Python numpy.unicode_() 使用实例

The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.

Example 1

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail) 

Example 2

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt) 

Example 3

def test_replace(self):
        R = self.A.replace(asbytes_nested(['3', 'a']),
                           asbytes_nested(['##########', '@']))
        tgt = asbytes_nested([[' abc ', ''],
                              ['12##########45', '[email protected]'],
                              ['12########## \t ##########45 \x00', 'UPPER']])
        assert_(issubclass(R.dtype.type, np.string_))
        assert_array_equal(R, tgt)

        if sys.version_info[0] < 3:
            # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3
            R = self.A.replace(asbytes('a'), sixu('\u03a3'))
            tgt = [[sixu(' \u03a3bc '), ''],
                   ['12345', sixu('MixedC\u03a3se')],
                   ['123 \t 345 \x00', 'UPPER']]
            assert_(issubclass(R.dtype.type, np.unicode_))
            assert_array_equal(R, tgt) 

Example 4

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt) 

Example 5

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt) 

Example 6

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail) 

Example 7

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt) 

Example 8

def test_replace(self):
        R = self.A.replace(asbytes_nested(['3', 'a']),
                           asbytes_nested(['##########', '@']))
        tgt = asbytes_nested([[' abc ', ''],
                              ['12##########45', '[email protected]'],
                              ['12########## \t ##########45 \x00', 'UPPER']])
        assert_(issubclass(R.dtype.type, np.string_))
        assert_array_equal(R, tgt)

        if sys.version_info[0] < 3:
            # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3
            R = self.A.replace(asbytes('a'), sixu('\u03a3'))
            tgt = [[sixu(' \u03a3bc '), ''],
                   ['12345', sixu('MixedC\u03a3se')],
                   ['123 \t 345 \x00', 'UPPER']]
            assert_(issubclass(R.dtype.type, np.unicode_))
            assert_array_equal(R, tgt) 

Example 9

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt) 

Example 10

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt) 

Example 11

def test_select_dtypes_str_raises(self):
        df = DataFrame({'a': list('abc'),
                        'g': list(u('abc')),
                        'b': list(range(1, 4)),
                        'c': np.arange(3, 6).astype('u1'),
                        'd': np.arange(4.0, 7.0, dtype='float64'),
                        'e': [True, False, True],
                        'f': pd.date_range('now', periods=3).values})
        string_dtypes = set((str, 'str', np.string_, 'S1',
                             'unicode', np.unicode_, 'U1'))
        try:
            string_dtypes.add(unicode)
        except NameError:
            pass
        for dt in string_dtypes:
            with tm.assertRaisesRegexp(TypeError,
                                       'string dtypes are not allowed'):
                df.select_dtypes(include=[dt])
            with tm.assertRaisesRegexp(TypeError,
                                       'string dtypes are not allowed'):
                df.select_dtypes(exclude=[dt]) 

Example 12

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail) 

Example 13

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt) 

Example 14

def test_lstrip(self):
        tgt = asbytes_nested([['abc ', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345 \0 ', 'UPPER']])
        assert_(issubclass(self.A.lstrip().dtype.type, np.string_))
        assert_array_equal(self.A.lstrip(), tgt)

        tgt = asbytes_nested([[' abc', ''],
                              ['2345', 'ixedCase'],
                              ['23 \t 345 \x00', 'UPPER']])
        assert_array_equal(self.A.lstrip(asbytes_nested(['1', 'M'])), tgt)

        tgt = [[sixu('\u03a3 '), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345 \0 ', 'UPPER']]
        assert_(issubclass(self.B.lstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.lstrip(), tgt) 

Example 15

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt) 

Example 16

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt) 

Example 17

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail) 

Example 18

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt) 

Example 19

def test_replace(self):
        R = self.A.replace(asbytes_nested(['3', 'a']),
                           asbytes_nested(['##########', '@']))
        tgt = asbytes_nested([[' abc ', ''],
                              ['12##########45', '[email protected]'],
                              ['12########## \t ##########45 \x00', 'UPPER']])
        assert_(issubclass(R.dtype.type, np.string_))
        assert_array_equal(R, tgt)

        if sys.version_info[0] < 3:
            # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3
            R = self.A.replace(asbytes('a'), sixu('\u03a3'))
            tgt = [[sixu(' \u03a3bc '), ''],
                   ['12345', sixu('MixedC\u03a3se')],
                   ['123 \t 345 \x00', 'UPPER']]
            assert_(issubclass(R.dtype.type, np.unicode_))
            assert_array_equal(R, tgt) 

Example 20

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt) 

Example 21

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt) 

Example 22

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail) 

Example 23

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt) 

Example 24

def test_replace(self):
        R = self.A.replace(asbytes_nested(['3', 'a']),
                           asbytes_nested(['##########', '@']))
        tgt = asbytes_nested([[' abc ', ''],
                              ['12##########45', '[email protected]'],
                              ['12########## \t ##########45 \x00', 'UPPER']])
        assert_(issubclass(R.dtype.type, np.string_))
        assert_array_equal(R, tgt)

        if sys.version_info[0] < 3:
            # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3
            R = self.A.replace(asbytes('a'), sixu('\u03a3'))
            tgt = [[sixu(' \u03a3bc '), ''],
                   ['12345', sixu('MixedC\u03a3se')],
                   ['123 \t 345 \x00', 'UPPER']]
            assert_(issubclass(R.dtype.type, np.unicode_))
            assert_array_equal(R, tgt) 

Example 25

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt) 

Example 26

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt) 

Example 27

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail) 

Example 28

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt) 

Example 29

def test_replace(self):
        R = self.A.replace(asbytes_nested(['3', 'a']),
                           asbytes_nested(['##########', '@']))
        tgt = asbytes_nested([[' abc ', ''],
                              ['12##########45', '[email protected]'],
                              ['12########## \t ##########45 \x00', 'UPPER']])
        assert_(issubclass(R.dtype.type, np.string_))
        assert_array_equal(R, tgt)

        if sys.version_info[0] < 3:
            # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3
            R = self.A.replace(asbytes('a'), sixu('\u03a3'))
            tgt = [[sixu(' \u03a3bc '), ''],
                   ['12345', sixu('MixedC\u03a3se')],
                   ['123 \t 345 \x00', 'UPPER']]
            assert_(issubclass(R.dtype.type, np.unicode_))
            assert_array_equal(R, tgt) 

Example 30

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt) 

Example 31

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt) 

Example 32

def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
	"""
	Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
	"""
	if np.issubdtype(a.dtype, np.object_):
		if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
			return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
		elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
			return a.astype("string_")
		else:
			print(type(a[0]))
			raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
	elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
		return a
	elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
		return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
	else:
		raise ValueError("String values must be object, ascii or unicode.") 

Example 33

def materialize_attr_values(a: np.ndarray) -> np.ndarray:
	scalar = False
	if np.isscalar(a):
		scalar = True
		a = np.array([a])
	result: np.ndarray = None
	if np.issubdtype(a.dtype, np.string_):
		# First ensure that what we load is valid ascii (i.e. ignore anything outside 7-bit range)
		temp = np.array([x.decode('ascii', 'ignore') for x in a])
		# Then unescape XML entities and convert to unicode
		result = np.array([html.unescape(x) for x in temp.astype(str)], dtype=np.str_)
	elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
		result = np.array(a.astype(str), dtype=np.str_)
	else:
		result = a
	if scalar:
		return result[0]
	else:
		return result 

Example 34

def test_from_unicode_array(self):
        A = np.array([['abc', sixu('Sigma \u03a3')],
                      ['long   ', '0123456789']])
        assert_equal(A.dtype.type, np.unicode_)
        B = np.char.array(A)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)
        B = np.char.array(A, **kw_unicode_true)
        assert_array_equal(B, A)
        assert_equal(B.dtype, A.dtype)
        assert_equal(B.shape, A.shape)

        def fail():
            np.char.array(A, **kw_unicode_false)

        self.assertRaises(UnicodeEncodeError, fail) 

Example 35

def test_join(self):
        if sys.version_info[0] >= 3:
            # NOTE: list(b'123') == [49, 50, 51]
            #       so that b','.join(b'123') results to an error on Py3
            A0 = self.A.decode('ascii')
        else:
            A0 = self.A

        A = np.char.join([',', '#'], A0)
        if sys.version_info[0] >= 3:
            assert_(issubclass(A.dtype.type, np.unicode_))
        else:
            assert_(issubclass(A.dtype.type, np.string_))
        tgt = np.array([[' ,a,b,c, ', ''],
                        ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
                        ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
        assert_array_equal(np.char.join([',', '#'], A0), tgt) 

Example 36

def test_replace(self):
        R = self.A.replace(asbytes_nested(['3', 'a']),
                           asbytes_nested(['##########', '@']))
        tgt = asbytes_nested([[' abc ', ''],
                              ['12##########45', '[email protected]'],
                              ['12########## \t ##########45 \x00', 'UPPER']])
        assert_(issubclass(R.dtype.type, np.string_))
        assert_array_equal(R, tgt)

        if sys.version_info[0] < 3:
            # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3
            R = self.A.replace(asbytes('a'), sixu('\u03a3'))
            tgt = [[sixu(' \u03a3bc '), ''],
                   ['12345', sixu('MixedC\u03a3se')],
                   ['123 \t 345 \x00', 'UPPER']]
            assert_(issubclass(R.dtype.type, np.unicode_))
            assert_array_equal(R, tgt) 

Example 37

def test_rstrip(self):
        assert_(issubclass(self.A.rstrip().dtype.type, np.string_))

        tgt = asbytes_nested([[' abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_array_equal(self.A.rstrip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['1234', 'MixedCase'],
                              ['123 \t 345 \x00', 'UPP']
                              ])
        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])), tgt)

        tgt = [[sixu(' \u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.rstrip().dtype.type, np.unicode_))
        assert_array_equal(self.B.rstrip(), tgt) 

Example 38

def test_strip(self):
        tgt = asbytes_nested([['abc', ''],
                              ['12345', 'MixedCase'],
                              ['123 \t 345', 'UPPER']])
        assert_(issubclass(self.A.strip().dtype.type, np.string_))
        assert_array_equal(self.A.strip(), tgt)

        tgt = asbytes_nested([[' abc ', ''],
                              ['234', 'ixedCas'],
                              ['23 \t 345 \x00', 'UPP']])
        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])), tgt)

        tgt = [[sixu('\u03a3'), ''],
               ['12345', 'MixedCase'],
               ['123 \t 345', 'UPPER']]
        assert_(issubclass(self.B.strip().dtype.type, np.unicode_))
        assert_array_equal(self.B.strip(), tgt) 

Example 39

def test_unicode_string_comparison(self,level=rlevel):
        # Ticket #190
        a = np.array('hello', np.unicode_)
        b = np.array('world')
        a == b 

Example 40

def test_pickle_py2_bytes_encoding(self):
        # Check that arrays and scalars pickled on Py2 are
        # unpickleable on Py3 using encoding='bytes'

        test_data = [
            # (original, py2_pickle)
            (np.unicode_('\u6f2c'),
             asbytes("cnumpy.core.multiarray\nscalar\np0\n(cnumpy\ndtype\np1\n"
                     "(S'U1'\np2\nI0\nI1\ntp3\nRp4\n(I3\nS'<'\np5\nNNNI4\nI4\n"
                     "I0\ntp6\nbS',o\\x00\\x00'\np7\ntp8\nRp9\n.")),

            (np.array([9e123], dtype=np.float64),
             asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\n"
                     "p1\n(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\n"
                     "p7\n(S'f8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'<'\np11\nNNNI-1\nI-1\n"
                     "I0\ntp12\nbI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np13\ntp14\nb.")),

            (np.array([(9e123,)], dtype=[('name', float)]),
             asbytes("cnumpy.core.multiarray\n_reconstruct\np0\n(cnumpy\nndarray\np1\n"
                     "(I0\ntp2\nS'b'\np3\ntp4\nRp5\n(I1\n(I1\ntp6\ncnumpy\ndtype\np7\n"
                     "(S'V8'\np8\nI0\nI1\ntp9\nRp10\n(I3\nS'|'\np11\nN(S'name'\np12\ntp13\n"
                     "(dp14\ng12\n(g7\n(S'f8'\np15\nI0\nI1\ntp16\nRp17\n(I3\nS'<'\np18\nNNNI-1\n"
                     "I-1\nI0\ntp19\nbI0\ntp20\nsI8\nI1\nI0\ntp21\n"
                     "bI00\nS'O\\x81\\xb7Z\\xaa:\\xabY'\np22\ntp23\nb.")),
        ]

        if sys.version_info[:2] >= (3, 4):
            # encoding='bytes' was added in Py3.4
            for original, data in test_data:
                result = pickle.loads(data, encoding='bytes')
                assert_equal(result, original)

                if isinstance(result, np.ndarray) and result.dtype.names:
                    for name in result.dtype.names:
                        assert_(isinstance(name, str)) 

Example 41

def test_unicode_upconvert(self):
        A = np.char.array(['abc'])
        B = np.char.array([sixu('\u03a3')])
        assert_(issubclass((A + B).dtype.type, np.unicode_)) 

Example 42

def setUp(self):
        TestComparisons.setUp(self)
        self.B = np.array([['efg', '123  '],
                           ['051', 'tuv']], np.unicode_).view(np.chararray) 

Example 43

def setUp(self):
        TestComparisons.setUp(self)
        self.A = np.array([['abc', '123'],
                           ['789', 'xyz']], np.unicode_).view(np.chararray) 

Example 44

def test_capitalize(self):
        tgt = asbytes_nested([[' abc ', ''],
                              ['12345', 'Mixedcase'],
                              ['123 \t 345 \0 ', 'Upper']])
        assert_(issubclass(self.A.capitalize().dtype.type, np.string_))
        assert_array_equal(self.A.capitalize(), tgt)

        tgt = [[sixu(' \u03c3 '), ''],
               ['12345', 'Mixedcase'],
               ['123 \t 345 \0 ', 'Upper']]
        assert_(issubclass(self.B.capitalize().dtype.type, np.unicode_))
        assert_array_equal(self.B.capitalize(), tgt) 

Example 45

def test_lower(self):
        tgt = asbytes_nested([[' abc ', ''],
                              ['12345', 'mixedcase'],
                              ['123 \t 345 \0 ', 'upper']])
        assert_(issubclass(self.A.lower().dtype.type, np.string_))
        assert_array_equal(self.A.lower(), tgt)

        tgt = [[sixu(' \u03c3 '), sixu('')],
               [sixu('12345'), sixu('mixedcase')],
               [sixu('123 \t 345 \0 '), sixu('upper')]]
        assert_(issubclass(self.B.lower().dtype.type, np.unicode_))
        assert_array_equal(self.B.lower(), tgt) 

Example 46

def test_swapcase(self):
        tgt = asbytes_nested([[' ABC ', ''],
                              ['12345', 'mIXEDcASE'],
                              ['123 \t 345 \0 ', 'upper']])
        assert_(issubclass(self.A.swapcase().dtype.type, np.string_))
        assert_array_equal(self.A.swapcase(), tgt)

        tgt = [[sixu(' \u03c3 '), sixu('')],
               [sixu('12345'), sixu('mIXEDcASE')],
               [sixu('123 \t 345 \0 '), sixu('upper')]]
        assert_(issubclass(self.B.swapcase().dtype.type, np.unicode_))
        assert_array_equal(self.B.swapcase(), tgt) 

Example 47

def test_title(self):
        tgt = asbytes_nested([[' Abc ', ''],
                              ['12345', 'Mixedcase'],
                              ['123 \t 345 \0 ', 'Upper']])
        assert_(issubclass(self.A.title().dtype.type, np.string_))
        assert_array_equal(self.A.title(), tgt)

        tgt = [[sixu(' \u03a3 '), sixu('')],
               [sixu('12345'), sixu('Mixedcase')],
               [sixu('123 \t 345 \0 '), sixu('Upper')]]
        assert_(issubclass(self.B.title().dtype.type, np.unicode_))
        assert_array_equal(self.B.title(), tgt) 

Example 48

def _can_convert_to_string(value):
        vtype = type(value)
        return isinstance(value, basestring) or vtype in [np.unicode_, np.string_, np.str_] 

Example 49

def toString(value):
        """
        Convert a value to a string, if possible.
        """
        if isinstance(value, basestring):
            return value
        elif type(value) in [np.string_, np.str_]:
            return str(value)
        elif type(value) == np.unicode_:
            return unicode(value)
        else:
            raise TypeError("Could not convert %s to string type" % type(value)) 

Example 50

def test_writeread(tmpdir):
    fname = os.path.join(tmpdir.dirname, 'temp.lbl')
    times = np.reshape(np.arange(0,20), (-1,2))
    labels = [chr(i) for i in np.arange(10) + 65]
    dtype = [('name', np.unicode_, max([len(x) for x in labels])),
             ('start', float), ('stop', float)]
    rec_array = np.array([(l, sta, sto) for l, (sta, sto) in zip(labels, times)],
                    dtype=dtype)
    lbl.write(fname, rec_array)
    rec_array2 = lbl.read(fname)
    for x, y in zip(rec_array['name'], rec_array2['name']):
        assert x == y, 'label named do not match'
    assert np.all(np.isclose(rec_array['start'], rec_array2['start'])), 'starts do not match'
    assert np.all(np.isclose(rec_array['stop'], rec_array2['stop'])), 'stops do not match' 
点赞