The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.
Example 1
def categorize_columns(self, df): """Categorize columns of dataframe by data type :param df: input (pandas) data frame """ # check presence and data type of requested columns # sort columns into numerical, timestamp and category based for c in self.columns: for col in c: if col not in df.columns: raise KeyError('column "{0:s}" not in dataframe "{1:s}"'.format(col, self.read_key)) dt = self.get_data_type(df, col) if col not in self.var_dtype: self.var_dtype[col] = dt.type if (self.var_dtype[col] is np.string_) or (self.var_dtype[col] is np.object_): self.var_dtype[col] = str if not any(dt in types for types in (STRING_SUBSTR, NUMERIC_SUBSTR, TIME_SUBSTR)): raise TypeError('cannot process column "{0:s}" of data type "{1:s}"'.format(col, str(dt))) is_number = isinstance(dt.type(), np.number) is_timestamp = isinstance(dt.type(), np.datetime64) colset = self.num_cols if is_number else self.dt_cols if is_timestamp else self.str_cols if col not in colset: colset.append(col) self.log().debug('Data type of column "%s" is "%s"', col, self.var_dtype[col])
Example 2
def test_object_array_refcount_self_assign(self, level=rlevel): # Ticket #711 class VictimObject(object): deleted = False def __del__(self): self.deleted = True d = VictimObject() arr = np.zeros(5, dtype=np.object_) arr[:] = d del d arr[:] = arr # refcount of 'd' might hit zero here assert_(not arr[0].deleted) arr[:] = arr # trying to induce a segfault by doing it again... assert_(not arr[0].deleted)
Example 3
def test_object_array_refcount_self_assign(self, level=rlevel): # Ticket #711 class VictimObject(object): deleted = False def __del__(self): self.deleted = True d = VictimObject() arr = np.zeros(5, dtype=np.object_) arr[:] = d del d arr[:] = arr # refcount of 'd' might hit zero here assert_(not arr[0].deleted) arr[:] = arr # trying to induce a segfault by doing it again... assert_(not arr[0].deleted)
Example 4
def batch_loader(self, rnd_gen=np.random, shuffle=True): """load_mbs yields a new minibatch at each iteration""" batchsize = self.batchsize inds = np.arange(self.n_samples) if shuffle: rnd_gen.shuffle(inds) n_mbs = np.int(np.ceil(self.n_samples / batchsize)) x = np.zeros(self.X_shape, np.float32) y = np.zeros(self.y_shape, np.float32) ids = np.empty((batchsize,), np.object_) for m in range(n_mbs): start = m * batchsize end = (m + 1) * batchsize if end > self.n_samples: end = self.n_samples mb_slice = slice(start, end) x[:end - start, :] = self.x[inds[mb_slice], :] y[:end - start, :] = self.y[inds[mb_slice], :] ids[:end - start] = self.ids[inds[mb_slice]] yield dict(X=x, y=y, ID=ids)
Example 5
def batch_loader(self, rnd_gen=np.random, shuffle=True): """load_mbs yields a new minibatch at each iteration""" batchsize = self.batchsize inds = np.arange(self.n_samples) if shuffle: rnd_gen.shuffle(inds) n_mbs = np.int(np.ceil(self.n_samples / batchsize)) x = np.zeros(self.X_shape, np.float32) y = np.zeros(self.y_shape, np.float32) ids = np.empty((batchsize,), np.object_) for m in range(n_mbs): start = m * batchsize end = (m + 1) * batchsize if end > self.n_samples: end = self.n_samples mb_slice = slice(start, end) x[:end - start, :] = self.x[inds[mb_slice], :] y[:end - start, :] = self.y[inds[mb_slice], :] ids[:end - start] = self.ids[inds[mb_slice]] yield dict(X=x, y=y, ID=ids)
Example 6
def pad_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if com.is_float_dtype(values): _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): _method = _pad_1d_datetime elif com.is_integer_dtype(values): values = com._ensure_float64(values) _method = algos.pad_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_inplace_object if _method is None: raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) if mask is None: mask = com.isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values
Example 7
def backfill_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if com.is_float_dtype(values): _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None) elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): _method = _backfill_1d_datetime elif com.is_integer_dtype(values): values = com._ensure_float64(values) _method = algos.backfill_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_inplace_object if _method is None: raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) if mask is None: mask = com.isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values
Example 8
def is_bool_indexer(key): if isinstance(key, (ABCSeries, np.ndarray)): if key.dtype == np.object_: key = np.asarray(_values_from_object(key)) if not lib.is_bool_array(key): if isnull(key).any(): raise ValueError('cannot index with vector containing ' 'NA / NaN values') return False return True elif key.dtype == np.bool_: return True elif isinstance(key, list): try: arr = np.asarray(key) return arr.dtype == np.bool_ and len(arr) == len(key) except TypeError: # pragma: no cover return False return False
Example 9
def test_fromValue(self): nans = Series(np.NaN, index=self.ts.index) self.assertEqual(nans.dtype, np.float_) self.assertEqual(len(nans), len(self.ts)) strings = Series('foo', index=self.ts.index) self.assertEqual(strings.dtype, np.object_) self.assertEqual(len(strings), len(self.ts)) d = datetime.now() dates = Series(d, index=self.ts.index) self.assertEqual(dates.dtype, 'M8[ns]') self.assertEqual(len(dates), len(self.ts)) # GH12336 # Test construction of categorical series from value categorical = Series(0, index=self.ts.index, dtype="category") expected = Series(0, index=self.ts.index).astype("category") self.assertEqual(categorical.dtype, 'category') self.assertEqual(len(categorical), len(self.ts)) tm.assert_series_equal(categorical, expected)
Example 10
def test_astype_datetimes(self): import pandas.tslib as tslib s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) s = s.astype('O') self.assertEqual(s.dtype, np.object_) s = Series([datetime(2001, 1, 2, 0, 0)]) s = s.astype('O') self.assertEqual(s.dtype, np.object_) s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) s[1] = np.nan self.assertEqual(s.dtype, 'M8[ns]') s = s.astype('O') self.assertEqual(s.dtype, np.object_)
Example 11
def test_convert_objects_leave_decimal_alone(self): from decimal import Decimal s = Series(lrange(5)) labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O') def convert_fast(x): return Decimal(str(x.mean())) def convert_force_pure(x): # base will be length 0 assert (len(x.base) > 0) return Decimal(str(x.mean())) grouped = s.groupby(labels) result = grouped.agg(convert_fast) self.assertEqual(result.dtype, np.object_) tm.assertIsInstance(result[0], Decimal) result = grouped.agg(convert_force_pure) self.assertEqual(result.dtype, np.object_) tm.assertIsInstance(result[0], Decimal)
Example 12
def test_set_value_resize(self): res = self.frame.set_value('foobar', 'B', 0) self.assertIs(res, self.frame) self.assertEqual(res.index[-1], 'foobar') self.assertEqual(res.get_value('foobar', 'B'), 0) self.frame.loc['foobar', 'qux'] = 0 self.assertEqual(self.frame.get_value('foobar', 'qux'), 0) res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 'sam') self.assertEqual(res3['baz'].dtype, np.object_) res = self.frame.copy() res3 = res.set_value('foobar', 'baz', True) self.assertEqual(res3['baz'].dtype, np.object_) res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) self.assertTrue(com.is_float_dtype(res3['baz'])) self.assertTrue(isnull(res3['baz'].drop(['foobar'])).all()) self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam')
Example 13
def test_stat_operators_attempt_obj_array(self): data = { 'a': [-0.00049987540199591344, -0.0016467257772919831, 0.00067695870775883013], 'b': [-0, -0, 0.0], 'c': [0.00031111847529610595, 0.0014902627951905339, -0.00094099200035979691] } df1 = DataFrame(data, index=['foo', 'bar', 'baz'], dtype='O') methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max'] # GH #676 df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object) for df in [df1, df2]: for meth in methods: self.assertEqual(df.values.dtype, np.object_) result = getattr(df, meth)(1) expected = getattr(df.astype('f8'), meth)(1) if not tm._incompat_bottleneck_version(meth): assert_series_equal(result, expected)
Example 14
def test_constructor_dict_cast(self): # cast float tests test_data = { 'A': {'1': 1, '2': 2}, 'B': {'1': '1', '2': '2', '3': '3'}, } frame = DataFrame(test_data, dtype=float) self.assertEqual(len(frame), 3) self.assertEqual(frame['B'].dtype, np.float64) self.assertEqual(frame['A'].dtype, np.float64) frame = DataFrame(test_data) self.assertEqual(len(frame), 3) self.assertEqual(frame['B'].dtype, np.object_) self.assertEqual(frame['A'].dtype, np.float64) # can't cast to float test_data = { 'A': dict(zip(range(20), tm.makeStringIndex(20))), 'B': dict(zip(range(15), randn(15))) } frame = DataFrame(test_data, dtype=float) self.assertEqual(len(frame), 20) self.assertEqual(frame['A'].dtype, np.object_) self.assertEqual(frame['B'].dtype, np.float64)
Example 15
def test_transpose(self): frame = self.frame dft = frame.T for idx, series in compat.iteritems(dft): for col, value in compat.iteritems(series): if np.isnan(value): self.assertTrue(np.isnan(frame[col][idx])) else: self.assertEqual(value, frame[col][idx]) # mixed type index, data = tm.getMixedTypeDict() mixed = DataFrame(data, index=index) mixed_T = mixed.T for col, s in compat.iteritems(mixed_T): self.assertEqual(s.dtype, np.object_)
Example 16
def test_nan_handling(self): # Nans are represented as -1 in labels s = Series(Categorical(["a", "b", np.nan, "a"])) self.assert_numpy_array_equal(s.cat.categories, np.array(["a", "b"])) self.assert_numpy_array_equal(s.values.codes, np.array([0, 1, -1, 0])) # If categories have nan included, the label should point to that # instead with tm.assert_produces_warning(FutureWarning): s2 = Series(Categorical( ["a", "b", np.nan, "a"], categories=["a", "b", np.nan])) self.assert_numpy_array_equal(s2.cat.categories, np.array( ["a", "b", np.nan], dtype=np.object_)) self.assert_numpy_array_equal(s2.values.codes, np.array([0, 1, 2, 0])) # Changing categories should also make the replaced category np.nan s3 = Series(Categorical(["a", "b", "c", "a"])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): s3.cat.categories = ["a", "b", np.nan] self.assert_numpy_array_equal(s3.cat.categories, np.array( ["a", "b", np.nan], dtype=np.object_)) self.assert_numpy_array_equal(s3.values.codes, np.array([0, 1, 2, 0]))
Example 17
def test_object_array_refcount_self_assign(self, level=rlevel): # Ticket #711 class VictimObject(object): deleted = False def __del__(self): self.deleted = True d = VictimObject() arr = np.zeros(5, dtype=np.object_) arr[:] = d del d arr[:] = arr # refcount of 'd' might hit zero here assert_(not arr[0].deleted) arr[:] = arr # trying to induce a segfault by doing it again... assert_(not arr[0].deleted)
Example 18
def test_object_array_refcount_self_assign(self, level=rlevel): # Ticket #711 class VictimObject(object): deleted = False def __del__(self): self.deleted = True d = VictimObject() arr = np.zeros(5, dtype=np.object_) arr[:] = d del d arr[:] = arr # refcount of 'd' might hit zero here assert_(not arr[0].deleted) arr[:] = arr # trying to induce a segfault by doing it again... assert_(not arr[0].deleted)
Example 19
def reset_minmax(self): try: data = self.get_values(sample=True) color_value = self.color_func(data) if self.color_func is not None else data if color_value.dtype.type == np.object_: color_value = color_value[is_number_value(color_value)] # this is probably broken if we have complex numbers stored as objects but I don't foresee # this case happening anytime soon. color_value = color_value.astype(float) # ignore nan, -inf, inf (setting them to 0 or to very large numbers is not an option) color_value = color_value[np.isfinite(color_value)] self.vmin = float(np.min(color_value)) self.vmax = float(np.max(color_value)) self.bgcolor_possible = True # ValueError for empty arrays, TypeError for object/string arrays except (TypeError, ValueError): self.vmin = None self.vmax = None self.bgcolor_possible = False
Example 20
def test_object_array_refcount_self_assign(self, level=rlevel): # Ticket #711 class VictimObject(object): deleted = False def __del__(self): self.deleted = True d = VictimObject() arr = np.zeros(5, dtype=np.object_) arr[:] = d del d arr[:] = arr # refcount of 'd' might hit zero here assert_(not arr[0].deleted) arr[:] = arr # trying to induce a segfault by doing it again... assert_(not arr[0].deleted)
Example 21
def test_object_array_refcount_self_assign(self, level=rlevel): # Ticket #711 class VictimObject(object): deleted = False def __del__(self): self.deleted = True d = VictimObject() arr = np.zeros(5, dtype=np.object_) arr[:] = d del d arr[:] = arr # refcount of 'd' might hit zero here assert_(not arr[0].deleted) arr[:] = arr # trying to induce a segfault by doing it again... assert_(not arr[0].deleted)
Example 22
def normalize_attr_strings(a: np.ndarray) -> np.ndarray: """ Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects """ if np.issubdtype(a.dtype, np.object_): if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]): return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a]) elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]): return a.astype("string_") else: print(type(a[0])) raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).") elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_): return a elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_): return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a]) else: raise ValueError("String values must be object, ascii or unicode.")
Example 23
def test_object_array_refcount_self_assign(self, level=rlevel): # Ticket #711 class VictimObject(object): deleted = False def __del__(self): self.deleted = True d = VictimObject() arr = np.zeros(5, dtype=np.object_) arr[:] = d del d arr[:] = arr # refcount of 'd' might hit zero here assert_(not arr[0].deleted) arr[:] = arr # trying to induce a segfault by doing it again... assert_(not arr[0].deleted)
Example 24
def numpy_to_transform(arr): from tf import transformations shape, rest = arr.shape[:-2], arr.shape[-2:] assert rest == (4,4) if len(shape) == 0: trans = transformations.translation_from_matrix(arr) quat = transformations.quaternion_from_matrix(arr) return Transform( translation=Vector3(*trans), rotation=Quaternion(*quat) ) else: res = np.empty(shape, dtype=np.object_) for idx in np.ndindex(shape): res[idx] = Transform( translation=Vector3(*transformations.translation_from_matrix(arr[idx])), rotation=Quaternion(*transformations.quaternion_from_matrix(arr[idx])) )
Example 25
def numpy_to_pose(arr): from tf import transformations shape, rest = arr.shape[:-2], arr.shape[-2:] assert rest == (4,4) if len(shape) == 0: trans = transformations.translation_from_matrix(arr) quat = transformations.quaternion_from_matrix(arr) return Pose( position=Vector3(*trans), orientation=Quaternion(*quat) ) else: res = np.empty(shape, dtype=np.object_) for idx in np.ndindex(shape): res[idx] = Pose( position=Vector3(*transformations.translation_from_matrix(arr[idx])), orientation=Quaternion(*transformations.quaternion_from_matrix(arr[idx])) )
Example 26
def initialize(self): """Initialize HistogramFillerBase""" # check basic attribute settings assert isinstance(self.read_key, str) and len(self.read_key), 'read_key has not been set correctly' if self.store_key is not None: assert isinstance(self.store_key, str) and len(self.store_key), 'store_key has not been set to string' # default histogram creation is at execute(). Storage at finalize is useful for # looping over datasets. if self.store_at_finalize: self.log().debug('Storing (and possible post-processing) at finalize, not execute') # check that columns are set correctly. for i, c in enumerate(self.columns): if isinstance(c, str): self.columns[i] = [c] if not isinstance(self.columns[i], list): raise TypeError('columns "{}" needs to be a string or list of strings'.format(self.columns[i])) # check for supported data types for k in self.var_dtype.keys(): try: self.var_dtype[k] = np.dtype(self.var_dtype[k]).type if self.var_dtype[k] is np.string_ or self.var_dtype[k] is np.object_: self.var_dtype[k] = str except BaseException: raise RuntimeError('unknown assigned datatype to variable "{}"'.format(k)) return StatusCode.Success
Example 27
def initialize(self): """Initialize FixPandasDataFrame""" self.check_arg_types(read_key=str, store_key=str) self.check_arg_types(recurse=True, allow_none=True, original_columns=str) self.check_arg_vals('read_key') if not isinstance(self.cleanup_string_columns, list) and not isinstance(self.cleanup_string_columns, bool): raise AssertionError('cleanup_string_columns should be a list of column names or boolean.') if self.read_key == self.store_key: self.inplace = True self.log().debug('store_key equals read_key; inplace has been set to "True"') if self.inplace: self.store_key = self.read_key self.log().debug('store_key has been set to read_key "%s"', self.store_key) if not self.store_key: self.store_key = self.read_key + '_fix' self.log().debug('store_key has been set to "%s"', self.store_key) # check data types for k in self.var_dtype.keys(): if k not in self.contaminated_columns: self.contaminated_columns.append(k) try: # convert to consistent types dt = np.dtype(self.var_dtype[k]).type if dt is np.str_ or dt is np.object_: dt = str self.var_dtype[k] = dt except BaseException: raise TypeError('unknown assigned datatype to variable "%s"' % k) return StatusCode.Success
Example 28
def test_unpickle_dtype_with_object(self,level=rlevel): # Implemented in r2840 dt = np.dtype([('x', int), ('y', np.object_), ('z', 'O')]) f = BytesIO() pickle.dump(dt, f) f.seek(0) dt_ = pickle.load(f) f.close() assert_equal(dt, dt_)
Example 29
def test_mem_array_creation_invalid_specification(self,level=rlevel): # Ticket #196 dt = np.dtype([('x', int), ('y', np.object_)]) # Wrong way self.assertRaises(ValueError, np.array, [1, 'object'], dt) # Correct way np.array([(1, 'object')], dt)
Example 30
def test_for_object_scalar_creation(self, level=rlevel): # Ticket #816 a = np.object_() b = np.object_(3) b2 = np.object_(3.0) c = np.object_([4, 5]) d = np.object_([None, {}, []]) assert_(a is None) assert_(type(b) is int) assert_(type(b2) is float) assert_(type(c) is np.ndarray) assert_(c.dtype == object) assert_(d.dtype == object)
Example 31
def test_object_array_to_fixed_string(self): # Ticket #1235. a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_) b = np.array(a, dtype=(np.str_, 8)) assert_equal(a, b) c = np.array(a, dtype=(np.str_, 5)) assert_equal(c, np.array(['abcde', 'ijklm'])) d = np.array(a, dtype=(np.str_, 12)) assert_equal(a, d) e = np.empty((2, ), dtype=(np.str_, 8)) e[:] = a[:] assert_equal(a, e)
Example 32
def test_split(self): A = self.A.split(asbytes('3')) tgt = asbytes_nested([ [[' abc '], ['']], [['12', '45'], ['MixedCase']], [['12', ' \t ', '45 \x00 '], ['UPPER']]]) assert_(issubclass(A.dtype.type, np.object_)) assert_equal(A.tolist(), tgt)
Example 33
def test_splitlines(self): A = np.char.array(['abc\nfds\nwer']).splitlines() assert_(issubclass(A.dtype.type, np.object_)) assert_(A.shape == (1,)) assert_(len(A[0]) == 3)
Example 34
def test_converters_cornercases(self): # Test the conversion to datetime. converter = { 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} data = TextIO('2009-02-03 12:00:00Z, 72214.0') test = np.ndfromtxt(data, delimiter=',', dtype=None, names=['date', 'stid'], converters=converter) control = np.array((datetime(2009, 2, 3), 72214.), dtype=[('date', np.object_), ('stid', float)]) assert_equal(test, control)
Example 35
def test_dtype_error(self): for f in self.nanfuncs: for dtype in [np.bool_, np.int_, np.object_]: assert_raises(TypeError, f, _ndat, axis=1, dtype=dtype)
Example 36
def test_out_dtype_error(self): for f in self.nanfuncs: for dtype in [np.bool_, np.int_, np.object_]: out = np.empty(_ndat.shape[0], dtype=dtype) assert_raises(TypeError, f, _ndat, axis=1, out=out)
Example 37
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False): # # Splits a column into multiple columns # dataframe : pandas dataframe to be processed # split_col : chr string of the column name to be split # split_char : chr to split the col on # new_colnames : list of new name for the columns # delete_old : logical True / False, remove original column? # ~~~~~~~~~~~~~~~~ # import pandas as pd import numpy as np # pl.my_debugger(globals().copy()) # my_debugger(locals().copy()) # save the split column as a separate object new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1) # if all values were NaN, no split occured, only one col exists still if len(new_cols.columns) < len(new_colnames): # create the missing cols, fill with NaN for i in range(len(new_cols.columns), len(new_colnames)): new_cols[new_colnames[i]] = np.nan # rename the cols new_cols.columns = new_colnames # remove the original column from the df if delete_old is True: del dataframe[split_col] # merge with df new_df = dataframe.join(new_cols) return new_df
Example 38
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False): # # Splits a column into multiple columns # dataframe : pandas dataframe to be processed # split_col : chr string of the column name to be split # split_char : chr to split the col on # new_colnames : list of new name for the columns # delete_old : logical True / False, remove original column? # ~~~~~~~~~~~~~~~~ # import pandas as pd import numpy as np # pl.my_debugger(globals().copy()) # my_debugger(locals().copy()) # save the split column as a separate object new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1) # if all values were NaN, no split occured, only one col exists still if len(new_cols.columns) < len(new_colnames): # create the missing cols, fill with NaN for i in range(len(new_cols.columns), len(new_colnames)): new_cols[new_colnames[i]] = np.nan # rename the cols new_cols.columns = new_colnames # remove the original column from the df if delete_old is True: del dataframe[split_col] # merge with df new_df = dataframe.join(new_cols) return new_df
Example 39
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False): # # Splits a column into multiple columns # dataframe : pandas dataframe to be processed # split_col : chr string of the column name to be split # split_char : chr to split the col on # new_colnames : list of new name for the columns # delete_old : logical True / False, remove original column? # ~~~~~~~~~~~~~~~~ # import pandas as pd import numpy as np # pl.my_debugger(globals().copy()) # my_debugger(locals().copy()) # save the split column as a separate object new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1) # if all values were NaN, no split occured, only one col exists still if len(new_cols.columns) < len(new_colnames): # create the missing cols, fill with NaN for i in range(len(new_cols.columns), len(new_colnames)): new_cols[new_colnames[i]] = np.nan # rename the cols new_cols.columns = new_colnames # remove the original column from the df if delete_old is True: del dataframe[split_col] # merge with df new_df = dataframe.join(new_cols) return new_df
Example 40
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False): # # Splits a column into multiple columns # dataframe : pandas dataframe to be processed # split_col : chr string of the column name to be split # split_char : chr to split the col on # new_colnames : list of new name for the columns # delete_old : logical True / False, remove original column? # ~~~~~~~~~~~~~~~~ # import pandas as pd import numpy as np # pl.my_debugger(globals().copy()) # my_debugger(locals().copy()) # save the split column as a separate object new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1) # if all values were NaN, no split occured, only one col exists still if len(new_cols.columns) < len(new_colnames): # create the missing cols, fill with NaN for i in range(len(new_cols.columns), len(new_colnames)): new_cols[new_colnames[i]] = np.nan # rename the cols new_cols.columns = new_colnames # remove the original column from the df if delete_old is True: del dataframe[split_col] # merge with df new_df = dataframe.join(new_cols) return new_df
Example 41
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False): # # Splits a column into multiple columns # dataframe : pandas dataframe to be processed # split_col : chr string of the column name to be split # split_char : chr to split the col on # new_colnames : list of new name for the columns # delete_old : logical True / False, remove original column? # ~~~~~~~~~~~~~~~~ # import pandas as pd import numpy as np # pl.my_debugger(globals().copy()) # my_debugger(locals().copy()) # save the split column as a separate object new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1) # if all values were NaN, no split occured, only one col exists still if len(new_cols.columns) < len(new_colnames): # create the missing cols, fill with NaN for i in range(len(new_cols.columns), len(new_colnames)): new_cols[new_colnames[i]] = np.nan # rename the cols new_cols.columns = new_colnames # remove the original column from the df if delete_old is True: del dataframe[split_col] # merge with df new_df = dataframe.join(new_cols) return new_df
Example 42
def test_unpickle_dtype_with_object(self,level=rlevel): # Implemented in r2840 dt = np.dtype([('x', int), ('y', np.object_), ('z', 'O')]) f = BytesIO() pickle.dump(dt, f) f.seek(0) dt_ = pickle.load(f) f.close() assert_equal(dt, dt_)
Example 43
def test_mem_array_creation_invalid_specification(self,level=rlevel): # Ticket #196 dt = np.dtype([('x', int), ('y', np.object_)]) # Wrong way self.assertRaises(ValueError, np.array, [1, 'object'], dt) # Correct way np.array([(1, 'object')], dt)
Example 44
def test_for_object_scalar_creation(self, level=rlevel): # Ticket #816 a = np.object_() b = np.object_(3) b2 = np.object_(3.0) c = np.object_([4, 5]) d = np.object_([None, {}, []]) assert_(a is None) assert_(type(b) is int) assert_(type(b2) is float) assert_(type(c) is np.ndarray) assert_(c.dtype == object) assert_(d.dtype == object)
Example 45
def test_object_array_to_fixed_string(self): # Ticket #1235. a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_) b = np.array(a, dtype=(np.str_, 8)) assert_equal(a, b) c = np.array(a, dtype=(np.str_, 5)) assert_equal(c, np.array(['abcde', 'ijklm'])) d = np.array(a, dtype=(np.str_, 12)) assert_equal(a, d) e = np.empty((2, ), dtype=(np.str_, 8)) e[:] = a[:] assert_equal(a, e)
Example 46
def test_split(self): A = self.A.split(asbytes('3')) tgt = asbytes_nested([ [[' abc '], ['']], [['12', '45'], ['MixedCase']], [['12', ' \t ', '45 \x00 '], ['UPPER']]]) assert_(issubclass(A.dtype.type, np.object_)) assert_equal(A.tolist(), tgt)
Example 47
def test_splitlines(self): A = np.char.array(['abc\nfds\nwer']).splitlines() assert_(issubclass(A.dtype.type, np.object_)) assert_(A.shape == (1,)) assert_(len(A[0]) == 3)
Example 48
def test_converters_cornercases(self): # Test the conversion to datetime. converter = { 'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')} data = TextIO('2009-02-03 12:00:00Z, 72214.0') test = np.ndfromtxt(data, delimiter=',', dtype=None, names=['date', 'stid'], converters=converter) control = np.array((datetime(2009, 2, 3), 72214.), dtype=[('date', np.object_), ('stid', float)]) assert_equal(test, control)
Example 49
def test_dtype_error(self): for f in self.nanfuncs: for dtype in [np.bool_, np.int_, np.object_]: assert_raises(TypeError, f, _ndat, axis=1, dtype=dtype)
Example 50
def test_out_dtype_error(self): for f in self.nanfuncs: for dtype in [np.bool_, np.int_, np.object_]: out = np.empty(_ndat.shape[0], dtype=dtype) assert_raises(TypeError, f, _ndat, axis=1, out=out)