The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.
Example 1
def _validate_X_predict( self, X: np.ndarray, check_input: bool) -> np.ndarray: if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError( "No support for np.int64 index based sparse matrices") n_features = X.shape[1] if self.n_features_ != n_features: raise ValueError( "Number of features of the model must match the input." " Model n_features is %s and input n_features is %s " % (self.n_features_, n_features)) return X
Example 2
def default(self, obj): # convert dates and numpy objects in a json serializable format if isinstance(obj, datetime): return obj.strftime('%Y-%m-%dT%H:%M:%SZ') elif isinstance(obj, date): return obj.strftime('%Y-%m-%d') elif type(obj) in (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64): return int(obj) elif type(obj) in (np.bool_,): return bool(obj) elif type(obj) in (np.float_, np.float16, np.float32, np.float64, np.complex_, np.complex64, np.complex128): return float(obj) # Let the base class default method raise the TypeError return json.JSONEncoder.default(self, obj)
Example 3
def _validate_X_predict(self, X, check_input): """Validate X whenever one tries to predict, apply, predict_proba""" if self.tree_ is None: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError("No support for np.int64 index based " "sparse matrices") n_features = X.shape[1] if self.n_features_ != n_features: raise ValueError("Number of features of the model must " "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, n_features)) return X
Example 4
def pairFeatureMatrix(self, elementList): """ Construction of pair-distance matrices """ # Initiate nSpecies = len(elementList) # Get the molecular structure pos = np.array(self.molecule.positions, dtype = float) # Atomic positions elInd = np.array(self.molecule.elInd, dtype = np.intc) # Element indices matching to elementList natoms = len(self.molecule.names) # Total number of atoms in the molecule # Initiate the matrix dim1 = natoms * (natoms -1)/2 # First dimension (pairwise distances) dim2 = nSpecies * (nSpecies + 1)/2 # Number of possible pairs featMat = np.zeros((dim1,dim2)) # To be passed to fun_pairFeatures (compiled C code) # Call the C function to store the pairFeatures pairFeatures.fun_pairFeatures(nSpecies, natoms, elInd, pos, featMat) # Return featMat return featMat
Example 5
def execute(self, actions): """ Pass action to universe environment, return reward, next step, terminal state and additional info. :param action: action to execute as numpy array, should have dtype np.intc and should adhere to the specification given in DeepMindLabEnvironment.action_spec(level_id) :return: dict containing the next state, the reward, and a boolean indicating if the next state is a terminal state """ adjusted_actions = list() for action_spec in self.level.action_spec(): if action_spec['min'] == -1 and action_spec['max'] == 1: adjusted_actions.append(actions[action_spec['name']] - 1) else: adjusted_actions.append(actions[action_spec['name']]) # clip? actions = np.array(adjusted_actions, dtype=np.intc) reward = self.level.step(action=actions, num_steps=self.repeat_action) state = self.level.observations()['RGB_INTERLACED'] terminal = not self.level.is_running() return state, terminal, reward
Example 6
def default(self, obj): # convert dates and numpy objects in a json serializable format if isinstance(obj, datetime): return obj.strftime('%Y-%m-%dT%H:%M:%SZ') elif isinstance(obj, date): return obj.strftime('%Y-%m-%d') elif type(obj) in [np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64]: return int(obj) elif type(obj) in [np.bool_]: return bool(obj) elif type(obj) in [np.float_, np.float16, np.float32, np.float64, np.complex_, np.complex64, np.complex128]: return float(obj) # Let the base class default method raise the TypeError return json.JSONEncoder.default(self, obj)
Example 7
def predict(self, queries, n_jobs=1): ''' Predict the ranking score for each individual document of the given queries. n_jobs: int, optional (default is 1) The number of working threads that will be spawned to compute the ranking scores. If -1, the current number of CPUs will be used. ''' if self.trained is False: raise ValueError('the model has not been trained yet') predictions = np.zeros(queries.document_count(), dtype=np.float64) n_jobs = max(1, min(n_jobs if n_jobs >= 0 else n_jobs + cpu_count() + 1, queries.document_count())) indices = np.linspace(0, queries.document_count(), n_jobs + 1).astype(np.intc) Parallel(n_jobs=n_jobs, backend="threading")(delayed(parallel_helper, check_pickle=False) (LambdaRandomForest, '_LambdaRandomForest__predict', self.estimators, queries.feature_vectors[indices[i]:indices[i + 1]], predictions[indices[i]:indices[i + 1]]) for i in range(indices.size - 1)) predictions /= len(self.estimators) return predictions
Example 8
def perform(self, node, inputs, out): # TODO support broadcast! # TODO assert all input have the same shape z, = out if (z[0] is None or z[0].shape != inputs[0].shape or not z[0].is_c_contiguous()): z[0] = theano.sandbox.cuda.CudaNdarray.zeros(inputs[0].shape) if inputs[0].shape != inputs[1].shape: raise TypeError("PycudaElemwiseSourceModuleOp:" " inputs don't have the same shape!") if inputs[0].size > 512: grid = (int(numpy.ceil(inputs[0].size / 512.)), 1) block = (512, 1, 1) else: grid = (1, 1) block = (inputs[0].shape[0], inputs[0].shape[1], 1) self.pycuda_fct(inputs[0], inputs[1], z[0], numpy.intc(inputs[1].size), block=block, grid=grid)
Example 9
def make_thunk(self, node, storage_map, _, _2): mod = SourceModule(""" __global__ void my_fct(float * i0, float * o0, int size) { int i = blockIdx.x*blockDim.x + threadIdx.x; if(i<size){ o0[i] = i0[i]*2; } }""") pycuda_fct = mod.get_function("my_fct") inputs = [ storage_map[v] for v in node.inputs] outputs = [ storage_map[v] for v in node.outputs] def thunk(): z = outputs[0] if z[0] is None or z[0].shape!=inputs[0][0].shape: z[0] = cuda.CudaNdarray.zeros(inputs[0][0].shape) grid = (int(numpy.ceil(inputs[0][0].size / 512.)),1) pycuda_fct(inputs[0][0], z[0], numpy.intc(inputs[0][0].size), block=(512,1,1), grid=grid) return thunk
Example 10
def npy2py_type(npy_type): int_types = [ np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64 ] float_types = [np.float_, np.float16, np.float32, np.float64] bytes_types = [np.str_, np.string_] if npy_type in int_types: return int if npy_type in float_types: return float if npy_type in bytes_types: return bytes if hasattr(npy_type, 'char'): if npy_type.char in ['S', 'a']: return bytes raise TypeError return npy_type
Example 11
def _validate_X_predict(self, X, check_input): """Validate X whenever one tries to predict, apply, predict_proba""" if self.tree_ is None: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError("No support for np.int64 index based " "sparse matrices") n_features = X.shape[1] if self.n_features_ != n_features: raise ValueError("Number of features of the model must " "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, n_features)) return X
Example 12
def _open_and_load(f, dtype, multilabel, zero_based, query_id): if hasattr(f, "read"): actual_dtype, data, ind, indptr, labels, query = \ _load_svmlight_file(f, dtype, multilabel, zero_based, query_id) # XXX remove closing when Python 2.7+/3.1+ required else: with closing(_gen_open(f)) as f: actual_dtype, data, ind, indptr, labels, query = \ _load_svmlight_file(f, dtype, multilabel, zero_based, query_id) # convert from array.array, give data the right dtype if not multilabel: labels = frombuffer_empty(labels, np.float64) data = frombuffer_empty(data, actual_dtype) indices = frombuffer_empty(ind, np.intc) indptr = np.frombuffer(indptr, dtype=np.intc) # never empty query = frombuffer_empty(query, np.intc) data = np.asarray(data, dtype=dtype) # no-op for float{32,64} return data, indices, indptr, labels, query
Example 13
def to_dense(A): """ Convert a sparse matrix A to dense. For debugging only. """ if hasattr(A, "getrow"): n = A.size(0) m = A.size(1) B = np.zeros( (n,m), dtype=np.float64) for i in range(0,n): [j, val] = A.getrow(i) B[i,j] = val return B else: x = Vector() Ax = Vector() A.init_vector(x,1) A.init_vector(Ax,0) n = get_local_size(Ax) m = get_local_size(x) B = np.zeros( (n,m), dtype=np.float64) for i in range(0,m): i_ind = np.array([i], dtype=np.intc) x.set_local(np.ones(i_ind.shape), i_ind) A.mult(x,Ax) B[:,i] = Ax.get_local() x.set_local(np.zeros(i_ind.shape), i_ind) return B
Example 14
def _create_lookups(self, X): """ Create document and term lookups for all tokens. """ docs, terms = np.nonzero(X) if issparse(X): x = np.array(X[docs, terms])[0] else: x = X[docs, terms] doc_lookup = np.ascontiguousarray(np.repeat(docs, x), dtype=np.intc) term_lookup = np.ascontiguousarray(np.repeat(terms, x), dtype=np.intc) return doc_lookup, term_lookup
Example 15
def _create_edges(self, y, order='tail'): y.sort(order=order) _docs, _counts = np.unique(y[order], return_counts=True) counts = np.zeros(self.n_docs) counts[_docs] = _counts docs = np.ascontiguousarray( np.concatenate(([0], np.cumsum(counts))), dtype=np.intc) edges = np.ascontiguousarray(y['index'].flatten(), dtype=np.intc) return docs, edges
Example 16
def fit(self, X, y): """ Estimate the topic distributions per document (theta), term distributions per topic (phi), and regression coefficients (eta). Parameters ---------- X : array-like, shape = (n_docs, n_terms) The document-term matrix. y : array-like, shape = (n_edges, 3) Each entry of y is an ordered triple (d_1, d_2, y_(d_1, d_2)), where d_1 and d_2 are documents and y_(d_1, d_2) is an indicator of a directed edge from d_1 to d_2. """ self.doc_term_matrix = X self.n_docs, self.n_terms = X.shape self.n_tokens = X.sum() self.n_edges = y.shape[0] doc_lookup, term_lookup = self._create_lookups(X) # edge info y = np.ascontiguousarray(np.column_stack((range(self.n_edges), y))) # we use a view here so that we can sort in-place using named columns y_rec = y.view(dtype=list(zip(('index', 'tail', 'head', 'data'), 4 * [y.dtype]))) edge_tail = np.ascontiguousarray(y_rec['tail'].flatten(), dtype=np.intc) edge_head = np.ascontiguousarray(y_rec['head'].flatten(), dtype=np.intc) edge_data = np.ascontiguousarray(y_rec['data'].flatten(), dtype=np.float64) out_docs, out_edges = self._create_edges(y_rec, order='tail') in_docs, in_edges = self._create_edges(y_rec, order='head') # iterate self.theta, self.phi, self.H, self.loglikelihoods = gibbs_sampler_grtm( self.n_iter, self.n_report_iter, self.n_topics, self.n_docs, self.n_terms, self.n_tokens, self.n_edges, self.alpha, self.beta, self.mu, self.nu2, self.b, doc_lookup, term_lookup, out_docs, out_edges, in_docs, in_edges, edge_tail, edge_head, edge_data, self.seed)
Example 17
def fit(self, X, y, hier): """ Estimate the topic distributions per document (theta), term distributions per topic (phi), and regression coefficients (eta). Parameters ---------- X : array-like, shape = (n_docs, n_terms) The document-term matrix. y : array-like, shape = (n_docs, n_labels) Response values for each document for each labels. hier : 1D array-like, size = n_labels The index of the list corresponds to the current label and the value of the indexed position is the parent of the label. Set -1 as the root. """ self.doc_term_matrix = X self.n_docs, self.n_terms = X.shape self.n_tokens = X.sum() doc_lookup, term_lookup = self._create_lookups(X) # iterate self.theta, self.phi, self.eta, self.loglikelihoods = gibbs_sampler_blhslda( self.n_iter, self.n_report_iter, self.n_topics, self.n_docs, self.n_terms, self.n_tokens, self.alpha, self.beta, self.mu, self.nu2, self.b, doc_lookup, term_lookup, np.ascontiguousarray(y, dtype=np.intc), np.ascontiguousarray(hier, dtype=np.intc), self.seed)
Example 18
def _create_lookups(self, X): """ Create document and term lookups for all tokens. """ docs, terms = np.nonzero(X) if issparse(X): x = np.array(X[docs, terms])[0] else: x = X[docs, terms] doc_lookup = np.ascontiguousarray(np.repeat(docs, x), dtype=np.intc) term_lookup = np.ascontiguousarray(np.repeat(terms, x), dtype=np.intc) return doc_lookup, term_lookup
Example 19
def fit(self, X, y): """ Estimate the topic distributions per document (theta), term distributions per topic (phi), and regression coefficients (eta). Parameters ---------- X : array-like, shape = (n_docs, n_terms) The document-term matrix. y : array-like, shape = (n_edges, 3) Each entry of y is an ordered triple (d_1, d_2, y_(d_1, d_2)), where d_1 and d_2 are documents and y_(d_1, d_2) is an indicator of a directed edge from d_1 to d_2. """ self.doc_term_matrix = X self.n_docs, self.n_terms = X.shape self.n_tokens = X.sum() self.n_edges = y.shape[0] doc_lookup, term_lookup = self._create_lookups(X) # edge info y = np.ascontiguousarray(np.column_stack((range(self.n_edges), y))) # we use a view here so that we can sort in-place using named columns y_rec = y.view(dtype=list(zip(('index', 'tail', 'head', 'data'), 4 * [y.dtype]))) edge_tail = np.ascontiguousarray(y_rec['tail'].flatten(), dtype=np.intc) edge_head = np.ascontiguousarray(y_rec['head'].flatten(), dtype=np.intc) edge_data = np.ascontiguousarray(y_rec['data'].flatten(), dtype=np.float64) out_docs, out_edges = self._create_edges(y_rec, order='tail') in_docs, in_edges = self._create_edges(y_rec, order='head') # iterate self.theta, self.phi, self.H, self.loglikelihoods = gibbs_sampler_grtm( self.n_iter, self.n_report_iter, self.n_topics, self.n_docs, self.n_terms, self.n_tokens, self.n_edges, self.alpha, self.beta, self.mu, self.nu2, self.b, doc_lookup, term_lookup, out_docs, out_edges, in_docs, in_edges, edge_tail, edge_head, edge_data, self.seed)
Example 20
def fit(self, X, y, hier): """ Estimate the topic distributions per document (theta), term distributions per topic (phi), and regression coefficients (eta). Parameters ---------- X : array-like, shape = (n_docs, n_terms) The document-term matrix. y : array-like, shape = (n_docs, n_labels) Response values for each document for each labels. hier : 1D array-like, size = n_labels The index of the list corresponds to the current label and the value of the indexed position is the parent of the label. Set -1 as the root. """ self.doc_term_matrix = X self.n_docs, self.n_terms = X.shape self.n_tokens = X.sum() doc_lookup, term_lookup = self._create_lookups(X) # iterate self.theta, self.phi, self.eta, self.loglikelihoods = gibbs_sampler_blhslda( self.n_iter, self.n_report_iter, self.n_topics, self.n_docs, self.n_terms, self.n_tokens, self.alpha, self.beta, self.mu, self.nu2, self.b, doc_lookup, term_lookup, np.ascontiguousarray(y, dtype=np.intc), np.ascontiguousarray(hier, dtype=np.intc), self.seed)
Example 21
def test_dtype(self): dt = np.intc p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = '<i4' p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = np.dtype('>i4') p = ndpointer(dtype=dt) p.from_param(np.array([1], dt)) self.assertRaises(TypeError, p.from_param, np.array([1], dt.newbyteorder('swap'))) dtnames = ['x', 'y'] dtformats = [np.intc, np.float64] dtdescr = {'names': dtnames, 'formats': dtformats} dt = np.dtype(dtdescr) p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.zeros((10,), dt))) samedt = np.dtype(dtdescr) p = ndpointer(dtype=samedt) self.assertTrue(p.from_param(np.zeros((10,), dt))) dt2 = np.dtype(dtdescr, align=True) if dt.itemsize != dt2.itemsize: self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2)) else: self.assertTrue(p.from_param(np.zeros((10,), dt2)))
Example 22
def predict(self, X, check_input=True): """Predict class or regression value for X. For a classification model, the predicted class for each sample in X is returned. For a regression model, the predicted value based on X is returned. Parameters ---------- X : array-like of shape = [n_samples, n_features] The input samples. Returns ------- y : array of shape = [n_samples] or [n_samples, n_outputs] The predicted classes, or the predict values. """ X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError("No support for np.int64 index based " "sparse matrices") n_samples, n_features = X.shape if self.tree_ is None: raise Exception("Tree not initialized. Perform a fit first") if self.n_features_ != n_features: raise ValueError("Number of features of the model must " " match the input. Model n_features is %s and " " input n_features is %s " % (self.n_features_, n_features)) return (self.tree_.get('coefficient') * (X[:, self.tree_.get('best_dim')] > self.tree_.get('threshold')) + self.tree_.get('constant'))
Example 23
def _action(*entries): return np.array(entries, dtype=np.intc)
Example 24
def __init__(self, points, fraction): super(Graph, self).__init__(points, fraction) self.order = _np.ascontiguousarray(_np.argsort(self.density).astype(_np.intc)[::-1]) self.delta, self.neighbour = _core.get_delta_and_neighbour( self.order, self.distances, self.max_distance)
Example 25
def assign(self, min_density, min_delta, border_only=False): self.min_density = min_density self.min_delta = min_delta self.border_only = border_only if self.autoplot: self.draw_decision_graph(self.min_density, self.min_delta) self._get_cluster_indices() self.membership = _core.get_membership(self.clusters, self.order, self.neighbour) self.border_density, self.border_member = _core.get_border( self.kernel_size, self.distances, self.density, self.membership, self.nclusters) self.halo_idx, self.core_idx = _core.get_halo( self.density, self.membership, self.border_density, self.border_member.astype(_np.intc), border_only=border_only)
Example 26
def _get_cluster_indices(self): self.clusters = _np.intersect1d( _np.where(self.density > self.min_density)[0], _np.where(self.delta > self.min_delta)[0], assume_unique=True).astype(_np.intc) self.nclusters = self.clusters.shape[0]
Example 27
def _get_membership(self): self.membership = -1 * _np.ones(shape=self.order.shape, dtype=_np.intc) for i in range(self.ncl): self.membership[self.clusters[i]] = i for i in range(self.npoints): if self.membership[self.order[i]] == -1: self.membership[self.order[i]] = self.membership[self.neighbour[self.order[i]]]
Example 28
def test_dtype(self): dt = np.intc p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = '<i4' p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = np.dtype('>i4') p = ndpointer(dtype=dt) p.from_param(np.array([1], dt)) self.assertRaises(TypeError, p.from_param, np.array([1], dt.newbyteorder('swap'))) dtnames = ['x', 'y'] dtformats = [np.intc, np.float64] dtdescr = {'names': dtnames, 'formats': dtformats} dt = np.dtype(dtdescr) p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.zeros((10,), dt))) samedt = np.dtype(dtdescr) p = ndpointer(dtype=samedt) self.assertTrue(p.from_param(np.zeros((10,), dt))) dt2 = np.dtype(dtdescr, align=True) if dt.itemsize != dt2.itemsize: self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2)) else: self.assertTrue(p.from_param(np.zeros((10,), dt2)))
Example 29
def MapActions(self, action_raw): self.action = np.zeros([self.num_actions]) if (action_raw == 0): self.action[self.indices["LOOK_LEFT_RIGHT_PIXELS_PER_FRAME"]] = -25 elif (action_raw == 1): self.action[self.indices["LOOK_LEFT_RIGHT_PIXELS_PER_FRAME"]] = 25 """if (action_raw==2): self.action[self.indices["LOOK_DOWN_UP_PIXELS_PER_FRAME"]] = -25 elif (action_raw==3): self.action[self.indices["LOOK_DOWN_UP_PIXELS_PER_FRAME"]] = 25 if (action_raw==4): self.action[self.indices["STRAFE_LEFT_RIGHT"]] = -1 elif (action_raw==5): self.action[self.indices["STRAFE_LEFT_RIGHT"]] = 1 if (action_raw==6): self.action[self.indices["MOVE_BACK_FORWARD"]] = -1 el""" if (action_raw == 2): # 7 self.action[self.indices["MOVE_BACK_FORWARD"]] = 1 # all binary actions need reset """if (action_raw==8): self.action[self.indices["FIRE"]] = 0 elif (action_raw==9): self.action[self.indices["FIRE"]] = 1 if (action_raw==10): self.action[self.indices["JUMP"]] = 0 elif (action_raw==11): self.action[self.indices["JUMP"]] = 1 if (action_raw==12): self.action[self.indices["CROUCH"]] = 0 elif (action_raw==13): self.action[self.indices["CROUCH"]] = 1""" return np.clip(self.action, self.mins, self.maxs).astype(np.intc)
Example 30
def _to_ctypes_array(tup, dtype=numpy.intc): return numpy.array(tup, dtype=dtype).ctypes
Example 31
def test_dtype(self): dt = np.intc p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = '<i4' p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = np.dtype('>i4') p = ndpointer(dtype=dt) p.from_param(np.array([1], dt)) self.assertRaises(TypeError, p.from_param, np.array([1], dt.newbyteorder('swap'))) dtnames = ['x', 'y'] dtformats = [np.intc, np.float64] dtdescr = {'names': dtnames, 'formats': dtformats} dt = np.dtype(dtdescr) p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.zeros((10,), dt))) samedt = np.dtype(dtdescr) p = ndpointer(dtype=samedt) self.assertTrue(p.from_param(np.zeros((10,), dt))) dt2 = np.dtype(dtdescr, align=True) if dt.itemsize != dt2.itemsize: self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2)) else: self.assertTrue(p.from_param(np.zeros((10,), dt2)))
Example 32
def __init__(self, bins, mapq_thresh=30, clip_thresh=1): # set parameters self.bins = bins self.mapQT = mapq_thresh self.clip_thresh = clip_thresh # initialise data structures self.depth_stats = DepthStats(bins, mapq_thresh=mapq_thresh, dtype=np.intc) self.aln_stats = np.zeros((bins.num, len(AlignStats.aln_stats_cols)), dtype=np.intc) self.fwd_inserts = np.empty(bins.num, dtype=list) self.rvs_inserts = np.empty(bins.num, dtype=list) for j in range(0, bins.num): self.fwd_inserts[j] = [] self.rvs_inserts[j] = []
Example 33
def generate_data(n_samples, n_features, size_groups, rho=0.5, random_state=24): """ Data generation process with Toplitz like correlated features: this correspond to the synthetic dataset used in our paper "GAP Safe Screening Rules for Sparse-Group Lasso". """ rng = check_random_state(random_state) n_groups = len(size_groups) # g_start = np.zeros(n_groups, order='F', dtype=np.intc) # for i in range(1, n_groups): # g_start[i] = size_groups[i - 1] + g_start[i - 1] g_start = np.cumsum(size_groups, dtype=np.intc) - size_groups[0] # 10% of groups are actives gamma1 = int(np.ceil(n_groups * 0.1)) selected_groups = rng.random_integers(0, n_groups - 1, gamma1) true_beta = np.zeros(n_features) for i in selected_groups: begin = g_start[i] end = g_start[i] + size_groups[i] # 10% of features are actives gamma2 = int(np.ceil(size_groups[i] * 0.1)) selected_features = rng.random_integers(begin, end - 1, gamma2) ns = len(selected_features) s = 2 * rng.rand(ns) - 1 u = rng.rand(ns) true_beta[selected_features] = np.sign(s) * (10 * u + (1 - u) * 0.5) vect = rho ** np.arange(n_features) covar = toeplitz(vect, vect) X = rng.multivariate_normal(np.zeros(n_features), covar, n_samples) y = np.dot(X, true_beta) + 0.01 * rng.normal(0, 1, n_samples) return X, y
Example 34
def test_dtype(self): dt = np.intc p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = '<i4' p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = np.dtype('>i4') p = ndpointer(dtype=dt) p.from_param(np.array([1], dt)) self.assertRaises(TypeError, p.from_param, np.array([1], dt.newbyteorder('swap'))) dtnames = ['x', 'y'] dtformats = [np.intc, np.float64] dtdescr = {'names': dtnames, 'formats': dtformats} dt = np.dtype(dtdescr) p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.zeros((10,), dt))) samedt = np.dtype(dtdescr) p = ndpointer(dtype=samedt) self.assertTrue(p.from_param(np.zeros((10,), dt))) dt2 = np.dtype(dtdescr, align=True) if dt.itemsize != dt2.itemsize: self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2)) else: self.assertTrue(p.from_param(np.zeros((10,), dt2)))
Example 35
def expected_support(): numpy_datatypes = [numpy.bool_, numpy.bool, numpy.int_, numpy.intc, numpy.intp, numpy.int8, numpy.int16, numpy.int32, numpy.int64, numpy.uint8, numpy.uint16, numpy.uint32, numpy.uint64, numpy.float_, numpy.float16, numpy.float32, numpy.float64] python_datatypes = [bool, int, float, object] return numpy_datatypes + python_datatypes
Example 36
def test_dtype(self): dt = np.intc p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = '<i4' p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = np.dtype('>i4') p = ndpointer(dtype=dt) p.from_param(np.array([1], dt)) self.assertRaises(TypeError, p.from_param, np.array([1], dt.newbyteorder('swap'))) dtnames = ['x', 'y'] dtformats = [np.intc, np.float64] dtdescr = {'names': dtnames, 'formats': dtformats} dt = np.dtype(dtdescr) p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.zeros((10,), dt))) samedt = np.dtype(dtdescr) p = ndpointer(dtype=samedt) self.assertTrue(p.from_param(np.zeros((10,), dt))) dt2 = np.dtype(dtdescr, align=True) if dt.itemsize != dt2.itemsize: self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2)) else: self.assertTrue(p.from_param(np.zeros((10,), dt2)))
Example 37
def test_dtype(self): dt = np.intc p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = '<i4' p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = np.dtype('>i4') p = ndpointer(dtype=dt) p.from_param(np.array([1], dt)) self.assertRaises(TypeError, p.from_param, np.array([1], dt.newbyteorder('swap'))) dtnames = ['x', 'y'] dtformats = [np.intc, np.float64] dtdescr = {'names': dtnames, 'formats': dtformats} dt = np.dtype(dtdescr) p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.zeros((10,), dt))) samedt = np.dtype(dtdescr) p = ndpointer(dtype=samedt) self.assertTrue(p.from_param(np.zeros((10,), dt))) dt2 = np.dtype(dtdescr, align=True) if dt.itemsize != dt2.itemsize: self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2)) else: self.assertTrue(p.from_param(np.zeros((10,), dt2)))
Example 38
def predict_rankings(self, queries, compact=False, n_jobs=1): ''' Predict rankings of the documents for the given queries. If `compact` is set to True then the output will be one long 1d array containing the rankings for all the queries instead of a list of 1d arrays. The compact array can be subsequently index using query index pointer array, see `queries.query_indptr`. query: Query The query whose documents should be ranked. compact: bool Specify to return rankings in compact format. n_jobs: int, optional (default is 1) The number of working threads that will be spawned to compute the ranking scores. If -1, the current number of CPUs will be used. ''' # Predict the ranking scores for the documents. predictions = self.predict(queries, n_jobs) rankings = np.zeros(queries.document_count(), dtype=np.intc) ranksort_queries(queries.query_indptr, predictions, rankings) if compact or len(queries) == 1: return rankings else: return np.array_split(rankings, queries.query_indptr[1:-1])
Example 39
def predict_rankings(self, queries, compact=False, n_jobs=1): ''' Predict rankings of the documents for the given queries. If `compact` is set to True then the output will be one long 1d array containing the rankings for all the queries instead of a list of 1d arrays. The compact array can be subsequently index using query index pointer array, see `queries.query_indptr`. query: Query The query whose documents should be ranked. compact: bool Specify to return rankings in compact format. n_jobs: int, optional (default is 1) The number of working threads that will be spawned to compute the ranking scores. If -1, the current number of CPUs will be used. ''' if self.trained is False: raise ValueError('the model has not been trained yet') # Predict the ranking scores for the documents. predictions = self.predict(queries, n_jobs) rankings = np.zeros(queries.document_count(), dtype=np.intc) ranksort_queries(queries.query_indptr, predictions, rankings) if compact or queries.query_count() == 1: return rankings else: return np.array_split(rankings, queries.query_indptr[1:-1])
Example 40
def compute_scale(self, queries, relevance_scores=None): ''' Return the ideal DCG value for each query. Optionally, external relevance assessments can be used instead of the relevances present in the queries. Parameters ---------- queries: Queries The queries for which the ideal DCG should be computed. relevance_scores: array of integers, optional, (default is None) The relevance scores that should be used instead of the relevance scores inside queries. Note, this argument is experimental. ''' ideal_values = np.empty(queries.query_count(), dtype=np.float64) if relevance_scores is not None: if queries.document_count() != relevance_scores.shape[0]: raise ValueError('number of documents and relevance scores do not match') # Need to sort the relevance labels first. indices = np.empty(relevance_scores.shape[0], dtype=np.intc) relevance_argsort_v1(relevance_scores, indices, relevance_scores.shape[0]) # Creates a copy. relevance_scores = relevance_scores[indices] else: # Assuming these are sorted. relevance_scores = queries.relevance_scores self.metric_.evaluate_queries_ideal(queries.query_indptr, relevance_scores, ideal_values) return ideal_values
Example 41
def evaluate(self, ranking=None, labels=None, ranked_labels=None, scales=None): ''' Evaluate NDCG metric on the specified ranked list of document relevance scores. The function input can be either ranked list of relevance labels (`ranked_labels`), which is most convenient from the computational point of view, or it can be in the form of ranked list of documents (`ranking`) and corresponding relevance scores (`labels`), from which the ranked document relevance labels are computed. Parameters: ----------- ranking: array, shape = (n_documents,) Specify list of ranked documents. labels: array: shape = (n_documents,) Specify relevance score for each document. ranked_labels: array, shape = (n_documents,) Relevance scores of the ranked documents. If not given, then `ranking` and `labels` must not be None, `ranked_labels` will be than inferred from them. scales: float, optional (default is None) The ideal DCG value on the given documents. If None is given it will be computed from the document relevance scores. ''' if ranked_labels is not None: return self.get_score_from_labels_list(ranked_labels) elif ranking is not None and labels is not None: if ranking.shape[0] != labels.shape[0]: raise ValueError('number of ranked documents != number of relevance labels (%d, %d)' \ % (ranking.shape[0], labels.shape[0])) ranked_labels = np.array(sorted(labels, key=dict(zip(labels,ranking)).get, reverse=True), dtype=np.intc) return self.get_score_from_labels_list(ranked_labels)
Example 42
def _get_partition_indices(start, end, n_jobs): ''' Get boundary indices for ``n_jobs`` number of sub-arrays dividing a (contiguous) array of indices starting with ``start`` (inclusive) and ending with ``end`` (exclusive) into equal parts. ''' if (end - start) >= n_jobs: return np.linspace(start, end, n_jobs + 1).astype(np.intc) else: return np.arange(end - start + 1, dtype=np.intc)
Example 43
def save_as_text(self, filepath, shuffle=False): ''' Save queries into the specified file in svmlight format. Parameters: ----------- filepath: string The filepath where this object will be saved. shuffle: bool Specify to shuffle the query document lists prior to writing into the file. ''' # Inflate the query_ids array such that each id covers # the corresponding feature vectors. query_ids = np.fromiter( chain(*[[qid] * cnt for qid, cnt in zip(self.query_ids, np.diff(self.query_indptr))]), dtype=int) relevance_scores = self.relevance_scores feature_vectors = self.feature_vectors if shuffle: shuffle_indices = np.random.permutation(self.document_count()) reshuffle_indices = np.argsort(query_ids[shuffle_indices]) document_shuffle_indices = np.arange(self.document_count(), dtype=np.intc)[shuffle_indices[reshuffle_indices]] query_ids = query_ids[document_shuffle_indices] relevance_scores = relevance_scores[document_shuffle_indices] feature_vectors = feature_vectors[document_shuffle_indices] with open(filepath, 'w') as ofile: for score, qid, feature_vector in zip(relevance_scores, query_ids, feature_vectors): ofile.write('%d' % score) ofile.write(' qid:%d' % qid) for feature in zip(self.feature_indices, feature_vector): output = ' %d:%.12f' % feature ofile.write(output.rstrip('0').rstrip('.')) ofile.write('\n')
Example 44
def _action(*entries): return np.array(entries, dtype=np.intc)
Example 45
def get_idxs_thread(comm, npoints): """ Get indices for processor using Scatterv Note: ----- Uppercase mpi4py functions require everything to be in C-compatible types or they will return garbage! """ size = comm.Get_size() rank = comm.Get_rank() npoints_thread = np.zeros(size,dtype=np.intc) offsets_thread = np.zeros(size,dtype=np.intc) for idx in range(size): npoints_thread[idx] = npoints/size offsets_thread[idx] = sum(npoints_thread[:idx]) for idx in range(npoints % size): npoints_thread[idx] += 1 offsets_thread[idx + 1:] += 1 npoints_thread = tuple(npoints_thread) offsets_thread = tuple(offsets_thread) idxs_thread = np.zeros(npoints_thread[rank],dtype=np.intc) idxs = np.arange(npoints,dtype=np.intc) comm.Scatterv((idxs, npoints_thread, offsets_thread, MPI.INT), idxs_thread, root=0) return idxs_thread, npoints_thread, offsets_thread
Example 46
def get_ravel_offsets(npoints_thread,natoms): """ Get lengths and offsets for gathering trajectory fragments """ size = len(npoints_thread) ravel_lengths = np.zeros(size,dtype=np.intc) ravel_offsets = np.zeros(size,dtype=np.intc) for i in range(size): ravel_lengths[i] = npoints_thread[i]*3*natoms ravel_offsets[i] = sum(ravel_lengths[:i]) ravel_lengths = tuple(ravel_lengths) ravel_offsets = tuple(ravel_offsets) return ravel_lengths, ravel_offsets
Example 47
def _count_vocab(self, raw_documents, fixed_vocab): """Create sparse feature matrix, and vocabulary where fixed_vocab=False """ if fixed_vocab: vocabulary = self.vocabulary_ else: # Add a new value when a new vocabulary item is seen vocabulary = defaultdict() vocabulary.default_factory = vocabulary.__len__ analyze = self.build_analyzer() j_indices = _make_int_array() indptr = _make_int_array() indptr.append(0) for doc in raw_documents: for feature in analyze(doc): try: j_indices.append(vocabulary[feature]) except KeyError: # Ignore out-of-vocabulary items for fixed_vocab=True continue indptr.append(len(j_indices)) if not fixed_vocab: # disable defaultdict behaviour vocabulary = dict(vocabulary) if not vocabulary: raise ValueError("empty vocabulary; perhaps the documents only" " contain stop words") j_indices = frombuffer_empty(j_indices, dtype=np.intc) indptr = np.frombuffer(indptr, dtype=np.intc) values = np.ones(len(j_indices)) X = sp.csr_matrix((values, j_indices, indptr), shape=(len(indptr) - 1, len(vocabulary)), dtype=self.dtype) X.sum_duplicates() return vocabulary, X
Example 48
def test_dtype(self): dt = np.intc p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = '<i4' p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.array([1], dt))) dt = np.dtype('>i4') p = ndpointer(dtype=dt) p.from_param(np.array([1], dt)) self.assertRaises(TypeError, p.from_param, np.array([1], dt.newbyteorder('swap'))) dtnames = ['x', 'y'] dtformats = [np.intc, np.float64] dtdescr = {'names': dtnames, 'formats': dtformats} dt = np.dtype(dtdescr) p = ndpointer(dtype=dt) self.assertTrue(p.from_param(np.zeros((10,), dt))) samedt = np.dtype(dtdescr) p = ndpointer(dtype=samedt) self.assertTrue(p.from_param(np.zeros((10,), dt))) dt2 = np.dtype(dtdescr, align=True) if dt.itemsize != dt2.itemsize: self.assertRaises(TypeError, p.from_param, np.zeros((10,), dt2)) else: self.assertTrue(p.from_param(np.zeros((10,), dt2)))
Example 49
def _count_vocab(self, raw_documents, fixed_vocab): """Create sparse feature matrix, and vocabulary where fixed_vocab=False """ if fixed_vocab: vocabulary = self.vocabulary_ else: # Add a new value when a new vocabulary item is seen vocabulary = defaultdict() vocabulary.default_factory = vocabulary.__len__ analyze = self.build_analyzer() j_indices = [] indptr = _make_int_array() values = _make_int_array() indptr.append(0) for doc in raw_documents: feature_counter = {} for feature in analyze(doc): try: feature_idx = vocabulary[feature] if feature_idx not in feature_counter: feature_counter[feature_idx] = 1 else: feature_counter[feature_idx] += 1 except KeyError: # Ignore out-of-vocabulary items for fixed_vocab=True continue j_indices.extend(feature_counter.keys()) values.extend(feature_counter.values()) indptr.append(len(j_indices)) if not fixed_vocab: # disable defaultdict behaviour vocabulary = dict(vocabulary) if not vocabulary: raise ValueError("empty vocabulary; perhaps the documents only" " contain stop words") j_indices = np.asarray(j_indices, dtype=np.intc) indptr = np.frombuffer(indptr, dtype=np.intc) values = frombuffer_empty(values, dtype=np.intc) X = sp.csr_matrix((values, j_indices, indptr), shape=(len(indptr) - 1, len(vocabulary)), dtype=self.dtype) X.sort_indices() return vocabulary, X
Example 50
def _count_vocab_2(self, raw_documents, fixed_vocab): """Create sparse feature matrix, and vocabulary where fixed_vocab=False """ if fixed_vocab: vocabulary = self.vocabulary_ else: # Add a new value when a new vocabulary item is seen vocabulary = defaultdict() vocabulary.default_factory = vocabulary.__len__ analyze = self.build_analyzer() j_indices = [] indptr = _make_int_array() # values = _make_int_array() values = array.array(str("f")) indptr.append(0) for doc in raw_documents: feature_counter = {} for feature in analyze(doc): try: feature_idx = vocabulary[feature] if feature_idx not in feature_counter: feature_counter[feature_idx] = 1 else: feature_counter[feature_idx] += 1 except KeyError: # Ignore out-of-vocabulary items for fixed_vocab=True continue j_indices.extend(feature_counter.keys()) values.extend([i * 1.0 / sum(feature_counter.values()) for i in feature_counter.values()]) indptr.append(len(j_indices)) if not fixed_vocab: # disable defaultdict behaviour vocabulary = dict(vocabulary) if not vocabulary: raise ValueError("empty vocabulary; perhaps the documents only" " contain stop words") j_indices = np.asarray(j_indices, dtype=np.intc) indptr = np.frombuffer(indptr, dtype=np.intc) values = frombuffer_empty(values, dtype=np.float32) X = sp.csr_matrix((values, j_indices, indptr), shape=(len(indptr) - 1, len(vocabulary))) X.sort_indices() return vocabulary, X