The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.
Example 1
def PCA(data, num_components=None): # mean center the data data -= data.mean(axis=0) # calculate the covariance matrix R = np.cov(data, rowvar=False) # calculate eigenvectors & eigenvalues of the covariance matrix # use 'eigh' rather than 'eig' since R is symmetric, # the performance gain is substantial V, E = np.linalg.eigh(R) # sort eigenvalue in decreasing order idx = np.argsort(V)[::-1] E = E[:,idx] # sort eigenvectors according to same index V = V[idx] # select the first n eigenvectors (n is desired dimension # of rescaled data array, or dims_rescaled_data) E = E[:, :num_components] # carry out the transformation on the data using eigenvectors # and return the re-scaled data, eigenvalues, and eigenvectors return np.dot(E.T, data.T).T, V, E
Example 2
def __SubDoWavelets(self,waveforms): scales = 4 dimensions = 10 nspk,ls = waveforms.shape cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1) cc = np.hstack(cc) sd = list() for i in range(ls): test_data = cc[:,i] thr_dist = np.std(test_data,ddof=1)*3 thr_dist_min = np.mean(test_data)-thr_dist thr_dist_max = np.mean(test_data)+thr_dist aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)] if aux.size > 10: sd.append(self.__test_ks(aux)) else: sd.append(0) ind = np.argsort(sd) ind = ind[::-1] coeff = ind[:dimensions] waveletspk = cc[:,coeff] return waveletspk
Example 3
def update_sort_idcs(self): # The selected points are sorted before all the other points -- an easy # way to achieve this is to add the maximum score to their score if self.current_order == 0: score = self.score_x elif self.current_order == 1: score = self.score_y elif self.current_order == 2: score = self.score_z else: raise AssertionError(self.current_order) score = score.copy() if len(self.selected_points): score[np.array(sorted(self.selected_points))] += score.max() self.sort_idcs = np.argsort(score)
Example 4
def removeTopPCs(X, numRemovePCs): t0 = time.time() X_mean = X.mean(axis=0) X -= X_mean XXT = symmetrize(blas.dsyrk(1.0, X, lower=0)) s,U = la.eigh(XXT) if (np.min(s) < -1e-4): raise Exception('Negative eigenvalues found') s[s<0]=0 ind = np.argsort(s)[::-1] U = U[:, ind] s = s[ind] s = np.sqrt(s) #remove null PCs ind = (s>1e-6) U = U[:, ind] s = s[ind] V = X.T.dot(U/s) #print 'max diff:', np.max(((U*s).dot(V.T) - X)**2) X = (U[:, numRemovePCs:]*s[numRemovePCs:]).dot((V.T)[numRemovePCs:, :]) X += X_mean return X
Example 5
def _translate(seq, f_init, f_next, trg_eos_idx, src_sel, trg_sel, k, cond_init_trg, normalize, n_best, **kwargs): sample, score = gen_sample( f_init, f_next, x=numpy.array(seq).reshape([len(seq), 1]), eos_idx=trg_eos_idx, src_selector=src_sel, trg_selector=trg_sel, k=k, maxlen=3*len(seq), stochastic=False, argmax=False, cond_init_trg=cond_init_trg, **kwargs) if normalize: lengths = numpy.array([len(s) for s in sample]) score = score / lengths if n_best == 1: sidx = numpy.argmin(score) elif n_best > 1: sidx = numpy.argsort(score)[:n_best] else: raise ValueError('n_best cannot be negative!') return sample[sidx], score[sidx]
Example 6
def closestNeighbor(query, embedding_array, normed=False, top_k=1): '''Gets the index of the closest neighbor of embedding_array to the query point. Distance metric is cosine. SLOW. DO NOT USE THIS FOR RAPID COMPUTATION. ''' embedding_array = numpy.array(embedding_array) if not normed: embedding_array = numpy.array([ (embedding_array[i] / numpy.linalg.norm(embedding_array[i])) for i in range(embedding_array.shape[0]) ]) ## assuming embeddings are unit-normed by this point; ## norm(query) is a constant factor, so we can ignore it dists = numpy.array([ numpy.dot(query, embedding_array[i]) for i in range(embedding_array.shape[0]) ]) sorted_ixes = numpy.argsort(-1 * dists) return sorted_ixes[:top_k]
Example 7
def testMerge(self, dtype=dtype): testarray1 = range(1,101) testarray2 = range(5,106) a = numpy.empty((100,2), dtype=dtype) b = numpy.empty((100,2), dtype=dtype) merged = numpy.empty((200,2), dtype=dtype) incompatible1 = numpy.empty((200,3), dtype=dtype) incompatible2 = numpy.empty(200, dtype=dtype) a[:,0] = numpy.arange(1,101) a[:,1] = numpy.arange(2,102) b[:,0] = numpy.arange(5,105) b[:,1] = numpy.arange(6,106) ref = numpy.concatenate([a,b]) ref = ref[numpy.argsort(ref[:,0])] self.assertEqual(mapped_struct.index_merge(a, b, merged), 200) self.assertTrue((merged == ref).all()) self.assertRaises(ValueError, mapped_struct.index_merge, a, b, incompatible1) self.assertRaises(ValueError, mapped_struct.index_merge, a, incompatible1, merged) self.assertRaises(ValueError, mapped_struct.index_merge, a, b, incompatible2) self.assertRaises(ValueError, mapped_struct.index_merge, a, incompatible2, merged)
Example 8
def __init__(self, pos, color, mode=None): """ =============== ============================================================== **Arguments:** pos Array of positions where each color is defined color Array of RGBA colors. Integer data types are interpreted as 0-255; float data types are interpreted as 0.0-1.0 mode Array of color modes (ColorMap.RGB, HSV_POS, or HSV_NEG) indicating the color space that should be used when interpolating between stops. Note that the last mode value is ignored. By default, the mode is entirely RGB. =============== ============================================================== """ self.pos = np.array(pos) order = np.argsort(self.pos) self.pos = self.pos[order] self.color = np.array(color)[order] if mode is None: mode = np.ones(len(pos)) self.mode = mode self.stopsCache = {}
Example 9
def __loadChnTimeWave(self,f,selectChan): times = list() waveforms = list() spk_startswith = "spike_{0}".format(selectChan) for chn_unit in f["spikes"].keys(): if chn_unit.startswith(spk_startswith): time = f["spikes"][chn_unit]["times"].value waveform = f["spikes"][chn_unit]["waveforms"].value times.append(time) waveforms.append(waveform) if times: times = np.hstack(times) waveforms = np.vstack(waveforms) sort_index = np.argsort(times) waveforms = waveforms[sort_index] times = times[sort_index] return times,waveforms else: return None,None
Example 10
def __init__(self, pos, color, mode=None): """ =============== ============================================================== **Arguments:** pos Array of positions where each color is defined color Array of RGBA colors. Integer data types are interpreted as 0-255; float data types are interpreted as 0.0-1.0 mode Array of color modes (ColorMap.RGB, HSV_POS, or HSV_NEG) indicating the color space that should be used when interpolating between stops. Note that the last mode value is ignored. By default, the mode is entirely RGB. =============== ============================================================== """ self.pos = np.array(pos) order = np.argsort(self.pos) self.pos = self.pos[order] self.color = np.array(color)[order] if mode is None: mode = np.ones(len(pos)) self.mode = mode self.stopsCache = {}
Example 11
def __loadChnTimeWave(self,f,selectChan): times = list() waveforms = list() spk_startswith = "spike_{0}".format(selectChan) for chn_unit in f["spikes"].keys(): if chn_unit.startswith(spk_startswith): time = f["spikes"][chn_unit]["times"].value waveform = f["spikes"][chn_unit]["waveforms"].value times.append(time) waveforms.append(waveform) if times: times = np.hstack(times) waveforms = np.vstack(waveforms) sort_index = np.argsort(times) waveforms = waveforms[sort_index] times = times[sort_index] return times,waveforms else: return None,None
Example 12
def __SubDoWavelets(self,waveforms): scales = 4 dimensions = 10 nspk,ls = waveforms.shape cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1) cc = np.hstack(cc) sd = list() for i in range(ls): test_data = cc[:,i] thr_dist = np.std(test_data,ddof=1)*3 thr_dist_min = np.mean(test_data)-thr_dist thr_dist_max = np.mean(test_data)+thr_dist aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)] if aux.size > 10: sd.append(self.__test_ks(aux)) else: sd.append(0) ind = np.argsort(sd) ind = ind[::-1] coeff = ind[:dimensions] waveletspk = cc[:,coeff] return waveletspk
Example 13
def __load_waveforms(self,selectChan,file_name): spk_startswith = "spike_{0}".format(selectChan) with hp.File(file_name,"r") as f: times = list() waveforms = list() for chn_unit in f["spikes"].keys(): if chn_unit.startswith(spk_startswith): tep_time = f["spikes"][chn_unit]["times"].value waveform = f["spikes"][chn_unit]["waveforms"].value times.append(tep_time) waveforms.append(waveform) if times: times = np.hstack(times) waveforms = np.vstack(waveforms) sort_index = np.argsort(times) waveforms = waveforms[sort_index] return waveforms else: return None
Example 14
def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list): """ :param row: it is a list.length is number of labels. e.g. 2002 :param vocabulary_index2word_label :param result_list :return: a lable """ label_list=list(np.argsort(row)) label_list.reverse() #print("label_list:",label_list) # a list,length is number of labels. for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want. #print(i,"index:",index) flag1=vocabulary_index2word_label[index] not in result_list flag2=index!=vocabulary_word2index_label[_PAD] flag3=index!=vocabulary_word2index_label[_END] if flag1 and flag2 and flag3: #print("going to return ") return vocabulary_index2word_label[index] # write question id and labels to file system.
Example 15
def get_label_using_logits_batch(question_id_sublist, logits_batch, vocabulary_index2word_label, f, top_number=5): print("get_label_using_logits.shape:", np.array(logits_batch).shape) # (1, 128, 2002)) for i, logits in enumerate(logits_batch): index_list = np.argsort(logits)[-top_number:] #print("index_list:",index_list) index_list = index_list[::-1] label_list = [] for index in index_list: #print("index:",index) label = vocabulary_index2word_label[index] label_list.append( label) # ('get_label_using_logits.label_list:', [u'-3423450385060590478', u'2838091149470021485', u'-3174907002942471215', u'-1812694399780494968', u'6815248286057533876']) # print("get_label_using_logits.label_list",label_list) write_question_id_with_labels(question_id_sublist[i], label_list, f) f.flush() # get label using logits
Example 16
def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list): """ :param row: it is a list.length is number of labels. e.g. 2002 :param vocabulary_index2word_label :param result_list :return: a lable """ label_list=list(np.argsort(row)) label_list.reverse() #print("label_list:",label_list) # a list,length is number of labels. for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want. #print(i,"index:",index) flag1=vocabulary_index2word_label[index] not in result_list flag2=index!=vocabulary_word2index_label[_PAD] flag3=index!=vocabulary_word2index_label[_END] if flag1 and flag2 and flag3: #print("going to return ") return vocabulary_index2word_label[index] # write question id and labels to file system.
Example 17
def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list): """ :param row: it is a list.length is number of labels. e.g. 2002 :param vocabulary_index2word_label :param result_list :return: a lable """ label_list=list(np.argsort(row)) label_list.reverse() #print("label_list:",label_list) # a list,length is number of labels. for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want. #print(i,"index:",index) flag1=vocabulary_index2word_label[index] not in result_list flag2=index!=vocabulary_word2index_label[_PAD] flag3=index!=vocabulary_word2index_label[_END] if flag1 and flag2 and flag3: #print("going to return ") return vocabulary_index2word_label[index]
Example 18
def filter_sort_unique(self, max_objval=float('Inf')): # filter if max_objval < float('inf'): good_idx = self.objvals <= max_objval self.objvals = self.objvals[good_idx] self.solutions = self.solutions[good_idx] if len(self.objvals) > 0: sort_idx = np.argsort(self.objvals) self.objvals = self.objvals[sort_idx] self.solutions = self.solutions[sort_idx] # unique b = np.ascontiguousarray(self.solutions).view( np.dtype((np.void, self.solutions.dtype.itemsize * self.P))) _, unique_idx = np.unique(b, return_index=True) self.objvals = self.objvals[unique_idx] self.solutions = self.solutions[unique_idx]
Example 19
def round_solution_pool(pool, constraints): pool.distinct().sort() P = pool.P L0_reg_ind = np.isnan(constraints['coef_set'].C_0j) L0_max = constraints['L0_max'] rounded_pool = SolutionPool(P) for solution in pool.solutions: # sort from largest to smallest coefficients feature_order = np.argsort([-abs(x) for x in solution]) rounded_solution = np.zeros(shape=(1, P)) l0_norm_count = 0 for k in range(0, P): j = feature_order[k] if not L0_reg_ind[j]: rounded_solution[0, j] = np.round(solution[j], 0) elif l0_norm_count < L0_max: rounded_solution[0, j] = np.round(solution[j], 0) l0_norm_count += L0_reg_ind[j] rounded_pool.add(objvals=np.nan, solutions=rounded_solution) rounded_pool.distinct().sort() return rounded_pool
Example 20
def listen(self, results): score_out = results['score_out'] y_gt = results['y_gt'] sort_idx = np.argsort(score_out, axis=-1) idx_gt = np.argmax(y_gt, axis=-1) correct = 0 count = 0 for kk, ii in enumerate(idx_gt): sort_idx_ = sort_idx[kk][::-1] for jj in sort_idx_[:self.top_k]: if ii == jj: correct += 1 break count += 1 # self.log.info('Correct {}/{}'.format(correct, count)) self.correct += correct self.count += count self.step = int(results['step']) # self.log.info('Step {}'.format(self.step)) pass
Example 21
def nn(model, text, vectors, query, k=5): """ Return the nearest neighbour sentences to query text: list of sentences vectors: the corresponding representations for text query: a string to search """ qf = encode(model, [query]) qf /= norm(qf) scores = numpy.dot(qf, vectors.T).flatten() sorted_args = numpy.argsort(scores)[::-1] sentences = [text[a] for a in sorted_args[:k]] print('QUERY: ' + query) print('NEAREST: ') for i, s in enumerate(sentences): print(s, sorted_args[i])
Example 22
def nn(model, text, vectors, query, k=5): """ Return the nearest neighbour sentences to query text: list of sentences vectors: the corresponding representations for text query: a string to search """ qf = encode(model, [query]) qf /= norm(qf) scores = numpy.dot(qf, vectors.T).flatten() sorted_args = numpy.argsort(scores)[::-1] sentences = [text[a] for a in sorted_args[:k]] print 'QUERY: ' + query print 'NEAREST: ' for i, s in enumerate(sentences): print s, sorted_args[i]
Example 23
def _spatial_sort(glyph): from scipy.spatial.distance import cdist from numpy import argsort from numpy import argmin curr = argmin(glyph[:,0]) visited = set([curr]) order = [curr] dd = cdist(glyph, glyph) while len(visited)<len(glyph): row = dd[curr,:] for i in argsort(row): if row[i]<=0.0 or i==curr or i in visited: continue order.append(i) visited.add(i) break glyph[:,:] = glyph[order,:]
Example 24
def label_ranking_reciprocal_rank(label, # [sent_num] preds): # [sent_num] """ Calcualting the reciprocal rank according to definition, """ rank = np.argsort(preds)[::-1] #pos_rank = np.take(rank, np.where(label == 1)[0]) #return np.mean(1.0 / pos_rank) if_find = False pos = 0 for r in rank: pos += 1 if label[r] == 1: first_pos_r = pos if_find = True break assert(if_find) return 1.0 / first_pos_r
Example 25
def _matrix_inverse(self, matrix): """ Computes inverse of a matrix. """ matrix = np.array(matrix) n_features = matrix.shape[0] rank = np.linalg.matrix_rank(matrix) if rank == n_features: return np.linalg.inv(matrix) else: # Matrix is not full rank, so use Hadi's technique to compute inverse # Reference: Ali S. Hadi (1992) "Identifying Multiple Outliers in Multivariate Data" eg. 2.3, 2.4 eigenValues, eigenVectors = np.linalg.eig(matrix) eigenValues = np.abs(eigenValues) # to deal with -0 values idx = eigenValues.argsort()[::-1] eigenValues = eigenValues[idx] eigenVectors = eigenVectors[:, idx] s = eigenValues[eigenValues != 0].min() w = [1 / max(e, s) for e in eigenValues] W = w * np.eye(n_features) return eigenVectors.dot(W).dot(eigenVectors.T)
Example 26
def get_1000G_snps(sumstats, out_file): sf = np.loadtxt(sumstats,dtype=str,skiprows=1) h5f = h5py.File('ref/Misc/1000G_SNP_info.h5','r') rf = h5f['snp_chr'][:] h5f.close() ind1 = np.in1d(sf[:,1],rf[:,2]) ind2 = np.in1d(rf[:,2],sf[:,1]) sf1 = sf[ind1] rf1 = rf[ind2] ### check order ### if sum(sf1[:,1]==rf1[:,2])==len(rf1[:,2]): print 'Good!' else: print 'Shit happens, sorting sf1 to have the same order as rf1' O1 = np.argsort(sf1[:,1]) O2 = np.argsort(rf1[:,2]) O3 = np.argsort(O2) sf1 = sf1[O1][O3] out = ['hg19chrc snpid a1 a2 bp or p'+'\n'] for i in range(len(sf1[:,1])): out.append(sf1[:,0][i]+' '+sf1[:,1][i]+' '+sf1[:,2][i]+' '+sf1[:,3][i]+' '+rf1[:,1][i]+' '+sf1[:,5][i]+' '+sf1[:,6][i]+'\n') ff = open(out_file,"w") ff.writelines(out) ff.close()
Example 27
def plot_heatmaps(data, mis, column_label, cont, topk=30, prefix=''): cmap = sns.cubehelix_palette(as_cmap=True, light=.9) m, nv = mis.shape for j in range(m): inds = np.argsort(- mis[j, :])[:topk] if len(inds) >= 2: plt.clf() order = np.argsort(cont[:,j]) subdata = data[:, inds][order].T subdata -= np.nanmean(subdata, axis=1, keepdims=True) subdata /= np.nanstd(subdata, axis=1, keepdims=True) columns = [column_label[i] for i in inds] sns.heatmap(subdata, vmin=-3, vmax=3, cmap=cmap, yticklabels=columns, xticklabels=False, mask=np.isnan(subdata)) filename = '{}/heatmaps/group_num={}.png'.format(prefix, j) if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) plt.title("Latent factor {}".format(j)) plt.yticks(rotation=0) plt.savefig(filename, bbox_inches='tight') plt.close('all') #plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=cont[:, j], # outfile=prefix + '/relationships/group_num=' + str(j), latent=labels[:, j], alpha=0.1)
Example 28
def plot_top_relationships(data, corex, labels, column_label, topk=5, prefix=''): dual = (corex.moments['X_i Y_j'] * corex.moments['X_i Z_j']).T alpha = dual > 0.04 cy = corex.moments['ry'] m, nv = alpha.shape for j in range(m): inds = np.where(alpha[j] > 0)[0] inds = inds[np.argsort(- dual[j][inds])][:topk] if len(inds) >= 2: if dual[j, inds[0]] > 0.1: factor = labels[:, j] title = '$Y_{%d}$' % j else: k = np.argmax(np.abs(cy[j])) if k == j: k = np.argsort(-np.abs(cy[j]))[1] factor = corex.moments['X_i Z_j'][inds[0], j] * labels[:, j] + corex.moments['X_i Z_j'][inds[0], k] * labels[:, k] title = '$Y_{%d} + Y_{%d}$' % (j, k) plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=factor, outfile=prefix + '/relationships/group_num=' + str(j), title=title)
Example 29
def trim(g, max_parents=False, max_children=False): for node in g: if max_parents: parents = list(g.successors(node)) weights = [g.edge[node][parent]['weight'] for parent in parents] for weak_parent in np.argsort(weights)[:-max_parents]: g.remove_edge(node, parents[weak_parent]) if max_children: children = g.predecessors(node) weights = [g.edge[child][node]['weight'] for child in children] for weak_child in np.argsort(weights)[:-max_children]: g.remove_edge(children[weak_child], node) return g # Misc. utilities
Example 30
def deviation_plot(rp, variable_name, slope_cutoff=1, average_cutoff = 2.): average_panel = rp.value_panel(variable_name, types=['average']) average_panel = (average_panel.T - np.median(average_panel, axis=1)).T average_panel.sort() average_ranges = np.max(average_panel, axis=1) - np.min(average_panel, axis=1) average_panel = average_panel[np.argsort(average_ranges)][::-1] slope_panel = rp.value_panel(variable_name, types=['slope']) slope_panel = (slope_panel.T - np.median(slope_panel, axis=1)).T slope_panel.sort() slope_ranges = np.max(slope_panel, axis=1) - np.min(slope_panel, axis=1) slope_panel = slope_panel[np.argsort(slope_ranges)][::-1] return _multiplot(rp.dataset, variable_name, slope_panel, average_panel, left_vmin = -1.0*slope_cutoff, left_vmax = slope_cutoff, right_vmin = -1.0*average_cutoff, right_vmax = average_cutoff)
Example 31
def get_local_words(preds, vocab, NEs=[], k=50): """ given the word probabilities over many coordinates, first normalize the probability of each word in different locations to get a probability distribution, then compute the entropy of the word's distribution over all coordinates and return the words that are low entropy and are not named entities. """ #normalize the probabilites of each vocab using entropy normalized_preds = normalize(preds, norm='l1', axis=0) entropies = stats.entropy(normalized_preds) sorted_indices = np.argsort(entropies) sorted_local_words = np.array(vocab)[sorted_indices].tolist() filtered_local_words = [] NEset = set(NEs) for word in sorted_local_words: if word in NEset: continue filtered_local_words.append(word) return filtered_local_words[0:k]
Example 32
def get_feature_importance(list_of_features): n_estimators=10000 random_state=0 n_jobs=4 x_train=data_frame[list_of_features] y_train=data_frame.iloc[:,-1] feat_labels= data_frame.columns[1:] forest = BaggingRegressor(n_estimators=n_estimators,random_state=random_state,n_jobs=n_jobs) forest.fit(x_train,y_train) importances=forest.feature_importances_ indices = np.argsort(importances)[::-1] for f in range(x_train.shape[1]): print("%2d) %-*s %f" % (f+1,30,feat_labels[indices[f]], importances[indices[f]])) plt.title("Feature Importance") plt.bar(range(x_train.shape[1]),importances[indices],color='lightblue',align='center') plt.xticks(range(x_train.shape[1]),feat_labels[indices],rotation=90) plt.xlim([-1,x_train.shape[1]]) plt.tight_layout() plt.show()
Example 33
def cr(self): # Composite Reliability composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent) for i in range(self.lenlatent): block = self.data_[self.Variables['measurement'] [self.Variables['latent'] == self.latent[i]]] p = len(block.columns) if(p != 1): cor_mat = np.cov(block.T) evals, evecs = np.linalg.eig(cor_mat) U, S, V = np.linalg.svd(cor_mat, full_matrices=False) indices = np.argsort(evals) indices = indices[::-1] evecs = evecs[:, indices] evals = evals[indices] loadings = V[0, :] * np.sqrt(evals[0]) numerador = np.sum(abs(loadings))**2 denominador = numerador + (p - np.sum(loadings ** 2)) cr = numerador / denominador composite[self.latent[i]] = cr else: composite[self.latent[i]] = 1 composite = composite.T return(composite)
Example 34
def _get_sorted_channels_(self, all_keys, pattern): sub_list = [f for f in all_keys if pattern in f] all_channels = [int(f.split(pattern)[1]) for f in sub_list] idx = numpy.argsort(all_channels) return sub_list, idx
Example 35
def set_streams(self, stream_mode): if stream_mode == 'single-file': sources = [] to_write = [] count = 0 params = self.get_description() my_file = h5py.File(self.file_name) all_matches = [re.findall('\d+', u) for u in my_file.keys()] all_streams = [] for m in all_matches: if len(m) > 0: all_streams += [int(m[0])] idx = numpy.argsort(all_streams) for i in xrange(len(all_streams)): params['h5_key'] = my_file.keys()[idx[i]] new_data = type(self)(self.file_name, params) sources += [new_data] to_write += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)] print_and_log(to_write, 'debug', logger) return sources elif stream_mode == 'multi-files': return H5File.set_streams(stream_mode)
Example 36
def set_streams(self, stream_mode): if stream_mode == 'single-file': sources = [] to_write = [] count = 0 params = self.get_description() my_file = h5py.File(self.file_name) all_matches = my_file.get('recordings').keys() all_streams = [] for m in all_matches: all_streams += [int(m)] idx = numpy.argsort(all_streams) for count in xrange(len(all_streams)): params['recording_number'] = all_streams[idx[count]] new_data = type(self)(self.file_name, params) sources += [new_data] to_write += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)] print_and_log(to_write, 'debug', logger) return sources elif stream_mode == 'multi-files': return H5File.set_streams(stream_mode)
Example 37
def rho_estimation(data, update=None, compute_rho=True, mratio=0.01): N = len(data) rho = numpy.zeros(N, dtype=numpy.float32) if update is None: dist = distancematrix(data) didx = lambda i,j: i*N + j - i*(i+1)//2 - i - 1 nb_selec = max(5, int(mratio*N)) sdist = {} if compute_rho: for i in xrange(N): indices = numpy.concatenate((didx(i, numpy.arange(i+1, N)), didx(numpy.arange(0, i-1), i))) tmp = numpy.argsort(numpy.take(dist, indices))[:nb_selec] sdist[i] = numpy.take(dist, numpy.take(indices, tmp)) rho[i] = numpy.mean(sdist[i]) else: M = len(update[0]) nb_selec = max(5, int(mratio*M)) sdist = {} for i in xrange(N): dist = distancematrix(data[i].reshape(1, len(data[i])), update[0]).ravel() all_dist = numpy.concatenate((dist, update[1][i])) idx = numpy.argsort(all_dist)[:nb_selec] sdist[i] = numpy.take(all_dist, idx) rho[i] = numpy.mean(sdist[i]) return rho, dist, sdist, nb_selec
Example 38
def update_data_plot(self): reverse_sort = np.argsort(self.sort_idcs) if len(self.inspect_points): inspect = reverse_sort[np.array(sorted(self.inspect_points))] data = numpy.vstack((np.ones(len(inspect))*(2*self.raw_lags[-1]-self.raw_lags[-2]), inspect+0.5)).T self.inspect_markers.set_offsets(data) self.inspect_markers.set_color(self.inspect_colors) else: self.inspect_markers.set_offsets([]) self.inspect_markers.set_color([]) self.ui.data_overview.draw_idle()
Example 39
def eigenDecompose(self, X, K, normalize=True): if (X.shape[1] >= X.shape[0]): s,U = la.eigh(K) else: U, s, _ = la.svd(X, check_finite=False, full_matrices=False) if (s.shape[0] < U.shape[1]): s = np.concatenate((s, np.zeros(U.shape[1]-s.shape[0]))) #note: can use low-rank formulas here s=s**2 if normalize: s /= float(X.shape[1]) if (np.min(s) < -1e-10): raise Exception('Negative eigenvalues found') s[s<0]=0 ind = np.argsort(s)[::-1] U = U[:, ind] s = s[ind] return s,U
Example 40
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1): """Determines a threshold for classifying examples as positive Args: y: labels y_pred: scores from the classifier recall: Threshold is set to classify at least this fraction of positive labelled examples as positive false_positive_margin: Threshold is set to acheive desired recall, and then is extended to include an additional fraction of negative labelled examples equal to false_positive_margin (This allows adding a buffer to the threshold while maintaining a constant "cost") """ n_positive = np.count_nonzero(y) n_negative = len(y) - n_positive if n_positive == 0: return np.max(y_pred) if false_positive_margin == 0 and recall == 1: return np.min(y_pred[y]) ind = np.argsort(y_pred) y_pred_sorted = y_pred[ind] y_sorted = y[ind] so_far = [0, 0] j = 0 for i in reversed(range(len(y_sorted))): so_far[y_sorted[i]] += 1 if so_far[1] >= int(np.floor(recall * n_positive)): j = i break so_far = [0, 0] if false_positive_margin == 0: return y_pred_sorted[j] k = 0 for i in reversed(range(j)): so_far[y_sorted[i]] += 1 if so_far[0] >= false_positive_margin * n_negative: k = i break return y_pred_sorted[k]
Example 41
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1): """Determines a threshold for classifying examples as positive Args: y: labels y_pred: scores from the classifier recall: Threshold is set to classify at least this fraction of positive labelled examples as positive false_positive_margin: Threshold is set to acheive desired recall, and then is extended to include an additional fraction of negative labelled examples equal to false_positive_margin (This allows adding a buffer to the threshold while maintaining a constant "cost") """ n_positive = np.count_nonzero(y) n_negative = len(y) - n_positive if n_positive == 0: return np.max(y_pred) if false_positive_margin == 0 and recall == 1: return np.min(y_pred[y]) ind = np.argsort(y_pred) y_pred_sorted = y_pred[ind] y_sorted = y[ind] so_far = [0, 0] j = 0 for i in reversed(range(len(y_sorted))): so_far[y_sorted[i]] += 1 if so_far[1] >= int(np.floor(recall * n_positive)): j = i break so_far = [0, 0] if false_positive_margin == 0: return y_pred_sorted[j] k = 0 for i in reversed(range(j)): so_far[y_sorted[i]] += 1 if so_far[0] >= false_positive_margin * n_negative: k = i break return y_pred_sorted[k]
Example 42
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1): """Determines a threshold for classifying examples as positive Args: y: labels y_pred: scores from the classifier recall: Threshold is set to classify at least this fraction of positive labelled examples as positive false_positive_margin: Threshold is set to acheive desired recall, and then is extended to include an additional fraction of negative labelled examples equal to false_positive_margin (This allows adding a buffer to the threshold while maintaining a constant "cost") """ n_positive = np.count_nonzero(y) n_negative = len(y) - n_positive if n_positive == 0: return np.max(y_pred) if false_positive_margin == 0 and recall == 1: return np.min(y_pred[y]) ind = np.argsort(y_pred) y_pred_sorted = y_pred[ind] y_sorted = y[ind] so_far = [0, 0] j = 0 for i in reversed(range(len(y_sorted))): so_far[y_sorted[i]] += 1 if so_far[1] >= int(np.floor(recall * n_positive)): j = i break so_far = [0, 0] if false_positive_margin == 0: return y_pred_sorted[j] k = 0 for i in reversed(range(j)): so_far[y_sorted[i]] += 1 if so_far[0] >= false_positive_margin * n_negative: k = i break return y_pred_sorted[k]
Example 43
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1): """Determines a threshold for classifying examples as positive Args: y: labels y_pred: scores from the classifier recall: Threshold is set to classify at least this fraction of positive labelled examples as positive false_positive_margin: Threshold is set to acheive desired recall, and then is extended to include an additional fraction of negative labelled examples equal to false_positive_margin (This allows adding a buffer to the threshold while maintaining a constant "cost") """ n_positive = np.count_nonzero(y) n_negative = len(y) - n_positive if n_positive == 0: return np.max(y_pred) if false_positive_margin == 0 and recall == 1: return np.min(y_pred[y]) ind = np.argsort(y_pred) y_pred_sorted = y_pred[ind] y_sorted = y[ind] so_far = [0, 0] j = 0 for i in reversed(range(len(y_sorted))): so_far[y_sorted[i]] += 1 if so_far[1] >= int(np.floor(recall * n_positive)): j = i break so_far = [0, 0] if false_positive_margin == 0: return y_pred_sorted[j] k = 0 for i in reversed(range(j)): so_far[y_sorted[i]] += 1 if so_far[0] >= false_positive_margin * n_negative: k = i break return y_pred_sorted[k]
Example 44
def relabel_by_size(labels): """ Relabel clusters so they are sorted by number of members, descending. Args: labels (np.array(int)): 1-based cluster labels """ order = np.argsort(np.argsort(-np.bincount(labels))) return 1 + order[labels]
Example 45
def adjust_pvalue_bh(p): """ Multiple testing correction of p-values using the Benjamini-Hochberg procedure """ descending = np.argsort(p)[::-1] # q = p * N / k where p = p-value, N = # tests, k = p-value rank scale = float(len(p)) / np.arange(len(p), 0, -1) q = np.minimum(1, np.minimum.accumulate(scale * p[descending])) # Return to original order return q[np.argsort(descending)]
Example 46
def compute_readpairs_per_umi_threshold(reads, subsample_rate): ''' Compute a threshold above which the UMIs are unlikely to be PCR off-products. reads (np.array(int)) - Read pairs for each UMI subsample_rate (float) - Subsample reads to this fraction. Returns threshold (int) - The RPPU threshold in the subsampled space ''' if len(np.unique(reads)) < 2: print 'Skipping RPPU threshold calculation.' return 1 print 'RPPU subsample rate: %0.4f' % subsample_rate reads = np.random.binomial(reads, subsample_rate) reads = reads[reads > 0] if len(np.unique(reads)) < 2: print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.' return 1 new_n50 = tk_stats.NX(reads, 0.5) print 'New N50: %d:' % new_n50 # Log-transform counts log_reads = np.log(reads) # Run K-Means. Reshape necessary because kmeans takes a matrix. kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1))) kmeans.predict(log_reads.reshape((-1,1))) # Take the cluster with the smallest mean min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0] print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_)))) print 'RPPU component members: ' + str(np.bincount(kmeans.labels_)) # Take the max element in the min-cluster threshold = np.max(reads[kmeans.labels_ == min_cluster]) return threshold
Example 47
def rebalance(self): """ Rebalances the binary heap. Takes O(n log n) time to run. Avoid using, when possible. """ # Sort array by priority sorted_indices_by_priority = np.argsort(-self.pq_array[:,0]) self.pq_array = self.pq_array[sorted_indices_by_priority] pq_indices = range(self.size) # Create hash tables self.pq_hash = dict(zip(pq_indices,self.pq_array[:,1])) self.exp_hash = dict(zip(self.pq_array[:,1],pq_indices))
Example 48
def rank_output(self): self.output_ranks = np.argsort(self.output_raw, axis=1, kind='mergesort').ravel()[::-1].astype(np.int32)
Example 49
def overlay_emojiface(probs): if max(probs) > 0.8: emotion = emotions[np.argmax(probs)] return 'emoji/{}-{}.png'.format(emotion, emotion) else: index1, index2 = np.argsort(probs)[::-1][:2] emotion1 = emotions[index1] emotion2 = emotions[index2] return 'emoji/{}-{}.png'.format(emotion1, emotion2)
Example 50
def __call__(self, words, weights, vocabulary_max): if len(words) < vocabulary_max * self.trigger_ratio: return words, weights if not isinstance(words, numpy.ndarray): words = numpy.array(words) # Tail optimization does not help with very large vocabularies if len(words) > vocabulary_max * 2: indices = numpy.argpartition(weights, len(weights) - vocabulary_max) indices = indices[-vocabulary_max:] words = words[indices] weights = weights[indices] return words, weights # Vocabulary typically consists of these three parts: # 1) the core - we found it's border - `core_end` - 15% # 2) the body - 70% # 3) the minor tail - 15% # (1) and (3) are roughly the same size # (3) can be safely discarded, (2) can be discarded with care, # (1) shall never be discarded. sorter = numpy.argsort(weights)[::-1] weights = weights[sorter] trend_start = int(len(weights) * 0.2) trend_finish = int(len(weights) * 0.8) z = numpy.polyfit(numpy.arange(trend_start, trend_finish), numpy.log(weights[trend_start:trend_finish]), 1) exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights))) avg_error = numpy.abs(weights[trend_start:trend_finish] - exp_z[trend_start:trend_finish]).mean() tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1]) weights = weights[:-tail_size][:vocabulary_max] words = words[sorter[:-tail_size]][:vocabulary_max] return words, weights