Python numpy.argsort() 使用实例

2023年6月8日 370次阅读

The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.

Example 1

def PCA(data, num_components=None):
    # mean center the data
    data -= data.mean(axis=0)
    # calculate the covariance matrix
    R = np.cov(data, rowvar=False)
    # calculate eigenvectors & eigenvalues of the covariance matrix
    # use 'eigh' rather than 'eig' since R is symmetric,
    # the performance gain is substantial
    V, E = np.linalg.eigh(R)
    # sort eigenvalue in decreasing order
    idx = np.argsort(V)[::-1]
    E = E[:,idx]
    # sort eigenvectors according to same index
    V = V[idx]
    # select the first n eigenvectors (n is desired dimension
    # of rescaled data array, or dims_rescaled_data)
    E = E[:, :num_components]
    # carry out the transformation on the data using eigenvectors
    # and return the re-scaled data, eigenvalues, and eigenvectors
    return np.dot(E.T, data.T).T, V, E

Example 2

def __SubDoWavelets(self,waveforms):
        scales = 4
        dimensions = 10
        nspk,ls = waveforms.shape
        cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1)
        cc = np.hstack(cc)

        sd = list()
        for i in range(ls):
            test_data = cc[:,i]
            thr_dist = np.std(test_data,ddof=1)*3
            thr_dist_min = np.mean(test_data)-thr_dist
            thr_dist_max = np.mean(test_data)+thr_dist
            aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)]
            if aux.size > 10:
                sd.append(self.__test_ks(aux))
            else:
                sd.append(0)
        ind = np.argsort(sd)
        ind = ind[::-1]
        coeff = ind[:dimensions]
        
        waveletspk = cc[:,coeff]
        return waveletspk

Example 3

def update_sort_idcs(self):
        # The selected points are sorted before all the other points -- an easy
        # way to achieve this is to add the maximum score to their score
        if self.current_order == 0:
            score = self.score_x
        elif self.current_order == 1:
            score = self.score_y
        elif self.current_order == 2:
            score = self.score_z
        else:
            raise AssertionError(self.current_order)
        score = score.copy()
        if len(self.selected_points):
            score[np.array(sorted(self.selected_points))] += score.max()

        self.sort_idcs = np.argsort(score)

Example 4

def removeTopPCs(X, numRemovePCs):	
	t0 = time.time()
	X_mean = X.mean(axis=0)
	X -= X_mean
	XXT = symmetrize(blas.dsyrk(1.0, X, lower=0))
	s,U = la.eigh(XXT)
	if (np.min(s) < -1e-4): raise Exception('Negative eigenvalues found')
	s[s<0]=0
	ind = np.argsort(s)[::-1]
	U = U[:, ind]
	s = s[ind]
	s = np.sqrt(s)
		
	#remove null PCs
	ind = (s>1e-6)
	U = U[:, ind]
	s = s[ind]
	
	V = X.T.dot(U/s)	
	#print 'max diff:', np.max(((U*s).dot(V.T) - X)**2)
	X = (U[:, numRemovePCs:]*s[numRemovePCs:]).dot((V.T)[numRemovePCs:, :])
	X += X_mean
	
	return X

Example 5

def _translate(seq, f_init, f_next, trg_eos_idx, src_sel, trg_sel,
               k, cond_init_trg, normalize, n_best, **kwargs):
    sample, score = gen_sample(
        f_init, f_next, x=numpy.array(seq).reshape([len(seq), 1]),
        eos_idx=trg_eos_idx, src_selector=src_sel, trg_selector=trg_sel,
        k=k, maxlen=3*len(seq), stochastic=False, argmax=False,
        cond_init_trg=cond_init_trg, **kwargs)
    if normalize:
        lengths = numpy.array([len(s) for s in sample])
        score = score / lengths
    if n_best == 1:
        sidx = numpy.argmin(score)
    elif n_best > 1:
        sidx = numpy.argsort(score)[:n_best]
    else:
        raise ValueError('n_best cannot be negative!')
    return sample[sidx], score[sidx]

Example 6

def closestNeighbor(query, embedding_array, normed=False, top_k=1):
    '''Gets the index of the closest neighbor of embedding_array
    to the query point.  Distance metric is cosine.

    SLOW. DO NOT USE THIS FOR RAPID COMPUTATION.
    '''
    embedding_array = numpy.array(embedding_array)
    if not normed:
        embedding_array = numpy.array([
            (embedding_array[i] / numpy.linalg.norm(embedding_array[i]))
                for i in range(embedding_array.shape[0])
        ])

    ## assuming embeddings are unit-normed by this point;
    ## norm(query) is a constant factor, so we can ignore it
    dists = numpy.array([
        numpy.dot(query, embedding_array[i])
            for i in range(embedding_array.shape[0])
    ])
    sorted_ixes = numpy.argsort(-1 * dists)
    return sorted_ixes[:top_k]

Example 7

def testMerge(self, dtype=dtype):
            testarray1 = range(1,101)
            testarray2 = range(5,106)
            a = numpy.empty((100,2), dtype=dtype)
            b = numpy.empty((100,2), dtype=dtype)
            merged = numpy.empty((200,2), dtype=dtype)
            incompatible1 = numpy.empty((200,3), dtype=dtype)
            incompatible2 = numpy.empty(200, dtype=dtype)
            a[:,0] = numpy.arange(1,101)
            a[:,1] = numpy.arange(2,102)
            b[:,0] = numpy.arange(5,105)
            b[:,1] = numpy.arange(6,106)
            ref = numpy.concatenate([a,b])
            ref = ref[numpy.argsort(ref[:,0])]
            self.assertEqual(mapped_struct.index_merge(a, b, merged), 200)
            self.assertTrue((merged == ref).all())
            self.assertRaises(ValueError, mapped_struct.index_merge, a, b, incompatible1)
            self.assertRaises(ValueError, mapped_struct.index_merge, a, incompatible1, merged)
            self.assertRaises(ValueError, mapped_struct.index_merge, a, b, incompatible2)
            self.assertRaises(ValueError, mapped_struct.index_merge, a, incompatible2, merged)

Example 8

def __init__(self, pos, color, mode=None):
        """
        ===============     ==============================================================
        **Arguments:**
        pos                 Array of positions where each color is defined
        color               Array of RGBA colors.
                            Integer data types are interpreted as 0-255; float data types
                            are interpreted as 0.0-1.0
        mode                Array of color modes (ColorMap.RGB, HSV_POS, or HSV_NEG)
                            indicating the color space that should be used when
                            interpolating between stops. Note that the last mode value is
                            ignored. By default, the mode is entirely RGB.
        ===============     ==============================================================
        """
        self.pos = np.array(pos)
        order = np.argsort(self.pos)
        self.pos = self.pos[order]
        self.color = np.array(color)[order]
        if mode is None:
            mode = np.ones(len(pos))
        self.mode = mode
        self.stopsCache = {}

Example 9

def __loadChnTimeWave(self,f,selectChan):
        times = list()
        waveforms = list()
        spk_startswith = "spike_{0}".format(selectChan)
        for chn_unit in f["spikes"].keys():
            if chn_unit.startswith(spk_startswith):
                time = f["spikes"][chn_unit]["times"].value
                waveform = f["spikes"][chn_unit]["waveforms"].value
                times.append(time)
                waveforms.append(waveform)
        if times:
            times = np.hstack(times)
            waveforms = np.vstack(waveforms)
            sort_index = np.argsort(times)
            waveforms = waveforms[sort_index]
            times = times[sort_index]
            return times,waveforms
        else:
            return None,None

Example 10

def __init__(self, pos, color, mode=None):
        """
        ===============     ==============================================================
        **Arguments:**
        pos                 Array of positions where each color is defined
        color               Array of RGBA colors.
                            Integer data types are interpreted as 0-255; float data types
                            are interpreted as 0.0-1.0
        mode                Array of color modes (ColorMap.RGB, HSV_POS, or HSV_NEG)
                            indicating the color space that should be used when
                            interpolating between stops. Note that the last mode value is
                            ignored. By default, the mode is entirely RGB.
        ===============     ==============================================================
        """
        self.pos = np.array(pos)
        order = np.argsort(self.pos)
        self.pos = self.pos[order]
        self.color = np.array(color)[order]
        if mode is None:
            mode = np.ones(len(pos))
        self.mode = mode
        self.stopsCache = {}

Example 11

def __loadChnTimeWave(self,f,selectChan):
        times = list()
        waveforms = list()
        spk_startswith = "spike_{0}".format(selectChan)
        for chn_unit in f["spikes"].keys():
            if chn_unit.startswith(spk_startswith):
                time = f["spikes"][chn_unit]["times"].value
                waveform = f["spikes"][chn_unit]["waveforms"].value
                times.append(time)
                waveforms.append(waveform)
        if times:
            times = np.hstack(times)
            waveforms = np.vstack(waveforms)
            sort_index = np.argsort(times)
            waveforms = waveforms[sort_index]
            times = times[sort_index]
            return times,waveforms
        else:
            return None,None

Example 12

def __SubDoWavelets(self,waveforms):
        scales = 4
        dimensions = 10
        nspk,ls = waveforms.shape
        cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1)
        cc = np.hstack(cc)

        sd = list()
        for i in range(ls):
            test_data = cc[:,i]
            thr_dist = np.std(test_data,ddof=1)*3
            thr_dist_min = np.mean(test_data)-thr_dist
            thr_dist_max = np.mean(test_data)+thr_dist
            aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)]
            if aux.size > 10:
                sd.append(self.__test_ks(aux))
            else:
                sd.append(0)
        ind = np.argsort(sd)
        ind = ind[::-1]
        coeff = ind[:dimensions]
        
        waveletspk = cc[:,coeff]
        return waveletspk

Example 13

def __load_waveforms(self,selectChan,file_name):
        spk_startswith = "spike_{0}".format(selectChan)
        with hp.File(file_name,"r") as f:
            times = list()
            waveforms = list()
            for chn_unit in f["spikes"].keys():
                if chn_unit.startswith(spk_startswith):
                    tep_time = f["spikes"][chn_unit]["times"].value
                    waveform = f["spikes"][chn_unit]["waveforms"].value
                    times.append(tep_time)
                    waveforms.append(waveform)
            if times:
                times = np.hstack(times)
                waveforms = np.vstack(waveforms)
                sort_index = np.argsort(times)
                waveforms = waveforms[sort_index]
                return waveforms
            else:
                return None

Example 14

def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list):
    """
    :param row: it is a list.length is number of labels. e.g. 2002
    :param vocabulary_index2word_label
    :param result_list
    :return: a lable
    """
    label_list=list(np.argsort(row))
    label_list.reverse()
    #print("label_list:",label_list) # a list,length is number of labels.
    for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want.
        #print(i,"index:",index)
        flag1=vocabulary_index2word_label[index] not in result_list
        flag2=index!=vocabulary_word2index_label[_PAD]
        flag3=index!=vocabulary_word2index_label[_END]
        if flag1 and flag2 and flag3:
            #print("going to return ")
            return vocabulary_index2word_label[index]

# write question id and labels to file system.

Example 15

def get_label_using_logits_batch(question_id_sublist, logits_batch, vocabulary_index2word_label, f, top_number=5):
    print("get_label_using_logits.shape:", np.array(logits_batch).shape) # (1, 128, 2002))
    for i, logits in enumerate(logits_batch):
        index_list = np.argsort(logits)[-top_number:]
        #print("index_list:",index_list)
        index_list = index_list[::-1]
        label_list = []
        for index in index_list:
            #print("index:",index)
            label = vocabulary_index2word_label[index]
            label_list.append(
                label)  # ('get_label_using_logits.label_list:', [u'-3423450385060590478', u'2838091149470021485', u'-3174907002942471215', u'-1812694399780494968', u'6815248286057533876'])
        # print("get_label_using_logits.label_list",label_list)
        write_question_id_with_labels(question_id_sublist[i], label_list, f)
    f.flush()

# get label using logits

Example 16

def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list):
    """
    :param row: it is a list.length is number of labels. e.g. 2002
    :param vocabulary_index2word_label
    :param result_list
    :return: a lable
    """
    label_list=list(np.argsort(row))
    label_list.reverse()
    #print("label_list:",label_list) # a list,length is number of labels.
    for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want.
        #print(i,"index:",index)
        flag1=vocabulary_index2word_label[index] not in result_list
        flag2=index!=vocabulary_word2index_label[_PAD]
        flag3=index!=vocabulary_word2index_label[_END]
        if flag1 and flag2 and flag3:
            #print("going to return ")
            return vocabulary_index2word_label[index]

# write question id and labels to file system.

Example 17

def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list):
    """
    :param row: it is a list.length is number of labels. e.g. 2002
    :param vocabulary_index2word_label
    :param result_list
    :return: a lable
    """
    label_list=list(np.argsort(row))
    label_list.reverse()
    #print("label_list:",label_list) # a list,length is number of labels.
    for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want.
        #print(i,"index:",index)
        flag1=vocabulary_index2word_label[index] not in result_list
        flag2=index!=vocabulary_word2index_label[_PAD]
        flag3=index!=vocabulary_word2index_label[_END]
        if flag1 and flag2 and flag3:
            #print("going to return ")
            return vocabulary_index2word_label[index]

Example 18

def filter_sort_unique(self, max_objval=float('Inf')):
        # filter
        if max_objval < float('inf'):
            good_idx = self.objvals <= max_objval
            self.objvals = self.objvals[good_idx]
            self.solutions = self.solutions[good_idx]

        if len(self.objvals) > 0:
            sort_idx = np.argsort(self.objvals)
            self.objvals = self.objvals[sort_idx]
            self.solutions = self.solutions[sort_idx]

            # unique
            b = np.ascontiguousarray(self.solutions).view(
                np.dtype((np.void, self.solutions.dtype.itemsize * self.P)))
            _, unique_idx = np.unique(b, return_index=True)
            self.objvals = self.objvals[unique_idx]
            self.solutions = self.solutions[unique_idx]

Example 19

def round_solution_pool(pool, constraints):

    pool.distinct().sort()
    P = pool.P
    L0_reg_ind = np.isnan(constraints['coef_set'].C_0j)
    L0_max = constraints['L0_max']
    rounded_pool = SolutionPool(P)

    for solution in pool.solutions:
        # sort from largest to smallest coefficients
        feature_order = np.argsort([-abs(x) for x in solution])
        rounded_solution = np.zeros(shape=(1, P))
        l0_norm_count = 0
        for k in range(0, P):
            j = feature_order[k]
            if not L0_reg_ind[j]:
                rounded_solution[0, j] = np.round(solution[j], 0)
            elif l0_norm_count < L0_max:
                rounded_solution[0, j] = np.round(solution[j], 0)
                l0_norm_count += L0_reg_ind[j]

        rounded_pool.add(objvals=np.nan, solutions=rounded_solution)

    rounded_pool.distinct().sort()
    return rounded_pool

Example 20

def listen(self, results):
        score_out = results['score_out']
        y_gt = results['y_gt']
        sort_idx = np.argsort(score_out, axis=-1)
        idx_gt = np.argmax(y_gt, axis=-1)
        correct = 0
        count = 0
        for kk, ii in enumerate(idx_gt):
            sort_idx_ = sort_idx[kk][::-1]
            for jj in sort_idx_[:self.top_k]:
                if ii == jj:
                    correct += 1
                    break
            count += 1
        # self.log.info('Correct {}/{}'.format(correct, count))
        self.correct += correct
        self.count += count
        self.step = int(results['step'])
        # self.log.info('Step {}'.format(self.step))
        pass

Example 21

def nn(model, text, vectors, query, k=5):
	"""
	Return the nearest neighbour sentences to query
	text: list of sentences
	vectors: the corresponding representations for text
	query: a string to search
	"""
	qf = encode(model, [query])
	qf /= norm(qf)
	scores = numpy.dot(qf, vectors.T).flatten()
	sorted_args = numpy.argsort(scores)[::-1]
	sentences = [text[a] for a in sorted_args[:k]]
	print('QUERY: ' + query)
	print('NEAREST: ')
	for i, s in enumerate(sentences):
		print(s, sorted_args[i])

Example 22

def nn(model, text, vectors, query, k=5):
	"""
	Return the nearest neighbour sentences to query
	text: list of sentences
	vectors: the corresponding representations for text
	query: a string to search
	"""
	qf = encode(model, [query])
	qf /= norm(qf)
	scores = numpy.dot(qf, vectors.T).flatten()
	sorted_args = numpy.argsort(scores)[::-1]
	sentences = [text[a] for a in sorted_args[:k]]
	print 'QUERY: ' + query
	print 'NEAREST: '
	for i, s in enumerate(sentences):
		print s, sorted_args[i]

Example 23

def _spatial_sort(glyph):
  from scipy.spatial.distance import cdist
  from numpy import argsort
  from numpy import argmin

  curr = argmin(glyph[:,0])
  visited = set([curr])
  order = [curr]

  dd = cdist(glyph, glyph)

  while len(visited)<len(glyph):
    row = dd[curr,:]

    for i in argsort(row):
      if row[i]<=0.0 or i==curr or i in visited:
        continue
      order.append(i)
      visited.add(i)
      break
  glyph[:,:] = glyph[order,:]

Example 24

def label_ranking_reciprocal_rank(label,  # [sent_num]
                                  preds): # [sent_num]
  """ Calcualting the reciprocal rank according to definition,
  """
  rank = np.argsort(preds)[::-1]

  #pos_rank = np.take(rank, np.where(label == 1)[0])
  #return np.mean(1.0 / pos_rank)
  
  if_find = False 
  pos = 0
  for r in rank:
      pos += 1
      if label[r] == 1:
          first_pos_r = pos
          if_find = True
          break

  assert(if_find)

  return 1.0 / first_pos_r

Example 25

def _matrix_inverse(self, matrix):
        """
        Computes inverse of a matrix.
        """
        matrix = np.array(matrix)
        n_features = matrix.shape[0]
        rank = np.linalg.matrix_rank(matrix)

        if rank == n_features:
            return np.linalg.inv(matrix)
        else:
            # Matrix is not full rank, so use Hadi's technique to compute inverse
            # Reference: Ali S. Hadi (1992) "Identifying Multiple Outliers in Multivariate Data" eg. 2.3, 2.4
            eigenValues, eigenVectors = np.linalg.eig(matrix)
            eigenValues = np.abs(eigenValues)  # to deal with -0 values
            idx = eigenValues.argsort()[::-1]
            eigenValues = eigenValues[idx]
            eigenVectors = eigenVectors[:, idx]

            s = eigenValues[eigenValues != 0].min()
            w = [1 / max(e, s) for e in eigenValues]
            W = w * np.eye(n_features)

            return eigenVectors.dot(W).dot(eigenVectors.T)

Example 26

def get_1000G_snps(sumstats, out_file):
    sf = np.loadtxt(sumstats,dtype=str,skiprows=1)
    h5f = h5py.File('ref/Misc/1000G_SNP_info.h5','r')
    rf = h5f['snp_chr'][:]
    h5f.close()
    ind1 = np.in1d(sf[:,1],rf[:,2])
    ind2 = np.in1d(rf[:,2],sf[:,1])
    sf1 = sf[ind1]
    rf1 = rf[ind2]
    ### check order ###
    if sum(sf1[:,1]==rf1[:,2])==len(rf1[:,2]):
        print 'Good!'
    else:
        print 'Shit happens, sorting sf1 to have the same order as rf1'
        O1 = np.argsort(sf1[:,1])
        O2 = np.argsort(rf1[:,2])
        O3 = np.argsort(O2)
        sf1 = sf1[O1][O3]
    out = ['hg19chrc snpid a1 a2 bp or p'+'\n']
    for i in range(len(sf1[:,1])):
        out.append(sf1[:,0][i]+' '+sf1[:,1][i]+' '+sf1[:,2][i]+' '+sf1[:,3][i]+' '+rf1[:,1][i]+' '+sf1[:,5][i]+' '+sf1[:,6][i]+'\n')
    ff = open(out_file,"w")
    ff.writelines(out)
    ff.close()

Example 27

def plot_heatmaps(data, mis, column_label, cont, topk=30, prefix=''):
    cmap = sns.cubehelix_palette(as_cmap=True, light=.9)
    m, nv = mis.shape
    for j in range(m):
        inds = np.argsort(- mis[j, :])[:topk]
        if len(inds) >= 2:
            plt.clf()
            order = np.argsort(cont[:,j])
            subdata = data[:, inds][order].T
            subdata -= np.nanmean(subdata, axis=1, keepdims=True)
            subdata /= np.nanstd(subdata, axis=1, keepdims=True)
            columns = [column_label[i] for i in inds]
            sns.heatmap(subdata, vmin=-3, vmax=3, cmap=cmap, yticklabels=columns, xticklabels=False, mask=np.isnan(subdata))
            filename = '{}/heatmaps/group_num={}.png'.format(prefix, j)
            if not os.path.exists(os.path.dirname(filename)):
                os.makedirs(os.path.dirname(filename))
            plt.title("Latent factor {}".format(j))
            plt.yticks(rotation=0)
            plt.savefig(filename, bbox_inches='tight')
            plt.close('all')
            #plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=cont[:, j],
            #          outfile=prefix + '/relationships/group_num=' + str(j), latent=labels[:, j], alpha=0.1)

Example 28

def plot_top_relationships(data, corex, labels, column_label, topk=5, prefix=''):
    dual = (corex.moments['X_i Y_j'] * corex.moments['X_i Z_j']).T
    alpha = dual > 0.04
    cy = corex.moments['ry']
    m, nv = alpha.shape
    for j in range(m):
        inds = np.where(alpha[j] > 0)[0]
        inds = inds[np.argsort(- dual[j][inds])][:topk]
        if len(inds) >= 2:
            if dual[j, inds[0]] > 0.1:
                factor = labels[:, j]
                title = '$Y_{%d}$' % j
            else:
                k = np.argmax(np.abs(cy[j]))
                if k == j:
                    k = np.argsort(-np.abs(cy[j]))[1]
                factor = corex.moments['X_i Z_j'][inds[0], j] * labels[:, j] + corex.moments['X_i Z_j'][inds[0], k] * labels[:, k]
                title = '$Y_{%d} + Y_{%d}$' % (j, k)
            plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=factor,
                      outfile=prefix + '/relationships/group_num=' + str(j), title=title)

Example 29

def trim(g, max_parents=False, max_children=False):
    for node in g:
        if max_parents:
            parents = list(g.successors(node))
            weights = [g.edge[node][parent]['weight'] for parent in parents]
            for weak_parent in np.argsort(weights)[:-max_parents]:
                g.remove_edge(node, parents[weak_parent])
        if max_children:
            children = g.predecessors(node)
            weights = [g.edge[child][node]['weight'] for child in children]
            for weak_child in np.argsort(weights)[:-max_children]:
                g.remove_edge(children[weak_child], node)
    return g


# Misc. utilities

Example 30

def deviation_plot(rp, variable_name, slope_cutoff=1, average_cutoff = 2.):
    average_panel = rp.value_panel(variable_name, types=['average'])
    average_panel = (average_panel.T - np.median(average_panel, axis=1)).T
    average_panel.sort()
    average_ranges = np.max(average_panel, axis=1) - np.min(average_panel, axis=1)
    average_panel = average_panel[np.argsort(average_ranges)][::-1]

    slope_panel = rp.value_panel(variable_name, types=['slope'])
    slope_panel = (slope_panel.T - np.median(slope_panel, axis=1)).T
    slope_panel.sort()
    slope_ranges = np.max(slope_panel, axis=1) - np.min(slope_panel, axis=1)
    slope_panel = slope_panel[np.argsort(slope_ranges)][::-1]

    return _multiplot(rp.dataset, variable_name, slope_panel, average_panel,
                     left_vmin = -1.0*slope_cutoff, left_vmax = slope_cutoff,
                     right_vmin = -1.0*average_cutoff, right_vmax = average_cutoff)

Example 31

def get_local_words(preds, vocab, NEs=[], k=50):
    """
    given the word probabilities over many coordinates,
    first normalize the probability of each word in different
    locations to get a probability distribution, then compute
    the entropy of the word's distribution over all coordinates
    and return the words that are low entropy and are not
    named entities.
    """
    #normalize the probabilites of each vocab using entropy
    normalized_preds = normalize(preds, norm='l1', axis=0)
    entropies = stats.entropy(normalized_preds)
    sorted_indices = np.argsort(entropies)
    sorted_local_words = np.array(vocab)[sorted_indices].tolist()

    
    filtered_local_words = []
    NEset = set(NEs)
    for word in sorted_local_words:
        if word in NEset: continue
        filtered_local_words.append(word)
    return filtered_local_words[0:k]

Example 32

def get_feature_importance(list_of_features):
    n_estimators=10000
    random_state=0
    n_jobs=4
    x_train=data_frame[list_of_features]
    y_train=data_frame.iloc[:,-1]
    feat_labels= data_frame.columns[1:]
    forest = BaggingRegressor(n_estimators=n_estimators,random_state=random_state,n_jobs=n_jobs) 
    forest.fit(x_train,y_train) 
    importances=forest.feature_importances_ 
    indices = np.argsort(importances)[::-1]


    for f in range(x_train.shape[1]):
        print("%2d) %-*s %f" % (f+1,30,feat_labels[indices[f]],
                                        importances[indices[f]]))

   
    plt.title("Feature Importance")
    plt.bar(range(x_train.shape[1]),importances[indices],color='lightblue',align='center')
    plt.xticks(range(x_train.shape[1]),feat_labels[indices],rotation=90)
    plt.xlim([-1,x_train.shape[1]])
    plt.tight_layout()
    plt.show()

Example 33

def cr(self):
        # Composite Reliability
        composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            p = len(block.columns)

            if(p != 1):
                cor_mat = np.cov(block.T)
                evals, evecs = np.linalg.eig(cor_mat)
                U, S, V = np.linalg.svd(cor_mat, full_matrices=False)

                indices = np.argsort(evals)
                indices = indices[::-1]
                evecs = evecs[:, indices]
                evals = evals[indices]

                loadings = V[0, :] * np.sqrt(evals[0])

                numerador = np.sum(abs(loadings))**2
                denominador = numerador + (p - np.sum(loadings ** 2))
                cr = numerador / denominador
                composite[self.latent[i]] = cr

            else:
                composite[self.latent[i]] = 1

        composite = composite.T
        return(composite)

Example 34

def _get_sorted_channels_(self, all_keys, pattern):
        sub_list     = [f for f in all_keys if pattern in f]
        all_channels = [int(f.split(pattern)[1]) for f in sub_list]
        idx          = numpy.argsort(all_channels)
        return sub_list, idx

Example 35

def set_streams(self, stream_mode):
        
        if stream_mode == 'single-file':
            
            sources     = []
            to_write    = []
            count       = 0
            params      = self.get_description()
            my_file     = h5py.File(self.file_name)
            all_matches = [re.findall('\d+', u) for u in my_file.keys()]
            all_streams = []
            for m in all_matches:
                if len(m) > 0:
                    all_streams += [int(m[0])]

            idx = numpy.argsort(all_streams)

            for i in xrange(len(all_streams)):
                params['h5_key']  = my_file.keys()[idx[i]]
                new_data          = type(self)(self.file_name, params)
                sources          += [new_data]
                to_write         += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)]

            print_and_log(to_write, 'debug', logger)

            return sources

        elif stream_mode == 'multi-files':
            return H5File.set_streams(stream_mode)

Example 36

def set_streams(self, stream_mode):
        
        if stream_mode == 'single-file':
            
            sources     = []
            to_write    = []
            count       = 0
            params      = self.get_description()
            my_file     = h5py.File(self.file_name)
            all_matches = my_file.get('recordings').keys()
            all_streams = []
            for m in all_matches:
                all_streams += [int(m)]

            idx = numpy.argsort(all_streams)

            for count in xrange(len(all_streams)):
                params['recording_number'] = all_streams[idx[count]]
                new_data   = type(self)(self.file_name, params)
                sources   += [new_data]
                to_write  += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)]

            print_and_log(to_write, 'debug', logger)

            return sources

        elif stream_mode == 'multi-files':
            return H5File.set_streams(stream_mode)

Example 37

def rho_estimation(data, update=None, compute_rho=True, mratio=0.01):

    N     = len(data)
    rho   = numpy.zeros(N, dtype=numpy.float32)

    if update is None:
        dist = distancematrix(data)
        didx = lambda i,j: i*N + j - i*(i+1)//2 - i - 1
        nb_selec = max(5, int(mratio*N))
        sdist    = {}

        if compute_rho:
            for i in xrange(N):
                indices  = numpy.concatenate((didx(i, numpy.arange(i+1, N)), didx(numpy.arange(0, i-1), i)))
                tmp      = numpy.argsort(numpy.take(dist, indices))[:nb_selec]
                sdist[i] = numpy.take(dist, numpy.take(indices, tmp))
                rho[i]   = numpy.mean(sdist[i])

    else:
        M        = len(update[0])
        nb_selec = max(5, int(mratio*M))
        sdist    = {}

        for i in xrange(N):
            dist     = distancematrix(data[i].reshape(1, len(data[i])), update[0]).ravel()
            all_dist = numpy.concatenate((dist, update[1][i]))
            idx      = numpy.argsort(all_dist)[:nb_selec]
            sdist[i] = numpy.take(all_dist, idx)
            rho[i]   = numpy.mean(sdist[i])
    return rho, dist, sdist, nb_selec

Example 38

def update_data_plot(self):
        reverse_sort = np.argsort(self.sort_idcs)

        if len(self.inspect_points):
            inspect = reverse_sort[np.array(sorted(self.inspect_points))]
            data = numpy.vstack((np.ones(len(inspect))*(2*self.raw_lags[-1]-self.raw_lags[-2]), inspect+0.5)).T
            self.inspect_markers.set_offsets(data)
            self.inspect_markers.set_color(self.inspect_colors)
        else:
            self.inspect_markers.set_offsets([])
            self.inspect_markers.set_color([])

        self.ui.data_overview.draw_idle()

Example 39

def eigenDecompose(self, X, K, normalize=True):
		if (X.shape[1] >= X.shape[0]):
			s,U = la.eigh(K)
		else:
			U, s, _ = la.svd(X, check_finite=False, full_matrices=False)
			if (s.shape[0] < U.shape[1]): s = np.concatenate((s, np.zeros(U.shape[1]-s.shape[0])))	#note: can use low-rank formulas here			
			s=s**2
			if normalize: s /= float(X.shape[1])
		if (np.min(s) < -1e-10): raise Exception('Negative eigenvalues found')
		s[s<0]=0	
		ind = np.argsort(s)[::-1]
		U = U[:, ind]
		s = s[ind]	
		
		return s,U

Example 40

def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
    """Determines a threshold for classifying examples as positive

    Args:
        y: labels
        y_pred: scores from the classifier
        recall: Threshold is set to classify at least this fraction of positive
            labelled examples as positive
        false_positive_margin: Threshold is set to acheive desired recall, and
            then is extended to include an additional fraction of negative
            labelled examples equal to false_positive_margin (This allows adding
            a buffer to the threshold while maintaining a constant "cost")
    """
    n_positive = np.count_nonzero(y)

    n_negative = len(y) - n_positive
    if n_positive == 0:
        return np.max(y_pred)
    if false_positive_margin == 0 and recall == 1:
        return np.min(y_pred[y])
    ind = np.argsort(y_pred)
    y_pred_sorted = y_pred[ind]
    y_sorted = y[ind]
    so_far = [0, 0]
    j = 0
    for i in reversed(range(len(y_sorted))):
        so_far[y_sorted[i]] += 1
        if so_far[1] >= int(np.floor(recall * n_positive)):
            j = i
            break
    so_far = [0, 0]
    if false_positive_margin == 0:
        return y_pred_sorted[j]
    k = 0
    for i in reversed(range(j)):
        so_far[y_sorted[i]] += 1
        if so_far[0] >= false_positive_margin * n_negative:
            k = i
            break
    return y_pred_sorted[k]

Example 41

def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
    """Determines a threshold for classifying examples as positive

    Args:
        y: labels
        y_pred: scores from the classifier
        recall: Threshold is set to classify at least this fraction of positive
            labelled examples as positive
        false_positive_margin: Threshold is set to acheive desired recall, and
            then is extended to include an additional fraction of negative
            labelled examples equal to false_positive_margin (This allows adding
            a buffer to the threshold while maintaining a constant "cost")
    """
    n_positive = np.count_nonzero(y)

    n_negative = len(y) - n_positive
    if n_positive == 0:
        return np.max(y_pred)
    if false_positive_margin == 0 and recall == 1:
        return np.min(y_pred[y])
    ind = np.argsort(y_pred)
    y_pred_sorted = y_pred[ind]
    y_sorted = y[ind]
    so_far = [0, 0]
    j = 0
    for i in reversed(range(len(y_sorted))):
        so_far[y_sorted[i]] += 1
        if so_far[1] >= int(np.floor(recall * n_positive)):
            j = i
            break
    so_far = [0, 0]
    if false_positive_margin == 0:
        return y_pred_sorted[j]
    k = 0
    for i in reversed(range(j)):
        so_far[y_sorted[i]] += 1
        if so_far[0] >= false_positive_margin * n_negative:
            k = i
            break
    return y_pred_sorted[k]

Example 42

def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
    """Determines a threshold for classifying examples as positive

    Args:
        y: labels
        y_pred: scores from the classifier
        recall: Threshold is set to classify at least this fraction of positive
            labelled examples as positive
        false_positive_margin: Threshold is set to acheive desired recall, and
            then is extended to include an additional fraction of negative
            labelled examples equal to false_positive_margin (This allows adding
            a buffer to the threshold while maintaining a constant "cost")
    """
    n_positive = np.count_nonzero(y)

    n_negative = len(y) - n_positive
    if n_positive == 0:
        return np.max(y_pred)
    if false_positive_margin == 0 and recall == 1:
        return np.min(y_pred[y])
    ind = np.argsort(y_pred)
    y_pred_sorted = y_pred[ind]
    y_sorted = y[ind]
    so_far = [0, 0]
    j = 0
    for i in reversed(range(len(y_sorted))):
        so_far[y_sorted[i]] += 1
        if so_far[1] >= int(np.floor(recall * n_positive)):
            j = i
            break
    so_far = [0, 0]
    if false_positive_margin == 0:
        return y_pred_sorted[j]
    k = 0
    for i in reversed(range(j)):
        so_far[y_sorted[i]] += 1
        if so_far[0] >= false_positive_margin * n_negative:
            k = i
            break
    return y_pred_sorted[k]

Example 43

def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
    """Determines a threshold for classifying examples as positive

    Args:
        y: labels
        y_pred: scores from the classifier
        recall: Threshold is set to classify at least this fraction of positive
            labelled examples as positive
        false_positive_margin: Threshold is set to acheive desired recall, and
            then is extended to include an additional fraction of negative
            labelled examples equal to false_positive_margin (This allows adding
            a buffer to the threshold while maintaining a constant "cost")
    """
    n_positive = np.count_nonzero(y)

    n_negative = len(y) - n_positive
    if n_positive == 0:
        return np.max(y_pred)
    if false_positive_margin == 0 and recall == 1:
        return np.min(y_pred[y])
    ind = np.argsort(y_pred)
    y_pred_sorted = y_pred[ind]
    y_sorted = y[ind]
    so_far = [0, 0]
    j = 0
    for i in reversed(range(len(y_sorted))):
        so_far[y_sorted[i]] += 1
        if so_far[1] >= int(np.floor(recall * n_positive)):
            j = i
            break
    so_far = [0, 0]
    if false_positive_margin == 0:
        return y_pred_sorted[j]
    k = 0
    for i in reversed(range(j)):
        so_far[y_sorted[i]] += 1
        if so_far[0] >= false_positive_margin * n_negative:
            k = i
            break
    return y_pred_sorted[k]

Example 44

def relabel_by_size(labels):
    """ Relabel clusters so they are sorted by number of members, descending.
    Args: labels (np.array(int)): 1-based cluster labels """
    order = np.argsort(np.argsort(-np.bincount(labels)))
    return 1 + order[labels]

Example 45

def adjust_pvalue_bh(p):
    """ Multiple testing correction of p-values using the Benjamini-Hochberg procedure """
    descending = np.argsort(p)[::-1]
    # q = p * N / k where p = p-value, N = # tests, k = p-value rank
    scale = float(len(p)) / np.arange(len(p), 0, -1)
    q = np.minimum(1, np.minimum.accumulate(scale * p[descending]))

    # Return to original order
    return q[np.argsort(descending)]

Example 46

def compute_readpairs_per_umi_threshold(reads, subsample_rate):
    ''' Compute a threshold above which the UMIs are unlikely to be PCR off-products.
        reads (np.array(int)) - Read pairs for each UMI
        subsample_rate (float) - Subsample reads to this fraction.
        Returns threshold (int) - The RPPU threshold in the subsampled space '''

    if len(np.unique(reads)) < 2:
        print 'Skipping RPPU threshold calculation.'
        return 1

    print 'RPPU subsample rate: %0.4f' % subsample_rate

    reads = np.random.binomial(reads, subsample_rate)
    reads = reads[reads > 0]

    if len(np.unique(reads)) < 2:
        print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.'
        return 1

    new_n50 = tk_stats.NX(reads, 0.5)

    print 'New N50: %d:' % new_n50

    # Log-transform counts
    log_reads = np.log(reads)

    # Run K-Means. Reshape necessary because kmeans takes a matrix.
    kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1)))
    kmeans.predict(log_reads.reshape((-1,1)))

    # Take the cluster with the smallest mean
    min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0]

    print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_))))
    print 'RPPU component members: ' + str(np.bincount(kmeans.labels_))

    # Take the max element in the min-cluster
    threshold = np.max(reads[kmeans.labels_ == min_cluster])

    return threshold

Example 47

def rebalance(self):
		"""
		Rebalances the binary heap.  Takes O(n log n) time to run.
		Avoid using, when possible.
		"""
		# Sort array by priority
		sorted_indices_by_priority = np.argsort(-self.pq_array[:,0])
		self.pq_array = self.pq_array[sorted_indices_by_priority]
		pq_indices = range(self.size)
		# Create hash tables
		self.pq_hash = dict(zip(pq_indices,self.pq_array[:,1]))
		self.exp_hash = dict(zip(self.pq_array[:,1],pq_indices))

Example 48

def rank_output(self):
        self.output_ranks = np.argsort(self.output_raw, axis=1, kind='mergesort').ravel()[::-1].astype(np.int32)

Example 49

def overlay_emojiface(probs):
    if max(probs) > 0.8:
        emotion = emotions[np.argmax(probs)]
        return 'emoji/{}-{}.png'.format(emotion, emotion)
    else:
        index1, index2 = np.argsort(probs)[::-1][:2]
        emotion1 = emotions[index1]
        emotion2 = emotions[index2]
        return 'emoji/{}-{}.png'.format(emotion1, emotion2)

Example 50

def __call__(self, words, weights, vocabulary_max):
        if len(words) < vocabulary_max * self.trigger_ratio:
            return words, weights

        if not isinstance(words, numpy.ndarray):
            words = numpy.array(words)

        # Tail optimization does not help with very large vocabularies
        if len(words) > vocabulary_max * 2:
            indices = numpy.argpartition(weights, len(weights) - vocabulary_max)
            indices = indices[-vocabulary_max:]
            words = words[indices]
            weights = weights[indices]
            return words, weights

        # Vocabulary typically consists of these three parts:
        # 1) the core - we found it's border - `core_end` - 15%
        # 2) the body - 70%
        # 3) the minor tail - 15%
        # (1) and (3) are roughly the same size
        # (3) can be safely discarded, (2) can be discarded with care,
        # (1) shall never be discarded.

        sorter = numpy.argsort(weights)[::-1]
        weights = weights[sorter]
        trend_start = int(len(weights) * 0.2)
        trend_finish = int(len(weights) * 0.8)
        z = numpy.polyfit(numpy.arange(trend_start, trend_finish),
                          numpy.log(weights[trend_start:trend_finish]),
                          1)
        exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights)))
        avg_error = numpy.abs(weights[trend_start:trend_finish] -
                              exp_z[trend_start:trend_finish]).mean()
        tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1])
        weights = weights[:-tail_size][:vocabulary_max]
        words = words[sorter[:-tail_size]][:vocabulary_max]

        return words, weights