Python numpy.argpartition() 使用实例

2023年7月20日 264次阅读

The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.

Example 1

def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])

Example 2

def format_lines(video_ids, predictions, labels, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    n_recall = max(int(numpy.sum(labels[video_index])), 1)
    # labels
    label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
    label_predictions = [(class_index, predictions[video_index][class_index]) 
                           for class_index in label_indices]
    label_predictions = sorted(label_predictions, key=lambda p: -p[1])
    label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
    # predictions
    top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    top_k_predictions = [(class_index, predictions[video_index][class_index])
                         for class_index in top_k_indices]
    top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
    top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
    # compute PERR
    top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
    positives = [labels[video_index][class_index] 
                 for class_index in top_n_indices]
    perr = sum(positives) / float(n_recall)
    # URL
    url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')
    yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"

Example 3

def argpartition(a, kth, axis=-1):
    """Returns the indices that would partially sort an array.

    Args:
        a (cupy.ndarray): Array to be sorted.
        kth (int or sequence of ints): Element index to partition by. If
            supplied with a sequence of k-th it will partition all elements
            indexed by k-th of them into their sorted position at once.
        axis (int or None): Axis along which to sort. Default is -1, which
            means sort along the last axis. If None is supplied, the array is
            flattened before sorting.

    Returns:
        cupy.ndarray: Array of the same type and shape as ``a``.

    .. note::
        For its implementation reason, `cupy.argpartition` fully sorts the
        given array as `cupy.argsort` does. It also does not support ``kind``
        and ``order`` parameters that ``numpy.argpartition`` supports.

    .. seealso:: :func:`numpy.argpartition`

    """
    return a.argpartition(kth, axis=axis)

Example 4

def CSMToBinary(D, Kappa):
    """
    Turn a cross-similarity matrix into a binary cross-simlarity matrix
    If Kappa = 0, take all neighbors
    If Kappa < 1 it is the fraction of mutual neighbors to consider
    Otherwise Kappa is the number of mutual neighbors to consider
    """
    N = D.shape[0]
    M = D.shape[1]
    if Kappa == 0:
        return np.ones((N, M))
    elif Kappa < 1:
        NNeighbs = int(np.round(Kappa*M))
    else:
        NNeighbs = Kappa
    J = np.argpartition(D, NNeighbs, 1)[:, 0:NNeighbs]
    I = np.tile(np.arange(N)[:, None], (1, NNeighbs))
    V = np.ones(I.size)
    [I, J] = [I.flatten(), J.flatten()]
    ret = sparse.coo_matrix((V, (I, J)), shape=(N, M))
    return ret.toarray()

Example 5

def closest_docs(self, query, k=1):
        """Closest docs by dot product between query and documents
        in tfidf weighted word vector space.
        """
        spvec = self.text2spvec(query)
        res = spvec * self.doc_mat

        if len(res.data) <= k:
            o_sort = np.argsort(-res.data)
        else:
            o = np.argpartition(-res.data, k)[0:k]
            o_sort = o[np.argsort(-res.data[o])]

        doc_scores = res.data[o_sort]
        doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
        return doc_ids, doc_scores

Example 6

def bottom_top_k_along_row(arr, k, ordered=True):
    """ bottom and top k of a 2d np.array, along the rows    
    http://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array/18691983
    """    
    assert k>0, "bottom_top_k_along_row/column() requires k>0."
    rows = arr.shape[0]
    if ordered:
        tmp = np.argsort(arr, axis=1)      
        idx_bot = tmp[:, :k]
        idx_top = tmp[:,-k:]
    else:
        idx_bot = np.argpartition(arr, k, axis=1)[:,:k]
        idx_top = np.argpartition(arr, -k, axis=1)[:,-k:]
    
    indices = np.concatenate((idx_bot, idx_top), axis=1)
    vals = arr[np.repeat(np.arange(rows), 2*k), indices.ravel()].reshape(rows,2*k)
    return vals, indices

Example 7

def top_k_recommendations(self, sequence, k=10, exclude=None, **kwargs):
		if exclude is None:
			exclude = []
		
		last_item = int(sequence[-1][0])
		if last_item not in self.previous_recommendations:
			self.get_all_recommendations(last_item)
		
		all_recommendations = deepcopy(self.previous_recommendations[last_item])
		for s in sequence:
			all_recommendations[int(s[0])] = 0
		for i in exclude:
			all_recommendations[i] = 0

		ranking = np.zeros(self.n_items)
		for i, x in enumerate(all_recommendations.most_common(k)):
			ranking[x[0]] = k-i
		return np.argpartition(-ranking, range(k))[:k]

Example 8

def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
		''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
		'''

		if exclude is None:
			exclude = []

		last_item = sequence[-1][0]
		output = np.dot(self.V_user_item[user_id, :], self.V_item_user.T) + np.dot(self.V_prev_next[last_item, :], self.V_next_prev.T)

		# Put low similarity to viewed items to exclude them from recommendations
		output[[i[0] for i in sequence]] = -np.inf
		output[exclude] = -np.inf

		# find top k according to output
		return list(np.argpartition(-output, range(k))[:k])

Example 9

def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
		''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
		'''

		if exclude is None:
			exclude = []

		user_items = [i[0] for i in sequence]
		output = self.item_score(user_id, user_items)

		# Put low similarity to viewed items to exclude them from recommendations
		output[[i[0] for i in sequence]] = -np.inf
		output[exclude] = -np.inf

		# find top k according to output
		return list(np.argpartition(-output, range(k))[:k])

Example 10

def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
		''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
		'''

		if exclude is None:
			exclude = []

		last_item = sequence[-1][0]
		output = self.bias + np.dot(self.V[user_id, :], self.H.T)

		# Put low similarity to viewed items to exclude them from recommendations
		output[[i[0] for i in sequence]] = -np.inf
		output[exclude] = -np.inf

		# find top k according to output
		return list(np.argpartition(-output, range(k))[:k])

Example 11

def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None, **kwargs):
		''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
		'''

		# Compile network if needed
		if not hasattr(self, 'predict_function'):
			self._compile_predict_function()

		# Prepare RNN input
		X = np.zeros((1, self._input_size())) # input of the RNN
		X[0, :] = self._one_hot_encoding([i[0] for i in sequence])

		# Run RNN
		output = self.predict_function(X.astype(theano.config.floatX))[0]

		# Put low similarity to viewed items to exclude them from recommendations
		output[[i[0] for i in sequence]] = -np.inf
		output[exclude] = -np.inf

		# find top k according to output
		return list(np.argpartition(-output, range(k))[:k])

Example 12

def _compile_test_function(self):
		''' Compile self.test_function, the deterministic rnn that output the [email protected]
		'''
		print("Compiling test...")
		deterministic_output = lasagne.layers.get_output(self.l_out, deterministic=True)
		if self.interactions_are_unique:
			deterministic_output *= (1 - self.l_in.input_var)
		theano_test_function = theano.function(self.theano_inputs, deterministic_output, allow_input_downcast=True, name="Test_function", on_unused_input='ignore')
		
		def test_function(theano_inputs, k=10):
			output = theano_test_function(*theano_inputs)
			ids = np.argpartition(-output, range(k), axis=-1)[0, :k]
			
			return ids

		self.test_function = test_function

Example 13

def _compile_test_function(self):
		''' Differs from base test function because of the added softmax operation
		'''
		print("Compiling test...")
		deterministic_output = T.nnet.softmax(lasagne.layers.get_output(self.l_out, deterministic=True))
		if self.interactions_are_unique:
			deterministic_output *= (1 - self.exclude)

		theano_test_function = theano.function(self.theano_inputs, deterministic_output, allow_input_downcast=True, name="Test_function", on_unused_input='ignore')
		
		def precision_test_function(theano_inputs, k=10):
			output = theano_test_function(*theano_inputs)
			ids = np.argpartition(-output, range(k), axis=-1)[0, :k]
			
			return ids

		self.test_function = precision_test_function
		print("Compilation done.")

Example 14

def smallest_k(matrix: np.ndarray, k: int,
               only_first_row: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], np.ndarray]:
    """
    Find the smallest elements in a numpy matrix.

    :param matrix: Any matrix.
    :param k: The number of smallest elements to return.
    :param only_first_row: If true the search is constrained to the first row of the matrix.
    :return: The row indices, column indices and values of the k smallest items in matrix.
    """
    if only_first_row:
        flatten = matrix[:1, :].flatten()
    else:
        flatten = matrix.flatten()

    # args are the indices in flatten of the k smallest elements
    args = np.argpartition(flatten, k)[:k]
    # args are the indices in flatten of the sorted k smallest elements
    args = args[np.argsort(flatten[args])]
    # flatten[args] are the values for args
    return np.unravel_index(args, matrix.shape), flatten[args]

Example 15

def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])

Example 16

def probs(self, x):
        dists = np.hstack([self.distFunc(x, cls) for cls in self.trainData])
        indices = np.argpartition(dists, self.k, axis=1)[:,:self.k]

        #start = 0
        #votes = list()
        #for cls in self.trainData:
        #    end = start + cls.shape[0]
        #    votes.append(np.sum(np.logical_and(start <= indices, indices < end), axis=1))
        #    start = end

        ends = np.cumsum([len(cls) for cls in self.trainData])
        starts = ends - np.array([len(cls) for cls in self.trainData])
        votes = [np.sum(np.logical_and(start <= indices, indices < end), axis=1)
                 for start, end in zip(starts, ends)]
        votes = np.vstack(votes).T

        #probs = np.zeros((x.shape[0], self.nCls))
        #probs[np.arange(probs.shape[0]), np.argmax(votes, axis=1)] = 1.0
        ##probs = util.softmax(votes / float(self.k))
        probs = votes / float(self.k)

        return probs

Example 17

def argmaxk_rows_opt1(arr, k=10, sort=False):
    """
    Optimized implementation. When sort=False it is equal to argmaxk_rows_basic. When sort=True and k << arr.shape[1],
    it is should be faster, because we argsort only subarray of k max elements from each row of arr (arr.shape[0] x k) instead of
    the whole array arr (arr.shape[0] x arr.shape[1]).
    """
    best_inds = np.argpartition(arr, kth=-k, axis=1)[:, -k:]  # column indices of k max elements in each row (m x k)
    if not sort:
        return best_inds
    # generate row indices corresponding to best_ids (just current row id in each row) (m x k)
    rows = np.arange(best_inds.shape[0], dtype=np.intp)[:, np.newaxis].repeat(best_inds.shape[1], axis=1)
    best_elems = arr[rows, best_inds]  # select k max elements from each row using advanced indexing (m x k)
    # indices which sort each row of best_elems in descending order (m x k)
    best_elems_inds = np.argsort(best_elems, axis=1)[:, ::-1]
    # reorder best_indices so that arr[i, sorted_best_inds[i,:]] will be sorted in descending order
    sorted_best_inds = best_inds[rows, best_elems_inds]
    return sorted_best_inds

Example 18

def generateCosineNeighborGraph(hin,kNeighbors=10,tf_param={'word':True, 'entity':False, 'we_weight':1}):
        X, newIds, entIds = GraphGenerator.getTFVectorX(hin,param=tf_param)
        cosX = cosine_similarity(X)
        #return sparse.csc_matrix(X.dot(X.transpose())),newIds
        n = cosX.shape[0]
        graph = np.zeros((n,n))
        tic = time.time()
        for i in range(n):
            for j in np.argpartition(-cosX[i],kNeighbors)[:kNeighbors]:
                if j == i:
                    continue
                #graph[i, j] += cosX[i, j]
                #graph[j, i] += cosX[i, j]
                graph[i, j] += 1
                graph[j, i] += 1
        toc = time.time() - tic

        return sparse.csc_matrix(graph), newIds

Example 19

def generateCosineNeighborGraphfromX(X, kNeighbors=10):
        cosX = cosine_similarity(X)
        # return sparse.csc_matrix(X.dot(X.transpose())),newIds
        #print cosX.shape
        n = cosX.shape[0]
        graph = np.zeros((n, n))
        tic = time.time()
        for i in range(n):
            for j in np.argpartition(-cosX[i], kNeighbors)[:kNeighbors]:
                if j == i:
                    continue
                # graph[i, j] += cosX[i, j]
                # graph[j, i] += cosX[i, j]
                graph[i, j] += 1
                graph[j, i] += 1
        toc = time.time() - tic
        #print 'graph generation done in %f seconds.' % toc
        return sparse.csc_matrix(graph)

Example 20

def generate_laplacian_score_scalar(X_ent, X_word, kNeighbors):
    # Generate cosine similarity graph
    n = X_ent.shape[0]
    cosX = cosine_similarity(X_word)
    graph = np.zeros((n, n))
    for i in range(n):
        for j in np.argpartition(cosX[i], -kNeighbors)[-kNeighbors:]:
            if j == i:
                continue
            graph[i, j] = cosX[i, j]
            graph[j, i] = cosX[i, j]

    D = sparse.diags([graph.sum(axis=0)], [0])
    L = D - graph
    f_tilde = X_ent - (float(X_ent.transpose() * D * np.ones((n, 1))) / D.sum().sum()) * np.ones((n, 1))
    score = float(f_tilde.transpose() * L * f_tilde) / float(f_tilde.transpose() * D * f_tilde + 1e-10)
    laplacian_score = score
    return laplacian_score

Example 21

def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])

Example 22

def compute_nearest_neighbors(self, num_neighbors):
        result_list = []
        for key, value in self.im2index.iteritems():
            neighbor_list = [key]
            similarity_scores = self.similarity_mat[value]
            # removes best match as same as key
            ind = np.argpartition(similarity_scores, -(num_neighbors + 1))[-(num_neighbors + 1):-1]
            ind = ind[np.argsort(similarity_scores[ind])]
            neighbors = [self.index2im[x] for x in ind]
            neighbor_list.extend(neighbors)

            result_list.append(neighbor_list)

        # compute neighbor statistics
        NearestNeighbour.compute_neighbor_stats(result_list, num_neighbors)

        # plot the TSNE plot
        self.plot_tsne()

        return result_list

Example 23

def _calculate_topk_ndces(self, k):
        """
        Calculate the indices of the k specialists with highest b-value, 
        including the base classifier regardless of its b-value. 

        Args:
            k: int >= 0, approximately specifying the number of derived specialists to select. 
                Precisely, the best k (by Wilson error bound) are taken, along with the 
                base classifier if it is not already one of the best k. 

        Returns:
            A list containing the indices of the top k classifiers. 
            The list always at least contains the base classifier's index (i.e. 0).
            Therefore, the list is of length k if the base classifier is one of the top k, 
            and length k+1 otherwise. If k is greater than the total number of derived 
            specialists, returns all of them.
        """
        assert self.label_corrs is not None , "Label correlations must be calculated before top k indices."
        if k < len(self.label_corrs):
            topk_ndces = set(np.argpartition(-self.label_corrs, k)[:k])  #Only does a partial sort of b!
        else:
            topk_ndces = set(range(len(self.label_corrs)))
        topk_ndces.add(0)
        return list(topk_ndces & set(self._relevant_ndces))

Example 24

def argsort(x, topn=None, reverse=False):
    """
    Return indices of the `topn` smallest elements in array `x`, in ascending order.

    If reverse is True, return the greatest elements instead, in descending order.

    """
    x = np.asarray(x)  # unify code path for when `x` is not a np array (list, tuple...)
    if topn is None:
        topn = x.size
    if topn <= 0:
        return []
    if reverse:
        x = -x
    if topn >= x.size or not hasattr(np, 'argpartition'):
        return np.argsort(x)[:topn]
    # np >= 1.8 has a fast partial argsort, use that!
    most_extreme = np.argpartition(x, topn)[:topn]
    return most_extreme.take(np.argsort(x.take(most_extreme)))  # resort topn into order

Example 25

def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])

Example 26

def closest_docs(self, query, k=1):
        """Closest docs by dot product between query and documents
        in tfidf weighted word vector space.
        """
        spvec = self.text2spvec(query)
        res = spvec * self.doc_mat

        if len(res.data) <= k:
            o_sort = np.argsort(-res.data)
        else:
            o = np.argpartition(-res.data, k)[0:k]
            o_sort = o[np.argsort(-res.data[o])]

        doc_scores = res.data[o_sort]
        doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
        return doc_ids, doc_scores

Example 27

def select_next_words(self, next_costs, next_probs, step_num, how_many):
        # Pick only on the first line (for the beginning of sampling)
        # This will avoid duplicate <q> token.
        if step_num == 0:
            flat_next_costs = next_costs[:1, :].flatten()
        else:
            # Set the next cost to infinite for finished utterances (they will be replaced)
            # by other utterances in the beam
            flat_next_costs = next_costs.flatten()
         
        voc_size = next_costs.shape[1]
         
        args = numpy.argpartition(flat_next_costs, how_many)[:how_many]
        args = args[numpy.argsort(flat_next_costs[args])]
        
        return numpy.unravel_index(args, next_costs.shape), flat_next_costs[args]

Example 28

def find_nbest(score, n, threshold=None):
    num_vars = score.shape[1]

    score = score.flatten()
    nbest = np.argpartition(score, n)[:n]

    beam_indices = nbest / num_vars
    var_indices = nbest % num_vars
    nbest_score = score[nbest]

    if threshold:
        best = np.max(nbest_score)
        cond = nbest_score > best + threshold
        nbest_score = nbest_score[cond]
        beam_indices = beam_indices[cond]
        var_indices = var_indices[cond]

    return nbest_score, beam_indices, var_indices

Example 29

def tfidf_retrieval(tfidf_vec, train_contexts_txt, train_responses_txt, output_file):
    print type(tfidf_vec)
    tfidf_vec = tfidf_vec.toarray()
    print tfidf_vec.shape
    prod_mat = np.dot(tfidf_vec, tfidf_vec.T)
    print prod_mat.shape
    prod_mat = prod_mat / mat_vector_2norm_squared(tfidf_vec)
    print prod_mat.shape
    
    response_list = []
    for i in xrange(len(prod_mat)):
        row = prod_mat[i]
        # No idea what's going on here. See the following page:
        # stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array
        ind = np.argpartition(row, -2)[-2:]
        ind = ind[np.argsort(row[ind])][0]
        response_list.append(train_responses_txt[ind])
        print train_contexts_txt[i]
        print response_list[i]

    with open(output_file, 'w') as f1:
        for response in response_list:
            f1.write(response)

Example 30

def visualize_frequent_words(vectors_2d: np.ndarray, dataset: DataSet, k: int, ax: plt.Axes = None) -> None:
    word_ids, counts = np.unique(dataset.data, return_counts=True)

    indices = np.argpartition(-counts, k)[:k]
    frequent_word_ids = word_ids[indices]

    if ax is None:
        fig, ax = plt.subplots(figsize=(13, 13))
    else:
        fig = None

    vectors_2d = vectors_2d[frequent_word_ids]

    ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1], s=2, alpha=0.25)
    for i, id in enumerate(frequent_word_ids):
        ax.annotate(dataset.vocabulary.to_word(id), (vectors_2d[i, 0], vectors_2d[i, 1]))

    if fig is not None:
        fig.tight_layout()
        fig.show()

Example 31

def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])

Example 32

def GetFeatures(self, data):
                closestPrototypesIndxs = []
		D = self.layers[0] - (np.array(data)*self.stateScale + self.bias)
		D = np.sqrt(sum(D.T**2))    # a bottlenect for sure
		indexes = np.argpartition(D, self.c[0], axis=0)[:self.c[0]]
		
		for i in range(1,len(self.layers)):
			D = np.sum(np.setxor1d(self.layers[i], indexes, True), axis=1)
#			phi = np.zeros(self.prototypeList[i])
#			phi[indexes] = 1
#			D = np.sum(np.logical_xor(self.layers[i], phi), axis=1)
			indexes = np.argpartition(D, self.c[i], axis=0)[:self.c[i]]


				
                return indexes

Example 33

def process_frame_for_game_play(frame):
    """Assumes a grayscale frame"""
    histogram = skimage.exposure.histogram(frame[40:])

    if np.unique(histogram[0]).size < 3:
        return None

    max_indices = np.argpartition(histogram[0], -3)[-3:]

    for index in sorted(max_indices)[:2]:
        frame[frame == index] = 0

    threshold = skimage.filters.threshold_otsu(frame[40:])
    bw_frame = frame > threshold

    return bw_frame

Example 34

def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
                           tgt)
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
                           tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
                        tgt[k])
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
                         tgt[k])

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])

Example 35

def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"

Example 36

def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """Performs a local (numpy) calculation of the PERR.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.

  Returns:
    float: The average precision at equal recall rate across the entire batch.
  """
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
                                     -num_labels)[-num_labels:]
    item_precision = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_precision += actuals[row][label_index]
    item_precision /= top_indices.size
    aggregated_precision += item_precision
  aggregated_precision /= num_videos
  return aggregated_precision

Example 37

def top_k_triplets(predictions, labels, k=20):
  """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
  (prediction, class) format"""
  m = len(predictions)
  k = min(k, m)
  indices = numpy.argpartition(predictions, -k)[-k:]
  return [(index, predictions[index], labels[index]) for index in indices]

Example 38

def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"

Example 39

def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"

Example 40

def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """Performs a local (numpy) calculation of the PERR.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.

  Returns:
    float: The average precision at equal recall rate across the entire batch.
  """
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
                                     -num_labels)[-num_labels:]
    item_precision = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_precision += actuals[row][label_index]
    item_precision /= top_indices.size
    aggregated_precision += item_precision
  aggregated_precision /= num_videos
  return aggregated_precision

Example 41

def top_k_triplets(predictions, labels, k=20):
  """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
  (prediction, class) format"""
  m = len(predictions)
  k = min(k, m)
  indices = numpy.argpartition(predictions, -k)[-k:]
  return [(index, predictions[index], labels[index]) for index in indices]

Example 42

def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"

Example 43

def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"

Example 44

def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"

Example 45

def calculate_recall_at_n(predictions, actuals, n):
  """Performs a local (numpy) calculation of the [email protected]

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    n: scalar of n

  Returns:
    float: The recall at n across the entire batch.
  """
  aggregated_recall = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
                                     -n)[-n:]
    item_recall = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_recall += actuals[row][label_index]
    item_recall /= num_labels
    aggregated_recall += item_recall
  aggregated_recall /= num_videos
  return aggregated_recall

Example 46

def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """Performs a local (numpy) calculation of the PERR.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.

  Returns:
    float: The average precision at equal recall rate across the entire batch.
  """
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
                                     -num_labels)[-num_labels:]
    item_precision = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_precision += actuals[row][label_index]
    item_precision /= top_indices.size
    aggregated_precision += item_precision
  aggregated_precision /= num_videos
  return aggregated_precision

Example 47

def top_k_triplets(predictions, labels, k=20):
  """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
  (prediction, class) format"""
  m = len(predictions)
  k = min(k, m)
  indices = numpy.argpartition(predictions, -k)[-k:]
  return [(index, predictions[index], labels[index]) for index in indices]

Example 48

def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"

Example 49

def __call__(self, words, weights, vocabulary_max):
        if len(words) < vocabulary_max * self.trigger_ratio:
            return words, weights

        if not isinstance(words, numpy.ndarray):
            words = numpy.array(words)

        # Tail optimization does not help with very large vocabularies
        if len(words) > vocabulary_max * 2:
            indices = numpy.argpartition(weights, len(weights) - vocabulary_max)
            indices = indices[-vocabulary_max:]
            words = words[indices]
            weights = weights[indices]
            return words, weights

        # Vocabulary typically consists of these three parts:
        # 1) the core - we found it's border - `core_end` - 15%
        # 2) the body - 70%
        # 3) the minor tail - 15%
        # (1) and (3) are roughly the same size
        # (3) can be safely discarded, (2) can be discarded with care,
        # (1) shall never be discarded.

        sorter = numpy.argsort(weights)[::-1]
        weights = weights[sorter]
        trend_start = int(len(weights) * 0.2)
        trend_finish = int(len(weights) * 0.8)
        z = numpy.polyfit(numpy.arange(trend_start, trend_finish),
                          numpy.log(weights[trend_start:trend_finish]),
                          1)
        exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights)))
        avg_error = numpy.abs(weights[trend_start:trend_finish] -
                              exp_z[trend_start:trend_finish]).mean()
        tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1])
        weights = weights[:-tail_size][:vocabulary_max]
        words = words[sorter[:-tail_size]][:vocabulary_max]

        return words, weights

Example 50

def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """Performs a local (numpy) calculation of the PERR.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.

  Returns:
    float: The average precision at equal recall rate across the entire batch.
  """
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
                                     -num_labels)[-num_labels:]
    item_precision = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_precision += actuals[row][label_index]
    item_precision /= top_indices.size
    aggregated_precision += item_precision
  aggregated_precision /= num_videos
  return aggregated_precision