# Python numpy.argpartition() 使用实例

Example 1

```def test_partition_cdtype(self):
d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
('Lancelot', 1.9, 38)],
dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

tgt = np.sort(d, order=['age', 'height'])
assert_array_equal(np.partition(d, range(d.size),
order=['age', 'height']),
tgt)
assert_array_equal(d[np.argpartition(d, range(d.size),
order=['age', 'height'])],
tgt)
for k in range(d.size):
assert_equal(np.partition(d, k, order=['age', 'height'])[k],
tgt[k])
assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],
tgt[k])

d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
tgt = np.sort(d)
assert_array_equal(np.partition(d, range(d.size)), tgt)
for k in range(d.size):
assert_equal(np.partition(d, k)[k], tgt[k])
assert_equal(d[np.argpartition(d, k)][k], tgt[k]) ```

Example 2

```def format_lines(video_ids, predictions, labels, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
n_recall = max(int(numpy.sum(labels[video_index])), 1)
# labels
label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
label_predictions = [(class_index, predictions[video_index][class_index])
for class_index in label_indices]
label_predictions = sorted(label_predictions, key=lambda p: -p[1])
label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
# predictions
top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
top_k_predictions = [(class_index, predictions[video_index][class_index])
for class_index in top_k_indices]
top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
# compute PERR
top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
positives = [labels[video_index][class_index]
for class_index in top_n_indices]
perr = sum(positives) / float(n_recall)
# URL
yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n" ```

Example 3

```def argpartition(a, kth, axis=-1):
"""Returns the indices that would partially sort an array.

Args:
a (cupy.ndarray): Array to be sorted.
kth (int or sequence of ints): Element index to partition by. If
supplied with a sequence of k-th it will partition all elements
indexed by k-th of them into their sorted position at once.
axis (int or None): Axis along which to sort. Default is -1, which
means sort along the last axis. If None is supplied, the array is
flattened before sorting.

Returns:
cupy.ndarray: Array of the same type and shape as ``a``.

.. note::
For its implementation reason, `cupy.argpartition` fully sorts the
given array as `cupy.argsort` does. It also does not support ``kind``
and ``order`` parameters that ``numpy.argpartition`` supports.

.. seealso:: :func:`numpy.argpartition`

"""
return a.argpartition(kth, axis=axis) ```

Example 4

```def CSMToBinary(D, Kappa):
"""
Turn a cross-similarity matrix into a binary cross-simlarity matrix
If Kappa = 0, take all neighbors
If Kappa < 1 it is the fraction of mutual neighbors to consider
Otherwise Kappa is the number of mutual neighbors to consider
"""
N = D.shape[0]
M = D.shape[1]
if Kappa == 0:
return np.ones((N, M))
elif Kappa < 1:
NNeighbs = int(np.round(Kappa*M))
else:
NNeighbs = Kappa
J = np.argpartition(D, NNeighbs, 1)[:, 0:NNeighbs]
I = np.tile(np.arange(N)[:, None], (1, NNeighbs))
V = np.ones(I.size)
[I, J] = [I.flatten(), J.flatten()]
ret = sparse.coo_matrix((V, (I, J)), shape=(N, M))
return ret.toarray() ```

Example 5

```def closest_docs(self, query, k=1):
"""Closest docs by dot product between query and documents
in tfidf weighted word vector space.
"""
spvec = self.text2spvec(query)
res = spvec * self.doc_mat

if len(res.data) <= k:
o_sort = np.argsort(-res.data)
else:
o = np.argpartition(-res.data, k)[0:k]
o_sort = o[np.argsort(-res.data[o])]

doc_scores = res.data[o_sort]
doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
return doc_ids, doc_scores ```

Example 6

```def bottom_top_k_along_row(arr, k, ordered=True):
""" bottom and top k of a 2d np.array, along the rows
http://stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array/18691983
"""
assert k>0, "bottom_top_k_along_row/column() requires k>0."
rows = arr.shape[0]
if ordered:
tmp = np.argsort(arr, axis=1)
idx_bot = tmp[:, :k]
idx_top = tmp[:,-k:]
else:
idx_bot = np.argpartition(arr, k, axis=1)[:,:k]
idx_top = np.argpartition(arr, -k, axis=1)[:,-k:]

indices = np.concatenate((idx_bot, idx_top), axis=1)
vals = arr[np.repeat(np.arange(rows), 2*k), indices.ravel()].reshape(rows,2*k)
return vals, indices ```

Example 7

```def top_k_recommendations(self, sequence, k=10, exclude=None, **kwargs):
if exclude is None:
exclude = []

last_item = int(sequence[-1][0])
if last_item not in self.previous_recommendations:
self.get_all_recommendations(last_item)

all_recommendations = deepcopy(self.previous_recommendations[last_item])
for s in sequence:
all_recommendations[int(s[0])] = 0
for i in exclude:
all_recommendations[i] = 0

ranking = np.zeros(self.n_items)
for i, x in enumerate(all_recommendations.most_common(k)):
ranking[x[0]] = k-i
return np.argpartition(-ranking, range(k))[:k] ```

Example 8

```def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
'''

if exclude is None:
exclude = []

last_item = sequence[-1][0]
output = np.dot(self.V_user_item[user_id, :], self.V_item_user.T) + np.dot(self.V_prev_next[last_item, :], self.V_next_prev.T)

# Put low similarity to viewed items to exclude them from recommendations
output[[i[0] for i in sequence]] = -np.inf
output[exclude] = -np.inf

# find top k according to output
return list(np.argpartition(-output, range(k))[:k]) ```

Example 9

```def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
'''

if exclude is None:
exclude = []

user_items = [i[0] for i in sequence]
output = self.item_score(user_id, user_items)

# Put low similarity to viewed items to exclude them from recommendations
output[[i[0] for i in sequence]] = -np.inf
output[exclude] = -np.inf

# find top k according to output
return list(np.argpartition(-output, range(k))[:k]) ```

Example 10

```def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
'''

if exclude is None:
exclude = []

last_item = sequence[-1][0]
output = self.bias + np.dot(self.V[user_id, :], self.H.T)

# Put low similarity to viewed items to exclude them from recommendations
output[[i[0] for i in sequence]] = -np.inf
output[exclude] = -np.inf

# find top k according to output
return list(np.argpartition(-output, range(k))[:k]) ```

Example 11

```def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None, **kwargs):
''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)
'''

# Compile network if needed
if not hasattr(self, 'predict_function'):
self._compile_predict_function()

# Prepare RNN input
X = np.zeros((1, self._input_size())) # input of the RNN
X[0, :] = self._one_hot_encoding([i[0] for i in sequence])

# Run RNN
output = self.predict_function(X.astype(theano.config.floatX))[0]

# Put low similarity to viewed items to exclude them from recommendations
output[[i[0] for i in sequence]] = -np.inf
output[exclude] = -np.inf

# find top k according to output
return list(np.argpartition(-output, range(k))[:k]) ```

Example 12

```def _compile_test_function(self):
''' Compile self.test_function, the deterministic rnn that output the [email protected]
'''
print("Compiling test...")
deterministic_output = lasagne.layers.get_output(self.l_out, deterministic=True)
if self.interactions_are_unique:
deterministic_output *= (1 - self.l_in.input_var)
theano_test_function = theano.function(self.theano_inputs, deterministic_output, allow_input_downcast=True, name="Test_function", on_unused_input='ignore')

def test_function(theano_inputs, k=10):
output = theano_test_function(*theano_inputs)
ids = np.argpartition(-output, range(k), axis=-1)[0, :k]

return ids

self.test_function = test_function ```

Example 13

```def _compile_test_function(self):
''' Differs from base test function because of the added softmax operation
'''
print("Compiling test...")
deterministic_output = T.nnet.softmax(lasagne.layers.get_output(self.l_out, deterministic=True))
if self.interactions_are_unique:
deterministic_output *= (1 - self.exclude)

theano_test_function = theano.function(self.theano_inputs, deterministic_output, allow_input_downcast=True, name="Test_function", on_unused_input='ignore')

def precision_test_function(theano_inputs, k=10):
output = theano_test_function(*theano_inputs)
ids = np.argpartition(-output, range(k), axis=-1)[0, :k]

return ids

self.test_function = precision_test_function
print("Compilation done.") ```

Example 14

```def smallest_k(matrix: np.ndarray, k: int,
only_first_row: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], np.ndarray]:
"""
Find the smallest elements in a numpy matrix.

:param matrix: Any matrix.
:param k: The number of smallest elements to return.
:param only_first_row: If true the search is constrained to the first row of the matrix.
:return: The row indices, column indices and values of the k smallest items in matrix.
"""
if only_first_row:
flatten = matrix[:1, :].flatten()
else:
flatten = matrix.flatten()

# args are the indices in flatten of the k smallest elements
args = np.argpartition(flatten, k)[:k]
# args are the indices in flatten of the sorted k smallest elements
args = args[np.argsort(flatten[args])]
# flatten[args] are the values for args
return np.unravel_index(args, matrix.shape), flatten[args] ```

Example 15

Example 16

```def probs(self, x):
dists = np.hstack([self.distFunc(x, cls) for cls in self.trainData])
indices = np.argpartition(dists, self.k, axis=1)[:,:self.k]

#start = 0
#for cls in self.trainData:
#    end = start + cls.shape[0]
#    votes.append(np.sum(np.logical_and(start <= indices, indices < end), axis=1))
#    start = end

ends = np.cumsum([len(cls) for cls in self.trainData])
starts = ends - np.array([len(cls) for cls in self.trainData])
votes = [np.sum(np.logical_and(start <= indices, indices < end), axis=1)
for start, end in zip(starts, ends)]

#probs = np.zeros((x.shape[0], self.nCls))

return probs ```

Example 17

```def argmaxk_rows_opt1(arr, k=10, sort=False):
"""
Optimized implementation. When sort=False it is equal to argmaxk_rows_basic. When sort=True and k << arr.shape[1],
it is should be faster, because we argsort only subarray of k max elements from each row of arr (arr.shape[0] x k) instead of
the whole array arr (arr.shape[0] x arr.shape[1]).
"""
best_inds = np.argpartition(arr, kth=-k, axis=1)[:, -k:]  # column indices of k max elements in each row (m x k)
if not sort:
return best_inds
# generate row indices corresponding to best_ids (just current row id in each row) (m x k)
rows = np.arange(best_inds.shape[0], dtype=np.intp)[:, np.newaxis].repeat(best_inds.shape[1], axis=1)
best_elems = arr[rows, best_inds]  # select k max elements from each row using advanced indexing (m x k)
# indices which sort each row of best_elems in descending order (m x k)
best_elems_inds = np.argsort(best_elems, axis=1)[:, ::-1]
# reorder best_indices so that arr[i, sorted_best_inds[i,:]] will be sorted in descending order
sorted_best_inds = best_inds[rows, best_elems_inds]
return sorted_best_inds ```

Example 18

```def generateCosineNeighborGraph(hin,kNeighbors=10,tf_param={'word':True, 'entity':False, 'we_weight':1}):
X, newIds, entIds = GraphGenerator.getTFVectorX(hin,param=tf_param)
cosX = cosine_similarity(X)
#return sparse.csc_matrix(X.dot(X.transpose())),newIds
n = cosX.shape[0]
graph = np.zeros((n,n))
tic = time.time()
for i in range(n):
for j in np.argpartition(-cosX[i],kNeighbors)[:kNeighbors]:
if j == i:
continue
#graph[i, j] += cosX[i, j]
#graph[j, i] += cosX[i, j]
graph[i, j] += 1
graph[j, i] += 1
toc = time.time() - tic

return sparse.csc_matrix(graph), newIds ```

Example 19

```def generateCosineNeighborGraphfromX(X, kNeighbors=10):
cosX = cosine_similarity(X)
# return sparse.csc_matrix(X.dot(X.transpose())),newIds
#print cosX.shape
n = cosX.shape[0]
graph = np.zeros((n, n))
tic = time.time()
for i in range(n):
for j in np.argpartition(-cosX[i], kNeighbors)[:kNeighbors]:
if j == i:
continue
# graph[i, j] += cosX[i, j]
# graph[j, i] += cosX[i, j]
graph[i, j] += 1
graph[j, i] += 1
toc = time.time() - tic
#print 'graph generation done in %f seconds.' % toc
return sparse.csc_matrix(graph) ```

Example 20

```def generate_laplacian_score_scalar(X_ent, X_word, kNeighbors):
# Generate cosine similarity graph
n = X_ent.shape[0]
cosX = cosine_similarity(X_word)
graph = np.zeros((n, n))
for i in range(n):
for j in np.argpartition(cosX[i], -kNeighbors)[-kNeighbors:]:
if j == i:
continue
graph[i, j] = cosX[i, j]
graph[j, i] = cosX[i, j]

D = sparse.diags([graph.sum(axis=0)], [0])
L = D - graph
f_tilde = X_ent - (float(X_ent.transpose() * D * np.ones((n, 1))) / D.sum().sum()) * np.ones((n, 1))
score = float(f_tilde.transpose() * L * f_tilde) / float(f_tilde.transpose() * D * f_tilde + 1e-10)
laplacian_score = score
return laplacian_score ```

Example 21

Example 22

```def compute_nearest_neighbors(self, num_neighbors):
result_list = []
for key, value in self.im2index.iteritems():
neighbor_list = [key]
similarity_scores = self.similarity_mat[value]
# removes best match as same as key
ind = np.argpartition(similarity_scores, -(num_neighbors + 1))[-(num_neighbors + 1):-1]
ind = ind[np.argsort(similarity_scores[ind])]
neighbors = [self.index2im[x] for x in ind]
neighbor_list.extend(neighbors)

result_list.append(neighbor_list)

# compute neighbor statistics
NearestNeighbour.compute_neighbor_stats(result_list, num_neighbors)

# plot the TSNE plot
self.plot_tsne()

return result_list ```

Example 23

```def _calculate_topk_ndces(self, k):
"""
Calculate the indices of the k specialists with highest b-value,
including the base classifier regardless of its b-value.

Args:
k: int >= 0, approximately specifying the number of derived specialists to select.
Precisely, the best k (by Wilson error bound) are taken, along with the
base classifier if it is not already one of the best k.

Returns:
A list containing the indices of the top k classifiers.
The list always at least contains the base classifier's index (i.e. 0).
Therefore, the list is of length k if the base classifier is one of the top k,
and length k+1 otherwise. If k is greater than the total number of derived
specialists, returns all of them.
"""
assert self.label_corrs is not None , "Label correlations must be calculated before top k indices."
if k < len(self.label_corrs):
topk_ndces = set(np.argpartition(-self.label_corrs, k)[:k])  #Only does a partial sort of b!
else:
topk_ndces = set(range(len(self.label_corrs)))
return list(topk_ndces & set(self._relevant_ndces)) ```

Example 24

```def argsort(x, topn=None, reverse=False):
"""
Return indices of the `topn` smallest elements in array `x`, in ascending order.

If reverse is True, return the greatest elements instead, in descending order.

"""
x = np.asarray(x)  # unify code path for when `x` is not a np array (list, tuple...)
if topn is None:
topn = x.size
if topn <= 0:
return []
if reverse:
x = -x
if topn >= x.size or not hasattr(np, 'argpartition'):
return np.argsort(x)[:topn]
# np >= 1.8 has a fast partial argsort, use that!
most_extreme = np.argpartition(x, topn)[:topn]
return most_extreme.take(np.argsort(x.take(most_extreme)))  # resort topn into order ```

Example 25

Example 26

```def closest_docs(self, query, k=1):
"""Closest docs by dot product between query and documents
in tfidf weighted word vector space.
"""
spvec = self.text2spvec(query)
res = spvec * self.doc_mat

if len(res.data) <= k:
o_sort = np.argsort(-res.data)
else:
o = np.argpartition(-res.data, k)[0:k]
o_sort = o[np.argsort(-res.data[o])]

doc_scores = res.data[o_sort]
doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
return doc_ids, doc_scores ```

Example 27

```def select_next_words(self, next_costs, next_probs, step_num, how_many):
# Pick only on the first line (for the beginning of sampling)
# This will avoid duplicate <q> token.
if step_num == 0:
flat_next_costs = next_costs[:1, :].flatten()
else:
# Set the next cost to infinite for finished utterances (they will be replaced)
# by other utterances in the beam
flat_next_costs = next_costs.flatten()

voc_size = next_costs.shape[1]

args = numpy.argpartition(flat_next_costs, how_many)[:how_many]
args = args[numpy.argsort(flat_next_costs[args])]

return numpy.unravel_index(args, next_costs.shape), flat_next_costs[args] ```

Example 28

```def find_nbest(score, n, threshold=None):
num_vars = score.shape[1]

score = score.flatten()
nbest = np.argpartition(score, n)[:n]

beam_indices = nbest / num_vars
var_indices = nbest % num_vars
nbest_score = score[nbest]

if threshold:
best = np.max(nbest_score)
cond = nbest_score > best + threshold
nbest_score = nbest_score[cond]
beam_indices = beam_indices[cond]
var_indices = var_indices[cond]

return nbest_score, beam_indices, var_indices ```

Example 29

```def tfidf_retrieval(tfidf_vec, train_contexts_txt, train_responses_txt, output_file):
print type(tfidf_vec)
tfidf_vec = tfidf_vec.toarray()
print tfidf_vec.shape
prod_mat = np.dot(tfidf_vec, tfidf_vec.T)
print prod_mat.shape
prod_mat = prod_mat / mat_vector_2norm_squared(tfidf_vec)
print prod_mat.shape

response_list = []
for i in xrange(len(prod_mat)):
row = prod_mat[i]
# No idea what's going on here. See the following page:
# stackoverflow.com/questions/6910641/how-to-get-indices-of-n-maximum-values-in-a-numpy-array
ind = np.argpartition(row, -2)[-2:]
ind = ind[np.argsort(row[ind])][0]
response_list.append(train_responses_txt[ind])
print train_contexts_txt[i]
print response_list[i]

with open(output_file, 'w') as f1:
for response in response_list:
f1.write(response) ```

Example 30

```def visualize_frequent_words(vectors_2d: np.ndarray, dataset: DataSet, k: int, ax: plt.Axes = None) -> None:
word_ids, counts = np.unique(dataset.data, return_counts=True)

indices = np.argpartition(-counts, k)[:k]
frequent_word_ids = word_ids[indices]

if ax is None:
fig, ax = plt.subplots(figsize=(13, 13))
else:
fig = None

vectors_2d = vectors_2d[frequent_word_ids]

ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1], s=2, alpha=0.25)
for i, id in enumerate(frequent_word_ids):
ax.annotate(dataset.vocabulary.to_word(id), (vectors_2d[i, 0], vectors_2d[i, 1]))

if fig is not None:
fig.tight_layout()
fig.show() ```

Example 31

Example 32

```def GetFeatures(self, data):
closestPrototypesIndxs = []
D = self.layers[0] - (np.array(data)*self.stateScale + self.bias)
D = np.sqrt(sum(D.T**2))    # a bottlenect for sure
indexes = np.argpartition(D, self.c[0], axis=0)[:self.c[0]]

for i in range(1,len(self.layers)):
D = np.sum(np.setxor1d(self.layers[i], indexes, True), axis=1)
#			phi = np.zeros(self.prototypeList[i])
#			phi[indexes] = 1
#			D = np.sum(np.logical_xor(self.layers[i], phi), axis=1)
indexes = np.argpartition(D, self.c[i], axis=0)[:self.c[i]]

return indexes ```

Example 33

```def process_frame_for_game_play(frame):
"""Assumes a grayscale frame"""
histogram = skimage.exposure.histogram(frame[40:])

if np.unique(histogram[0]).size < 3:
return None

max_indices = np.argpartition(histogram[0], -3)[-3:]

for index in sorted(max_indices)[:2]:
frame[frame == index] = 0

threshold = skimage.filters.threshold_otsu(frame[40:])
bw_frame = frame > threshold

return bw_frame ```

Example 34

Example 35

```def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
#  print("Type - Test :")
#  print(type(video_ids[video_index]))
#  print(video_ids[video_index].decode('utf-8'))
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n" ```

Example 36

```def calculate_precision_at_equal_recall_rate(predictions, actuals):
"""Performs a local (numpy) calculation of the PERR.

Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.

Returns:
float: The average precision at equal recall rate across the entire batch.
"""
aggregated_precision = 0.0
num_videos = actuals.shape[0]
for row in numpy.arange(num_videos):
num_labels = int(numpy.sum(actuals[row]))
top_indices = numpy.argpartition(predictions[row],
-num_labels)[-num_labels:]
item_precision = 0.0
for label_index in top_indices:
if predictions[row][label_index] > 0:
item_precision += actuals[row][label_index]
item_precision /= top_indices.size
aggregated_precision += item_precision
aggregated_precision /= num_videos
return aggregated_precision ```

Example 37

```def top_k_triplets(predictions, labels, k=20):
"""Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
(prediction, class) format"""
m = len(predictions)
k = min(k, m)
indices = numpy.argpartition(predictions, -k)[-k:]
return [(index, predictions[index], labels[index]) for index in indices] ```

Example 38

Example 39

Example 40

Example 41

Example 42

Example 43

Example 44

Example 45

```def calculate_recall_at_n(predictions, actuals, n):
"""Performs a local (numpy) calculation of the [email protected]

Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
n: scalar of n

Returns:
float: The recall at n across the entire batch.
"""
aggregated_recall = 0.0
num_videos = actuals.shape[0]
for row in numpy.arange(num_videos):
num_labels = int(numpy.sum(actuals[row]))
top_indices = numpy.argpartition(predictions[row],
-n)[-n:]
item_recall = 0.0
for label_index in top_indices:
if predictions[row][label_index] > 0:
item_recall += actuals[row][label_index]
item_recall /= num_labels
aggregated_recall += item_recall
aggregated_recall /= num_videos
return aggregated_recall ```

Example 46

Example 47

Example 48

```def format_lines(video_ids, predictions, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
line = [(class_index, predictions[video_index][class_index])
for class_index in top_indices]
line = sorted(line, key=lambda p: -p[1])
yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
for pair in line) + "\n" ```

Example 49

```def __call__(self, words, weights, vocabulary_max):
if len(words) < vocabulary_max * self.trigger_ratio:
return words, weights

if not isinstance(words, numpy.ndarray):
words = numpy.array(words)

# Tail optimization does not help with very large vocabularies
if len(words) > vocabulary_max * 2:
indices = numpy.argpartition(weights, len(weights) - vocabulary_max)
indices = indices[-vocabulary_max:]
words = words[indices]
weights = weights[indices]
return words, weights

# Vocabulary typically consists of these three parts:
# 1) the core - we found it's border - `core_end` - 15%
# 2) the body - 70%
# 3) the minor tail - 15%
# (1) and (3) are roughly the same size
# (3) can be safely discarded, (2) can be discarded with care,
# (1) shall never be discarded.

sorter = numpy.argsort(weights)[::-1]
weights = weights[sorter]
trend_start = int(len(weights) * 0.2)
trend_finish = int(len(weights) * 0.8)
z = numpy.polyfit(numpy.arange(trend_start, trend_finish),
numpy.log(weights[trend_start:trend_finish]),
1)
exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights)))
avg_error = numpy.abs(weights[trend_start:trend_finish] -
exp_z[trend_start:trend_finish]).mean()
tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1])
weights = weights[:-tail_size][:vocabulary_max]
words = words[sorter[:-tail_size]][:vocabulary_max]

return words, weights ```

Example 50

