Python numpy.fill_diagonal() 使用实例

2023年4月29日 525次阅读

The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.

Example 1

def layout_tree(correlation):
    """Layout tree for visualization with e.g. matplotlib.

    Args:
        correlation: A [V, V]-shaped numpy array of latent correlations.

    Returns:
        A [V, 3]-shaped numpy array of spectral positions of vertices.
    """
    assert len(correlation.shape) == 2
    assert correlation.shape[0] == correlation.shape[1]
    assert correlation.dtype == np.float32

    laplacian = -correlation
    np.fill_diagonal(laplacian, 0)
    np.fill_diagonal(laplacian, -laplacian.sum(axis=0))
    evals, evects = scipy.linalg.eigh(laplacian, eigvals=[1, 2, 3])
    assert np.all(evals > 0)
    assert evects.shape[1] == 3
    return evects

Example 2

def find_distance_matrix(self, vector, metric='cosine'):
        '''
        compute distance matrix between topis using cosine or euclidean
        distance (default=cosine distance)
        '''
        if metric == 'cosine':
            distance_matrix = pairwise_distances(vector,
                                                metric='cosine')
            # diagonals should be exactly zero, so remove rounding errors
            numpy.fill_diagonal(distance_matrix, 0)
        if metric == 'euclidean':
            distance_matrix = pairwise_distances(vector,
                                                metric='euclidean')
        return distance_matrix

Example 3

def test_diagonal_mpa(nr_sites, local_dim, _, rgen, dtype):
    randfunc = factory._randfuncs[dtype]
    entries = randfunc((local_dim,), randstate=rgen)

    mpa_mp = factory.diagonal_mpa(entries, nr_sites)
    if nr_sites > 1:
        mpa_np = np.zeros((local_dim,) * nr_sites, dtype=dtype)
        np.fill_diagonal(mpa_np, entries)
    else:
        mpa_np = entries

    assert len(mpa_mp) == nr_sites
    assert mpa_mp.dtype is dtype
    assert_array_almost_equal(mpa_mp.to_array(), mpa_np)
    assert_correct_normalization(mpa_mp, nr_sites - 1, nr_sites)

    if nr_sites > 1:
        assert max(mpa_mp.ranks) == local_dim

Example 4

def __parse_pairs__(self, filepath, delimiter = ',', target_col = 2, column_names = list(), sequence_length = None):
        assert("target" in column_names)
        with open(filepath, "r") as f:
            lines = f.readlines()
            try:
                if sequence_length is None:
                    dataframe = pd.read_csv(filepath, sep = delimiter, skip_blank_lines = True,
                        header = None, names = column_names, index_col = False)
                    sequence_length = np.asarray(dataframe[["i", "j"]]).max()
            except ValueError:
                return None
            data = np.full((sequence_length, sequence_length), np.nan, dtype = np.double)
            np.fill_diagonal(data, Params.DISTANCE_WITH_ITSELF)
            for line in lines:
                elements = line.rstrip("\r\n").split(delimiter)
                i, j, k = int(elements[0]) - 1, int(elements[1]) - 1, float(elements[target_col])
                data[i, j] = data[j, i] = k
            if np.isnan(data).any():
                # sequence_length is wrong or the input file has missing pairs
                warnings.warn("Warning: Pairs of residues are missing from the contacts text file")
                warnings.warn("Number of missing pairs: %i " % np.isnan(data).sum())
            return data

Example 5

def getW(D, K, Mu = 0.5):
    """
    Return affinity matrix
    [1] Wang, Bo, et al. "Similarity network fusion for aggregating data types on a genomic scale." 
        Nature methods 11.3 (2014): 333-337.
    :param D: Self-similarity matrix
    :param K: Number of nearest neighbors
    """
    #W(i, j) = exp(-Dij^2/(mu*epsij))
    DSym = 0.5*(D + D.T)
    np.fill_diagonal(DSym, 0)

    Neighbs = np.partition(DSym, K+1, 1)[:, 0:K+1]
    MeanDist = np.mean(Neighbs, 1)*float(K+1)/float(K) #Need this scaling
    #to exclude diagonal element in mean
    #Equation 1 in SNF paper [1] for estimating local neighborhood radii
    #by looking at k nearest neighbors, not including point itself
    Eps = MeanDist[:, None] + MeanDist[None, :] + DSym
    Eps = Eps/3
    W = np.exp(-DSym**2/(2*(Mu*Eps)**2))
    return W

Example 6

def jaccard_similarity_weighted(F, fill_diagonal=True):
    assert F.format == 'csr'
    if not F.has_sorted_indices:
        F.sort_indices()

    ind = F.indices
    ptr = F.indptr
    dat = F.data.astype(np.float64, copy=False) # dtype needed for jaccard computation

    shift = 1 if fill_diagonal else 0
    data, rows, cols = _jaccard_similarity_weighted_tri(dat, ind, ptr, shift)

    S = sp.sparse.coo_matrix((data, (rows, cols)), shape=(F.shape[0],)*2).tocsc()
    S += S.T # doubles diagonal values if fill_diagonal is False

    if fill_diagonal:
        set_diagonal_values(S, 1)
    else:
        set_diagonal_values(S, np.sign(S.diagonal())) # set to 1, preserve zeros
    return S

Example 7

def select_negtive(self, i_feat, s_feat, sess, topN=50):
	'''
	Select the triplets with the largest losses \n
	return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
	'''
	feed_dict = {self.image_feat: i_feat, self.sentence_feat:s_feat}
	i_embed, s_embed = sess.run([self.image_fc2, self.sentence_fc2], feed_dict=feed_dict)
	S = np.matmul(i_embed, s_embed.T)
	i_feat_pos = i_feat.repeat(topN, axis=0)
	s_feat_pos = s_feat.repeat(topN, axis=0)
	N = S.shape[0]
	np.fill_diagonal(S, -2*np.ones(N))
	neg_s_idx = S.argsort(axis=1)[:, -topN:]
	neg_i_idx = S.argsort(axis=0)[-topN:, :]
	s_feat_neg = s_feat[neg_s_idx.flatten('C')]
	i_feat_neg = i_feat[neg_i_idx.flatten('F')]
	return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg

Example 8

def select_negtive(self, i_feat, s_feat, sess, topN=50):
	'''
	Select the triplets with the largest losses \n
	return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
	'''
	feed_dict = {self.image_feat: i_feat, self.sentence_feat:s_feat}
	i_embed, s_embed = sess.run([self.image_fc2, self.sentence_fc2], feed_dict=feed_dict)
	S = np.matmul(i_embed, s_embed.T)
	i_feat_pos = i_feat.repeat(topN, axis=0)
	s_feat_pos = s_feat.repeat(topN, axis=0)
	N = S.shape[0]
	np.fill_diagonal(S, -2*np.ones(N))
	neg_s_idx = S.argsort(axis=1)[:, -topN:]
	neg_i_idx = S.argsort(axis=0)[-topN:, :]
	s_feat_neg = s_feat[neg_s_idx.flatten('C')]
	i_feat_neg = i_feat[neg_i_idx.flatten('F')]
	return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg

Example 9

def compute_sims(inputs: mx.nd.NDArray, normalize: bool) -> mx.nd.NDArray:
    """
    Returns a matrix with pair-wise similarity scores between inputs.
    Similarity score is (normalized) Euclidean distance. 'Similarity with self' is masked
    to large negative value.

    :param inputs: NDArray of inputs.
    :param normalize: Whether to normalize to unit-length.
    :return: NDArray with pairwise similarities of same shape as inputs.
    """
    if normalize:
        logger.info("Normalizing embeddings to unit length")
        inputs = mx.nd.L2Normalization(inputs, mode='instance')
    sims = mx.nd.dot(inputs, inputs, transpose_b=True)
    sims_np = sims.asnumpy()
    np.fill_diagonal(sims_np, -9999999.)
    sims = mx.nd.array(sims_np)
    return sims

Example 10

def sharpenOld(s, kernelFunc, dist=None, scale=None,
            normalize=False, m1=False, *args, **kwargs):
    s = util.colmat(s)

    if dist is None:
        dist = np.arange(s.shape[1])+1.0
        dist = np.abs(dist[None,:]-dist[:,None])

        #dist = np.insert(spsig.triang(s.shape[1]-1, sym=False), 0, 0.0)
        #dist = np.vstack([np.roll(dist, i) for i in xrange(dist.size)])

    if scale is None:
        # minimum off-diagonal distance
        scale = np.min(dist[np.asarray(1.0-np.eye(dist.shape[0]), dtype=np.bool)])

    kernel = kernelFunc(dist.T/scale, *args, **kwargs)

    if m1:
        np.fill_diagonal(kernel, 0.0)

    if normalize:
        kernel = kernel/np.abs(kernel.sum(axis=0))

    return s - s.dot(kernel)

Example 11

def load_data(filename):
	df = pd.read_csv(filename, compression='zip')
	selected = ['Category', 'Descript']
	non_selected = list(set(df.columns) - set(selected))

	df = df.drop(non_selected, axis=1)
	df = df.dropna(axis=0, how='any', subset=selected)
	df = df.reindex(np.random.permutation(df.index))

	labels = sorted(list(set(df[selected[0]].tolist())))
	num_labels = len(labels)
	one_hot = np.zeros((num_labels, num_labels), int)
	np.fill_diagonal(one_hot, 1)
	label_dict = dict(zip(labels, one_hot))

	x_raw= df[selected[1]].apply(lambda x: clean_str(x).split(' ')).tolist()
	y_raw = df[selected[0]].apply(lambda y: label_dict[y]).tolist()

	x_raw = pad_sentences(x_raw)

	vocabulary, vocabulary_inv = build_vocab(x_raw)

	x = np.array([[vocabulary[word] for word in sentence] for sentence in x_raw])
	y = np.array(y_raw)
	return x, y, vocabulary, vocabulary_inv, df, labels

Example 12

def load_test_data(test_file, labels):
	df = pd.read_csv(test_file, sep='|')
	select = ['Descript']

	df = df.dropna(axis=0, how='any', subset=select)
	test_examples = df[select[0]].apply(lambda x: data_helper.clean_str(x).split(' ')).tolist()

	num_labels = len(labels)
	one_hot = np.zeros((num_labels, num_labels), int)
	np.fill_diagonal(one_hot, 1)
	label_dict = dict(zip(labels, one_hot))

	y_ = None
	if 'Category' in df.columns:
		select.append('Category')
		y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist()

	not_select = list(set(df.columns) - set(select))
	df = df.drop(not_select, axis=1)
	return test_examples, y_, df

Example 13

def select_negtive(self, i_feat, s_feat, sess, topN=50):
        '''
        Select the triplets with the largest losses \n
        return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
        '''
        feed_dict = {self.image_feat: i_feat, self.sentence_feat:s_feat}
        i_embed, s_embed = sess.run([self.image_fc2, self.sentence_fc2], feed_dict=feed_dict)
        S = np.matmul(i_embed, s_embed.T)
        i_feat_pos = i_feat.repeat(topN, axis=0)
        s_feat_pos = s_feat.repeat(topN, axis=0)
        N = S.shape[0]
        np.fill_diagonal(S, -2*np.ones(N))
        neg_s_idx = S.argsort(axis=1)[:, -topN:]
        neg_i_idx = S.argsort(axis=0)[-topN:, :]
        s_feat_neg = s_feat[neg_s_idx.flatten('C')]
        i_feat_neg = i_feat[neg_i_idx.flatten('F')]
        return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg

Example 14

def select_negtive(self, i_feat, s_feat, sess, topN=50):
        '''
        Select the triplets with the largest losses \n
        return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg
        '''
        feed_dict = {self.image_feat: i_feat, self.sentence_feat:s_feat}
        i_embed, s_embed = sess.run([self.image_fc2, self.sentence_fc2], feed_dict=feed_dict)
        S = np.matmul(i_embed, s_embed.T)
        i_feat_pos = i_feat.repeat(topN, axis=0)
        s_feat_pos = s_feat.repeat(topN, axis=0)
        N = S.shape[0]
        np.fill_diagonal(S, -2*np.ones(N))
        neg_s_idx = S.argsort(axis=1)[:, -topN:]
        neg_i_idx = S.argsort(axis=0)[-topN:, :]
        s_feat_neg = s_feat[neg_s_idx.flatten('C')]
        i_feat_neg = i_feat[neg_i_idx.flatten('F')]
        return i_feat_pos, s_feat_pos, i_feat_neg, s_feat_neg

Example 15

def get_gcovmat(h2, rg):
    """
    Args: h2: vector with SNP heritabilities
          rg: vector with genetic correlations
    Returns: numpy trait by trait array with h2 on diagonal and genetic covariance on offdiagnoals
    """
    mat = numpy.zeros((len(h2), len(h2)))
    mat[numpy.triu_indices(len(h2), 1)] = rg
    mat = mat + mat.T
    mat = mat * numpy.sqrt(numpy.outer(h2, h2))
    numpy.fill_diagonal(mat, h2)
    return numpy.array(mat)


# When input files are score files, not beta files, mtot may be unknown.
# Here mtot=1e6 is assumed. The absolute value of the expected variances for each trait is irrelevant for the multi-trait weighting, so it doesn't matter too much what this value is, expecially if M > N.

Example 16

def ols_weights(n, h2, rg, mtot=1e6):
    """
    Args: n: vector with sample size for each trait
          h2: vector with SNP heritabilities
          rg: vector with rg for each pair of traits (3 traits: 1,2; 1,3; 2,3)
          mtot: total number of markers (doesn't change result much)
    Returns: ntraits * ntraits array with ols weights. weights in each row are for are for a multi-trait predictor of the trait in this row
    """
    ntraits = len(n)
    gcovmat = get_gcovmat(h2, rg)
    print(gcovmat)
    V = gcovmat / mtot
    numpy.fill_diagonal(V, ols_variances(n, h2, mtot))
    C = gcovmat / mtot

    weights = numpy.zeros([ntraits, ntraits])
    for i in range(ntraits):
        nonzero = V[i,] != 0
        Vi = V[numpy.array(numpy.where(nonzero)[0])[:, None], nonzero]
        Vinv = numpy.linalg.inv(Vi)
        weights[i, nonzero] = numpy.dot(Vinv, C[i, nonzero])
    print(weights)
    return weights

Example 17

def __get_prolongation_matrix(ndofs_coarse, ndofs_fine):
        """Helper routine for the prolongation operator

        Args:
            ndofs_fine (int): number of DOFs on the fine grid
            ndofs_coarse (int): number of DOFs on the coarse grid

        Returns:
            scipy.sparse.csc_matrix: sparse prolongation matrix of size
                `ndofs_fine` x `ndofs_coarse`
        """

        # This is a workaround, since I am not aware of a suitable way to do
        # this directly with sparse matrices.
        P = np.zeros((ndofs_fine, ndofs_coarse))
        np.fill_diagonal(P[1::2, :], 1)
        np.fill_diagonal(P[0::2, :], 1.0/2.0)
        np.fill_diagonal(P[2::2, :], 1.0/2.0)
        return sp.csc_matrix(P)

Example 18

def set_diagonal(G, val=0):
    """

    Generally diagonal is set to 0. This function helps set the diagonal across time.


    **PARAMETERS**

    :G: temporal network (graphlet)
    :val: value to set diagonal to (default 0).

    **OUTPUT**

    :G: Graphlet representation of G with new diagonal

    **HISTORY**

    :Modified: Dec 2016, WHT (documentation)
    :Created: Nov 2016, WHT

    """

    for t in range(0, G.shape[2]):
        np.fill_diagonal(G[:, :, t], val)
    return G

Example 19

def newScore(movie):
    critic_num = len(token_dict[movie["movieTitle"]]["critics"])
    N = len(token_dict[movie["movieTitle"]]["reviews"])
    C = cosine[movie["movieTitle"]][critic_num:, critic_num:]
    R = map(lambda x: x['score'], movie['reviews'])

    print C.shape
    # exclude self similarity
    # np.fill_diagonal(C, 0)
    # normalize
    row_sums = C.sum(axis=1)
    C = C / row_sums[:, np.newaxis]
    # calculate new score
    new_score = np.dot(C, R)

    # update new score
    new_review = movie['reviews']
    map(lambda x, y: x.update({'newScore': y}), new_review, new_score)

    testing = map(lambda x: abs(x['score'] - x['newScore']) < 5, new_review)
    print np.sum(testing)

    return new_review

Example 20

def get_masked(self, percent_hole, diag_off=1):
        """ Construct a random mask.
            Random training set on 20% on Data / debug5 - debug11 -- Unbalanced
        """

        data = self.data
        if type(data) is np.ndarray:
            #self.data_mat = sp.sparse.csr_matrix(data)
            pass
        else:
            raise NotImplementedError('type %s unknow as corpus' % type(data))

        n = int(data.size * percent_hole)
        mask_index = np.unravel_index(np.random.permutation(data.size)[:n], data.shape)
        mask = np.zeros(data.shape, dtype=data.dtype)
        mask[mask_index] = 1

        if self.is_symmetric():
            mask = np.tril(mask) + np.tril(mask, -1).T

        data_ma = ma.array(data, mask=mask)
        if diag_off == 1:
            np.fill_diagonal(data_ma, ma.masked)

        return data_ma

Example 21

def get_masked_zeros(self, diag_off=1):
        ''' Take out all zeros '''
        data = self.data
        if type(data) is np.ndarray:
            #self.data_mat = sp.sparse.csr_matrix(data)
            pass
        else:
            raise NotImplementedError('type %s unknow as corpus' % type(data))

        mask = np.zeros(data.shape, dtype=data.dtype)
        mask[data == 0] = 1

        if self.is_symmetric():
            mask = np.tril(mask) + np.tril(mask, -1).T

        data_ma = ma.array(data, mask=mask)
        if diag_off == 1:
            np.fill_diagonal(data_ma, ma.masked)

        return data_ma

Example 22

def _solve_hessian(G, Y, thY, precon, lambda_min):
    N, T = Y.shape
    # Compute the derivative of the score
    psidY = ne.evaluate('(- thY ** 2 + 1.) / 2.')  # noqa
    # Build the diagonal of the Hessian, a.
    Y_squared = Y ** 2
    if precon == 2:
        a = np.inner(psidY, Y_squared) / float(T)
    elif precon == 1:
        sigma2 = np.mean(Y_squared, axis=1)
        psidY_mean = np.mean(psidY, axis=1)
        a = psidY_mean[:, None] * sigma2[None, :]
        diagonal_term = np.mean(Y_squared * psidY) + 1.
        a[np.diag_indices_from(a)] = diagonal_term
    else:
        raise ValueError('precon should be 1 or 2')
    # Compute the eigenvalues of the Hessian
    eigenvalues = 0.5 * (a + a.T - np.sqrt((a - a.T) ** 2 + 4.))
    # Regularize
    problematic_locs = eigenvalues < lambda_min
    np.fill_diagonal(problematic_locs, False)
    i_pb, j_pb = np.where(problematic_locs)
    a[i_pb, j_pb] += lambda_min - eigenvalues[i_pb, j_pb]
    # Invert the transform
    return (G * a.T - G.T) / (a * a.T - 1.)

Example 23

def grad(self, inp, cost_grad):
        """
        Notes
        -----
        The gradient is currently implemented for matrices only.

        """
        a, val = inp
        grad = cost_grad[0]
        if (a.dtype.startswith('complex')):
            return [None, None]
        elif a.ndim > 2:
            raise NotImplementedError('%s: gradient is currently implemented'
                                      ' for matrices only' %
                                      self.__class__.__name__)
        wr_a = fill_diagonal(grad, 0)  # valid for any number of dimensions
        # diag is only valid for matrices
        wr_val = theano.tensor.nlinalg.diag(grad).sum()
        return [wr_a, wr_val]

Example 24

def test_perform(self):
        x = tensor.matrix()
        y = tensor.scalar()
        f = function([x, y], fill_diagonal(x, y))
        for shp in [(8, 8), (5, 8), (8, 5)]:
            a = numpy.random.rand(*shp).astype(config.floatX)
            val = numpy.cast[config.floatX](numpy.random.rand())
            out = f(a, val)
            # We can't use numpy.fill_diagonal as it is bugged.
            assert numpy.allclose(numpy.diag(out), val)
            assert (out == val).sum() == min(a.shape)

        # test for 3d tensor
        a = numpy.random.rand(3, 3, 3).astype(config.floatX)
        x = tensor.tensor3()
        y = tensor.scalar()
        f = function([x, y], fill_diagonal(x, y))
        val = numpy.cast[config.floatX](numpy.random.rand() + 10)
        out = f(a, val)
        # We can't use numpy.fill_diagonal as it is bugged.
        assert out[0, 0, 0] == val
        assert out[1, 1, 1] == val
        assert out[2, 2, 2] == val
        assert (out == val).sum() == min(a.shape)

Example 25

def test_perform(self):
        x = tensor.matrix()
        y = tensor.scalar()
        z = tensor.iscalar()

        f = function([x, y, z], fill_diagonal_offset(x, y, z))
        for test_offset in (-5, -4, -1, 0, 1, 4, 5):
            for shp in [(8, 8), (5, 8), (8, 5), (5, 5)]:
                a = numpy.random.rand(*shp).astype(config.floatX)
                val = numpy.cast[config.floatX](numpy.random.rand())
                out = f(a, val, test_offset)
                # We can't use numpy.fill_diagonal as it is bugged.
                assert numpy.allclose(numpy.diag(out, test_offset), val)
                if test_offset >= 0:
                    assert (out == val).sum() == min(min(a.shape),
                                                     a.shape[1] - test_offset)
                else:
                    assert (out == val).sum() == min(min(a.shape),
                                                     a.shape[0] + test_offset)

Example 26

def constant(self):
        delta = np.min(self.rho) - 0.01
        cormat = np.full((self.nkdim, self.nkdim), delta)

        epsilon = 0.99 - np.max(self.rho)
        for i in np.arange(self.k):
            cor = np.full((self.nk[i], self.nk[i]), self.rho[i])

            if i == 0:
                cormat[0:self.nk[0], 0:self.nk[0]] = cor
            if i != 0:
                cormat[np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1]),
                np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1])] = cor

        np.fill_diagonal(cormat, 1 - epsilon)

        cormat = self._generate_noise(cormat, self.nkdim, self.M, epsilon)

        return cormat

Example 27

def toepz(self):
        cormat = np.zeros((self.nkdim, self.nkdim))

        epsilon = (1 - np.max(self.rho)) / (1 + np.max(self.rho)) - .01

        for i in np.arange(self.k):
            t = np.insert(np.power(self.rho[i], np.arange(1, self.nk[i])), 0, 1)
            cor = toeplitz(t)
            if i == 0:
                cormat[0:self.nk[0], 0:self.nk[0]] = cor
            if i != 0:
                cormat[np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1]),
                np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1])] = cor

        np.fill_diagonal(cormat, 1 - epsilon)

        cormat = self._generate_noise(cormat, self.nkdim, self.M, epsilon)

        return cormat

Example 28

def hub(self):
        cormat = np.zeros((self.nkdim, self.nkdim))

        for i in np.arange(self.k):
            cor = toeplitz(self._fill_hub_matrix(self.rho[i,0],self.rho[i,1], self.power, self.nk[i]))
            if i == 0:
                cormat[0:self.nk[0], 0:self.nk[0]] = cor
            if i != 0:
                cormat[np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1]),
                np.sum(self.nk[0:i]):np.sum(self.nk[0:i + 1])] = cor
            tau = (np.max(self.rho[i]) - np.min(self.rho[i])) / (self.nk[i] - 2)

        epsilon = 0.08 #(1 - np.min(rho) - 0.75 * np.min(tau)) - 0.01

        np.fill_diagonal(cormat, 1 - epsilon)

        cormat = self._generate_noise(cormat, self.nkdim, self.M, epsilon)

        return cormat

Example 29

def load_data_and_labels():

    articles = np.load('data/bin/all_articles.npy')
    labels = np.load('data/bin/all_labels.npy')

    articles = [clean_str(article) for article in articles]

    # Map the actual labels to one hot labels
    label_list = sorted(list(set(labels)))
    one_hot = np.zeros((len(label_list), len(label_list)), int)
    np.fill_diagonal(one_hot, 1)
    label_dict = dict(zip(label_list, one_hot))

    labels = one_hot_encode(labels, label_dict)

    x_raw = articles
    y_raw = labels
    return x_raw, y_raw, label_list

Example 30

def fill_diagonal_(mat: T.Tensor, val: T.Scalar) -> T.Tensor:
    """
    Fill the diagonal of the matirx with a specified value.

    Note:
        Modifies mat in place.

    Args:
        mat: A tensor.
        val: The value to put along the diagonal.

    Returns:
        None

    """
    numpy.fill_diagonal(mat, val)

Example 31

def make_one_hot(X, onehot_size):
    """
    DESCRIPTION:
        Make a one-hot version of X
    PARAM:
        X: 1d numpy with each value in X representing the class of X
        onehot_size: length of the one hot vector
    RETURN:
        2d numpy tensor, with each row been the onehot vector
    """
    if onehot_size < 450:
        dig_one = np.zeros((onehot_size, onehot_size))
        np.fill_diagonal(dig_one, 1)
        rX = dig_one[np.asarray(X)]
    else:
        # for large onehot size, this is faster
        rX = np.zeros((len(X), onehot_size))
        for i in range(len(X)):
            rX[i, X[i]] = 1
    return rX

Example 32

def test_binary_perplexity_stability():
    # Binary perplexity search should be stable.
    # The binary_search_perplexity had a bug wherein the P array
    # was uninitialized, leading to sporadically failing tests.
    k = 10
    n_samples = 100
    random_state = check_random_state(0)
    distances = random_state.randn(n_samples, 2).astype(np.float32)
    # Distances shouldn't be negative
    distances = np.abs(distances.dot(distances.T))
    np.fill_diagonal(distances, 0.0)
    last_P = None
    neighbors_nn = np.argsort(distances, axis=1)[:, :k].astype(np.int64)
    for _ in range(100):
        P = _binary_search_perplexity(distances.copy(), neighbors_nn.copy(),
                                      3, verbose=0)
        P1 = _joint_probabilities_nn(distances, neighbors_nn, 3, verbose=0)
        if last_P is None:
            last_P = P
            last_P1 = P1
        else:
            assert_array_almost_equal(P, last_P, decimal=4)
            assert_array_almost_equal(P1, last_P1, decimal=4)

Example 33

def test_gradient():
    # Test gradient of Kullback-Leibler divergence.
    random_state = check_random_state(0)

    n_samples = 50
    n_features = 2
    n_components = 2
    alpha = 1.0

    distances = random_state.randn(n_samples, n_features).astype(np.float32)
    distances = distances.dot(distances.T)
    np.fill_diagonal(distances, 0.0)
    X_embedded = random_state.randn(n_samples, n_components)

    P = _joint_probabilities(distances, desired_perplexity=25.0,
                             verbose=0)
    fun = lambda params: _kl_divergence(params, P, alpha, n_samples,
                                        n_components)[0]
    grad = lambda params: _kl_divergence(params, P, alpha, n_samples,
                                         n_components)[1]
    assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0,
                        decimal=5)

Example 34

def _check_fix_Q(self, fixed_mu=False):
        """
        Check the main diagonal of Q and fix it in case it does not corresond
        the definition of the rate matrix. Should be run every time when creating
        custom GTR model.
        """
        # fix Q
        self.Pi /= self.Pi.sum() # correct the Pi manually
        # NEEDED TO BREAK RATE MATRIX DEGENERACY AND FORCE NP TO RETURN REAL ORTHONORMAL EIGENVECTORS
        self.W += self.break_degen + self.break_degen.T
        # fix W
        np.fill_diagonal(self.W, 0)
        Wdiag = -(self.Q).sum(axis=0)/self.Pi
        np.fill_diagonal(self.W, Wdiag)
        scale_factor = -np.sum(np.diagonal(self.Q)*self.Pi)
        self.W /= scale_factor
        if not fixed_mu:
            self.mu *= scale_factor
        if (self.Q.sum(axis=0) < 1e-10).sum() <  self.alphabet.shape[0]: # fix failed
            print ("Cannot fix the diagonal of the GTR rate matrix. Should be all zero", self.Q.sum(axis=0))
            import ipdb; ipdb.set_trace()
            raise ArithmeticError("Cannot fix the diagonal of the GTR rate matrix.")

Example 35

def compute_db_index(matrix, kmeans):
    '''
    Compute Davies-Bouldin index, a measure of clustering quality.
    Faster and possibly more reliable than silhouette score.
    '''
    (n, m) = matrix.shape
    k = kmeans.n_clusters

    centers = kmeans.cluster_centers_
    labels = kmeans.labels_

    centroid_dists = sp_dist.squareform(sp_dist.pdist(centers))
    # Avoid divide-by-zero
    centroid_dists[np.abs(centroid_dists) < MIN_CENTROID_DIST] = MIN_CENTROID_DIST

    wss = np.zeros(k)
    counts = np.zeros(k)

    for i in xrange(n):
        label = labels[i]
        # note: this is 2x faster than scipy sqeuclidean
        sqdist = np.square(matrix[i,:] - centers[label,:]).sum()
        wss[label] += sqdist
        counts[label] += 1

    # Handle empty clusters
    counts[counts == 0] = 1

    scatter = np.sqrt(wss / counts)
    mixitude = (scatter + scatter[:, np.newaxis]) / centroid_dists
    np.fill_diagonal(mixitude, 0.0)

    worst_case_mixitude = np.max(mixitude, axis=1)
    db_score = worst_case_mixitude.sum() / k

    return db_score

Example 36

def cor2cov(cor, var=None, sd=None, copy=True):
    sd = np.sqrt(var) if var is not None else sd
    if isinstance(cor, (DiagonalArray, SubdiagonalArray)):
        cor = cor.tonumpyarray()
    cor = npu.tondim2(cor, copy=copy)
    dim = len(var)
    assert dim == np.shape(cor)[0] and dim == np.shape(cor)[1]
    np.fill_diagonal(cor, 1.)
    cor = (sd.T * (sd * cor).T).T
    npu.lowertosymmetric(cor, copy=False)
    return cor

Example 37

def diagonal_mpa(entries, sites):
    """Returns an MPA with ``entries`` on the diagonal and zeros otherwise.

    :param numpy.ndarray entries: one-dimensional array
    :returns: :class:`~mpnum.mparray.MPArray` with rank ``len(entries)``.

    """
    assert sites > 0

    if entries.ndim != 1:
        raise NotImplementedError("Currently only supports diagonal MPA with "
                                  "one leg per site.")

    if sites < 2:
        return mp.MPArray.from_array(entries)

    ldim = len(entries)
    leftmost_ltens = np.eye(ldim).reshape((1, ldim, ldim))
    rightmost_ltens = np.diag(entries).reshape((ldim, ldim, 1))
    center_ltens = np.zeros((ldim,) * 3)
    np.fill_diagonal(center_ltens, 1)
    ltens = it.chain((leftmost_ltens,), it.repeat(center_ltens, sites - 2),
                     (rightmost_ltens,))

    return mp.MPArray(LocalTensors(ltens, cform=(sites - 1, sites)))


#########################
#  More physical stuff  #
#########################

Example 38

def test_ignore_no_data_ints(self):
        arr = np.ones((1, 16, 16), int)
        np.fill_diagonal(arr[0], NO_DATA_INT)
        tile = Tile(arr, 'INT', NO_DATA_INT)

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {1: 0}

        result = raster_rdd.reclassify(value_map, int, replace_nodata_with=1).to_numpy_rdd().first()[1].cells

        self.assertTrue((result == np.identity(16, int)).all())

Example 39

def test_ignore_no_data_floats(self):
        arr = np.ones((1, 4, 4))
        np.fill_diagonal(arr[0], float('nan'))
        tile = Tile(arr, 'FLOAT', float('nan'))

        rdd = BaseTestClass.pysc.parallelize([(self.projected_extent, tile)])
        raster_rdd = RasterLayer.from_numpy_rdd(LayerType.SPATIAL, rdd)

        value_map = {1.0: 0.0}

        result = raster_rdd.reclassify(value_map, float, replace_nodata_with=1.0).to_numpy_rdd().first()[1].cells

        self.assertTrue((result == np.identity(4)).all())

Example 40

def _update_syn(self, x, eta=0.5):
        """Perform one update of the weights and re-calculate moments in the SYNERGISTIC case."""
        m = self.moments
        H = (1. / m["X_i^2 | Y"] * m["X_i Z_j"].T).dot(m["X_i Z_j"])
        np.fill_diagonal(H, 0)
        R = m["X_i Z_j"].T / m["X_i^2 | Y"]
        S = np.dot(H, self.ws)
        ws = (1. - eta) * self.ws + eta * (R - S)
        m = self._calculate_moments_syn(x, ws)
        return ws, m

Example 41

def get_covariance(self):
        # This uses E(Xi|Y) formula for non-synergistic relationships
        m = self.moments
        if self.discourage_overlap:
            z = m['rhoinvrho'] / (1 + m['Si'])
            cov = np.dot(z.T, z)
            cov /= (1. - self.eps**2)
            np.fill_diagonal(cov, 1)
            return self.theta[1][:, np.newaxis] * self.theta[1] * cov
        else:
            cov = np.einsum('ij,kj->ik', m["X_i Z_j"], m["X_i Y_j"])
            np.fill_diagonal(cov, 1)
            return self.theta[1][:, np.newaxis] * self.theta[1] * cov

Example 42

def seriation(self, A):
        n_components = 2
        eigen_tol = 0.00001
        if sparse.issparse(A):
            A = A.todense()
        np.fill_diagonal(A, 0)
        laplacian, dd = graph_laplacian(A, return_diag=True)
        laplacian *= -1
        lambdas, diffusion_map = eigsh(laplacian, k=n_components, sigma=1.0, which='LM', tol=eigen_tol)
        embedding = diffusion_map.T[n_components::-1]  # * dd
        sort_index = np.argsort(embedding[1])
        return sort_index

Example 43

def fill_diagonal(a, val, wrap=False):
    """Fills the main diagonal of the given array of any dimensionality.

    For an array `a` with ``a.ndim > 2``, the diagonal is the list of
    locations with indices ``a[i, i, ..., i]`` all identical. This function
    modifies the input array in-place, it does not return a value.

    Args:
        a (cupy.ndarray): The array, at least 2-D.
        val (scalar): The value to be written on the diagonal.
            Its type must be compatible with that of the array a.
        wrap (bool): If specified, the diagonal is "wrapped" after N columns.
            This affects only tall matrices.

    Examples
    --------
    >>> a = cupy.zeros((3, 3), int)
    >>> cupy.fill_diagonal(a, 5)
    >>> a
    array([[5, 0, 0],
           [0, 5, 0],
           [0, 0, 5]])

     .. seealso:: :func:`numpy.fill_diagonal`
    """
    # The followings are imported from the original numpy
    if a.ndim < 2:
        raise ValueError('array must be at least 2-d')
    end = None
    if a.ndim == 2:
        step = a.shape[1] + 1
        if not wrap:
            end = a.shape[1] * a.shape[1]
    else:
        if not numpy.alltrue(numpy.diff(a.shape) == 0):
            raise ValueError('All dimensions of input must be of equal length')
        step = 1 + numpy.cumprod(a.shape[:-1]).sum()

    # Since the current cupy does not support a.flat,
    # we use a.ravel() instead of a.flat
    a.ravel()[:end:step] = val

Example 44

def knn_initialize(
        X,
        missing_mask,
        verbose=False,
        min_dist=1e-6,
        max_dist_multiplier=1e6):
    """
    Fill X with NaN values if necessary, construct the n_samples x n_samples
    distance matrix and set the self-distance of each row to infinity.

    Returns contents of X laid out in row-major, the distance matrix,
    and an "effective infinity" which is larger than any entry of the
    distance matrix.
    """
    X_row_major = X.copy("C")
    if missing_mask.sum() != np.isnan(X_row_major).sum():
        # if the missing values have already been zero-filled need
        # to put NaN's back in the data matrix for the distances function
        X_row_major[missing_mask] = np.nan
    D = all_pairs_normalized_distances(X_row_major)
    D_finite_flat = D[np.isfinite(D)]
    if len(D_finite_flat) > 0:
        max_dist = max_dist_multiplier * max(1, D_finite_flat.max())
    else:
        max_dist = max_dist_multiplier
    # set diagonal of distance matrix to a large value since we don't want
    # points considering themselves as neighbors
    np.fill_diagonal(D, max_dist)
    D[D < min_dist] = min_dist  # prevents 0s
    D[D > max_dist] = max_dist  # prevents infinities
    return X_row_major, D, max_dist

Example 45

def getDiffusionMap(SSM, Kappa, t = -1, includeDiag = True, thresh = 5e-4, NEigs = 51):
    """
    :param SSM: Metric between all pairs of points
    :param Kappa: Number in (0, 1) indicating a fraction of nearest neighbors
                used to autotune neighborhood size
    :param t: Diffusion parameter.  If -1, do Autotuning
    :param includeDiag: If true, include recurrence to diagonal in the markov
        chain.  If false, zero out diagonal
    :param thresh: Threshold below which to zero out entries in markov chain in
        the sparse approximation
    :param NEigs: The number of eigenvectors to use in the approximation
    """
    N = SSM.shape[0]
    #Use the letters from the delaPorte paper
    K = getW(SSM, int(Kappa*N))
    if not includeDiag:
        np.fill_diagonal(K, np.zeros(N))
    RowSumSqrt = np.sqrt(np.sum(K, 1))
    DInvSqrt = sparse.diags([1/RowSumSqrt], [0])

    #Symmetric normalized Laplacian
    Pp = (K/RowSumSqrt[None, :])/RowSumSqrt[:, None]
    Pp[Pp < thresh] = 0
    Pp = sparse.csr_matrix(Pp)

    lam, X = sparse.linalg.eigsh(Pp, NEigs, which='LM')
    lam = lam/lam[-1] #In case of numerical instability

    #Check to see if autotuning
    if t > -1:
        lamt = lam**t
    else:
        #Autotuning diffusion time
        lamt = np.array(lam)
        lamt[0:-1] = lam[0:-1]/(1-lam[0:-1])

    #Do eigenvector version
    V = DInvSqrt.dot(X) #Right eigenvectors
    M = V*lamt[None, :]
    return M/RowSumSqrt[:, None] #Put back into orthogonal Euclidean coordinates

Example 46

def coulomb_matrix(self, system):
        """Creates the Coulomb matrix for the given system.
        """
        # Calculate offdiagonals
        q = system.get_initial_charges()
        qiqj = q[None, :]*q[:, None]
        idmat = system.get_inverse_distance_matrix()
        np.fill_diagonal(idmat, 0)
        cmat = qiqj*idmat

        # Set diagonal
        np.fill_diagonal(cmat, 0.5 * q ** 2.4)

        return cmat

Example 47

def sine_matrix(self, system):
        """Creates the Sine matrix for the given system.
        """
        # Cell and inverse cell
        B = system.get_cell()
        B_inv = system.get_cell_inverse()

        # Difference vectors in tensor 3D-tensor-form
        diff_tensor = system.get_displacement_tensor()

        # Calculate phi
        arg_to_sin = np.pi * np.dot(diff_tensor, B_inv)
        phi = np.linalg.norm(np.dot(np.sin(arg_to_sin)**2, B), axis=2)

        with np.errstate(divide='ignore'):
            phi = np.reciprocal(phi)

        # Calculate Z_i*Z_j
        q = system.get_initial_charges()
        qiqj = q[None, :]*q[:, None]
        np.fill_diagonal(phi, 0)

        # Multiply by charges
        smat = qiqj*phi

        # Set diagonal
        np.fill_diagonal(smat, 0.5 * q ** 2.4)

        return smat

Example 48

def _calc_subsystem_energies(self, ewald_matrix):
        """Modify the give matrix that consists of the eral and reciprocal sums
        so that each entry x_ij is the full Ewald sum energy of a system
        consisting of atoms i and j.
        """
        q = self.q

        # Create the self-term array where q1[i,j] is qi**2 + qj**2, except for
        # the diagonal, where it is qi**2
        q1 = q[None, :]**2 + q[:, None]**2
        diag = np.diag(q1)/2
        np.fill_diagonal(q1, diag)
        q1_prefactor = -self.a/self.sqrt_pi

        # Create the charge correction array where q2[i,j] is (qi + qj)**2,
        # except for the diagonal where it is qi**2
        q2 = q[None, :] + q[:, None]
        q2 **= 2
        diag = np.diag(q2)/4
        np.fill_diagonal(q2, diag)
        q2_prefactor = -np.pi/(2*self.volume*self.a_squared)
        correction_matrix = q1_prefactor*q1 + q2_prefactor*q2

        # Add the terms coming from x_ii and x_jj to the off-diagonal along
        # with the corrections
        n_atoms = self.n_atoms
        final_matrix = np.zeros((n_atoms, n_atoms))
        for i in range(n_atoms):
            for j in range(n_atoms):
                if i == j:
                    final_matrix[i, j] = ewald_matrix[i, j]
                else:
                    pair_term = 2*ewald_matrix[i, j]
                    self_term_ii = ewald_matrix[i, i]
                    self_term_jj = ewald_matrix[j, j]
                    energy_total = pair_term + self_term_ii + self_term_jj
                    final_matrix[i, j] = energy_total
        final_matrix += correction_matrix

        return final_matrix

Example 49

def makeT(self,cp):
        # cp: [(k*k*k) x 3] control points
        # T: [((k*k*k)+4) x ((k*k*k)+4)]
        K = cp.shape[0]
        T = np.zeros((K+4, K+4))
        T[:K, 0] = 1; T[:K, 1:4] = cp; T[K, 4:] = 1; T[K+1:, 4:] = cp.T
        R = squareform(pdist(cp, metric='euclidean'))
        R = R * R;R[R == 0] = 1 # a trick to make R ln(R) 0
        R = R * np.log(R)
        np.fill_diagonal(R, 0)
        T[:K, 4:] = R
        return T

Example 50

def _s(n, a, b):
    '''Get all permutations of [a, b, ..., b] of length n.
    len(out) == n.
    '''
    out = numpy.full((n, n), b)
    numpy.fill_diagonal(out, a)
    return out