Python numpy.sum() 使用实例

2023年2月23日 500次阅读

Example 1

def KMO(data):

    cor_ = pd.DataFrame.corr(data)
    invCor = np.linalg.inv(cor_)
    rows = cor_.shape[0]
    cols = cor_.shape[1]
    A = np.ones((rows, cols))

    for i in range(rows):
        for j in range(i, cols):
            A[i, j] = - (invCor[i, j]) / (np.sqrt(invCor[i, i] * invCor[j, j]))
            A[j, i] = A[i, j]

    num = np.sum(np.sum((cor_)**2)) - np.sum(np.sum(np.diag(cor_**2)))
    den = num + (np.sum(np.sum(A**2)) - np.sum(np.sum(np.diag(A**2))))
    kmo = num / den

    return kmo

Example 2

def remove_artifacts(self, image):
        """
        Remove the connected components that are not within the parameters
        Operates in place
        :param image: sudoku's thresholded image w/o grid
        :return: None
        """
        labeled, features = label(image, structure=CROSS)
        lbls = np.arange(1, features + 1)
        areas = extract_feature(image, labeled, lbls, np.sum,
                                np.uint32, 0)
        sides = extract_feature(image, labeled, lbls, min_side,
                                np.float32, 0, True)
        diags = extract_feature(image, labeled, lbls, diagonal,
                                np.float32, 0, True)

        for index in lbls:
            area = areas[index - 1] / 255
            side = sides[index - 1]
            diag = diags[index - 1]
            if side < 5 or side > 20 \
                    or diag < 15 or diag > 25 \
                    or area < 40:
                image[labeled == index] = 0
        return None

Example 3

def remove_artifacts(self, image):
        """
        Remove the connected components that are not within the parameters
        Operates in place
        :param image: sudoku's thresholded image w/o grid
        :return: None
        """
        labeled, features = label(image, structure=CROSS)
        lbls = np.arange(1, features + 1)
        areas = extract_feature(image, labeled, lbls, np.sum,
                                np.uint32, 0)
        sides = extract_feature(image, labeled, lbls, min_side,
                                np.float32, 0, True)
        diags = extract_feature(image, labeled, lbls, diagonal,
                                np.float32, 0, True)

        for index in lbls:
            area = areas[index - 1] / 255
            side = sides[index - 1]
            diag = diags[index - 1]
            if side < 5 or side > 20 \
                    or diag < 15 or diag > 25 \
                    or area < 40:
                image[labeled == index] = 0
        return None

Example 4

def evaluate(self, dataset):
        predictions = self.predict(dataset[:,0])
        confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes)

        precisions = []
        recalls = []
        accuracies = []

        for gender in self.__classes:
            idx = self.__classes_indexes[gender]
            precision = 1
            recall = 1
            if np.sum(confusion_matrix[idx,:]) > 0:
                precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:])
            if np.sum(confusion_matrix[:, idx]) > 0:
                recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx])
            precisions.append(precision)
            recalls.append(recall)

        precision = np.mean(precisions)
        recall = np.mean(recalls)
        f1 = (2*(precision*recall))/float(precision+recall)
        accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix))

        return precision, recall, accuracy, f1

Example 5

def do_work_pso(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
#    print((1 / np.sum(f1)))
    return (1 / np.sum(f1))

Example 6

def do_work_ga(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
    output = pd.DataFrame(population[item].genes)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    return (1 / np.sum(f1))

# Main

Example 7

def xloads(self):
        # Xloadings
        A = self.data_.transpose().values
        B = self.fscores.transpose().values
        A_mA = A - A.mean(1)[:, None]
        B_mB = B - B.mean(1)[:, None]

        ssA = (A_mA**2).sum(1)
        ssB = (B_mB**2).sum(1)

        xloads_ = (np.dot(A_mA, B_mB.T) /
                   np.sqrt(np.dot(ssA[:, None], ssB[None])))
        xloads = pd.DataFrame(
            xloads_, index=self.manifests, columns=self.latent)

        return xloads

Example 8

def do_work_pso(data, LVcsv, Mcsv, scheme, reg, h, maximo):
    output = pd.DataFrame(population[item].position)
    output.columns = ['Split']
    dataSplit = pd.concat([data, output], axis=1)
    f1 = []
    results = []
    for i in range(nclusters):
        dataSplited = (dataSplit.loc[dataSplit['Split']
                                     == i]).drop('Split', axis=1)
        dataSplited.index = range(len(dataSplited))

        try:
            results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
                                  reg, 0, 50, HOC='true'))

            resid = results[i].residuals()[3]
            f1.append(resid)
        except:
            f1.append(10000)
    print((1 / np.sum(f1)))
    return (1 / np.sum(f1))

Example 9

def do_work_pso(self, item):
        output = pd.DataFrame(self.population[item].position)
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)
        print((1 / np.sum(f1)))
        return (1 / np.sum(f1))

Example 10

def do_work_tabu(self, item):
        output = pd.DataFrame(self.population[item])
        output.columns = ['Split']
        dataSplit = pd.concat([self.data, output], axis=1)
        f1 = []
        results = []
        for i in range(self.nclusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
                                      self.reg, 0, 50, HOC='true'))

                resid = results[i].residuals()[3]
                f1.append(resid)
            except:
                f1.append(10000)

        cost = (np.sum(f1))
        print(1 / cost)
        return [self.population[item], cost]

Example 11

def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        # A different (control flow based) way to control dropout
        if self.training:
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        else:
            x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        if self.training:
            x = F.dropout(x, training=True)
        x = self.fc2(x)

        # Check for NaNs and infinites
        nans = np.sum(np.isnan(x.data.numpy()))
        infs = np.sum(np.isinf(x.data.numpy()))
        if nans > 0:
            print("There is {} NaN at the output layer".format(nans))
        if infs > 0:
            print("There is {} infinite values at the output layer".format(infs))

        return F.log_softmax(x)

Example 12

def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
        pred = output.data.max(1)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

Example 13

def score_samples(self, X):
        """Return the log-likelihood of each sample
        See. "Pattern Recognition and Machine Learning"
        by C. Bishop, 12.2.1 p. 574
        or http://www.miketipping.com/papers/met-mppca.pdf
        Parameters
        ----------
        X: array, shape(n_samples, n_features)
            The data.
        Returns
        -------
        ll: array, shape (n_samples,)
            Log-likelihood of each sample under the current model
        """
        check_is_fitted(self, 'mean_')

        X = check_array(X)
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        precision = self.get_precision()
        log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
        log_like -= .5 * (n_features * log(2. * np.pi)
                          - fast_logdet(precision))
        return log_like

Example 14

def main():
    files = tf.gfile.Glob(flags.FLAGS.src_path_1)
    labels_uni = np.zeros([4716,1])
    labels_matrix = np.zeros([4716,4716])
    for file in files:
        labels_all = get_video_input_feature(file)
        print(len(labels_all[0][2]),len(labels_all[0][3]),len(labels_all[0][4]),len(labels_all[0][5]))
        """
        for labels in labels_all:
            for i in range(len(labels)):
                labels_uni[labels[i]] += 1
                for j in range(len(labels)):
                    labels_matrix[labels[i],labels[j]] += 1
    labels_matrix = labels_matrix/labels_uni
    labels_matrix = labels_matrix/(np.sum(labels_matrix,axis=0)-1.0)
    for i in range(4716):
        labels_matrix[i,i] = 1.0
    np.savetxt('labels_uni.out', labels_uni, delimiter=',')
    np.savetxt('labels_matrix.out', labels_matrix, delimiter=',')"""

Example 15

def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()

Example 16

def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()

Example 17

def format_lines(video_ids, predictions, labels, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    n_recall = max(int(numpy.sum(labels[video_index])), 1)
    # labels
    label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
    label_predictions = [(class_index, predictions[video_index][class_index]) 
                           for class_index in label_indices]
    label_predictions = sorted(label_predictions, key=lambda p: -p[1])
    label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
    # predictions
    top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    top_k_predictions = [(class_index, predictions[video_index][class_index])
                         for class_index in top_k_indices]
    top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
    top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
    # compute PERR
    top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
    positives = [labels[video_index][class_index] 
                 for class_index in top_n_indices]
    perr = sum(positives) / float(n_recall)
    # URL
    url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')
    yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"

Example 18

def calculate_gap(predictions, actuals, top_k=20):
  """Performs a local (numpy) calculation of the global average precision.

  Only the top_k predictions are taken for each of the videos.

  Args:
    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.
    top_k: How many predictions to use per video.

  Returns:
    float: The global average precision.
  """
  gap_calculator = ap_calculator.AveragePrecisionCalculator()
  sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
  gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
  return gap_calculator.peek_ap_at_n()

Example 19

def getTrainKernel(self, params):
		self.checkParams(params)
		if (self.sameParams(params)): return self.cache['getTrainKernel']
		
		ell = np.exp(params[0])
		if (self.K_sq is None): K = sq_dist(self.X_scaled.T / ell)	#precompute squared distances
		else: K = self.K_sq / ell**2		
		self.cache['K_sq_scaled'] = K

		# # # #manual computation (just for sanity checks)
		# # # K1 = np.exp(-K / 2.0)
		# # # K2 = np.zeros((self.X_scaled.shape[0], self.X_scaled.shape[0]))
		# # # for i1 in xrange(self.X_scaled.shape[0]):
			# # # for i2 in xrange(i1, self.X_scaled.shape[0]):
				# # # diff = self.X_scaled[i1,:] - self.X_scaled[i2,:]
				# # # K2[i1, i2] = np.exp(-np.sum(diff**2) / (2*ell))
				# # # K2[i2, i1] = K2[i1, i2]				
		# # # print np.max((K1-K2)**2)
		# # # sys.exit(0)
		
		K_exp = np.exp(-K / 2.0)
		self.cache['getTrainKernel'] = K_exp
		self.saveParams(params)
		return K_exp

Example 20

def getTrainTestKernel(self, params, Xtest):
		self.checkParams(params)
		ell2 = np.exp(2*params[0])
		
		z = Xtest / np.sqrt(Xtest.shape[1])
		S = 1 + self.X_scaled.dot(z.T)
		sz = 1 + np.sum(z**2, axis=1)
		sqrtEll2Psx = np.sqrt(ell2+self.sx)
		sqrtEll2Psz = np.sqrt(ell2+sz)
		K = S / np.outer(sqrtEll2Psx, sqrtEll2Psz)
		return np.arcsin(K)

Example 21

def match_matrix(event: Event):
    """Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR.

        Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for
    red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated
    on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an
    event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match
    matrix that looks like this (sans labels):

                                #1  #2  #3  #4  #5  #6  #7
                    qm1_red     1   0   1   0   1   0   0
                    qm1_blue    0   1   0   1   0   1   0
    """
    match_list = []
    for match in filter(lambda match: match['comp_level'] == 'qm', event.matches):
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0)
        match_list.append(matchRow)
        matchRow = []
        for team in event.teams:
            matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0)
        match_list.append(matchRow)

    mat = numpy.array(match_list)
    sum_matches = numpy.sum(mat, axis=0)
    avg_team_matches = sum(sum_matches) / float(len(sum_matches))
    return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2]

Example 22

def compute_angle(pt0, pt1, pt2):
    """
    Given 3 points, compute the cosine of the angle from pt0
    :type pt0: numpy.array
    :type pt1: numpy.array
    :type pt2: numpy.array
    :return: cosine of angle
    """
    a = pt0 - pt1
    b = pt0 - pt2
    return (np.sum(a * b)) / (np.linalg.norm(a) * np.linalg.norm(b))

Example 23

def _zoning(image):
        """
        It works better with DSIZE = 28
        ~0.9967 precision and recall
        :param image:
        :return: #pixels/area ratio of each zone (7x7) as feature vector
        """
        zones = []
        for i in range(0, 28, 7):
            for j in range(0, 28, 7):
                roi = image[i:i+7, j:j+7]
                val = (np.sum(roi)/255) / 49.
                zones.append(val)
        return np.array(zones, np.float32)

Example 24

def getTypeProblem (self, solution_filename):
     		''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
		if 'task' not in self.info.keys():
			solution = np.array(data_converter.file_to_array(solution_filename))
			target_num = solution.shape[1]
			self.info['target_num']=target_num
			if target_num == 1: # if we have only one column
				solution = np.ravel(solution) # flatten
				nbr_unique_values = len(np.unique(solution))
				if nbr_unique_values < len(solution)/8:
					# Classification
					self.info['label_num'] = nbr_unique_values
					if nbr_unique_values == 2:
						self.info['task'] = 'binary.classification'
						self.info['target_type'] = 'Binary'
					else:
						self.info['task'] = 'multiclass.classification'
						self.info['target_type'] = 'Categorical'
				else:
					# Regression
					self.info['label_num'] = 0
					self.info['task'] = 'regression'
					self.info['target_type'] = 'Numerical'     
			else:
				# Multilabel or multiclass       
				self.info['label_num'] = target_num
				self.info['target_type'] = 'Binary' 
				if any(item > 1 for item in map(np.sum,solution.astype(int))):
					self.info['task'] = 'multilabel.classification'     
				else:
					self.info['task'] = 'multiclass.classification'        
		return self.info['task']

Example 25

def binarize_predictions(array, task='binary.classification'):
    ''' Turn predictions into decisions {0,1} by selecting the class with largest 
    score for multiclass problems and thresholding at 0.5 for other cases.'''
    # add a very small random value as tie breaker (a bit bad because this changes the score every time)
    # so to make sure we get the same result every time, we seed it    
    #eps = 1e-15
    #np.random.seed(sum(array.shape))
    #array = array + eps*np.random.rand(array.shape[0],array.shape[1])
    bin_array = np.zeros(array.shape)
    if (task != 'multiclass.classification') or (array.shape[1]==1): 
        bin_array[array>=0.5] = 1
    else:        
        sample_num=array.shape[0]
        for i in range(sample_num):
            j = np.argmax(array[i,:])
            bin_array[i,j] = 1        
    return bin_array

Example 26

def acc_stat (solution, prediction):
	''' Return accuracy statistics TN, FP, TP, FN
     Assumes that solution and prediction are binary 0/1 vectors.'''
     # This uses floats so the results are floats
	TN = sum(np.multiply((1-solution), (1-prediction)))
	FN = sum(np.multiply(solution, (1-prediction)))
	TP = sum(np.multiply(solution, prediction))
	FP = sum(np.multiply((1-solution), prediction))
 	#print "TN =",TN
	#print "FP =",FP
	#print "TP =",TP
	#print "FN =",FN
	return (TN, FP, TP, FN)

Example 27

def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score

Example 28

def auc_metric(solution, prediction, task='binary.classification'):
    ''' Normarlized Area under ROC curve (AUC).
    Return Gini index = 2*AUC-1 for  binary classification problems.
    Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values
    for the predictions. If solution and prediction are not vectors, the AUC
    of the columns of the matrices are computed and averaged (with no weight).
    The same for all classification problems (in fact it treats well only the
    binary and multilabel classification problems).'''
    #auc = metrics.roc_auc_score(solution, prediction, average=None)
    # There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect
    label_num=solution.shape[1]
    auc=np.empty(label_num)
    for k in range(label_num):
        r_ = tiedrank(prediction[:,k])
        s_ = solution[:,k]
        if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1))
        npos = sum(s_==1)
        nneg = sum(s_<1)
        auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos)
    return 2*mvmean(auc)-1

    
### END CLASSIFICATION METRICS 
    
# ======= Specialized scores ========
# We run all of them for all tasks even though they don't make sense for some tasks

Example 29

def prior_log_loss(frac_pos, task = 'binary.classification'):
    ''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
    eps = 1e-15   
    frac_pos_ = sp.maximum (eps, frac_pos)
    if (task != 'multiclass.classification'): # binary case
        frac_neg = 1-frac_pos
        frac_neg_ = sp.maximum (eps, frac_neg)
        pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
        neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
        base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
        # base_log_loss = mvmean(base_log_loss)
        # print('binary {}'.format(base_log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else: # multiclass case
        fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
        # Only ONE label is 1 in the multiclass case active for each line
        pos_class_log_loss_ = - frac_pos * np.log(fp)
        base_log_loss = np.sum(pos_class_log_loss_) 
    return base_log_loss
        
# sklearn implementations for comparison

Example 30

def num_lines (filename):
	''' Count the number of lines of file'''
	return sum(1 for line in open(filename))

Example 31

def tp_filter(X, Y, feat_num=1000, verbose=True):
    ''' TP feature selection in the spirit of the winners of the KDD cup 2001
    Only for binary classification and sparse matrices'''
        
    if issparse(X) and len(Y.shape)==1  and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1: 
        if verbose: print("========= Filtering features...")
        Posidx=Y>0
        #npos = sum(Posidx)
        #Negidx=Y<=0
        #nneg = sum(Negidx)
            
        nz=X.nonzero()
        mx=X[nz].max()
        if X[nz].min()==mx: # sparse binary
            if mx!=1: X[nz]=1
            tp=csr_matrix.sum(X[Posidx,:], axis=0)
            #fn=npos-tp
            #fp=csr_matrix.sum(X[Negidx,:], axis=0)
            #tn=nneg-fp
        else:
            tp=np.sum(X[Posidx,:]>0, axis=0)
            #tn=np.sum(X[Negidx,:]<=0, axis=0)
            #fn=np.sum(X[Posidx,:]<=0, axis=0)
            #fp=np.sum(X[Negidx,:]>0, axis=0)

        tp=np.ravel(tp)
        idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True)   
        return idx[0:feat_num]
    else:
        feat_num = X.shape[1]
        return range(feat_num)

Example 32

def predict(self, X):
        prediction = self.predict_method(X)
        # Calibrate proba
        if self.task != 'regression' and self.postprocessor!=None:          
            prediction = self.postprocessor.predict_proba(prediction)
        # Keep only 2nd column because the second one is 1-first    
        if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1:
            prediction = prediction[:,1]
        # Make sure the normalization is correct
        if self.task=='multiclass.classification':
            eps = 1e-15
            norma = np.sum(prediction, axis=1)
            for k in range(prediction.shape[0]):
                prediction[k,:] /= sp.maximum(norma[k], eps)  
        return prediction

Example 33

def fit(self, X, Y):
        if len(Y.shape)==1: 
            Y = np.array([Y]).transpose() # Transform vector into column matrix
            # This is NOT what we want: Y = Y.reshape( -1, 1 ), because Y.shape[1] out of range
        self.n_target = Y.shape[1]                 # Num target values = num col of Y
        self.n_label = len(set(Y.ravel()))         # Num labels = num classes (categories of categorical var if n_target=1 or n_target if labels are binary )
        # Create the right number of copies of the predictor instance
        if len(self.predictors)!=self.n_target:
            predictorInstance = self.predictors[0]
            self.predictors = [predictorInstance]
            for i in range(1,self.n_target):
                self.predictors.append(copy.copy(predictorInstance))
        # Fit all predictors
        for i in range(self.n_target):
            # Update the number of desired prodictos
            if hasattr(self.predictors[i], 'n_estimators'):
                self.predictors[i].n_estimators=self.n_estimators
            # Subsample if desired
            if self.balance:
                pos = Y[:,i]>0
                neg = Y[:,i]<=0
                if sum(pos)<sum(neg): 
                    chosen = pos
                    not_chosen = neg
                else: 
                    chosen = neg
                    not_chosen = pos
                num = sum(chosen)
                idx=filter(lambda(x): x[1]==True, enumerate(not_chosen))
                idx=np.array(zip(*idx)[0])
                np.random.shuffle(idx)
                chosen[idx[0:min(num, len(idx))]]=True
                # Train with chosen samples            
                self.predictors[i].fit(X[chosen,:],Y[chosen,i])
            else:
                self.predictors[i].fit(X,Y[:,i])
        return

Example 34

def get_batch_loss(self, input_batch, output_batch):

        dynet.renew_cg()

        # Dimension: maxSentLength * minibatch_size
        wids = []
        wids_reversed = []

        # List of lists to store whether an input is
        # present(1)/absent(0) for an example at a time step
        # masks = [] # Dimension: maxSentLength * minibatch_size

        # tot_words = 0
        maxSentLength = max([len(sent) for sent in input_batch])
        for j in range(maxSentLength):
            wids.append([(self.src_vocab[sent[j]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
            wids_reversed.append([(self.src_vocab[sent[len(sent)- j-1]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
            # mask = [(1 if len(sent)>j else 0) for sent in input_batch]
            # masks.append(mask)
            #tot_words += sum(mask)

        embedded_batch = self.embed_batch_seq(wids)
        embedded_batch_reverse = self.embed_batch_seq(wids_reversed)
        encoded_batch = self.encode_batch_seq(embedded_batch, embedded_batch_reverse)

        # pass last hidden state of encoder to decoder
        return self.decode_batch(encoded_batch, output_batch)

Example 35

def plotFields(layer,fieldShape=None,channel=None,figOffset=1,cmap=None,padding=0.01):
	# Receptive Fields Summary
	try:
		W = layer.W
	except:
		W = layer
	wp = W.eval().transpose();
	if len(np.shape(wp)) < 4:		# Fully connected layer, has no shape
		fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)	
	else:			# Convolutional layer already has shape
		features, channels, iy, ix = np.shape(wp)
		if channel is not None:
			fields = wp[:,channel,:,:]
		else:
			fields = np.reshape(wp,[features*channels,iy,ix])

	perRow = int(math.floor(math.sqrt(fields.shape[0])))
	perColumn = int(math.ceil(fields.shape[0]/float(perRow)))

	fig = mpl.figure(figOffset); mpl.clf()
	
	# Using image grid
	from mpl_toolkits.axes_grid1 import ImageGrid
	grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
	for i in range(0,np.shape(fields)[0]):
		im = grid[i].imshow(fields[i],cmap=cmap); 

	grid.cbar_axes[0].colorbar(im)
	mpl.title('%s Receptive Fields' % layer.name)
	
	# old way
	# fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
	# tiled = []
	# for i in range(0,perColumn*perRow,perColumn):
	# 	tiled.append(np.hstack(fields2[i:i+perColumn]))
	# 
	# tiled = np.vstack(tiled)
	# mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
	mpl.figure(figOffset+1); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()

Example 36

def plotFields(layer,fieldShape=None,channel=None,maxFields=25,figName='ReceptiveFields',cmap=None,padding=0.01):
	# Receptive Fields Summary
	W = layer.W
	wp = W.eval().transpose();
	if len(np.shape(wp)) < 4:		# Fully connected layer, has no shape
		fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)
	else:			# Convolutional layer already has shape
		features, channels, iy, ix = np.shape(wp)
		if channel is not None:
			fields = wp[:,channel,:,:]
		else:
			fields = np.reshape(wp,[features*channels,iy,ix])

	fieldsN = min(fields.shape[0],maxFields)
	perRow = int(math.floor(math.sqrt(fieldsN)))
	perColumn = int(math.ceil(fieldsN/float(perRow)))

	fig = mpl.figure(figName); mpl.clf()

	# Using image grid
	from mpl_toolkits.axes_grid1 import ImageGrid
	grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
	for i in range(0,fieldsN):
		im = grid[i].imshow(fields[i],cmap=cmap);

	grid.cbar_axes[0].colorbar(im)
	mpl.title('%s Receptive Fields' % layer.name)

	# old way
	# fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
	# tiled = []
	# for i in range(0,perColumn*perRow,perColumn):
	# 	tiled.append(np.hstack(fields2[i:i+perColumn]))
	#
	# tiled = np.vstack(tiled)
	# mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
	mpl.figure(figName+' Total'); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()

Example 37

def analytic_convolution_gaussian(mu1,covar1,mu2,covar2):
    """
    The analytic vconvolution of two Gaussians is simply the sum of the two mean vectors
    and the two convariance matrixes

    --- INPUT ---
    mu1         The mean of the first gaussian
    covar1      The covariance matrix of of the first gaussian
    mu2         The mean of the second gaussian
    covar2      The covariance matrix of of the second gaussian

    """
    muconv    = mu1+mu2
    covarconv = covar1+covar2
    return muconv, covarconv

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

Example 38

def reshape_array(array, newsize, pixcombine='sum'):
    """
    Reshape an array to a give size using either the sum, mean or median of the pixels binned

    Note that the old array dimensions have to be multiples of the new array dimensions

    --- INPUT ---
    array           Array to reshape (combine pixels)
    newsize         New size of array
    pixcombine      The method to combine the pixels with. Choices are sum, mean and median

    """
    sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1]
    pdb.set_trace()
    if pixcombine == 'sum':
        reshapedarray = array.reshape(sh).sum(-1).sum(1)
    elif pixcombine == 'mean':
        reshapedarray = array.reshape(sh).mean(-1).mean(1)
    elif pixcombine == 'median':
        reshapedarray = array.reshape(sh).median(-1).median(1)

    return reshapedarray
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

Example 39

def inner_product_to_infty(self,gf1,gf2):
        "Inner product on non-compact domain"
        factors = [s.get_scale_factor() for s in self.stencils]
        factor = np.prod(factors)
        integrand = (factor*gf1*self.weights2D*gf2*self.dRdX)
        integrand[-1] = 0
        integral = np.sum(integrand)
        return integral

Example 40

def get_integration_weights(order,nodes=None):
    """
    Returns the integration weights for Gauss-Lobatto quadrature
    as a function of the order of the polynomial we want to
    represent.
    See: https://en.wikipedia.org/wiki/Gaussian_quadrature
    See: arXive:gr-qc/0609020v1
    """
    if np.all(nodes == False):
        nodes=get_quadrature_points(order)
    if poly == polynomial.chebyshev.Chebyshev:
        weights = np.empty((order+1))
        weights[1:-1] = np.pi/order
        weights[0] = np.pi/(2*order)
        weights[-1] = weights[0]
        return weights
    elif poly == polynomial.legendre.Legendre:
        interior_weights = 2/((order+1)*order*poly.basis(order)(nodes[1:-1])**2)
        boundary_weights = np.array([1-0.5*np.sum(interior_weights)])
        weights = np.concatenate((boundary_weights,
                                  interior_weights,
                                  boundary_weights))
        return weights
    else:
        raise ValueError("Not a known polynomial type.")
        return False

Example 41

def inner_product(self,gf1,gf2):
        """Calculates the 2D inner product between grid functions
        gf1 and gf2 using the appropriate quadrature rule
        """
        factors = [s.get_scale_factor() for s in self.stencils]
        factor = np.prod(factors)
        integrand = gf1*self.weights2D*gf2
        integral_unit_cell = np.sum(integrand)
        integral_physical = integral_unit_cell*factor
        return integral_physical

Example 42

def compute_rhs(rhs):
    U_dealiased = work[((3,) + FFT.work_shape(dealias), float, 0)]
    curl_dealiased = work[((3,) + FFT.work_shape(dealias), float, 1)]
    for i in range(3):
        U_dealiased[i] = FFT.ifftn(U_hat[i], U_dealiased[i], dealias)

    curl_dealiased = curl(U_hat, curl_dealiased)
    rhs = cross(U_dealiased, curl_dealiased, rhs)
    P_hat[:] = sum(rhs*K_over_K2, 0, out=P_hat)
    rhs -= P_hat*K
    rhs -= nu*K2*U_hat
    return rhs

# Initialize a Taylor Green vortex

Example 43

def gof(self):
        r2mean = np.mean(self.r2.T[self.endoexo()[0]].values)
        AVEmean = self.AVE().copy()

        totalblock = 0
        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            block = len(block.columns.values)
            totalblock += block
            AVEmean[self.latent[i]] = AVEmean[self.latent[i]] * block

        AVEmean = np.sum(AVEmean) / totalblock
        return np.sqrt(AVEmean * r2mean)

Example 44

def cr(self):
        # Composite Reliability
        composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            block = self.data_[self.Variables['measurement']
                               [self.Variables['latent'] == self.latent[i]]]
            p = len(block.columns)

            if(p != 1):
                cor_mat = np.cov(block.T)
                evals, evecs = np.linalg.eig(cor_mat)
                U, S, V = np.linalg.svd(cor_mat, full_matrices=False)

                indices = np.argsort(evals)
                indices = indices[::-1]
                evecs = evecs[:, indices]
                evals = evals[indices]

                loadings = V[0, :] * np.sqrt(evals[0])

                numerador = np.sum(abs(loadings))**2
                denominador = numerador + (p - np.sum(loadings ** 2))
                cr = numerador / denominador
                composite[self.latent[i]] = cr

            else:
                composite[self.latent[i]] = 1

        composite = composite.T
        return(composite)

Example 45

def r2adjusted(self):
        n = len(self.data_)
        r2 = self.r2.values

        r2adjusted = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

        for i in range(self.lenlatent):
            p = sum(self.LVariables['target'] == self.latent[i])
            r2adjusted[self.latent[i]] = r2[i] - \
                (p * (1 - r2[i])) / (n - p - 1)

        return r2adjusted.T

Example 46

def AVE(self):
        # AVE
        return self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())

Example 47

def fornell(self):
        cor_ = pd.DataFrame.corr(self.fscores)**2
        AVE = self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())
        for i in range(len(cor_)):
            cor_.ix[i, i] = AVE[i]

        return(cor_)

Example 48

def fitness(self, data_, n_clusters, lvmodel, mvmodel, scheme, regression):

        output = pd.DataFrame(self.genes)
        output.columns = ['Split']
        dataSplit = pd.concat([data_, output], axis=1)
        f1 = []
        results = []
        for i in range(n_clusters):
            dataSplited = (dataSplit.loc[dataSplit['Split']
                                         == i]).drop('Split', axis=1)
            dataSplited.index = range(len(dataSplited))

            try:
                results.append(PyLSpm(dataSplited, lvmodel, mvmodel, scheme,
                                      regression, 0, 50, HOC='true'))

                sumOuterResid = pd.DataFrame.sum(
                    pd.DataFrame.sum(results[i].residuals()[1]**2))
                sumInnerResid = pd.DataFrame.sum(
                    pd.DataFrame.sum(results[i].residuals()[2]**2))
                f1.append(sumOuterResid + sumInnerResid)
            except:
                f1.append(10000)

        print((1 / np.sum(f1)))
        return (1 / np.sum(f1))

Example 49

def roulettewheel(pop, fit):
    fit = fit - min(fit)
    sumf = sum(fit)
    if(sumf == 0):
        return pop[0]
    prob = [(item + sum(fit[:index])) / sumf for index, item in enumerate(fit)]
    prob_ = uniform(0, 1)
#    print(prob)
    individuo = (int(BinSearch(prob, prob_, 0, len(prob) - 1)))
    return pop[individuo]

Example 50

def xavier_initializer(shape):
	dim_sum = np.sum(shape)
	if len(shape) == 1:
		dim_sum += 1
	bound = np.sqrt(2.0 / dim_sum)
	return tf.random_uniform(shape, minval=-bound, maxval=bound)

# Assigning network variables to target network variables