Example 1
def KMO(data): cor_ = pd.DataFrame.corr(data) invCor = np.linalg.inv(cor_) rows = cor_.shape[0] cols = cor_.shape[1] A = np.ones((rows, cols)) for i in range(rows): for j in range(i, cols): A[i, j] = - (invCor[i, j]) / (np.sqrt(invCor[i, i] * invCor[j, j])) A[j, i] = A[i, j] num = np.sum(np.sum((cor_)**2)) - np.sum(np.sum(np.diag(cor_**2))) den = num + (np.sum(np.sum(A**2)) - np.sum(np.sum(np.diag(A**2)))) kmo = num / den return kmo
Example 2
def remove_artifacts(self, image): """ Remove the connected components that are not within the parameters Operates in place :param image: sudoku's thresholded image w/o grid :return: None """ labeled, features = label(image, structure=CROSS) lbls = np.arange(1, features + 1) areas = extract_feature(image, labeled, lbls, np.sum, np.uint32, 0) sides = extract_feature(image, labeled, lbls, min_side, np.float32, 0, True) diags = extract_feature(image, labeled, lbls, diagonal, np.float32, 0, True) for index in lbls: area = areas[index - 1] / 255 side = sides[index - 1] diag = diags[index - 1] if side < 5 or side > 20 \ or diag < 15 or diag > 25 \ or area < 40: image[labeled == index] = 0 return None
Example 3
def remove_artifacts(self, image): """ Remove the connected components that are not within the parameters Operates in place :param image: sudoku's thresholded image w/o grid :return: None """ labeled, features = label(image, structure=CROSS) lbls = np.arange(1, features + 1) areas = extract_feature(image, labeled, lbls, np.sum, np.uint32, 0) sides = extract_feature(image, labeled, lbls, min_side, np.float32, 0, True) diags = extract_feature(image, labeled, lbls, diagonal, np.float32, 0, True) for index in lbls: area = areas[index - 1] / 255 side = sides[index - 1] diag = diags[index - 1] if side < 5 or side > 20 \ or diag < 15 or diag > 25 \ or area < 40: image[labeled == index] = 0 return None
Example 4
def evaluate(self, dataset): predictions = self.predict(dataset[:,0]) confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes) precisions = [] recalls = [] accuracies = [] for gender in self.__classes: idx = self.__classes_indexes[gender] precision = 1 recall = 1 if np.sum(confusion_matrix[idx,:]) > 0: precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:]) if np.sum(confusion_matrix[:, idx]) > 0: recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx]) precisions.append(precision) recalls.append(recall) precision = np.mean(precisions) recall = np.mean(recalls) f1 = (2*(precision*recall))/float(precision+recall) accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix)) return precision, recall, accuracy, f1
Example 5
def do_work_pso(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population): output = pd.DataFrame(population[item].position) output.columns = ['Split'] dataSplit = pd.concat([data, output], axis=1) f1 = [] results = [] for i in range(nclusters): dataSplited = (dataSplit.loc[dataSplit['Split'] == i]).drop('Split', axis=1) dataSplited.index = range(len(dataSplited)) try: results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme, reg, 0, 50, HOC='true')) resid = results[i].residuals()[3] f1.append(resid) except: f1.append(10000) # print((1 / np.sum(f1))) return (1 / np.sum(f1))
Example 6
def do_work_ga(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population): output = pd.DataFrame(population[item].genes) output.columns = ['Split'] dataSplit = pd.concat([data, output], axis=1) f1 = [] results = [] for i in range(nclusters): dataSplited = (dataSplit.loc[dataSplit['Split'] == i]).drop('Split', axis=1) dataSplited.index = range(len(dataSplited)) try: results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme, reg, 0, 50, HOC='true')) resid = results[i].residuals()[3] f1.append(resid) except: f1.append(10000) return (1 / np.sum(f1)) # Main
Example 7
def xloads(self): # Xloadings A = self.data_.transpose().values B = self.fscores.transpose().values A_mA = A - A.mean(1)[:, None] B_mB = B - B.mean(1)[:, None] ssA = (A_mA**2).sum(1) ssB = (B_mB**2).sum(1) xloads_ = (np.dot(A_mA, B_mB.T) / np.sqrt(np.dot(ssA[:, None], ssB[None]))) xloads = pd.DataFrame( xloads_, index=self.manifests, columns=self.latent) return xloads
Example 8
def do_work_pso(data, LVcsv, Mcsv, scheme, reg, h, maximo): output = pd.DataFrame(population[item].position) output.columns = ['Split'] dataSplit = pd.concat([data, output], axis=1) f1 = [] results = [] for i in range(nclusters): dataSplited = (dataSplit.loc[dataSplit['Split'] == i]).drop('Split', axis=1) dataSplited.index = range(len(dataSplited)) try: results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme, reg, 0, 50, HOC='true')) resid = results[i].residuals()[3] f1.append(resid) except: f1.append(10000) print((1 / np.sum(f1))) return (1 / np.sum(f1))
Example 9
def do_work_pso(self, item): output = pd.DataFrame(self.population[item].position) output.columns = ['Split'] dataSplit = pd.concat([self.data, output], axis=1) f1 = [] results = [] for i in range(self.nclusters): dataSplited = (dataSplit.loc[dataSplit['Split'] == i]).drop('Split', axis=1) dataSplited.index = range(len(dataSplited)) try: results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme, self.reg, 0, 50, HOC='true')) resid = results[i].residuals()[3] f1.append(resid) except: f1.append(10000) print((1 / np.sum(f1))) return (1 / np.sum(f1))
Example 10
def do_work_tabu(self, item): output = pd.DataFrame(self.population[item]) output.columns = ['Split'] dataSplit = pd.concat([self.data, output], axis=1) f1 = [] results = [] for i in range(self.nclusters): dataSplited = (dataSplit.loc[dataSplit['Split'] == i]).drop('Split', axis=1) dataSplited.index = range(len(dataSplited)) try: results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme, self.reg, 0, 50, HOC='true')) resid = results[i].residuals()[3] f1.append(resid) except: f1.append(10000) cost = (np.sum(f1)) print(1 / cost) return [self.population[item], cost]
Example 11
def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) # A different (control flow based) way to control dropout if self.training: x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) else: x = F.relu(F.max_pool2d(self.conv2(x), 2)) x = x.view(-1, 320) x = F.relu(self.fc1(x)) if self.training: x = F.dropout(x, training=True) x = self.fc2(x) # Check for NaNs and infinites nans = np.sum(np.isnan(x.data.numpy())) infs = np.sum(np.isinf(x.data.numpy())) if nans > 0: print("There is {} NaN at the output layer".format(nans)) if infs > 0: print("There is {} infinite values at the output layer".format(infs)) return F.log_softmax(x)
Example 12
def test(): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output = model(data) test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss pred = output.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
Example 13
def score_samples(self, X): """Return the log-likelihood of each sample See. "Pattern Recognition and Machine Learning" by C. Bishop, 12.2.1 p. 574 or http://www.miketipping.com/papers/met-mppca.pdf Parameters ---------- X: array, shape(n_samples, n_features) The data. Returns ------- ll: array, shape (n_samples,) Log-likelihood of each sample under the current model """ check_is_fitted(self, 'mean_') X = check_array(X) Xr = X - self.mean_ n_features = X.shape[1] log_like = np.zeros(X.shape[0]) precision = self.get_precision() log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1) log_like -= .5 * (n_features * log(2. * np.pi) - fast_logdet(precision)) return log_like
Example 14
def main(): files = tf.gfile.Glob(flags.FLAGS.src_path_1) labels_uni = np.zeros([4716,1]) labels_matrix = np.zeros([4716,4716]) for file in files: labels_all = get_video_input_feature(file) print(len(labels_all[0][2]),len(labels_all[0][3]),len(labels_all[0][4]),len(labels_all[0][5])) """ for labels in labels_all: for i in range(len(labels)): labels_uni[labels[i]] += 1 for j in range(len(labels)): labels_matrix[labels[i],labels[j]] += 1 labels_matrix = labels_matrix/labels_uni labels_matrix = labels_matrix/(np.sum(labels_matrix,axis=0)-1.0) for i in range(4716): labels_matrix[i,i] = 1.0 np.savetxt('labels_uni.out', labels_uni, delimiter=',') np.savetxt('labels_matrix.out', labels_matrix, delimiter=',')"""
Example 15
def calculate_gap(predictions, actuals, top_k=20): """Performs a local (numpy) calculation of the global average precision. Only the top_k predictions are taken for each of the videos. Args: predictions: Matrix containing the outputs of the model. Dimensions are 'batch' x 'num_classes'. actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x 'num_classes'. top_k: How many predictions to use per video. Returns: float: The global average precision. """ gap_calculator = ap_calculator.AveragePrecisionCalculator() sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k) gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) return gap_calculator.peek_ap_at_n()
Example 16
def calculate_gap(predictions, actuals, top_k=20): """Performs a local (numpy) calculation of the global average precision. Only the top_k predictions are taken for each of the videos. Args: predictions: Matrix containing the outputs of the model. Dimensions are 'batch' x 'num_classes'. actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x 'num_classes'. top_k: How many predictions to use per video. Returns: float: The global average precision. """ gap_calculator = ap_calculator.AveragePrecisionCalculator() sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k) gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) return gap_calculator.peek_ap_at_n()
Example 17
def format_lines(video_ids, predictions, labels, top_k): batch_size = len(video_ids) for video_index in range(batch_size): n_recall = max(int(numpy.sum(labels[video_index])), 1) # labels label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:] label_predictions = [(class_index, predictions[video_index][class_index]) for class_index in label_indices] label_predictions = sorted(label_predictions, key=lambda p: -p[1]) label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions]) # predictions top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:] top_k_predictions = [(class_index, predictions[video_index][class_index]) for class_index in top_k_indices] top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1]) top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions]) # compute PERR top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:] positives = [labels[video_index][class_index] for class_index in top_n_indices] perr = sum(positives) / float(n_recall) # URL url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8') yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"
Example 18
def calculate_gap(predictions, actuals, top_k=20): """Performs a local (numpy) calculation of the global average precision. Only the top_k predictions are taken for each of the videos. Args: predictions: Matrix containing the outputs of the model. Dimensions are 'batch' x 'num_classes'. actuals: Matrix containing the ground truth labels. Dimensions are 'batch' x 'num_classes'. top_k: How many predictions to use per video. Returns: float: The global average precision. """ gap_calculator = ap_calculator.AveragePrecisionCalculator() sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k) gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives)) return gap_calculator.peek_ap_at_n()
Example 19
def getTrainKernel(self, params): self.checkParams(params) if (self.sameParams(params)): return self.cache['getTrainKernel'] ell = np.exp(params[0]) if (self.K_sq is None): K = sq_dist(self.X_scaled.T / ell) #precompute squared distances else: K = self.K_sq / ell**2 self.cache['K_sq_scaled'] = K # # # #manual computation (just for sanity checks) # # # K1 = np.exp(-K / 2.0) # # # K2 = np.zeros((self.X_scaled.shape[0], self.X_scaled.shape[0])) # # # for i1 in xrange(self.X_scaled.shape[0]): # # # for i2 in xrange(i1, self.X_scaled.shape[0]): # # # diff = self.X_scaled[i1,:] - self.X_scaled[i2,:] # # # K2[i1, i2] = np.exp(-np.sum(diff**2) / (2*ell)) # # # K2[i2, i1] = K2[i1, i2] # # # print np.max((K1-K2)**2) # # # sys.exit(0) K_exp = np.exp(-K / 2.0) self.cache['getTrainKernel'] = K_exp self.saveParams(params) return K_exp
Example 20
def getTrainTestKernel(self, params, Xtest): self.checkParams(params) ell2 = np.exp(2*params[0]) z = Xtest / np.sqrt(Xtest.shape[1]) S = 1 + self.X_scaled.dot(z.T) sz = 1 + np.sum(z**2, axis=1) sqrtEll2Psx = np.sqrt(ell2+self.sx) sqrtEll2Psz = np.sqrt(ell2+sz) K = S / np.outer(sqrtEll2Psx, sqrtEll2Psz) return np.arcsin(K)
Example 21
def match_matrix(event: Event): """Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR. Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match matrix that looks like this (sans labels): #1 #2 #3 #4 #5 #6 #7 qm1_red 1 0 1 0 1 0 0 qm1_blue 0 1 0 1 0 1 0 """ match_list = [] for match in filter(lambda match: match['comp_level'] == 'qm', event.matches): matchRow = [] for team in event.teams: matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0) match_list.append(matchRow) matchRow = [] for team in event.teams: matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0) match_list.append(matchRow) mat = numpy.array(match_list) sum_matches = numpy.sum(mat, axis=0) avg_team_matches = sum(sum_matches) / float(len(sum_matches)) return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2]
Example 22
def compute_angle(pt0, pt1, pt2): """ Given 3 points, compute the cosine of the angle from pt0 :type pt0: numpy.array :type pt1: numpy.array :type pt2: numpy.array :return: cosine of angle """ a = pt0 - pt1 b = pt0 - pt2 return (np.sum(a * b)) / (np.linalg.norm(a) * np.linalg.norm(b))
Example 23
def _zoning(image): """ It works better with DSIZE = 28 ~0.9967 precision and recall :param image: :return: #pixels/area ratio of each zone (7x7) as feature vector """ zones = [] for i in range(0, 28, 7): for j in range(0, 28, 7): roi = image[i:i+7, j:j+7] val = (np.sum(roi)/255) / 49. zones.append(val) return np.array(zones, np.float32)
Example 24
def getTypeProblem (self, solution_filename): ''' Get the type of problem directly from the solution file (in case we do not have an info file)''' if 'task' not in self.info.keys(): solution = np.array(data_converter.file_to_array(solution_filename)) target_num = solution.shape[1] self.info['target_num']=target_num if target_num == 1: # if we have only one column solution = np.ravel(solution) # flatten nbr_unique_values = len(np.unique(solution)) if nbr_unique_values < len(solution)/8: # Classification self.info['label_num'] = nbr_unique_values if nbr_unique_values == 2: self.info['task'] = 'binary.classification' self.info['target_type'] = 'Binary' else: self.info['task'] = 'multiclass.classification' self.info['target_type'] = 'Categorical' else: # Regression self.info['label_num'] = 0 self.info['task'] = 'regression' self.info['target_type'] = 'Numerical' else: # Multilabel or multiclass self.info['label_num'] = target_num self.info['target_type'] = 'Binary' if any(item > 1 for item in map(np.sum,solution.astype(int))): self.info['task'] = 'multilabel.classification' else: self.info['task'] = 'multiclass.classification' return self.info['task']
Example 25
def binarize_predictions(array, task='binary.classification'): ''' Turn predictions into decisions {0,1} by selecting the class with largest score for multiclass problems and thresholding at 0.5 for other cases.''' # add a very small random value as tie breaker (a bit bad because this changes the score every time) # so to make sure we get the same result every time, we seed it #eps = 1e-15 #np.random.seed(sum(array.shape)) #array = array + eps*np.random.rand(array.shape[0],array.shape[1]) bin_array = np.zeros(array.shape) if (task != 'multiclass.classification') or (array.shape[1]==1): bin_array[array>=0.5] = 1 else: sample_num=array.shape[0] for i in range(sample_num): j = np.argmax(array[i,:]) bin_array[i,j] = 1 return bin_array
Example 26
def acc_stat (solution, prediction): ''' Return accuracy statistics TN, FP, TP, FN Assumes that solution and prediction are binary 0/1 vectors.''' # This uses floats so the results are floats TN = sum(np.multiply((1-solution), (1-prediction))) FN = sum(np.multiply(solution, (1-prediction))) TP = sum(np.multiply(solution, prediction)) FP = sum(np.multiply((1-solution), prediction)) #print "TN =",TN #print "FP =",FP #print "TP =",TP #print "FN =",FN return (TN, FP, TP, FN)
Example 27
def pac_metric (solution, prediction, task='binary.classification'): ''' Probabilistic Accuracy based on log_loss metric. We assume the solution is in {0, 1} and prediction in [0, 1]. Otherwise, run normalize_array.''' debug_flag=False [sample_num, label_num] = solution.shape if label_num==1: task='binary.classification' eps = 1e-15 the_log_loss = log_loss(solution, prediction, task) # Compute the base log loss (using the prior probabilities) pos_num = 1.* sum(solution) # float conversion! frac_pos = pos_num / sample_num # prior proba of positive class the_base_log_loss = prior_log_loss(frac_pos, task) # Alternative computation of the same thing (slower) # Should always return the same thing except in the multi-label case # For which the analytic solution makes more sense if debug_flag: base_prediction = np.empty(prediction.shape) for k in range(sample_num): base_prediction[k,:] = frac_pos base_log_loss = log_loss(solution, base_prediction, task) diff = np.array(abs(the_base_log_loss-base_log_loss)) if len(diff.shape)>0: diff=max(diff) if(diff)>1e-10: print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss)) # Exponentiate to turn into an accuracy-like score. # In the multi-label case, we need to average AFTER taking the exp # because it is an NL operation pac = mvmean(np.exp(-the_log_loss)) base_pac = mvmean(np.exp(-the_base_log_loss)) # Normalize: 0 for random, 1 for perfect score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac)) return score
Example 28
def auc_metric(solution, prediction, task='binary.classification'): ''' Normarlized Area under ROC curve (AUC). Return Gini index = 2*AUC-1 for binary classification problems. Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values for the predictions. If solution and prediction are not vectors, the AUC of the columns of the matrices are computed and averaged (with no weight). The same for all classification problems (in fact it treats well only the binary and multilabel classification problems).''' #auc = metrics.roc_auc_score(solution, prediction, average=None) # There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect label_num=solution.shape[1] auc=np.empty(label_num) for k in range(label_num): r_ = tiedrank(prediction[:,k]) s_ = solution[:,k] if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1)) npos = sum(s_==1) nneg = sum(s_<1) auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos) return 2*mvmean(auc)-1 ### END CLASSIFICATION METRICS # ======= Specialized scores ======== # We run all of them for all tasks even though they don't make sense for some tasks
Example 29
def prior_log_loss(frac_pos, task = 'binary.classification'): ''' Baseline log loss. For multiplr classes ot labels return the volues for each column''' eps = 1e-15 frac_pos_ = sp.maximum (eps, frac_pos) if (task != 'multiclass.classification'): # binary case frac_neg = 1-frac_pos frac_neg_ = sp.maximum (eps, frac_neg) pos_class_log_loss_ = - frac_pos * np.log(frac_pos_) neg_class_log_loss_ = - frac_neg * np.log(frac_neg_) base_log_loss = pos_class_log_loss_ + neg_class_log_loss_ # base_log_loss = mvmean(base_log_loss) # print('binary {}'.format(base_log_loss)) # In the multilabel case, the right thing i to AVERAGE not sum # We return all the scores so we can normalize correctly later on else: # multiclass case fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case # Only ONE label is 1 in the multiclass case active for each line pos_class_log_loss_ = - frac_pos * np.log(fp) base_log_loss = np.sum(pos_class_log_loss_) return base_log_loss # sklearn implementations for comparison
Example 30
def num_lines (filename): ''' Count the number of lines of file''' return sum(1 for line in open(filename))
Example 31
def tp_filter(X, Y, feat_num=1000, verbose=True): ''' TP feature selection in the spirit of the winners of the KDD cup 2001 Only for binary classification and sparse matrices''' if issparse(X) and len(Y.shape)==1 and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1: if verbose: print("========= Filtering features...") Posidx=Y>0 #npos = sum(Posidx) #Negidx=Y<=0 #nneg = sum(Negidx) nz=X.nonzero() mx=X[nz].max() if X[nz].min()==mx: # sparse binary if mx!=1: X[nz]=1 tp=csr_matrix.sum(X[Posidx,:], axis=0) #fn=npos-tp #fp=csr_matrix.sum(X[Negidx,:], axis=0) #tn=nneg-fp else: tp=np.sum(X[Posidx,:]>0, axis=0) #tn=np.sum(X[Negidx,:]<=0, axis=0) #fn=np.sum(X[Posidx,:]<=0, axis=0) #fp=np.sum(X[Negidx,:]>0, axis=0) tp=np.ravel(tp) idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True) return idx[0:feat_num] else: feat_num = X.shape[1] return range(feat_num)
Example 32
def predict(self, X): prediction = self.predict_method(X) # Calibrate proba if self.task != 'regression' and self.postprocessor!=None: prediction = self.postprocessor.predict_proba(prediction) # Keep only 2nd column because the second one is 1-first if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1: prediction = prediction[:,1] # Make sure the normalization is correct if self.task=='multiclass.classification': eps = 1e-15 norma = np.sum(prediction, axis=1) for k in range(prediction.shape[0]): prediction[k,:] /= sp.maximum(norma[k], eps) return prediction
Example 33
def fit(self, X, Y): if len(Y.shape)==1: Y = np.array([Y]).transpose() # Transform vector into column matrix # This is NOT what we want: Y = Y.reshape( -1, 1 ), because Y.shape[1] out of range self.n_target = Y.shape[1] # Num target values = num col of Y self.n_label = len(set(Y.ravel())) # Num labels = num classes (categories of categorical var if n_target=1 or n_target if labels are binary ) # Create the right number of copies of the predictor instance if len(self.predictors)!=self.n_target: predictorInstance = self.predictors[0] self.predictors = [predictorInstance] for i in range(1,self.n_target): self.predictors.append(copy.copy(predictorInstance)) # Fit all predictors for i in range(self.n_target): # Update the number of desired prodictos if hasattr(self.predictors[i], 'n_estimators'): self.predictors[i].n_estimators=self.n_estimators # Subsample if desired if self.balance: pos = Y[:,i]>0 neg = Y[:,i]<=0 if sum(pos)<sum(neg): chosen = pos not_chosen = neg else: chosen = neg not_chosen = pos num = sum(chosen) idx=filter(lambda(x): x[1]==True, enumerate(not_chosen)) idx=np.array(zip(*idx)[0]) np.random.shuffle(idx) chosen[idx[0:min(num, len(idx))]]=True # Train with chosen samples self.predictors[i].fit(X[chosen,:],Y[chosen,i]) else: self.predictors[i].fit(X,Y[:,i]) return
Example 34
def get_batch_loss(self, input_batch, output_batch): dynet.renew_cg() # Dimension: maxSentLength * minibatch_size wids = [] wids_reversed = [] # List of lists to store whether an input is # present(1)/absent(0) for an example at a time step # masks = [] # Dimension: maxSentLength * minibatch_size # tot_words = 0 maxSentLength = max([len(sent) for sent in input_batch]) for j in range(maxSentLength): wids.append([(self.src_vocab[sent[j]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch]) wids_reversed.append([(self.src_vocab[sent[len(sent)- j-1]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch]) # mask = [(1 if len(sent)>j else 0) for sent in input_batch] # masks.append(mask) #tot_words += sum(mask) embedded_batch = self.embed_batch_seq(wids) embedded_batch_reverse = self.embed_batch_seq(wids_reversed) encoded_batch = self.encode_batch_seq(embedded_batch, embedded_batch_reverse) # pass last hidden state of encoder to decoder return self.decode_batch(encoded_batch, output_batch)
Example 35
def plotFields(layer,fieldShape=None,channel=None,figOffset=1,cmap=None,padding=0.01): # Receptive Fields Summary try: W = layer.W except: W = layer wp = W.eval().transpose(); if len(np.shape(wp)) < 4: # Fully connected layer, has no shape fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape) else: # Convolutional layer already has shape features, channels, iy, ix = np.shape(wp) if channel is not None: fields = wp[:,channel,:,:] else: fields = np.reshape(wp,[features*channels,iy,ix]) perRow = int(math.floor(math.sqrt(fields.shape[0]))) perColumn = int(math.ceil(fields.shape[0]/float(perRow))) fig = mpl.figure(figOffset); mpl.clf() # Using image grid from mpl_toolkits.axes_grid1 import ImageGrid grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single') for i in range(0,np.shape(fields)[0]): im = grid[i].imshow(fields[i],cmap=cmap); grid.cbar_axes[0].colorbar(im) mpl.title('%s Receptive Fields' % layer.name) # old way # fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))]) # tiled = [] # for i in range(0,perColumn*perRow,perColumn): # tiled.append(np.hstack(fields2[i:i+perColumn])) # # tiled = np.vstack(tiled) # mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar(); mpl.figure(figOffset+1); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()
Example 36
def plotFields(layer,fieldShape=None,channel=None,maxFields=25,figName='ReceptiveFields',cmap=None,padding=0.01): # Receptive Fields Summary W = layer.W wp = W.eval().transpose(); if len(np.shape(wp)) < 4: # Fully connected layer, has no shape fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape) else: # Convolutional layer already has shape features, channels, iy, ix = np.shape(wp) if channel is not None: fields = wp[:,channel,:,:] else: fields = np.reshape(wp,[features*channels,iy,ix]) fieldsN = min(fields.shape[0],maxFields) perRow = int(math.floor(math.sqrt(fieldsN))) perColumn = int(math.ceil(fieldsN/float(perRow))) fig = mpl.figure(figName); mpl.clf() # Using image grid from mpl_toolkits.axes_grid1 import ImageGrid grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single') for i in range(0,fieldsN): im = grid[i].imshow(fields[i],cmap=cmap); grid.cbar_axes[0].colorbar(im) mpl.title('%s Receptive Fields' % layer.name) # old way # fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))]) # tiled = [] # for i in range(0,perColumn*perRow,perColumn): # tiled.append(np.hstack(fields2[i:i+perColumn])) # # tiled = np.vstack(tiled) # mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar(); mpl.figure(figName+' Total'); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()
Example 37
def analytic_convolution_gaussian(mu1,covar1,mu2,covar2): """ The analytic vconvolution of two Gaussians is simply the sum of the two mean vectors and the two convariance matrixes --- INPUT --- mu1 The mean of the first gaussian covar1 The covariance matrix of of the first gaussian mu2 The mean of the second gaussian covar2 The covariance matrix of of the second gaussian """ muconv = mu1+mu2 covarconv = covar1+covar2 return muconv, covarconv # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
Example 38
def reshape_array(array, newsize, pixcombine='sum'): """ Reshape an array to a give size using either the sum, mean or median of the pixels binned Note that the old array dimensions have to be multiples of the new array dimensions --- INPUT --- array Array to reshape (combine pixels) newsize New size of array pixcombine The method to combine the pixels with. Choices are sum, mean and median """ sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1] pdb.set_trace() if pixcombine == 'sum': reshapedarray = array.reshape(sh).sum(-1).sum(1) elif pixcombine == 'mean': reshapedarray = array.reshape(sh).mean(-1).mean(1) elif pixcombine == 'median': reshapedarray = array.reshape(sh).median(-1).median(1) return reshapedarray # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
Example 39
def inner_product_to_infty(self,gf1,gf2): "Inner product on non-compact domain" factors = [s.get_scale_factor() for s in self.stencils] factor = np.prod(factors) integrand = (factor*gf1*self.weights2D*gf2*self.dRdX) integrand[-1] = 0 integral = np.sum(integrand) return integral
Example 40
def get_integration_weights(order,nodes=None): """ Returns the integration weights for Gauss-Lobatto quadrature as a function of the order of the polynomial we want to represent. See: https://en.wikipedia.org/wiki/Gaussian_quadrature See: arXive:gr-qc/0609020v1 """ if np.all(nodes == False): nodes=get_quadrature_points(order) if poly == polynomial.chebyshev.Chebyshev: weights = np.empty((order+1)) weights[1:-1] = np.pi/order weights[0] = np.pi/(2*order) weights[-1] = weights[0] return weights elif poly == polynomial.legendre.Legendre: interior_weights = 2/((order+1)*order*poly.basis(order)(nodes[1:-1])**2) boundary_weights = np.array([1-0.5*np.sum(interior_weights)]) weights = np.concatenate((boundary_weights, interior_weights, boundary_weights)) return weights else: raise ValueError("Not a known polynomial type.") return False
Example 41
def inner_product(self,gf1,gf2): """Calculates the 2D inner product between grid functions gf1 and gf2 using the appropriate quadrature rule """ factors = [s.get_scale_factor() for s in self.stencils] factor = np.prod(factors) integrand = gf1*self.weights2D*gf2 integral_unit_cell = np.sum(integrand) integral_physical = integral_unit_cell*factor return integral_physical
Example 42
def compute_rhs(rhs): U_dealiased = work[((3,) + FFT.work_shape(dealias), float, 0)] curl_dealiased = work[((3,) + FFT.work_shape(dealias), float, 1)] for i in range(3): U_dealiased[i] = FFT.ifftn(U_hat[i], U_dealiased[i], dealias) curl_dealiased = curl(U_hat, curl_dealiased) rhs = cross(U_dealiased, curl_dealiased, rhs) P_hat[:] = sum(rhs*K_over_K2, 0, out=P_hat) rhs -= P_hat*K rhs -= nu*K2*U_hat return rhs # Initialize a Taylor Green vortex
Example 43
def gof(self): r2mean = np.mean(self.r2.T[self.endoexo()[0]].values) AVEmean = self.AVE().copy() totalblock = 0 for i in range(self.lenlatent): block = self.data_[self.Variables['measurement'] [self.Variables['latent'] == self.latent[i]]] block = len(block.columns.values) totalblock += block AVEmean[self.latent[i]] = AVEmean[self.latent[i]] * block AVEmean = np.sum(AVEmean) / totalblock return np.sqrt(AVEmean * r2mean)
Example 44
def cr(self): # Composite Reliability composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent) for i in range(self.lenlatent): block = self.data_[self.Variables['measurement'] [self.Variables['latent'] == self.latent[i]]] p = len(block.columns) if(p != 1): cor_mat = np.cov(block.T) evals, evecs = np.linalg.eig(cor_mat) U, S, V = np.linalg.svd(cor_mat, full_matrices=False) indices = np.argsort(evals) indices = indices[::-1] evecs = evecs[:, indices] evals = evals[indices] loadings = V[0, :] * np.sqrt(evals[0]) numerador = np.sum(abs(loadings))**2 denominador = numerador + (p - np.sum(loadings ** 2)) cr = numerador / denominador composite[self.latent[i]] = cr else: composite[self.latent[i]] = 1 composite = composite.T return(composite)
Example 45
def r2adjusted(self): n = len(self.data_) r2 = self.r2.values r2adjusted = pd.DataFrame(0, index=np.arange(1), columns=self.latent) for i in range(self.lenlatent): p = sum(self.LVariables['target'] == self.latent[i]) r2adjusted[self.latent[i]] = r2[i] - \ (p * (1 - r2[i])) / (n - p - 1) return r2adjusted.T
Example 46
def AVE(self): # AVE return self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())
Example 47
def fornell(self): cor_ = pd.DataFrame.corr(self.fscores)**2 AVE = self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum()) for i in range(len(cor_)): cor_.ix[i, i] = AVE[i] return(cor_)
Example 48
def fitness(self, data_, n_clusters, lvmodel, mvmodel, scheme, regression): output = pd.DataFrame(self.genes) output.columns = ['Split'] dataSplit = pd.concat([data_, output], axis=1) f1 = [] results = [] for i in range(n_clusters): dataSplited = (dataSplit.loc[dataSplit['Split'] == i]).drop('Split', axis=1) dataSplited.index = range(len(dataSplited)) try: results.append(PyLSpm(dataSplited, lvmodel, mvmodel, scheme, regression, 0, 50, HOC='true')) sumOuterResid = pd.DataFrame.sum( pd.DataFrame.sum(results[i].residuals()[1]**2)) sumInnerResid = pd.DataFrame.sum( pd.DataFrame.sum(results[i].residuals()[2]**2)) f1.append(sumOuterResid + sumInnerResid) except: f1.append(10000) print((1 / np.sum(f1))) return (1 / np.sum(f1))
Example 49
def roulettewheel(pop, fit): fit = fit - min(fit) sumf = sum(fit) if(sumf == 0): return pop[0] prob = [(item + sum(fit[:index])) / sumf for index, item in enumerate(fit)] prob_ = uniform(0, 1) # print(prob) individuo = (int(BinSearch(prob, prob_, 0, len(prob) - 1))) return pop[individuo]
Example 50
def xavier_initializer(shape): dim_sum = np.sum(shape) if len(shape) == 1: dim_sum += 1 bound = np.sqrt(2.0 / dim_sum) return tf.random_uniform(shape, minval=-bound, maxval=bound) # Assigning network variables to target network variables