The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.
Example 1
def rhoA(self): # rhoA rhoA = pd.DataFrame(0, index=np.arange(1), columns=self.latent) for i in range(self.lenlatent): weights = pd.DataFrame(self.outer_weights[self.latent[i]]) weights = weights[(weights.T != 0).any()] result = pd.DataFrame.dot(weights.T, weights) result_ = pd.DataFrame.dot(weights, weights.T) S = self.data_[self.Variables['measurement'][ self.Variables['latent'] == self.latent[i]]] S = pd.DataFrame.dot(S.T, S) / S.shape[0] numerador = ( np.dot(np.dot(weights.T, (S - np.diag(np.diag(S)))), weights)) denominador = ( (np.dot(np.dot(weights.T, (result_ - np.diag(np.diag(result_)))), weights))) rhoA_ = ((result)**2) * (numerador / denominador) if(np.isnan(rhoA_.values)): rhoA[self.latent[i]] = 1 else: rhoA[self.latent[i]] = rhoA_.values return rhoA.T
Example 2
def SMA(Series, N, M=1): ret = [] i = 1 length = len(Series) # ??X????? nan ? while i < length: if np.isnan(Series[i]): i += 1 else: break preY = Series[i] # Y' ret.append(preY) while i < length: Y = (M * Series[i] + (N - M) * preY) / float(N) ret.append(Y) preY = Y i += 1 return pd.Series(ret)
Example 3
def replace_missing(X): # This is ugly, but try: if X.getformat()=='csr': return X except: X[np.isnan(X)]=-999.0 #djajetic 05.09.2015 return X #djajetic 05.09.2015 p=len(X) nn=len(X[0])*2 XX = np.zeros([p,nn]) for i in range(len(X)): line = X[i] line1 = [0 if np.isnan(x) else x for x in line] line2 = [1 if np.isnan(x) else 0 for x in line] # indicator of missingness XX[i] = line1 + line2 return XX
Example 4
def get(self, X): X = np.array(X) X_nan = np.isnan(X) imputed = self.meanImput(X.copy()) if len(self.estimators_) > 1: for i, estimator_ in enumerate(self.estimators_): X_s = np.delete(imputed, i, 1) y_nan = X_nan[:, i] X_unk = X_s[y_nan] result_ = [] if len(X_unk) > 0: for unk in X_unk: result_.append(estimator_.predict(unk)) X[y_nan, i] = result_ return X
Example 5
def treegauss_remove_row( data_row, tree_grid, latent_row, vert_ss, edge_ss, feat_ss, ): # Update sufficient statistics. for v in range(latent_row.shape[0]): z = latent_row[v, :] vert_ss[v, :, :] -= np.outer(z, z) for e in range(tree_grid.shape[1]): z1 = latent_row[tree_grid[1, e], :] z2 = latent_row[tree_grid[2, e], :] edge_ss[e, :, :] -= np.outer(z1, z2) for v, x in enumerate(data_row): if np.isnan(x): continue z = latent_row[v, :] feat_ss[v] -= 1 feat_ss[v, 1] -= x feat_ss[v, 2:] -= x * z # TODO Use central covariance.
Example 6
def test_train(self): model, fetches_ = self._test_pipeline(tf.contrib.learn.ModeKeys.TRAIN) predictions_, loss_, _ = fetches_ target_len = self.sequence_length + 10 + 2 max_decode_length = model.params["target.max_seq_len"] expected_decode_len = np.minimum(target_len, max_decode_length) np.testing.assert_array_equal(predictions_["logits"].shape, [ self.batch_size, expected_decode_len - 1, model.target_vocab_info.total_size ]) np.testing.assert_array_equal(predictions_["losses"].shape, [self.batch_size, expected_decode_len - 1]) np.testing.assert_array_equal(predictions_["predicted_ids"].shape, [self.batch_size, expected_decode_len - 1]) self.assertFalse(np.isnan(loss_))
Example 7
def information_ratio(algorithm_returns, benchmark_returns): """ http://en.wikipedia.org/wiki/Information_ratio Args: algorithm_returns (np.array-like): All returns during algorithm lifetime. benchmark_returns (np.array-like): All benchmark returns during algo lifetime. Returns: float. Information ratio. """ relative_returns = algorithm_returns - benchmark_returns relative_deviation = relative_returns.std(ddof=1) if zp_math.tolerant_equals(relative_deviation, 0) or \ np.isnan(relative_deviation): return 0.0 return np.mean(relative_returns) / relative_deviation
Example 8
def raw_data_gen(self): for dt, series in self.data.iterrows(): for sid, price in series.iteritems(): # Skip SIDs that can not be forward filled if np.isnan(price) and \ sid not in self.started_sids: continue self.started_sids.add(sid) event = { 'dt': dt, 'sid': sid, 'price': price, # Just chose something large # if no volume available. 'volume': 1e9, } yield event
Example 9
def test_nan_filter_dataframe(self): dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC') df = pd.DataFrame(np.random.randn(2, 2), index=dates, columns=[4, 5]) # should be filtered df.loc[dates[0], 4] = np.nan # should not be filtered, should have been ffilled df.loc[dates[1], 5] = np.nan source = DataFrameSource(df) event = next(source) self.assertEqual(5, event.sid) event = next(source) self.assertEqual(4, event.sid) event = next(source) self.assertEqual(5, event.sid) self.assertFalse(np.isnan(event.price))
Example 10
def df_type_to_str(i): ''' Convert into simple datatypes from pandas/numpy types ''' if isinstance(i, np.bool_): return bool(i) if isinstance(i, np.int_): return int(i) if isinstance(i, np.float): if np.isnan(i): return 'NaN' elif np.isinf(i): return str(i) return float(i) if isinstance(i, np.uint): return int(i) if type(i) == bytes: return i.decode('UTF-8') if isinstance(i, (tuple, list)): return str(i) if i is pd.NaT: # not identified as a float null return 'NaN' return str(i)
Example 11
def calc_reward(self, action=0, state=None, **kw ): """Calculate the reward for the specified transition.""" eps1, eps2 = self.eps_values_for_actions[action] if state is None: state = self.observe() if self.logspace: T1, T2, T1s, T2s, V, E = 10**state else: T1, T2, T1s, T2s, V, E = state # the reward function penalizes treatment because of side-effects reward = -0.1*V - 2e4*eps1**2 - 2e3*eps2**2 + 1e3*E # Constrain reward to be within specified range if np.isnan(reward): reward = -self.reward_bound elif reward > self.reward_bound: reward = self.reward_bound elif reward < -self.reward_bound: reward = -self.reward_bound return reward
Example 12
def to_rgb(img): """ Converts the given array into a RGB image. If the number of channels is not 3 the array is tiled such that it has 3 channels. Finally, the values are rescaled to [0,255) :param img: the array to convert [nx, ny, channels] :returns img: the rgb image [nx, ny, 3] """ img = np.atleast_3d(img) channels = img.shape[2] if channels < 3: img = np.tile(img, 3) img[np.isnan(img)] = 0 img -= np.amin(img) img /= np.amax(img) img *= 255 return img
Example 13
def map(self, data): data = data[self.fieldName] colors = np.empty((len(data), 4)) default = np.array(fn.colorTuple(self['Default'])) / 255. colors[:] = default for v in self.param('Values'): mask = data == v.maskValue c = np.array(fn.colorTuple(v.value())) / 255. colors[mask] = c #scaled = np.clip((data-self['Min']) / (self['Max']-self['Min']), 0, 1) #cmap = self.value() #colors = cmap.map(scaled, mode='float') #mask = np.isnan(data) | np.isinf(data) #nanColor = self['NaN'] #nanColor = (nanColor.red()/255., nanColor.green()/255., nanColor.blue()/255., nanColor.alpha()/255.) #colors[mask] = nanColor return colors
Example 14
def map(self, data): data = data[self.fieldName] colors = np.empty((len(data), 4)) default = np.array(fn.colorTuple(self['Default'])) / 255. colors[:] = default for v in self.param('Values'): mask = data == v.maskValue c = np.array(fn.colorTuple(v.value())) / 255. colors[mask] = c #scaled = np.clip((data-self['Min']) / (self['Max']-self['Min']), 0, 1) #cmap = self.value() #colors = cmap.map(scaled, mode='float') #mask = np.isnan(data) | np.isinf(data) #nanColor = self['NaN'] #nanColor = (nanColor.red()/255., nanColor.green()/255., nanColor.blue()/255., nanColor.alpha()/255.) #colors[mask] = nanColor return colors
Example 15
def round_solution_pool(pool, constraints): pool.distinct().sort() P = pool.P L0_reg_ind = np.isnan(constraints['coef_set'].C_0j) L0_max = constraints['L0_max'] rounded_pool = SolutionPool(P) for solution in pool.solutions: # sort from largest to smallest coefficients feature_order = np.argsort([-abs(x) for x in solution]) rounded_solution = np.zeros(shape=(1, P)) l0_norm_count = 0 for k in range(0, P): j = feature_order[k] if not L0_reg_ind[j]: rounded_solution[0, j] = np.round(solution[j], 0) elif l0_norm_count < L0_max: rounded_solution[0, j] = np.round(solution[j], 0) l0_norm_count += L0_reg_ind[j] rounded_pool.add(objvals=np.nan, solutions=rounded_solution) rounded_pool.distinct().sort() return rounded_pool
Example 16
def checkFSXvalsAgainstADNIMERGE(tadpoleDF, mriADNI1FileFSX, otherSSvisCodeStr, ssNameTag, ignoreMissingCols = False): nrRows, nrCols = tadpoleDF.shape colListOtherSS = list(ssDF.columns.values) colListTadpoleDF = list(tadpoleDF.columns.values) tadpoleDF[['Hippocampus', 'ST29SV%s' % ssNameTag, 'ST88SV%s' % ssNameTag]] = \ tadpoleDF[['Hippocampus', 'ST29SV%s' % ssNameTag, 'ST88SV%s' % ssNameTag]].apply(pd.to_numeric, errors='coerce') tadpoleDF['HIPPOSUM'] = tadpoleDF['ST29SV%s' % ssNameTag] + tadpoleDF['ST88SV%s' % ssNameTag] for r in range(nrRows): valsNan = np.isnan(tadpoleDF['Hippocampus'][r]) or (np.isnan(tadpoleDF['ST29SV%s' % ssNameTag][r]) and \ np.isnan(tadpoleDF['ST88SV%s' % ssNameTag][r])) if valsNan: continue valsNotEq = tadpoleDF['Hippocampus'][r] != (tadpoleDF['ST29SV%s' % ssNameTag][r] + tadpoleDF['ST88SV%s' % ssNameTag][r]) if valsNotEq: print('entries dont match\n ', tadpoleDF[['RID','VISCODE', 'Hippocampus', 'ST29SV%s' % ssNameTag,\ 'ST88SV%s' % ssNameTag, 'HIPPOSUM']].iloc[r]) # Conclusion: the reason why entries above don't match is because UCSFFSX has duplicate entries for the same subject and viscode.
Example 17
def test_hz(): """Test the hz function.""" df, _ = readSC() for (teff, logg, mass) in df.loc[:, ['teff', 'logg', 'mass']].values: lum = (teff / 5777)**4 * (mass / ((10**logg) / (10**4.44)))**2 assert isinstance(hz(teff, lum, model=2), float) assert isinstance(hz(teff, lum, model=4), float) teff = 5777 lum = 1 invalids = [{teff: lum}, [teff, lum], (teff, lum), "..."] for model in range(1, 6): assert isinstance(hz(teff, lum, model), float) results = [0.75, 0.98, 0.99, 1.71, 1.77] for model, result in enumerate(results, start=1): assert round(hz(teff, lum, model), 2) == result for invalid in invalids: assert np.isnan(hz(invalid, lum, model)) assert np.isnan(hz(teff, invalid, model)) assert hz(teff, lum, 2) < hz(teff, lum, 4) # hz1 < hz2
Example 18
def generateWekaFile(X,Y,features,path,name): f = open(path + name + '.arff', 'w') f.write("@relation '" + name + "'\n\n") for feat in features: f.write("@attribute " + feat + " numeric\n") f.write("@attribute cluster {True,False}\n\n") f.write("@data\n\n") for i in range(X.shape[0]): for j in range(X.shape[1]): if np.isnan(X[i,j]): f.write("?,") else: f.write(str(X[i,j]) + ",") if Y[i] == 1.0 or Y[i] == True: f.write("True\n") else: f.write("False\n") f.close()
Example 19
def test_posterior_zeros(self): p = np.asarray([.5, 0., 0.]).reshape((1, 3)) posterior = self.eval(self.posterior, p) print 'posterior', posterior posterior_grad = self.eval(self.posterior_grad, p) print 'posterior grad', posterior_grad kl = self.eval(self.posterior_kl, p) print kl self.assertGreater(kl.sum(), 0) self.assertFalse(np.isnan(kl).any()) self.assertTrue(np.isfinite(kl).all()) grad = self.eval(self.posterior_kl_grad, p) print grad self.assertFalse(np.isnan(grad).any()) self.assertTrue(np.isfinite(grad).all())
Example 20
def update_summary( var_up, var, start, end, ): diff = np.abs(var_up - var) reldiff = diff / var # filter out nan's try: reldiff = reldiff[~np.isnan(reldiff)] except: pass return (np.mean(diff), np.std(diff), np.mean(reldiff), np.std(reldiff), (end - start).microseconds)
Example 21
def test_bootstrap_replicate_1d(data, seed): np.random.seed(seed) x = dcst.bootstrap_replicate_1d(data, np.mean) np.random.seed(seed) x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.mean) assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \ or np.isclose(x, x_correct, atol=atol, equal_nan=True) np.random.seed(seed) x = dcst.bootstrap_replicate_1d(data, np.median) np.random.seed(seed) x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.median) assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \ or np.isclose(x, x_correct, atol=atol, equal_nan=True) np.random.seed(seed) x = dcst.bootstrap_replicate_1d(data, np.std) np.random.seed(seed) x_correct = original.bootstrap_replicate_1d(data[~np.isnan(data)], np.std) assert (np.isnan(x) and np.isnan(x_correct, atol=atol, equal_nan=True)) \ or np.isclose(x, x_correct, atol=atol, equal_nan=True)
Example 22
def nan_helper(y): """ Helper to handle indices and logical indices of NaNs. Input: - y, 1d numpy array with possible NaNs Output: - nans, logical indices of NaNs - index, a function, with signature indices= index(logical_indices), to convert logical indices of NaNs to 'equivalent' indices Example: >>> # linear interpolation of NaNs >>> nans, x= nan_helper(y) >>> y[nans]= NP.interp(x(nans), x(~nans), y[~nans]) """ # Source: http://stackoverflow.com/questions/6518811/interpolate-nan-values-in-a-numpy-array return NP.isnan(y), lambda z: z.nonzero()[0]
Example 23
def step4(): key_vec = pickle.loads(open("key_vec.pkl", "rb").read()) vecs = [] for ev, vec in enumerate(key_vec.values()): x = np.array(vec) if np.isnan(x).any(): # print(vec) continue vecs.append(x) vecs = np.array(vecs) kmeans = KMeans(n_clusters=128, init='k-means++', n_init=10, max_iter=300, tol=0.0001,precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=1) print("now fitting...") kmeans.fit(vecs) open("kmeans.model", "wb").write( pickle.dumps(kmeans) ) for p in kmeans.predict(vecs): print(p)
Example 24
def _step5(arr): kmeans = pickle.loads(open("kmeans.model", "rb").read()) key, lines, tipe = arr print(key) open("./tmp/tmp.{tipe}.{key}.txt".format(tipe=tipe,key=key), "w").write("\n".join(lines)) res = os.popen("./fasttext print-sentence-vectors ./models/model.bin < tmp/tmp.{tipe}.{key}.txt".format(tipe=tipe, key=key)).read() w = open("tmp/tmp.{tipe}.{key}.json".format(tipe=tipe,key=key), "w") for line in res.split("\n"): try: vec = list(map(float, line.split()[-100:])) except: print(line) print(res) continue x = np.array(vec) if np.isnan(x).any(): continue cluster = kmeans.predict([vec]) txt = line.split()[:-100] obj = {"txt": txt, "cluster": cluster.tolist()} data = json.dumps(obj, ensure_ascii=False) w.write( data + "\n" )
Example 25
def test_lm(self): hps = get_test_hparams() with tf.variable_scope("model"): model = LM(hps) with self.test_session() as sess: tf.initialize_all_variables().run() tf.initialize_local_variables().run() loss = 1e5 for i in range(50): x, y, w = simple_data_generator(hps.batch_size, hps.num_steps) loss, _ = sess.run([model.loss, model.train_op], {model.x: x, model.y: y, model.w: w}) print("%d: %.3f %.3f" % (i, loss, np.exp(loss))) if np.isnan(loss): print("NaN detected") break self.assertLess(loss, 1.0)
Example 26
def get_series_median_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=10, exclude_partial_missing=False): """ Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years """ medians = [] r_word_time_series = {} if exclude_partial_missing: for word, time_series in word_time_series.iteritems(): if not np.isnan(np.sum(time_series.values())): r_word_time_series[word] = time_series else: r_word_time_series = word_time_series for year in xrange(start_year, end_year + 1, year_inc): word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] if word in r_word_time_series and not np.isnan(r_word_time_series[word][year]) and not r_word_time_series[word][year] == 0]) if len(word_array) == 0: continue if one_minus: word_array = 1 - word_array medians.append(np.median(word_array)) return np.array(medians)
Example 27
def get_series_mean_std_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=1, exclude_partial_missing=False): """ Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years """ means = [] stderrs = [] r_word_time_series = {} if exclude_partial_missing: for word, time_series in word_time_series.iteritems(): if not np.isnan(np.sum(time_series.values())): r_word_time_series[word] = time_series else: r_word_time_series = word_time_series for year in xrange(start_year, end_year + 1, year_inc): word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] if word in r_word_time_series and not np.isnan(r_word_time_series[word][year]) and not np.isinf(r_word_time_series[word][year])]) if len(word_array) == 0: continue if one_minus: word_array = 1 - word_array means.append(word_array.mean()) stderrs.append(word_array.std()) return np.array(means), np.array(stderrs)
Example 28
def get_series_mean_stderr_peryear(word_time_series, i_year_words, one_minus=False, start_year=1900, end_year=2000, year_inc=1, exclude_partial_missing=False): """ Return the mean and stderr arrays for the values of the words specified per year in i_year_words for specified years """ means = [] stderrs = [] r_word_time_series = {} if exclude_partial_missing: for word, time_series in word_time_series.iteritems(): time_series = {year:val for year, val in time_series.iteritems() if year >= start_year and year <= end_year} if not np.isnan(np.sum(time_series.values())): r_word_time_series[word] = time_series else: r_word_time_series = word_time_series for year in xrange(start_year, end_year + 1, year_inc): word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] if word in r_word_time_series and not np.isnan(r_word_time_series[word][year])]) if one_minus: word_array = 1 - word_array means.append(word_array.mean()) stderrs.append(word_array.std() / len(word_array)) return np.array(means), np.array(stderrs)
Example 29
def get_yearly_set_dev(series, i_year_words, one_minus=False, start_year=1900, end_year=2000, method='diff'): """ Gets the mean relative deviation of the words in words vs. the full series. """ base_mat = _make_series_mat(series, series.keys(), one_minus=one_minus, start_year=start_year, end_year=end_year) means = [] stderrs = [] r_word_time_series = series for year in xrange(start_year, end_year + 1): word_array = np.array([r_word_time_series[word][year] for word in i_year_words[year] if word in r_word_time_series and not np.isnan(r_word_time_series[word][year])]) if one_minus: word_array = 1 - word_array if method == 'diff': word_array = word_array - base_mat.mean(0)[year-start_year] elif method == 'ratio': word_array = word_array / base_mat.mean(0)[year-start_year] else: raise RuntimeError("Unknown deviation method. Use diff or ratio.") means.append(word_array.mean()) stderrs.append(word_array.std() / len(word_array)) return np.array(means), np.array(stderrs)
Example 30
def log_likelihood(self, data): nks = np.bincount(self.labels_, minlength=self.n_clusters) # number of points in each cluster n, d = data.shape log_likelihood = 0 covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data) covar_matrix_det_v = np.linalg.det(covar_matrices) self._inv_covar_matrices = self._matrix_inverses(covar_matrices) for k, nk in enumerate(nks): if self.verbose == 1: print('log_likelihood: covar_matrix_det = {}'.format(covar_matrix_det_v[k])) term_1 = nk * (np.log(float(nk)/n) - 0.5 * d * np.log(2*np.pi) - 0.5 * np.log(abs(covar_matrix_det_v[k]))) cdist_result = cdist(data[self.labels_ == k], np.array([self.cluster_centers_[k]]), metric='mahalanobis', VI=self._inv_covar_matrices[k]) cdist_no_nan = cdist_result[~np.isnan(cdist_result)] # to deal with nans returned by cdist term_2 = -0.5 * (np.sum(cdist_no_nan)) k_sum = term_1 + term_2 log_likelihood += k_sum if np.isnan(log_likelihood) or log_likelihood == float('inf'): raise Exception('ll is nan or inf') return log_likelihood
Example 31
def test_alpha(self, returns, benchmark, expected): observed = self.empyrical.alpha(returns, benchmark) assert_almost_equal( observed, expected, DECIMAL_PLACES) if len(returns) == len(benchmark): # Compare to scipy linregress returns_arr = returns.values benchmark_arr = benchmark.values mask = ~np.isnan(returns_arr) & ~np.isnan(benchmark_arr) slope, intercept, _, _, _ = stats.linregress(benchmark_arr[mask], returns_arr[mask]) assert_almost_equal( observed, intercept * 252, DECIMAL_PLACES ) # Alpha/beta translation tests.
Example 32
def test_beta(self, returns, benchmark, expected): observed = self.empyrical.beta(returns, benchmark) assert_almost_equal( observed, expected, DECIMAL_PLACES) if len(returns) == len(benchmark): # Compare to scipy linregress returns_arr = returns.values benchmark_arr = benchmark.values mask = ~np.isnan(returns_arr) & ~np.isnan(benchmark_arr) slope, intercept, _, _, _ = stats.linregress(benchmark_arr[mask], returns_arr[mask]) assert_almost_equal( observed, slope )
Example 33
def strategy(data, params): """ Stack overlapping intervals. Assumes that each set has the same horizontal position """ vjust = params['vjust'] y = data['y'].copy() y[np.isnan(y)] = 0 heights = np.append(0, y.cumsum()) if params['fill']: heights = heights / np.abs(heights[-1]) data['ymin'] = np.min([heights[:-1], heights[1:]], axis=0) data['ymax'] = np.max([heights[:-1], heights[1:]], axis=0) # less intuitive than (ymin + vjust(ymax-ymin)), but # this way avoids subtracting numbers of potentially # similar precision data['y'] = ((1-vjust)*data['ymin'] + vjust*data['ymax']) return data
Example 34
def _find_index(bg_df, start_date, end_date, make_col_bool): if (make_col_bool): bg_df['date'] = bg_df['created_at'].apply(lambda x: x.date()) #create column with just the date if make_col_bool is True #Find the first date with the start date (first entry) and the last date with the end date (last entry) #Since the older dates have higher indices, we use max() for start and min() for the end dates start_index = bg_df[bg_df['date'] == start_date.date()].index.max() end_index = bg_df[bg_df['date'] == end_date.date()].index.min() #Raises exception if invalid dates (which are labeled as NaN) if np.isnan(start_index): raise Exception("Invalid start date: " + str(start_date.date())) if np.isnan(end_index): raise Exception("Invalid end date: " + str(end_date.date())) return bg_df, start_index, end_index #Function to get the bg data
Example 35
def plot_heatmaps(data, mis, column_label, cont, topk=30, prefix=''): cmap = sns.cubehelix_palette(as_cmap=True, light=.9) m, nv = mis.shape for j in range(m): inds = np.argsort(- mis[j, :])[:topk] if len(inds) >= 2: plt.clf() order = np.argsort(cont[:,j]) subdata = data[:, inds][order].T subdata -= np.nanmean(subdata, axis=1, keepdims=True) subdata /= np.nanstd(subdata, axis=1, keepdims=True) columns = [column_label[i] for i in inds] sns.heatmap(subdata, vmin=-3, vmax=3, cmap=cmap, yticklabels=columns, xticklabels=False, mask=np.isnan(subdata)) filename = '{}/heatmaps/group_num={}.png'.format(prefix, j) if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) plt.title("Latent factor {}".format(j)) plt.yticks(rotation=0) plt.savefig(filename, bbox_inches='tight') plt.close('all') #plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=cont[:, j], # outfile=prefix + '/relationships/group_num=' + str(j), latent=labels[:, j], alpha=0.1)
Example 36
def write_data(self, result_dict): for key, result in six.iteritems(result_dict): if ss.isspmatrix(result): if np.isnan(result.data).any(): raise ValueError("data {} have nan".format(key)) elif np.isnan(result).any(): raise ValueError("data {} have nan".format(key)) with SimpleTimer("Writing generated data {} to hdf5 file" .format(key), end_in_new_line=False): if key in self.h5f: # self.h5f[key][...] = result raise NotImplementedError("Overwriting not supported.") else: if (isinstance(result, ss.csc_matrix) or isinstance(result, ss.csr_matrix)): # sparse matrix h5sparse.Group(self.h5f).create_dataset(key, data=result) else: self.h5f.create_dataset(key, data=result) self.h5f.flush()
Example 37
def repeat_until_convergence(labelled_data, labelled_clusters, unlabelled_centroids): #find best fitting centroids to the labelled_data previous_max_difference = 0 while True: unlabelled_old_centroids = unlabelled_centroids unlabelled_centroids = move_centroids(labelled_clusters) labelled_clusters = form_clusters(labelled_data, unlabelled_centroids) differences = list(map(lambda a, b: np.linalg.norm(a-b),unlabelled_old_centroids,unlabelled_centroids)) max_difference = max(differences) if np.isnan(max_difference-previous_max_difference): difference_change = np.nan else: difference_change = abs((max_difference-previous_max_difference)/np.mean([previous_max_difference,max_difference])) * 100 previous_max_difference = max_difference # difference change is nan once the list of differences is all zeroes. if np.isnan(difference_change): break return labelled_clusters, unlabelled_centroids
Example 38
def loadData (self, filename, verbose=True, replace_missing=True): ''' Get the data from a text file in one of 3 formats: matrix, sparse, binary_sparse''' if verbose: print("========= Reading " + filename) start = time.time() if self.use_pickle and os.path.exists (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")): with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "r") as pickle_file: vprint (verbose, "Loading pickle file : " + os.path.join(self.tmp_dir, os.path.basename(filename) + ".pickle")) return pickle.load(pickle_file) if 'format' not in self.info.keys(): self.getFormatData(filename) if 'feat_num' not in self.info.keys(): self.getNbrFeatures(filename) data_func = {'dense':data_io.data, 'sparse':data_io.data_sparse, 'sparse_binary':data_io.data_binary_sparse} data = data_func[self.info['format']](filename, self.info['feat_num']) # INPORTANT: when we replace missing values we double the number of variables if self.info['format']=='dense' and replace_missing and np.any(map(np.isnan,data)): vprint (verbose, "Replace missing values by 0 (slow, sorry)") data = data_converter.replace_missing(data) if self.use_pickle: with open (os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle"), "wb") as pickle_file: vprint (verbose, "Saving pickle file : " + os.path.join (self.tmp_dir, os.path.basename(filename) + ".pickle")) p = pickle.Pickler(pickle_file) p.fast = True p.dump(data) end = time.time() if verbose: print( "[+] Success in %5.2f sec" % (end - start)) return data
Example 39
def sanitize_array(array): ''' Replace NaN and Inf (there should not be any!)''' a=np.ravel(array) maxi = np.nanmax((filter(lambda x: x != float('inf'), a))) # Max except NaN and Inf mini = np.nanmin((filter(lambda x: x != float('-inf'), a))) # Mini except NaN and Inf array[array==float('inf')]=maxi array[array==float('-inf')]=mini mid = (maxi + mini)/2 array[np.isnan(array)]=mid return array
Example 40
def htmt(self): htmt_ = pd.DataFrame(pd.DataFrame.corr(self.data_), index=self.manifests, columns=self.manifests) mean = [] allBlocks = [] for i in range(self.lenlatent): block_ = self.Variables['measurement'][ self.Variables['latent'] == self.latent[i]] allBlocks.append(list(block_.values)) block = htmt_.ix[block_, block_] mean_ = (block - np.diag(np.diag(block))).values mean_[mean_ == 0] = np.nan mean.append(np.nanmean(mean_)) comb = [[k, j] for k in range(self.lenlatent) for j in range(self.lenlatent)] comb_ = [(np.sqrt(mean[comb[i][1]] * mean[comb[i][0]])) for i in range(self.lenlatent ** 2)] comb__ = [] for i in range(self.lenlatent ** 2): block = (htmt_.ix[allBlocks[comb[i][1]], allBlocks[comb[i][0]]]).values # block[block == 1] = np.nan comb__.append(np.nanmean(block)) htmt__ = np.divide(comb__, comb_) where_are_NaNs = np.isnan(htmt__) htmt__[where_are_NaNs] = 0 htmt = pd.DataFrame(np.tril(htmt__.reshape( (self.lenlatent, self.lenlatent)), k=-1), index=self.latent, columns=self.latent) return htmt
Example 41
def get_cubic_root(self): # We have the equation x^2 D^2 + (1-x)^4 * C / h_min^2 # where x = sqrt(mu). # We substitute x, which is sqrt(mu), with x = y + 1. # It gives y^3 + py = q # where p = (D^2 h_min^2)/(2*C) and q = -p. # We use the Vieta's substution to compute the root. # There is only one real solution y (which is in [0, 1] ). # http://mathworld.wolfram.com/VietasSubstitution.html # eps in the numerator is to prevent momentum = 1 in case of zero gradient if np.isnan(self._dist_to_opt) or np.isnan(self._h_min) or np.isnan(self._grad_var) \ or np.isinf(self._dist_to_opt) or np.isinf(self._h_min) or np.isinf(self._grad_var): logging.warning("Input to cubic solver has invalid nan/inf value!") raise Exception("Input to cubic solver has invalid nan/inf value!") p = (self._dist_to_opt + eps)**2 * (self._h_min + eps)**2 / 2 / (self._grad_var + eps) w3 = (-math.sqrt(p**2 + 4.0 / 27.0 * p**3) - p) / 2.0 w = math.copysign(1.0, w3) * math.pow(math.fabs(w3), 1.0/3.0) y = w - p / 3.0 / (w + eps) x = y + 1 if self._verbose: logging.debug("p %f, denominator %f", p, self._grad_var + eps) logging.debug("w3 %f ", w3) logging.debug("y %f, denominator %f", y, w + eps) if np.isnan(x) or np.isinf(x): logging.warning("Output from cubic is invalid nan/inf value!") raise Exception("Output from cubic is invalid nan/inf value!") return x
Example 42
def treegauss_add_row( data_row, tree_grid, program, latent_row, vert_ss, edge_ss, feat_ss, ): # Sample latent state using dynamic programming. TODO('https://github.com/posterior/treecat/issues/26') # Update sufficient statistics. for v in range(latent_row.shape[0]): z = latent_row[v, :] vert_ss[v, :, :] += np.outer(z, z) for e in range(tree_grid.shape[1]): z1 = latent_row[tree_grid[1, e], :] z2 = latent_row[tree_grid[2, e], :] edge_ss[e, :, :] += np.outer(z1, z2) for v, x in enumerate(data_row): if np.isnan(x): continue z = latent_row[v, :] feat_ss[v] += 1 feat_ss[v, 1] += x feat_ss[v, 2:] += x * z # TODO Use central covariance.
Example 43
def imputeSNPs(X): snpsMean = np.nanmean(X, axis=0) isNan = np.isnan(X) for i,m in enumerate(snpsMean): X[isNan[:,i], i] = m return X
Example 44
def __call__(self, *args, **kwargs): assert len(args) <= len(self.inputs), "Too many arguments provided" feed_dict = {} # Update the args for inpt, value in zip(self.inputs, args): self._feed_input(feed_dict, inpt, value) # Update the kwargs kwargs_passed_inpt_names = set() for inpt in self.inputs[len(args):]: inpt_name = inpt.name.split(':')[0] inpt_name = inpt_name.split('/')[-1] assert inpt_name not in kwargs_passed_inpt_names, \ "this function has two arguments with the same name \"{}\", so kwargs cannot be used.".format(inpt_name) if inpt_name in kwargs: kwargs_passed_inpt_names.add(inpt_name) self._feed_input(feed_dict, inpt, kwargs.pop(inpt_name)) else: assert inpt in self.givens, "Missing argument " + inpt_name assert len(kwargs) == 0, "Function got extra arguments " + str(list(kwargs.keys())) # Update feed dict with givens. for inpt in self.givens: feed_dict[inpt] = feed_dict.get(inpt, self.givens[inpt]) results = get_session().run(self.outputs_update, feed_dict=feed_dict)[:-1] if self.check_nan: if any(np.isnan(r).any() for r in results): raise RuntimeError("Nan detected") return results
Example 45
def nan_to_num(array, fill_value = 0.0, copy = True): """ Replace NaNs with another fill value. Parameters ---------- array : array_like Input data. fill_value : float, optional NaNs will be replaced by ``fill_value``. Default is 0.0, in keeping with ``numpy.nan_to_num``. copy : bool, optional Whether to create a copy of `array` (True) or to replace values in-place (False). The in-place operation only occurs if casting to an array does not require a copy. Returns ------- out : ndarray Array without NaNs. If ``array`` was not of floating or complearray type, ``array`` is returned unchanged. Notes ----- Contrary to ``numpy.nan_to_num``, this functions does not handle infinite values. See Also -------- numpy.nan_to_num : replace NaNs and Infs with zeroes. """ array = np.array(array, subok = True, copy = copy) dtype = array.dtype.type # Non-inexact types do not have NaNs if not np.issubdtype(dtype, np.inexact): return array iscomplex = np.issubdtype(dtype, np.complexfloating) dest = (array.real, array.imag) if iscomplex else (array,) for d in dest: np.copyto(d, fill_value, where = np.isnan(d)) return array
Example 46
def test_gradients(self): inputs = tf.random_normal( [self.batch_size, self.sequence_length, self.input_depth]) seq_length = tf.ones(self.batch_size, dtype=tf.int32) * self.sequence_length labels = np.random.randint(0, self.vocab_size, [self.batch_size, self.sequence_length]) helper = decode_helper.TrainingHelper( inputs=inputs, sequence_length=seq_length) decoder_fn = self.create_decoder( helper=helper, mode=tf.contrib.learn.ModeKeys.TRAIN) initial_state = decoder_fn.cell.zero_state( self.batch_size, dtype=tf.float32) decoder_output, _ = decoder_fn(initial_state, helper) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=decoder_output.logits, labels=labels) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) grads_and_vars = optimizer.compute_gradients(tf.reduce_mean(losses)) #pylint: disable=E1101 with self.test_session() as sess: sess.run(tf.global_variables_initializer()) grads_and_vars_ = sess.run(grads_and_vars) for grad, _ in grads_and_vars_: self.assertFalse(np.isnan(grad).any()) return grads_and_vars_
Example 47
def frame_to_series(self, field, frame, columns=None): """ Convert a frame with a DatetimeIndex and sid columns into a series with a sid index, using the aggregator defined by the given field. """ if isinstance(frame, pd.DataFrame): columns = frame.columns frame = frame.values if not len(frame): return pd.Series( data=(0 if field == 'volume' else np.nan), index=columns, ).values if field in ['price', 'close']: # shortcircuit for full last row vals = frame[-1] if np.all(~np.isnan(vals)): return vals return ffill(frame)[-1] elif field == 'open': return bfill(frame)[0] elif field == 'volume': return np.nansum(frame, axis=0) elif field == 'high': return np.nanmax(frame, axis=0) elif field == 'low': return np.nanmin(frame, axis=0) else: raise ValueError("Unknown field {}".format(field))
Example 48
def update_last_known_values(self): """ Store the non-NaN values from our oldest frame in each frequency. """ ffillable = self.ffillable_fields if not len(ffillable): return for frequency in self.unique_frequencies: digest_panel = self.digest_panels.get(frequency, None) if digest_panel: oldest_known_values = digest_panel.oldest_frame(raw=True) else: oldest_known_values = self.buffer_panel.oldest_frame(raw=True) oldest_vals = oldest_known_values oldest_columns = self.fields for field in ffillable: f_idx = oldest_columns.get_loc(field) field_vals = oldest_vals[f_idx] # isnan would be fast, possible to use? non_nan_sids = np.where(pd.notnull(field_vals)) key = (frequency.freq_str, field) key_loc = self.last_known_prior_values.index.get_loc(key) self.last_known_prior_values.values[ key_loc, non_nan_sids ] = field_vals[non_nan_sids]
Example 49
def check_entry(key, value): if key != 'period_label': return np.isnan(value) or np.isinf(value) else: return False ############################ # Risk Metric Calculations # ############################
Example 50
def _compute_asset_lifetimes(self): """ Compute and cache a recarry of asset lifetimes. """ equities_cols = self.equities.c buf = np.array( tuple( sa.select(( equities_cols.sid, equities_cols.start_date, equities_cols.end_date, )).execute(), ), dtype='<f8', # use doubles so we get NaNs ) lifetimes = np.recarray( buf=buf, shape=(len(buf),), dtype=[ ('sid', '<f8'), ('start', '<f8'), ('end', '<f8') ], ) start = lifetimes.start end = lifetimes.end start[np.isnan(start)] = 0 # convert missing starts to 0 end[np.isnan(end)] = np.iinfo(int).max # convert missing end to INTMAX # Cast the results back down to int. return lifetimes.astype([ ('sid', '<i8'), ('start', '<i8'), ('end', '<i8'), ])