Example 1
def roll_zeropad(a, shift, axis=None): a = np.asanyarray(a) if shift == 0: return a if axis is None: n = a.size reshape = True else: n = a.shape[axis] reshape = False if np.abs(shift) > n: res = np.zeros_like(a) elif shift < 0: shift += n zeros = np.zeros_like(a.take(np.arange(n-shift), axis)) res = np.concatenate((a.take(np.arange(n-shift,n), axis), zeros), axis) else: zeros = np.zeros_like(a.take(np.arange(n-shift,n), axis)) res = np.concatenate((zeros, a.take(np.arange(n-shift), axis)), axis) if reshape: return res.reshape(a.shape) else: return res
Example 2
def rhoA(self): # rhoA rhoA = pd.DataFrame(0, index=np.arange(1), columns=self.latent) for i in range(self.lenlatent): weights = pd.DataFrame(self.outer_weights[self.latent[i]]) weights = weights[(weights.T != 0).any()] result = pd.DataFrame.dot(weights.T, weights) result_ = pd.DataFrame.dot(weights, weights.T) S = self.data_[self.Variables['measurement'][ self.Variables['latent'] == self.latent[i]]] S = pd.DataFrame.dot(S.T, S) / S.shape[0] numerador = ( np.dot(np.dot(weights.T, (S - np.diag(np.diag(S)))), weights)) denominador = ( (np.dot(np.dot(weights.T, (result_ - np.diag(np.diag(result_)))), weights))) rhoA_ = ((result)**2) * (numerador / denominador) if(np.isnan(rhoA_.values)): rhoA[self.latent[i]] = 1 else: rhoA[self.latent[i]] = rhoA_.values return rhoA.T
Example 3
def plot_sent_trajectories(sents, decode_plot): font = {'family' : 'normal', 'size' : 14} matplotlib.rc('font', **font) i = 0 l = ["Portuguese","Catalan"] axes = plt.gca() #axes.set_xlim([xmin,xmax]) axes.set_ylim([-1,1]) for sent, enc in zip(sents, decode_plot): if i==2: continue i += 1 #times = np.arange(len(enc)) times = np.linspace(0,1,len(enc)) plt.plot(times, enc, label=l[i-1]) plt.title("Hidden Node Trajectories") plt.xlabel('timestep') plt.ylabel('trajectories') plt.legend(loc='best') plt.savefig("final_tests/cr_por_cat_hidden_cell_trajectories", bbox_inches="tight") plt.close()
Example 4
def _generate_data(): """ ????? ????u(k-1) ? y(k-1)?????y(k) """ # u = np.random.uniform(-1,1,200) # y=[] # former_y_value = 0 # for i in np.arange(0,200): # y.append(former_y_value) # next_y_value = (29.0 / 40) * np.sin( # (16.0 * u[i] + 8 * former_y_value) / (3.0 + 4.0 * (u[i] ** 2) + 4 * (former_y_value ** 2))) \ # + (2.0 / 10) * u[i] + (2.0 / 10) * former_y_value # former_y_value = next_y_value # return u,y u1 = np.random.uniform(-np.pi,np.pi,200) u2 = np.random.uniform(-1,1,200) y = np.zeros(200) for i in range(200): value = np.sin(u1[i]) + u2[i] y[i] = value return u1, u2, y
Example 5
def plot_counts(counts, gene_type): """Plot expression counts. Return a Figure object""" import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt import seaborn as sns import numpy as np fig = plt.figure(figsize=((50 + len(counts) * 5) / 25.4, 210/25.4)) matplotlib.rcParams.update({'font.size': 14}) ax = fig.gca() ax.set_title('{} gene usage'.format(gene_type)) ax.set_xlabel('{} gene'.format(gene_type)) ax.set_ylabel('Count') ax.set_xticks(np.arange(len(counts)) + 0.5) ax.set_xticklabels(counts.index, rotation='vertical') ax.grid(axis='x') ax.set_xlim((-0.25, len(counts))) ax.bar(np.arange(len(counts)), counts['count']) fig.set_tight_layout(True) return fig
Example 6
def _create_figure(predictions_dict): """Creates and returns a new figure that visualizes attention scores for for a single model predictions. """ # Find out how long the predicted sequence is target_words = list(predictions_dict["predicted_tokens"]) prediction_len = _get_prediction_length(predictions_dict) # Get source words source_len = predictions_dict["features.source_len"] source_words = predictions_dict["features.source_tokens"][:source_len] # Plot fig = plt.figure(figsize=(8, 8)) plt.imshow( X=predictions_dict["attention_scores"][:prediction_len, :source_len], interpolation="nearest", cmap=plt.cm.Blues) plt.xticks(np.arange(source_len), source_words, rotation=45) plt.yticks(np.arange(prediction_len), target_words, rotation=-45) fig.tight_layout() return fig
Example 7
def mypsd(Rates,time_range,bin_w = 5., nmax = 4000): bins = np.arange(0,len(time_range),1) #print bins a,b = np.histogram(Rates, bins) ff = (1./len(bins))*abs(np.fft.fft(Rates- np.mean(Rates)))**2 Fs = 1./(1*0.001) freq2 = np.fft.fftfreq(len(bins))[0:len(bins/2)+1] # d= dt freq = np.fft.fftfreq(len(bins))[:len(ff)/2+1] px = ff[0:len(ff)/2+1] max_px = np.max(px[1:]) idx = px == max_px corr_freq = freq[pl.find(idx)] new_px = px max_pow = new_px[pl.find(idx)] return new_px,freq,corr_freq[0],freq2, max_pow
Example 8
def remove_artifacts(self, image): """ Remove the connected components that are not within the parameters Operates in place :param image: sudoku's thresholded image w/o grid :return: None """ labeled, features = label(image, structure=CROSS) lbls = np.arange(1, features + 1) areas = extract_feature(image, labeled, lbls, np.sum, np.uint32, 0) sides = extract_feature(image, labeled, lbls, min_side, np.float32, 0, True) diags = extract_feature(image, labeled, lbls, diagonal, np.float32, 0, True) for index in lbls: area = areas[index - 1] / 255 side = sides[index - 1] diag = diags[index - 1] if side < 5 or side > 20 \ or diag < 15 or diag > 25 \ or area < 40: image[labeled == index] = 0 return None
Example 9
def remove_artifacts(self, image): """ Remove the connected components that are not within the parameters Operates in place :param image: sudoku's thresholded image w/o grid :return: None """ labeled, features = label(image, structure=CROSS) lbls = np.arange(1, features + 1) areas = extract_feature(image, labeled, lbls, np.sum, np.uint32, 0) sides = extract_feature(image, labeled, lbls, min_side, np.float32, 0, True) diags = extract_feature(image, labeled, lbls, diagonal, np.float32, 0, True) for index in lbls: area = areas[index - 1] / 255 side = sides[index - 1] diag = diags[index - 1] if side < 5 or side > 20 \ or diag < 15 or diag > 25 \ or area < 40: image[labeled == index] = 0 return None
Example 10
def next_batch(self, batch_size, fake_data=False): """Return the next `batch_size` examples from this data set.""" if fake_data: fake_image = [1.0 for _ in xrange(784)] fake_label = 0 return [fake_image for _ in xrange(batch_size)], [ fake_label for _ in xrange(batch_size)] start = self._index_in_epoch self._index_in_epoch += batch_size if self._index_in_epoch > self._num_examples: # Finished epoch self._epochs_completed += 1 # Shuffle the data perm = numpy.arange(self._num_examples) numpy.random.shuffle(perm) self._images = self._images[perm] self._labels = self._labels[perm] # Start next epoch start = 0 self._index_in_epoch = batch_size assert batch_size <= self._num_examples end = self._index_in_epoch return self._images[start:end], self._labels[start:end]
Example 11
def next_batch(self, batch_size, fake_data=False): """Return the next `batch_size` examples from this data set.""" if fake_data: fake_image = [1.0 for _ in xrange(784)] fake_label = 0 return [fake_image for _ in xrange(batch_size)], [ fake_label for _ in xrange(batch_size)] start = self._index_in_epoch self._index_in_epoch += batch_size if self._index_in_epoch > self._num_examples: # Finished epoch self._epochs_completed += 1 # Shuffle the data perm = numpy.arange(self._num_examples) numpy.random.shuffle(perm) self._images = self._images[perm] self._labels = self._labels[perm] # Start next epoch start = 0 self._index_in_epoch = batch_size assert batch_size <= self._num_examples end = self._index_in_epoch return self._images[start:end], self._labels[start:end]
Example 12
def split_dataset(dataset, split_ratio, mode): if mode=='SPLIT_CLASSES': nrof_classes = len(dataset) class_indices = np.arange(nrof_classes) np.random.shuffle(class_indices) split = int(round(nrof_classes*split_ratio)) train_set = [dataset[i] for i in class_indices[0:split]] test_set = [dataset[i] for i in class_indices[split:-1]] elif mode=='SPLIT_IMAGES': train_set = [] test_set = [] min_nrof_images = 2 for cls in dataset: paths = cls.image_paths np.random.shuffle(paths) split = int(round(len(paths)*split_ratio)) if split<min_nrof_images: continue # Not enough images for test set. Skip class... train_set.append(ImageClass(cls.name, paths[0:split])) test_set.append(ImageClass(cls.name, paths[split:-1])) else: raise ValueError('Invalid train/test split mode "%s"' % mode) return train_set, test_set
Example 13
def quantize_from_probs2(probs, resolution): """Quantize multiple non-normalized probs to given resolution. Args: probs: An [N, M]-shaped numpy array of non-normalized probabilities. Returns: An [N, M]-shaped array of quantized probabilities such that np.all(result.sum(axis=1) == resolution). """ assert len(probs.shape) == 2 N, M = probs.shape probs = probs / probs.sum(axis=1, keepdims=True) result = np.zeros(probs.shape, np.int8) range_N = np.arange(N, dtype=np.int32) for _ in range(resolution): sample = probs.argmax(axis=1) result[range_N, sample] += 1 probs[range_N, sample] -= 1.0 / resolution return result
Example 14
def get_train_data(): # definite the dataset as two input , one output DS = SupervisedDataSet(2, 1) u1, u2, y = _generate_data() # add data element to the dataset for i in np.arange(199): DS.addSample([u1[i], u2[i]], [y[i + 1]]) # you can get your input/output this way # X = DS['input'] # Y = DS['target'] # split the dataset into train dataset and test dataset dataTrain, dataTest = DS.splitWithProportion(0.8) return dataTrain, dataTest
Example 15
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None): self._open() t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding) if self.time_axis == 0: local_chunk = self.data[t_start:t_stop, :] elif self.time_axis == 1: local_chunk = self.data[:, t_start:t_stop].T self._close() if nodes is not None: if not numpy.all(nodes == numpy.arange(self.nb_channels)): local_chunk = numpy.take(local_chunk, nodes, axis=1) return self._scale_data_to_float32(local_chunk)
Example 16
def _get_slice_(self, t_start, t_stop): x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD) r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD) x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD) r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD) if x_beg == x_end: g_offset = x_beg * self.bytes_per_block_div + self.block_offset_div data_slice = numpy.arange(g_offset + r_beg * self.nb_channels, g_offset + r_end * self.nb_channels, dtype=numpy.int64) yield data_slice else: for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)): g_offset = nb_blocks * self.bytes_per_block_div + self.block_offset_div if count == 0: data_slice = numpy.arange(g_offset + r_beg * self.nb_channels, g_offset + self.block_size_div, dtype=numpy.int64) elif (count == (x_end - x_beg)): data_slice = numpy.arange(g_offset, g_offset + r_end * self.nb_channels, dtype=numpy.int64) else: data_slice = numpy.arange(g_offset, g_offset + self.block_size_div, dtype=numpy.int64) yield data_slice
Example 17
def _get_slice_(self, t_start, t_stop): x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD) r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD) x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD) r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD) data_slice = [] if x_beg == x_end: g_offset = x_beg * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(x_beg + 1) + self.OFFSET_PER_BLOCK[1]*x_beg data_slice = numpy.arange(g_offset + r_beg, g_offset + r_end, dtype=numpy.int64) else: for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)): g_offset = nb_blocks * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(nb_blocks + 1) + self.OFFSET_PER_BLOCK[1]*nb_blocks if count == 0: data_slice += numpy.arange(g_offset + r_beg, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist() elif (count == (x_end - x_beg)): data_slice += numpy.arange(g_offset, g_offset + r_end, dtype=numpy.int64).tolist() else: data_slice += numpy.arange(g_offset, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist() return data_slice
Example 18
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None): t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding) local_shape = t_stop - t_start if nodes is None: nodes = numpy.arange(self.nb_channels) local_chunk = numpy.zeros((local_shape, len(nodes)), dtype=self.data_dtype) data_slice = self._get_slice_(t_start, t_stop) self._open() for count, i in enumerate(nodes): local_chunk[:, count] = self.data[i][data_slice] self._close() return self._scale_data_to_float32(local_chunk)
Example 19
def _get_slice_(self, t_start, t_stop): x_beg = numpy.int64(t_start // self.SAMPLES_PER_RECORD) r_beg = numpy.mod(t_start, self.SAMPLES_PER_RECORD) x_end = numpy.int64(t_stop // self.SAMPLES_PER_RECORD) r_end = numpy.mod(t_stop, self.SAMPLES_PER_RECORD) data_slice = [] if x_beg == x_end: g_offset = x_beg * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(x_beg + 1) + self.OFFSET_PER_BLOCK[1]*x_beg data_slice = numpy.arange(g_offset + r_beg, g_offset + r_end, dtype=numpy.int64) else: for count, nb_blocks in enumerate(numpy.arange(x_beg, x_end + 1, dtype=numpy.int64)): g_offset = nb_blocks * self.SAMPLES_PER_RECORD + self.OFFSET_PER_BLOCK[0]*(nb_blocks + 1) + self.OFFSET_PER_BLOCK[1]*nb_blocks if count == 0: data_slice += numpy.arange(g_offset + r_beg, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist() elif (count == (x_end - x_beg)): data_slice += numpy.arange(g_offset, g_offset + r_end, dtype=numpy.int64).tolist() else: data_slice += numpy.arange(g_offset, g_offset + self.SAMPLES_PER_RECORD, dtype=numpy.int64).tolist() return data_slice
Example 20
def read_chunk(self, idx, chunk_size, padding=(0, 0), nodes=None): t_start, t_stop = self._get_t_start_t_stop(idx, chunk_size, padding) local_shape = t_stop - t_start if nodes is None: nodes = numpy.arange(self.nb_channels) local_chunk = numpy.zeros((local_shape, len(nodes)), dtype=self.data_dtype) data_slice = self._get_slice_(t_start, t_stop) self._open() for count, i in enumerate(nodes): local_chunk[:, count] = self.data[i][data_slice] self._close() return self._scale_data_to_float32(local_chunk)
Example 21
def view_trigger_snippets_bis(trigger_snippets, elec_index, save=None): fig = pylab.figure() ax = fig.add_subplot(1, 1, 1) for n in xrange(0, trigger_snippets.shape[2]): y = trigger_snippets[:, elec_index, n] x = numpy.arange(- (y.size - 1) / 2, (y.size - 1) / 2 + 1) b = 0.5 + 0.5 * numpy.random.rand() ax.plot(x, y, color=(0.0, 0.0, b), linestyle='solid') ax.grid(True) ax.set_xlim([numpy.amin(x), numpy.amax(x)]) ax.set_xlabel("time") ax.set_ylabel("amplitude") if save is None: pylab.show() else: pylab.savefig(save) pylab.close(fig) return
Example 22
def cost(self, x): Rdx = dl.Vector() self.Prior.init_vector(Rdx,0) dx = x[PARAMETER] - self.Prior.mean self.Prior.R.mult(dx, Rdx) reg = .5*Rdx.inner(dx) u = dl.Vector() ud = dl.Vector() self.Q.init_vector(u,0) self.Q.init_vector(ud,0) misfit = 0 for t in np.arange(self.t_1, self.t_final+(.5*self.dt), self.dt): x[STATE].retrieve(u,t) self.ud.retrieve(ud,t) diff = u - ud Qdiff = self.Q * diff misfit += .5/self.noise_variance*Qdiff.inner(diff) c = misfit + reg return [c, reg, misfit]
Example 23
def _flow_index(self, n, batch_size=32, shuffle=False, seed=None): # ensure self.batch_index is 0 self.reset() while 1: if seed is not None: np.random.seed(seed + self.total_batches_seen) if self.batch_index == 0: index_array = np.arange(n) if shuffle: index_array = np.random.permutation(n) current_index = (self.batch_index * batch_size) % n if n >= current_index + batch_size: current_batch_size = batch_size self.batch_index += 1 else: current_batch_size = n - current_index self.batch_index = 0 self.total_batches_seen += 1 yield (index_array[current_index: current_index + current_batch_size], current_index, current_batch_size)
Example 24
def make_split(X_full, Y_full, split): N = X_full.shape[0] n = int(N * PROPORTION_TRAIN) ind = np.arange(N) np.random.seed(split + SEED) np.random.shuffle(ind) train_ind = ind[:n] test_ind= ind[n:] X = X_full[train_ind] Xs = X_full[test_ind] Y = Y_full[train_ind] Ys = Y_full[test_ind] return X, Y, Xs, Ys
Example 25
def plot_difference_histogram(group, gene_name, bins=np.arange(20.1)): """ Plot a histogram of percentage differences for a specific gene. """ exact_matches = group[group.V_SHM == 0] CDR3s_exact = len(set(s for s in exact_matches.CDR3_nt if s)) Js_exact = len(set(exact_matches.J_gene)) fig = Figure(figsize=(100/25.4, 60/25.4)) ax = fig.gca() ax.set_xlabel('Percentage difference') ax.set_ylabel('Frequency') fig.suptitle('Gene ' + gene_name, y=1.08, fontsize=16) ax.set_title('{:,} sequences assigned'.format(len(group))) ax.text(0.25, 0.95, '{:,} ({:.1%}) exact matches\n {} unique CDR3\n {} unique J'.format( len(exact_matches), len(exact_matches) / len(group), CDR3s_exact, Js_exact), transform=ax.transAxes, fontsize=10, bbox=dict(boxstyle='round', facecolor='white', alpha=0.5), horizontalalignment='left', verticalalignment='top') _ = ax.hist(list(group.V_SHM), bins=bins) return fig
Example 26
def create_decoder(self, helper, mode): attention_fn = AttentionLayerDot( params={"num_units": self.attention_dim}, mode=tf.contrib.learn.ModeKeys.TRAIN) attention_values = tf.convert_to_tensor( np.random.randn(self.batch_size, self.input_seq_len, 32), dtype=tf.float32) attention_keys = tf.convert_to_tensor( np.random.randn(self.batch_size, self.input_seq_len, 32), dtype=tf.float32) params = AttentionDecoder.default_params() params["max_decode_length"] = self.max_decode_length return AttentionDecoder( params=params, mode=mode, vocab_size=self.vocab_size, attention_keys=attention_keys, attention_values=attention_values, attention_values_length=np.arange(self.batch_size) + 1, attention_fn=attention_fn)
Example 27
def make_copy(num_examples, min_len, max_len): """ Generates a dataset where the target is equal to the source. Sequence lengths are chosen randomly from [min_len, max_len]. Args: num_examples: Number of examples to generate min_len: Minimum sequence length max_len: Maximum sequence length Returns: An iterator of (source, target) string tuples. """ for _ in range(num_examples): turn_length = np.random.choice(np.arange(min_len, max_len + 1)) source_tokens = np.random.choice( list(VOCABULARY), size=turn_length, replace=True) target_tokens = source_tokens yield " ".join(source_tokens), " ".join(target_tokens)
Example 28
def make_reverse(num_examples, min_len, max_len): """ Generates a dataset where the target is equal to the source reversed. Sequence lengths are chosen randomly from [min_len, max_len]. Args: num_examples: Number of examples to generate min_len: Minimum sequence length max_len: Maximum sequence length Returns: An iterator of (source, target) string tuples. """ for _ in range(num_examples): turn_length = np.random.choice(np.arange(min_len, max_len + 1)) source_tokens = np.random.choice( list(VOCABULARY), size=turn_length, replace=True) target_tokens = source_tokens[::-1] yield " ".join(source_tokens), " ".join(target_tokens)
Example 29
def update_dividends(self, new_dividends): """ Update our dividend frame with new dividends. @new_dividends should be a DataFrame with columns containing at least the entries in zipline.protocol.DIVIDEND_FIELDS. """ # Mark each new dividend with a unique integer id. This ensures that # we can differentiate dividends whose date/sid fields are otherwise # identical. new_dividends['id'] = np.arange( self._dividend_count, self._dividend_count + len(new_dividends), ) self._dividend_count += len(new_dividends) self.dividend_frame = sort_values(pd.concat( [self.dividend_frame, new_dividends] ), ['pay_date', 'ex_date']).set_index('id', drop=False)
Example 30
def create_test_panel_ohlc_source(sim_params, env): start = sim_params.first_open \ if sim_params else pd.datetime(1990, 1, 3, 0, 0, 0, 0, pytz.utc) end = sim_params.last_close \ if sim_params else pd.datetime(1990, 1, 8, 0, 0, 0, 0, pytz.utc) index = env.days_in_range(start, end) price = np.arange(0, len(index)) + 100 high = price * 1.05 low = price * 0.95 open_ = price + .1 * (price % 2 - .5) volume = np.ones(len(index)) * 1000 arbitrary = np.ones(len(index)) df = pd.DataFrame({'price': price, 'high': high, 'low': low, 'open': open_, 'volume': volume, 'arbitrary': arbitrary}, index=index) panel = pd.Panel.from_dict({0: df}) return DataPanelSource(panel), panel
Example 31
def test_expect_dtypes_with_tuple(self): allowed_dtypes = (dtype('datetime64[ns]'), dtype('float')) @expect_dtypes(a=allowed_dtypes) def foo(a, b): return a, b for d in allowed_dtypes: good_a = arange(3).astype(d) good_b = object() ret_a, ret_b = foo(good_a, good_b) self.assertIs(good_a, ret_a) self.assertIs(good_b, ret_b) with self.assertRaises(TypeError) as e: foo(arange(3, dtype='uint32'), object()) expected_message = ( "{qualname}() expected a value with dtype 'datetime64[ns]' " "or 'float64' for argument 'a', but got 'uint32' instead." ).format(qualname=qualname(foo)) self.assertEqual(e.exception.args[0], expected_message)
Example 32
def test_bad_input(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) loader = DataFrameLoader( USEquityPricing.close, baseline, ) with self.assertRaises(ValueError): # Wrong column. loader.load_adjusted_array( [USEquityPricing.open], self.dates, self.sids, self.mask ) with self.assertRaises(ValueError): # Too many columns. loader.load_adjusted_array( [USEquityPricing.open, USEquityPricing.close], self.dates, self.sids, self.mask, )
Example 33
def test_baseline(self): data = arange(100).reshape(self.ndates, self.nsids) baseline = DataFrame(data, index=self.dates, columns=self.sids) loader = DataFrameLoader(USEquityPricing.close, baseline) dates_slice = slice(None, 10, None) sids_slice = slice(1, 3, None) [adj_array] = loader.load_adjusted_array( [USEquityPricing.close], self.dates[dates_slice], self.sids[sids_slice], self.mask[dates_slice, sids_slice], ).values() for idx, window in enumerate(adj_array.traverse(window_length=3)): expected = baseline.values[dates_slice, sids_slice][idx:idx + 3] assert_array_equal(window, expected)
Example 34
def get_normalized_dispersion(mat_mean, mat_var, nbins=20): mat_disp = (mat_var - mat_mean) / np.square(mat_mean) quantiles = np.percentile(mat_mean, np.arange(0, 100, 100 / nbins)) quantiles = np.append(quantiles, mat_mean.max()) # merge bins with no difference in value quantiles = np.unique(quantiles) if len(quantiles) <= 1: # pathological case: the means are all identical. just return raw dispersion. return mat_disp # calc median dispersion per bin (disp_meds, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, mat_disp, statistic='median', bins=quantiles) # calc median absolute deviation of dispersion per bin disp_meds_arr = disp_meds[disp_bins-1] # 0th bin is empty since our quantiles start from 0 disp_abs_dev = abs(mat_disp - disp_meds_arr) (disp_mads, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, disp_abs_dev, statistic='median', bins=quantiles) # calculate normalized dispersion disp_mads_arr = disp_mads[disp_bins-1] disp_norm = (mat_disp - disp_meds_arr) / disp_mads_arr return disp_norm
Example 35
def compute_nearest_neighbors(submatrix, balltree, k, row_start): """ Compute k nearest neighbors on a submatrix Args: submatrix (np.ndarray): Data submatrix balltree: Nearest neighbor index (from sklearn) k: number of nearest neigbors to compute row_start: row offset into larger matrix Returns a COO sparse adjacency matrix of nearest neighbor relations as (i,j,x)""" nn_dist, nn_idx = balltree.query(submatrix, k=k+1) # Remove the self-as-neighbors nn_idx = nn_idx[:,1:] nn_dist = nn_dist[:,1:] # Construct a COO sparse matrix of edges and distances i = np.repeat(row_start + np.arange(nn_idx.shape[0]), k) j = nn_idx.ravel().astype(int) return (i, j, nn_dist.ravel())
Example 36
def preprocess_matrix(matrix, num_bcs=None, use_bcs=None, use_genes=None, force_cells=None): if force_cells is not None: bc_counts = matrix.get_reads_per_bc() bc_indices, _, _ = cr_stats.filter_cellular_barcodes_fixed_cutoff(bc_counts, force_cells) matrix = matrix.select_barcodes(bc_indices) elif use_bcs is not None: bc_seqs = cr_utils.load_csv_rownames(use_bcs) bc_indices = matrix.bcs_to_ints(bc_seqs) matrix = matrix.select_barcodes(bc_indices) elif num_bcs is not None and num_bcs < matrix.bcs_dim: bc_indices = np.sort(np.random.choice(np.arange(matrix.bcs_dim), size=num_bcs, replace=False)) matrix = matrix.select_barcodes(bc_indices) if use_genes is not None: gene_ids = cr_utils.load_csv_rownames(use_genes) gene_indices = matrix.gene_ids_to_ints(gene_ids) matrix = matrix.select_genes(gene_indices) matrix, _, _ = matrix.select_nonzero_axes() return matrix
Example 37
def get_depth_info(read_iter, chrom, cstart, cend): depths = np.zeros(cend-cstart, np.int32) for read in read_iter: pos = read.pos rstart = max(pos, cstart) # Increment to the end of the window or the end of the # alignment, whichever comes first rend = min(read.aend, cend) depths[(rstart-cstart):(rend-cstart)] += 1 positions = np.arange(cstart, cend, dtype=np.int32) depth_df = pd.DataFrame({"chrom": chrom, "pos": positions, "coverage": depths}) return depth_df
Example 38
def getDataRecorderConfiguration(self): nRecorders= self.getNumberOfRecorderTables() sourceBufSize= 256 source= ctypes.create_string_buffer('\000', sourceBufSize) option= CIntArray(np.zeros(nRecorders, dtype=np.int32)) table=CIntArray(np.arange(1, nRecorders + 1)) self._lib.PI_qDRC.argtypes= [c_int, CIntArray, c_char_p, CIntArray, c_int, c_int] self._convertErrorToException( self._lib.PI_qDRC(self._id, table, source, option, sourceBufSize, nRecorders)) sources= [x.strip() for x in source.value.split('\n')] cfg= DataRecorderConfiguration() for i in range(nRecorders): cfg.setTable(table.toNumpyArray()[i], sources[i], option.toNumpyArray()[i]) return cfg
Example 39
def loadLogoSet(path, rows,cols,test_data_rate=0.15): random.seed(612) _, imgID = readItems('data.txt') y, _ = modelDict(path) nPics = len(y) faceassset = np.zeros((nPics,rows,cols), dtype = np.uint8) ### gray images noImg = [] for i in range(nPics): temp = cv2.imread(path +'logo/'+imgID[i]+'.jpg', 0) if temp == None: noImg.append(i) elif temp.size < 1000: noImg.append(i) else: temp = cv2.resize(temp,(cols, rows), interpolation = cv2.INTER_CUBIC) faceassset[i,:,:] = temp y = np.delete(y, noImg,0); faceassset = np.delete(faceassset, noImg, 0) nPics = len(y) index = random.sample(np.arange(nPics), int(nPics*test_data_rate)) x_test = faceassset[index,:,:]; x_train = np.delete(faceassset, index, 0) y_test = y[index]; y_train = np.delete(y, index, 0) return (x_train, y_train), (x_test, y_test)
Example 40
def batch_iter(data, batch_size, num_epochs, shuffle=True): """ Generates a batch iterator for a dataset. """ data = np.array(data) data_size = len(data) num_batches_per_epoch = int(len(data)/batch_size) + 1 for epoch in range(num_epochs): # Shuffle the data at each epoch if shuffle: shuffle_indices = np.random.permutation(np.arange(data_size)) shuffled_data = data[shuffle_indices] else: shuffled_data = data for batch_num in range(num_batches_per_epoch): start_index = batch_num * batch_size end_index = min((batch_num + 1) * batch_size, data_size) yield shuffled_data[start_index:end_index]
Example 41
def _gen_centroids(): a = np.arange(SSIZE/18, SSIZE, SSIZE/9) x, y = np.meshgrid(a, a) return np.dstack((y, x)).reshape((81, 2))
Example 42
def classify(self, image): """ Given a 28x28 image, returns an array representing the 2 highest probable prediction :param image: :return: array of 2 highest prob-digit tuples """ if cv2.__version__[0] == '2': res = self.model.find_nearest(np.array([self.feature(image)]), k=11) else: res = self.model.findNearest(np.array([self.feature(image)]), k=11) hist = np.histogram(res[2], bins=9, range=(1, 10), normed=True)[0] zipped = sorted(zip(hist, np.arange(1, 10)), reverse=True) return np.array(zipped[:2])
Example 43
def blend2(x1,x2,y, metric, task, x1valid, x2valid, x1test, x2test): try: mm = no_transform() mbest_score = -2 for w1 in np.arange(0.2, 1, 0.1): w2 = 1- w1 x = mm.fit_transform(x1)*w1 + mm.fit_transform(x2)*w2 exec('score = libscores.'+ metric + '(y, x, "' + task + '")') try: if score <= 0: exec('CVscore_auc = libscores.auc_metric(y, x, "' + task + '")') score += CVscore_auc/10 except: pass if score > mbest_score: mbest_score = score mbest_w1 = w1 mbest_x = x mbest_w2 = 1- mbest_w1 xvalid = mm.fit_transform(x1valid) * mbest_w1 + mm.fit_transform(x2valid)* mbest_w2 xtest = mm.fit_transform(x1test) * mbest_w1 + mm.fit_transform(x2test) * mbest_w2 return mbest_score, xvalid, xtest except: return 0.01, x1valid, x1test
Example 44
def blend3(x1,x2, x3, y, metric, task, x1valid, x2valid, x3valid, x1test, x2test, x3test): try: mm = no_transform() mbest_score = -2 for w1 in np.arange(0.2, 1, 0.2): for w2 in np.arange(0.1, 0.6, 0.2): w3 = 1- w1 - w2 if w3 > 0: x = mm.fit_transform(x1)*w1 + mm.fit_transform(x2)*w2 + mm.fit_transform(x3)*w3 exec('score = libscores.'+ metric + '(y, x, "' + task + '")') try: if score <= 0: exec('CVscore_auc = libscores.auc_metric(y, x, "' + task + '")') score += CVscore_auc/10 except: pass if score > mbest_score: mbest_score = score mbest_w1 = w1 mbest_w2 = w2 mbest_w3 = 1- mbest_w1- mbest_w2 xvalid = mm.fit_transform(x1valid) * mbest_w1 + mm.fit_transform(x2valid)* mbest_w2 + mm.fit_transform(x3valid)* mbest_w3 xtest = mm.fit_transform(x1test) * mbest_w1 + mm.fit_transform(x2test) * mbest_w2 + mm.fit_transform(x3test) * mbest_w3 return mbest_score, xvalid, xtest except: return 0.01, x1valid, x1test
Example 45
def tiedrank(a): ''' Return the ranks (with base 1) of a list resolving ties by averaging. This works for numpy arrays.''' m=len(a) # Sort a in ascending order (sa=sorted vals, i=indices) i=a.argsort() sa=a[i] # Find unique values uval=np.unique(a) # Test whether there are ties R=np.arange(m, dtype=float)+1 # Ranks with base 1 if len(uval)!=m: # Average the ranks for the ties oldval=sa[0] newval=sa[0] k0=0 for k in range(1,m): newval=sa[k] if newval==oldval: # moving average R[k0:k+1]=R[k-1]*(k-k0)/(k-k0+1)+R[k]/(k-k0+1) else: k0=k; oldval=newval # Invert the index S=np.empty(m) S[i]=R return S
Example 46
def plot_trajectories(src_sent, src_encoding, idx): # encoding is (time_steps, hidden_dim) #pca = PCA(n_components=1) #pca_result = pca.fit_transform(src_encoding) times = np.arange(src_encoding.shape[0]) plt.plot(times, src_encoding) plt.title(" ".join(src_sent)) plt.xlabel('timestep') plt.ylabel('trajectories') plt.savefig("misc_hidden_cell_trajectories_"+str(idx), bbox_inches="tight") plt.close()
Example 47
def dense_to_one_hot(labels_dense, num_classes=10): """Convert class labels from scalars to one-hot vectors.""" num_labels = labels_dense.shape[0] index_offset = numpy.arange(num_labels) * num_classes labels_one_hot = numpy.zeros((num_labels, num_classes)) labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 return labels_one_hot
Example 48
def dense_to_one_hot(labels_dense, num_classes=10): """Convert class labels from scalars to one-hot vectors.""" num_labels = labels_dense.shape[0] index_offset = numpy.arange(num_labels) * num_classes labels_one_hot = numpy.zeros((num_labels, num_classes)) labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 return labels_one_hot
Example 49
def iterate_minibatches(inputs, targets, batchsize, shuffle=False, augment=False): assert len(inputs) == len(targets) if shuffle: indices = np.arange(len(inputs)) np.random.shuffle(indices) for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): if shuffle: excerpt = indices[start_idx:start_idx + batchsize] else: excerpt = slice(start_idx, start_idx + batchsize) if augment: # as in paper : # pad feature arrays with 4 pixels on each side # and do random cropping of 32x32 padded = np.pad(inputs[excerpt],((0,0),(0,0),(4,4),(4,4)),mode='constant') random_cropped = np.zeros(inputs[excerpt].shape, dtype=np.float32) crops = np.random.random_integers(0,high=8,size=(batchsize,2)) for r in range(batchsize): random_cropped[r,:,:,:] = padded[r,:,crops[r,0]:(crops[r,0]+32),crops[r,1]:(crops[r,1]+32)] inp_exc = random_cropped else: inp_exc = inputs[excerpt] yield inp_exc, targets[excerpt] # ############################## Main program ################################
Example 50
def __init__(self, env): self.env = env if isinstance(env.observation_space, Discrete): self.state_size = 1 else: self.state_size = numel(env.observation_space.shape) if isinstance(self.env.action_space, Discrete): self.is_discrete = True self.action_size = env.action_space.n self.actions = np.arange(self.action_size) else: self.is_discrete = False self.action_size = numel(env.action_space.sample())