Python numpy.compress() 使用实例

2023年2月23日 351次阅读

The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.

Example 1

def test_compress(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = arr.compress([0, 1, 0, 1, 0], axis=1)
        assert_equal(out, tgt)

        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=1)
        assert_equal(out, tgt)

        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1])
        assert_equal(out, 1)

Example 2

def test_compress(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = arr.compress([0, 1, 0, 1, 0], axis=1)
        assert_equal(out, tgt)

        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=1)
        assert_equal(out, tgt)

        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1])
        assert_equal(out, 1)

Example 3

def get_cloud_colors(data):
        """ Get colors from the cloud """
        dtype = np.dtype('float32')
        dtype = dtype.newbyteorder('<')
        buf = np.frombuffer(data.data, dtype)
        buf = np.resize(buf, (data.width * data.height, 8))
        buf = np.compress([True, True, True, False, True, False, False,
                           False], buf, axis=1)
        cond = np.isnan(buf).any(1)
        buf[cond] = [0.0, 0.0, 0.0, 0.0]
        buf = np.compress([False, False, False, True], buf, axis=1)
        nstr = buf.tostring()
        rgb = np.fromstring(nstr, dtype='uint8')
        rgb.resize((data.height * data.width), 4)
        rgb = np.compress([True, True, True, False], rgb, axis=1)
        return np.array([rgb])

Example 4

def _getWavesetIntersection(self):
        minw = refs._default_waveset[0]
        maxw = refs._default_waveset[-1]

        for component in self.components[1:]:
            if component.emissivity != None:
                wave = component.emissivity.GetWaveSet()

                minw = max(minw, wave[0])
                maxw = min(maxw, wave[-1])

        result = self._mergeEmissivityWavesets()

        result = N.compress(result > minw, result)
        result = N.compress(result < maxw, result)

        # intersection with vega spectrum (why???)
        vegasp = spectrum.TabularSourceSpectrum(locations.VegaFile)
        vegaws = vegasp.GetWaveSet()
        result = N.compress(result > vegaws[0], result)
        result = N.compress(result < vegaws[-1], result)

        return result

Example 5

def test_compress(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = arr.compress([0, 1, 0, 1, 0], axis=1)
        assert_equal(out, tgt)

        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=1)
        assert_equal(out, tgt)

        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1])
        assert_equal(out, 1)

Example 6

def test_compress(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = arr.compress([0, 1, 0, 1, 0], axis=1)
        assert_equal(out, tgt)

        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=1)
        assert_equal(out, tgt)

        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1])
        assert_equal(out, 1)

Example 7

def _min_or_max_axis(X, axis, min_or_max):
        N = X.shape[axis]
        if N == 0:
            raise ValueError("zero-size array to reduction operation")
        M = X.shape[1 - axis]
        mat = X.tocsc() if axis == 0 else X.tocsr()
        mat.sum_duplicates()
        major_index, value = _minor_reduce(mat, min_or_max)
        not_full = np.diff(mat.indptr)[major_index] < N
        value[not_full] = min_or_max(value[not_full], 0)
        mask = value != 0
        major_index = np.compress(mask, major_index)
        value = np.compress(mask, value)

        from scipy.sparse import coo_matrix
        if axis == 0:
            res = coo_matrix((value, (np.zeros(len(value)), major_index)),
                             dtype=X.dtype, shape=(1, M))
        else:
            res = coo_matrix((value, (major_index, np.zeros(len(value)))),
                             dtype=X.dtype, shape=(M, 1))
        return res.A.ravel()

Example 8

def test_compress(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = arr.compress([0, 1, 0, 1, 0], axis=1)
        assert_equal(out, tgt)

        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1], axis=1)
        assert_equal(out, tgt)

        arr = np.arange(10).reshape(2, 5)
        out = arr.compress([0, 1])
        assert_equal(out, 1)

Example 9

def computejobcpus(self):
        """ stats for the cores on the nodes that were assigend to the job (if available) """

        proc = self._job.getdata('proc')

        if proc == None:
            return {"error": ProcessingError.CPUSET_UNKNOWN}, {"error": ProcessingError.CPUSET_UNKNOWN}

        cpusallowed = self._job.getdata('proc')['cpusallowed']

        ratios = numpy.empty((self._ncpumetrics, self._totalcores), numpy.double)

        coreindex = 0
        for host, last in self._last.iteritems():
            elapsed = last - self._first[host]
            if host in cpusallowed and 'error' not in cpusallowed[host]:
                elapsed = elapsed[:, cpusallowed[host]]
            else:
                return {"error": ProcessingError.CPUSET_UNKNOWN}, {"error": ProcessingError.CPUSET_UNKNOWN}

            coresperhost = len(elapsed[0, :])
            ratios[:, coreindex:(coreindex+coresperhost)] = 1.0 * elapsed / numpy.sum(elapsed, 0)
            coreindex += coresperhost

        allowedcores = numpy.array(ratios[:, :coreindex])

        results = {}
        for i, name in enumerate(self._outnames):
            results[name] = calculate_stats(allowedcores[i, :])

        results['all'] = {"cnt": coreindex}

        effective = numpy.compress(allowedcores[1, :] < 0.95, allowedcores , axis=1)
        effectiveresults = {
            'all': len(effective[i, :])
        }
        if effectiveresults['all'] > 0:
            for i, name in enumerate(self._outnames):
                effectiveresults[name] = calculate_stats(effective[i, :])

        return results, effectiveresults

Example 10

def break_info(self, range=None):
        """
        Return break information for the axis

        The range, major breaks & minor_breaks are
        in transformed space. The labels for the major
        breaks depict data space values.
        """
        if range is None:
            range = self.dimension()

        major = self.get_breaks(range)
        if major is None or len(major) == 0:
            major = minor = labels = np.array([])
        else:
            major = major.compress(np.isfinite(major))
            minor = self.get_minor_breaks(major, range)

        major = major.compress(
            (range[0] <= major) & (major <= range[1]))
        labels = self.get_labels(major)

        return {'range': range,
                'labels': labels,
                'major': major,
                'minor': minor}

Example 11

def test_compress(self):
        arr = [[0, 1, 2, 3, 4],
               [5, 6, 7, 8, 9]]
        tgt = [[5, 6, 7, 8, 9]]
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

Example 12

def test_axis(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = np.compress([0, 1, 0, 1, 0], arr, axis=1)
        assert_equal(out, tgt)

Example 13

def test_truncate(self):
        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=1)
        assert_equal(out, tgt)

Example 14

def test_flatten(self):
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr)
        assert_equal(out, 1)

Example 15

def _get_file_list(self, sample0, sample1):
        """_get_file_list returns an ordered list of full file names of metadata files that contain metadata.
        """
        start_ts = int(sample0 / self._samples_per_second)
        end_ts = int(sample1 / self._samples_per_second)

        # convert ts to be divisible by self._file_cadence_secs
        start_ts = (start_ts // self._file_cadence_secs) * \
            self._file_cadence_secs
        end_ts = (end_ts // self._file_cadence_secs) * \
            self._file_cadence_secs

        # get subdirectory start and end ts
        start_sub_ts = (start_ts // self._subdir_cadence_secs) * \
            self._subdir_cadence_secs
        end_sub_ts = (end_ts // self._subdir_cadence_secs) * \
            self._subdir_cadence_secs

        # ordered list of full file paths to return, always include dmd_properties.h5
        ret_list = ['dmd_properties.h5']

        for sub_ts in range(start_sub_ts, end_sub_ts + self._subdir_cadence_secs, self._subdir_cadence_secs):
            sub_datetime = datetime.datetime.utcfromtimestamp(sub_ts)
            subdir = sub_datetime.strftime('%Y-%m-%dT%H-%M-%S')
            # create numpy array of all file TS in subdir
            file_ts_in_subdir = numpy.arange(
                sub_ts, sub_ts + self._subdir_cadence_secs, self._file_cadence_secs)
            valid_file_ts_list = numpy.compress(numpy.logical_and(file_ts_in_subdir >= start_ts, file_ts_in_subdir <= end_ts),
                                                file_ts_in_subdir)
            for valid_file_ts in valid_file_ts_list:
                file_basename = '%[email protected]%i.h5' % (self._file_name, valid_file_ts)
                full_file = os.path.join(subdir, file_basename)
                ret_list.append(full_file)

        return(ret_list)

Example 16

def whiskers(self, whis: float = 1.5) -> t.Tuple[float, float]:
        """
        Calculates the upper and the lower whisker for a boxplot.
        I.e. the minimum and the maximum value of the data set
        the lie in the range (Q1 - whis * IQR, Q3 + whis * IQR).
        IQR being the interquartil distance, Q1 the lower and Q2 the upper quartile.

        Adapted from http://stackoverflow.com/a/20096945
        """
        q1, q2, q3 = self.quartiles()
        iqr = self.iqr()
        hi_val = q1 + whis * self.iqr()
        whisk_hi = np.compress(self.array <= hi_val, self.array)
        if len(whisk_hi) == 0 or np.max(whisk_hi) < q3:
            whisk_hi = q3
        else:
            whisk_hi = max(whisk_hi)

        # get low extreme
        lo_val = q1 - whis * iqr
        whisk_lo = np.compress(self.array >= lo_val, self.array)
        if len(whisk_lo) == 0 or np.min(whisk_lo) > q1:
            whisk_lo = q1
        else:
            whisk_lo = min(whisk_lo)
        return whisk_lo, whisk_hi

Example 17

def PCR_preprocess(file_path, log_mode = False, pseudotime_mode = False, 
                   pcv_method = 'Rprincurve', anchor_gene = None,
                   exclude_marker_names = None):
                   
    low_gene_fraction_max = 0.8
    
    data_tag, output_directory = create_output_directory(file_path)
    
    cell_IDs, cell_stages, data = get_PCR_or_RNASeq_data(file_path, pseudotime_mode)
    
    with open(file_path, 'r') as f:
        markers = np.loadtxt(f, dtype = str, delimiter = '\t', 
            skiprows = 1 if pseudotime_mode else 2, usecols = [0])
        markers.reshape(markers.size)
        
    if exclude_marker_names:
        indices = np.zeros(0, dtype = int)
        for name in exclude_marker_names:
            indices = np.append(indices, np.where(markers == name)[0])
            
        data = np.delete(data, indices, axis = 1)
        markers = np.delete(markers, indices)
        
    if pseudotime_mode:
        cell_stages = infer_pseudotime(data, output_directory, data_tag, pcv_method,
                                       anchor_gene, markers)
                                       
    condition = np.mean(data == 0, axis = 0) < low_gene_fraction_max
    data = np.compress(condition, data, 1)
    markers = np.compress(condition, markers)
    
    write_preprocessed_data(output_directory, cell_IDs, cell_stages, data, markers)
            
    return cell_IDs, data, markers, cell_stages.astype(float), data_tag, output_directory

Example 18

def test_compress(self):
        arr = [[0, 1, 2, 3, 4],
               [5, 6, 7, 8, 9]]
        tgt = [[5, 6, 7, 8, 9]]
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

Example 19

def test_axis(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = np.compress([0, 1, 0, 1, 0], arr, axis=1)
        assert_equal(out, tgt)

Example 20

def test_truncate(self):
        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=1)
        assert_equal(out, tgt)

Example 21

def test_flatten(self):
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr)
        assert_equal(out, 1)

Example 22

def get_cloud_data(data):
        """ Get the data out of a cloud as a numpy array """
        dtype = np.dtype('float32')
        dtype = dtype.newbyteorder('<')
        buf = np.frombuffer(data.data, dtype)
        buf = np.resize(buf, (data.width * data.height, 8))
        return np.compress([True, True, True, False, True, False, False,
                            False], buf, axis=1)

Example 23

def get_cloud_image(self, data):
        """ Get an image from the cloud """
        dta = np.zeros((data.height, data.width), dtype="float32")

        dtype = np.dtype('float32')
        dtype = dtype.newbyteorder('<')
        buf = np.frombuffer(data.data, dtype)
        buf = np.resize(buf, (data.width * data.height, 8))
        buf = np.compress([True, True, True, True, True, False, False, False],
                          buf, axis=1)
        buf = buf[~np.isnan(buf).any(1)]

        for point in buf:
            point[3] = 1.0
            src = np.asmatrix(point[:4])
            src = np.reshape(src, (4, 1))
            dst = np.dot(self.p_left, src)
            pnt_w = dst[2, 0]
            if pnt_w != 0:
                img_x = dst[0, 0] / pnt_w
                img_y = dst[1, 0] / pnt_w
                dta[img_y, img_x] = point[4]

        nstr = dta.tostring()
        img = np.fromstring(nstr, dtype='uint8')
        img.resize(data.height, data.width, 4)
        img = np.compress([True, True, True, False], img, axis=2)
        return img

Example 24

def _find_door(self):
        """ Find the door, The most distant point in our cloud """
        cloud = self.fc.zarj.eyes.get_stereo_cloud()
        image, details = self.fc.zarj.eyes.get_cloud_image_with_details(cloud)

        # we only want the center of the image
        shape = image.shape
        print shape
        cloud = details[0:2*shape[0]/3, shape[1]/3:2*shape[1]/3]
        cloud = np.compress([False, False, True, False], cloud, axis=2)
        cloud = cloud.flatten()
        return np.nanmax(cloud)

Example 25

def log10(self, data, ind):
        data = np.compress(data[:, ind] > 0, data, 0)
        data[:, ind] = np.log10(data[:, ind])
        return data

Example 26

def _get_default_locs(self, vmin, vmax):
        "Returns the default locations of ticks."

        if self.plot_obj.date_axis_info is None:
            self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)

        locator = self.plot_obj.date_axis_info

        if self.isminor:
            return np.compress(locator['min'], locator['val'])
        return np.compress(locator['maj'], locator['val'])

Example 27

def _set_default_format(self, vmin, vmax):
        "Returns the default ticks spacing."

        if self.plot_obj.date_axis_info is None:
            self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq)
        info = self.plot_obj.date_axis_info

        if self.isminor:
            format = np.compress(info['min'] & np.logical_not(info['maj']),
                                 info)
        else:
            format = np.compress(info['maj'], info)
        self.formatdict = dict([(x, f) for (x, _, _, f) in format])
        return self.formatdict

Example 28

def test_small_large(self):
        # test the small and large code paths, current cutoff 400 elements
        for s in [5, 20, 51, 200, 1000]:
            d = np.random.randn(4, s)
            # Randomly set some elements to NaN:
            w = np.random.randint(0, d.size, size=d.size // 5)
            d.ravel()[w] = np.nan
            d[:,0] = 1.  # ensure at least one good value
            # use normal median without nans to compare
            tgt = []
            for x in d:
                nonan = np.compress(~np.isnan(x), x)
                tgt.append(np.median(nonan, overwrite_input=True))

            assert_array_equal(np.nanmedian(d, axis=-1), tgt)

Example 29

def trimSpectrum(sp, minw, maxw):
    """Create a new spectrum with trimmed upper and lower ranges.

    Parameters
    ----------
    sp : `SourceSpectrum`
        Spectrum to trim.

    minw, maxw : number
        Lower and upper limits (inclusive) for the wavelength set
        in the trimmed spectrum.

    Returns
    -------
    result : `TabularSourceSpectrum`
        Trimmed spectrum.

    """
    wave = sp.GetWaveSet()
    flux = sp(wave)

    new_wave = N.compress(wave >= minw, wave)
    new_flux = N.compress(wave >= minw, flux)

    new_wave = N.compress(new_wave <= maxw, new_wave)
    new_flux = N.compress(new_wave <= maxw, new_flux)

    result = TabularSourceSpectrum()

    result._wavetable = new_wave
    result._fluxtable = new_flux

    result.waveunits = units.Units(sp.waveunits.name)
    result.fluxunits = units.Units(sp.fluxunits.name)

    return result

Example 30

def test_compress(self):
        arr = [[0, 1, 2, 3, 4],
               [5, 6, 7, 8, 9]]
        tgt = [[5, 6, 7, 8, 9]]
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

Example 31

def test_axis(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = np.compress([0, 1, 0, 1, 0], arr, axis=1)
        assert_equal(out, tgt)

Example 32

def test_truncate(self):
        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=1)
        assert_equal(out, tgt)

Example 33

def test_flatten(self):
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr)
        assert_equal(out, 1)

Example 34

def sort_array(arg0,arg1=None,decimate=True,as_index=False):
    """
    Args can be an (N,2) array or a tuple with 2 (times,values) arrays
    Takes two arrays of times and values of the same length and sorts the (time,value) 
    The decimate argument just removes repeated timestamps, not values
    """
    import numpy as np
    t0=time.time()
    #times = np.random.random_integers(N,size=(N,))
    #values = np.random.random_integers(3000,4000,size=(N,))
    data = arg0 if arg1 is None else (arg0,arg1)
    if len(data)==2:
        times,values = data
        data = np.array((times,values)).T #Build a new array for sorting
    #Sort the array by row index (much faster than numpy.sort(order))
    time_index = get_col(np.argsort(data,0),0)
    if as_index:
        if not decimate:
            return index
        else:
            return np.compress(get_array_steps(get_col(data,0).take(time_index)),time_index,0)
    else:
        sdata = data.take(time_index,0)
        if decimate:
            sdata = np.compress(get_array_steps(get_col(sdata,0)),sdata,0)
        print time.time()-t0
        return sdata

Example 35

def test_compress(self):
        arr = [[0, 1, 2, 3, 4],
               [5, 6, 7, 8, 9]]
        tgt = [[5, 6, 7, 8, 9]]
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

Example 36

def test_axis(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = np.compress([0, 1, 0, 1, 0], arr, axis=1)
        assert_equal(out, tgt)

Example 37

def test_truncate(self):
        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=1)
        assert_equal(out, tgt)

Example 38

def test_flatten(self):
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr)
        assert_equal(out, 1)

Example 39

def test_small_large(self):
        # test the small and large code paths, current cutoff 400 elements
        for s in [5, 20, 51, 200, 1000]:
            d = np.random.randn(4, s)
            # Randomly set some elements to NaN:
            w = np.random.randint(0, d.size, size=d.size // 5)
            d.ravel()[w] = np.nan
            d[:,0] = 1.  # ensure at least one good value
            # use normal median without nans to compare
            tgt = []
            for x in d:
                nonan = np.compress(~np.isnan(x), x)
                tgt.append(np.median(nonan, overwrite_input=True))

            assert_array_equal(np.nanmedian(d, axis=-1), tgt)

Example 40

def compress(condition, x, axis=None):
    """
    Return selected slices of an array along given axis.

    It returns the input tensor, but with selected slices along a given axis
    retained. If no axis is provided, the tensor is flattened.
    Corresponds to numpy.compress

    .. versionadded:: 0.7

    Parameters
    ----------
    x
        Input data, tensor variable.
    condition
         1 dimensional array of non-zero and zero values
         corresponding to indices of slices along a selected axis.

    Returns
    -------
    object
        `x` with selected slices.

    """
    indices = theano.tensor.basic.flatnonzero(condition)
    return x.take(indices, axis=axis)

Example 41

def test_op(self):
        for axis, cond, shape in zip(self.axis_list, self.cond_list,
                                     self.shape_list):
            cond_var = theano.tensor.ivector()
            data = numpy.random.random(size=shape).astype(theano.config.floatX)
            data_var = theano.tensor.matrix()

            f = theano.function([cond_var, data_var],
                                self.op(cond_var, data_var, axis=axis))

            expected = numpy.compress(cond, data, axis=axis)
            tested = f(cond, data)

            assert tested.shape == expected.shape
            assert numpy.allclose(tested, expected)

Example 42

def subtr_cellmeans(workd,subjslots):
        """
   Subtract all cell means when within-subjects factors are present ...
   i.e., calculate full-model using a D-variable.
   """
        # Get a list of all dims that are source and between-subj
        sourcedims = makelist(Bbetweens,Nfactors+1)

        # Now, fix this list by mapping the dims from the original source
        # to dims for a between-subjects variable (namely, subjslots)
        transidx = range(len(subjslots.shape))[1:] + [0] # put subj dim at end
        tsubjslots = N.transpose(subjslots,transidx) # get all Ss for this idx
        tworkd = N.transpose(workd) # swap subj. and variable dims
        errors = 1.0 * tworkd

        if len(sourcedims) == 0:
            idx = [-1]
            loopcap = [0]
        if len(sourcedims) <> 0:
            btwsourcedims = map(Bscols.index,sourcedims)
            idx = [0] * len(btwsourcedims)
            idx[0] = -1 # compensate for pre-increment of 1st slot in incr()

            # Get a list of the maximum values each factor can handle
            loopcap = N.take(N.array(Nlevels),sourcedims)-1

### WHILE STILL MORE GROUPS, CALCULATE GROUP MEAN FOR EACH D-VAR
        while incr(idx,loopcap) <> -1:  # loop through source btw level-combos
            mask = tsubjslots[idx]
            thisgroup = tworkd*mask[N.NewAxis,:]
            groupmns = amean(N.compress(mask,thisgroup),1)

### THEN SUBTRACT THEM FROM APPROPRIATE SUBJECTS
            errors = errors - N.multiply.outer(groupmns,mask)
        return errors

Example 43

def atvar(a,limits=None,inclusive=(1,1)):
    """
   Returns the sample variance of values in an array, (i.e., using N-1),
   ignoring values strictly outside the sequence passed to 'limits'.  
   Note: either limit in the sequence, or the value of limits itself,
   can be set to None.  The inclusive list/tuple determines whether the lower
   and upper limiting bounds (respectively) are open/exclusive (0) or
   closed/inclusive (1). ASSUMES A FLAT ARRAY (OR ELSE PREFLATTENS).
   
   Usage:   atvar(a,limits=None,inclusive=(1,1))
   """
    a = a.astype(N.float_)
    if limits == None or limits == [None,None]:
        return avar(a)
    assert type(limits) in [ListType,TupleType,N.ndarray], "Wrong type for limits in atvar"
    if inclusive[0]:    lowerfcn = N.greater_equal
    else:               lowerfcn = N.greater
    if inclusive[1]:    upperfcn = N.less_equal
    else:               upperfcn = N.less
    if limits[0] > N.maximum.reduce(N.ravel(a)) or limits[1] < N.minimum.reduce(N.ravel(a)):
        raise ValueError, "No array values within given limits (atvar)."
    elif limits[0]==None and limits[1]<>None:
        mask = upperfcn(a,limits[1])
    elif limits[0]<>None and limits[1]==None:
        mask = lowerfcn(a,limits[0])
    elif limits[0]<>None and limits[1]<>None:
        mask = lowerfcn(a,limits[0])*upperfcn(a,limits[1])

    a = N.compress(mask,a)  # squish out excluded values
    return avar(a)

Example 44

def awilcoxont(x,y):
    """
    Calculates the Wilcoxon T-test for related samples and returns the
    result.  A non-parametric T-test.
    
    Usage:   awilcoxont(x,y)     where x,y are equal-length arrays for 2 conditions
    Returns: t-statistic, two-tailed p-value
    """
    if len(x) <> len(y):
        raise ValueError, 'Unequal N in awilcoxont.  Aborting.'
    d = x-y
    d = N.compress(N.not_equal(d,0),d) # Keep all non-zero differences
    count = len(d)
    absd = abs(d)
    absranked = arankdata(absd)
    r_plus = 0.0
    r_minus = 0.0
    for i in range(len(absd)):
        if d[i] < 0:
            r_minus = r_minus + absranked[i]
        else:
            r_plus = r_plus + absranked[i]
    wt = min(r_plus, r_minus)
    mn = count * (count+1) * 0.25
    se =  math.sqrt(count*(count+1)*(2.0*count+1.0)/24.0)
    z = math.fabs(wt-mn) / se
    z = math.fabs(wt-mn) / se
    prob = 2*(1.0 -zprob(abs(z)))
    return wt, prob

Example 45

def _build(self, tree, examples_idx, features_idx, depth=0):
        items, counts = unique(self.y[examples_idx])
        if (features_idx.size == 0
                or items.size == 1
                or examples_idx.size < self.min_samples_split
                or depth >= self.max_depth):
            node = self._class_node(items, counts)
            return node

        calc_record = self.splitter.calc(examples_idx, features_idx)

        if (calc_record is None
                or calc_record.info < self.min_entropy_decrease):
            node = self._class_node(items, counts)
            return node

        split_records = self.splitter.split(examples_idx, calc_record)

        features_idx = np.compress(calc_record.alive_features, features_idx)
        if not self.is_repeating:
            features_idx = np.delete(features_idx,
                                     np.where(features_idx ==
                                              calc_record.feature_idx))
        root = Node(calc_record.feature_idx,
                    is_feature=True,
                    details=calc_record,
                    item_count=(items, counts))
        for record in split_records:
            if record.size == 0:
                node = self._class_node(items, counts)
                root.add_child(node, record)
            else:
                root.add_child(self._build(tree, record.bag,
                               features_idx, depth+1),
                               record)
        return root

Example 46

def assert_probmatrix_relaxed(mat):  # accepts matrices with all-nan rows (invalid training data for class etc.)
    mask = ~np.all(np.isnan(mat), axis=1, keepdims=False)
    mat = mat.compress(mask, axis=0)
    assert_probmatrix(mat)

Example 47

def best_cat_split(self, ind, dep):
        """ detrmine best categorical variable split """
        split = Split(None, None, None, None, 0)
        all_dep = np.unique(dep.arr)
        for i, ind_var in enumerate(ind):
            ind_var = ind_var.deep_copy()
            unique = np.unique(ind_var.arr)

            freq = {}
            if dep.weights is None:
                for col in unique:
                    counts = np.unique(np.compress(ind_var.arr == col, dep.arr), return_counts=True)
                    freq[col] = cl.defaultdict(int)
                    freq[col].update(np.transpose(counts))
            else:
                for col in unique:
                    counts = np.unique(np.compress(ind_var.arr == col, dep.arr), return_counts=True)
                    freq[col] = cl.defaultdict(int)
                    for dep_v in all_dep:
                        freq[col][dep_v] = dep.weights[(ind_var.arr == col) * (dep.arr == dep_v)].sum()

            if len(list(ind_var.possible_groupings())) == 0:
                split.invalid_reason = InvalidSplitReason.PURE_NODE

            choice, highest_p_join, split_chi, dof = None, None, None, None
            for comb in ind_var.all_combinations():
                freqs = [ sum( [ cl.Counter(freq[key]) for key in c ], cl.Counter()) for c in comb ]
                keys = set(sum([ list(f.keys()) for f in freqs ], []))

                n_ij = np.array(
                    [ [ col.get(k, 0) for k in keys ] for col in freqs ]
                )

                chi, p_split, dof = chisquare(n_ij, dep.weights is not None)

                if (choice is None or p_split < highest_p_join or (p_split == highest_p_join and chi > split_chi)) and (n_ij.sum(axis=1) >= self.min_child_node_size).all() and p_split < self.alpha_merge:
                    choice, highest_p_join, split_chi = comb, p_split, chi

            temp_split = Split(i, choice, split_chi, highest_p_join, dof, split_name=ind_var.name)
            better_split = (not split.valid() or p_split < split.p or (p_split == split.p and chi > split.score)) and choice is not None
            if better_split: split, temp_split = temp_split, split

            if split.valid() and choice is not None:
                chi_threshold = self.split_threshold * split.score

                if temp_split.valid() and temp_split.score >= chi_threshold:
                    for sur in temp_split.surrogates:
                        if sur.column_id != i and sur.score >= chi_threshold:
                            split.surrogates.append(sur)

                    temp_split.surrogates = []
                    split.surrogates.append(temp_split)

                split.sub_split_values(ind[split.column_id].metadata)

        return split

Example 48

def test_compress(self):
        arr = [[0, 1, 2, 3, 4],
               [5, 6, 7, 8, 9]]
        tgt = [[5, 6, 7, 8, 9]]
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

Example 49

def test_axis(self):
        tgt = [[5, 6, 7, 8, 9]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=0)
        assert_equal(out, tgt)

        tgt = [[1, 3], [6, 8]]
        out = np.compress([0, 1, 0, 1, 0], arr, axis=1)
        assert_equal(out, tgt)

Example 50

def test_truncate(self):
        tgt = [[1], [6]]
        arr = np.arange(10).reshape(2, 5)
        out = np.compress([0, 1], arr, axis=1)
        assert_equal(out, tgt)