Here are the examples of the python api numpy.intc taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
101 Examples
3
Source : test_dtype.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_shape_invalid(self):
# Check that the shape is valid.
max_int = np.iinfo(np.intc).max
max_intp = np.iinfo(np.intp).max
# Too large values (the datatype is part of this)
assert_raises(ValueError, np.dtype, [('a', 'f4', max_int // 4 + 1)])
assert_raises(ValueError, np.dtype, [('a', 'f4', max_int + 1)])
assert_raises(ValueError, np.dtype, [('a', 'f4', (max_int, 2))])
# Takes a different code path (fails earlier:
assert_raises(ValueError, np.dtype, [('a', 'f4', max_intp + 1)])
# Negative values
assert_raises(ValueError, np.dtype, [('a', 'f4', -1)])
assert_raises(ValueError, np.dtype, [('a', 'f4', (-1, -1))])
def test_alignment(self):
3
Source : _fitpack_impl.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def _intc_overflow(x, msg=None):
"""Cast the value to an intc and raise an OverflowError if the value
cannot fit.
"""
if x > iinfo(intc).max:
if msg is None:
msg = '%r cannot fit into an intc' % x
raise OverflowError(msg)
return intc(x)
_iermess = {
3
Source : test_sputils.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_upcast(self):
assert_equal(sputils.upcast('intc'), np.intc)
assert_equal(sputils.upcast('int32', 'float32'), np.float64)
assert_equal(sputils.upcast('bool', complex, float), np.complex128)
assert_equal(sputils.upcast('i', 'd'), np.float64)
def test_getdtype(self):
3
Source : tree.py
with MIT License
from alvarobartt
with MIT License
from alvarobartt
def _validate_X_predict(self, X, check_input):
"""Validate X whenever one tries to predict, apply, predict_proba"""
if check_input:
X = check_array(X, dtype=DTYPE, accept_sparse="csr")
if issparse(X) and (X.indices.dtype != np.intc or
X.indptr.dtype != np.intc):
raise ValueError("No support for np.int64 index based "
"sparse matrices")
n_features = X.shape[1]
if self.n_features_ != n_features:
raise ValueError("Number of features of the model must "
"match the input. Model n_features is %s and "
"input n_features is %s "
% (self.n_features_, n_features))
return X
def predict(self, X, check_input=True):
3
Source : onlineLDA.py
with MIT License
from armor-ai
with MIT License
from armor-ai
def loglikelihood(self):
"""Calculate complete log likelihood, log p(w,z)
Formula used is log p(w,z) = log p(w|z) + log p(z)
"""
nzw, ndz, nz = self.nzw_, self.ndz_, self.nz_
alpha_m = self.alpha_m
eta_m = self.eta_m
alpha_sum = self.alpha_sum
eta_sum = self.eta_sum
nd = np.sum(ndz, axis=1).astype(np.intc)
return _lda._loglikelihood(nzw, ndz, nz, nd, alpha_m, eta_m, alpha_sum, eta_sum)
def _sample_topics(self, rands):
3
Source : test_connect4.py
with MIT License
from bhansconnect
with MIT License
from bhansconnect
def test_simple_moves():
board, player, game = init_board_from_moves([4, 5, 4, 3, 0, 6])
expected = np.array(
[[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 1, 0, 0],
[ 1, 0, 0, -1, 1, -1, -1]], dtype=np.intc).tostring()
assert expected == game.stringRepresentation(board)
def test_overfull_column():
3
Source : dmlab_env.py
with Apache License 2.0
from google-research
with Apache License 2.0
from google-research
def _action(*entries):
"""Helper function for defining an action."""
return np.array(entries, dtype=np.intc)
_KEYS_TO_ACTIONS = {
3
Source : dmlab_env.py
with Apache License 2.0
from google-research
with Apache License 2.0
from google-research
def keys_to_action(cls, keys: environment.Keys) -> np.ndarray:
keys = input_utils.apply_default_gamepad_mapping(keys)
keys = input_utils.get_mapped_keys(keys, _BUTTON_MAPPING)
keys = {
**keys,
**input_utils.axes_to_keys(keys, threshold=0.5, mapping=_AXIS_MAPPING)
}
# We combine the action vectors of the pressed keys.
action = np.zeros((7,), dtype=np.intc)
for key in keys:
if key in _KEYS_TO_ACTIONS:
action += _KEYS_TO_ACTIONS[key]
return action
def env(self) -> environment.DMEnv:
3
Source : _classes.py
with GNU General Public License v3.0
from gustavowillam
with GNU General Public License v3.0
from gustavowillam
def _validate_X_predict(self, X, check_input):
"""Validate the training data on predict (probabilities)."""
if check_input:
X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr",
reset=False)
if issparse(X) and (X.indices.dtype != np.intc or
X.indptr.dtype != np.intc):
raise ValueError("No support for np.int64 index based "
"sparse matrices")
else:
# The number of features is checked regardless of `check_input`
self._check_n_features(X, reset=False)
return X
def predict(self, X, check_input=True):
3
Source : test_wcsprm.py
with BSD 3-Clause "New" or "Revised" License
from holzschu
with BSD 3-Clause "New" or "Revised" License
from holzschu
def test_colax():
w = _wcs.Wcsprm()
assert w.colax.dtype == np.intc
assert_array_equal(w.colax, [0, 0])
w.colax = [42, 54]
assert_array_equal(w.colax, [42, 54])
w.colax[0] = 0
assert_array_equal(w.colax, [0, 54])
with pytest.raises(ValueError):
w.colax = [1, 2, 3]
def test_colnum():
3
Source : wrappers.py
with MIT License
from jsikyoon
with MIT License
from jsikyoon
def step(self, action):
raw_action = np.array(self._action_set[action], np.intc)
reward = self._env.step(raw_action, num_steps=self._action_repeat)
self._done = not self._env.is_running()
obs = self._get_obs()
return obs, reward, self._done, {}
def render(self, *args, **kwargs):
3
Source : dmlab.py
with MIT License
from jurgisp
with MIT License
from jurgisp
def step(self, action):
raw_action = np.array(self.action_set[action], np.intc)
reward = self.env.step(raw_action, num_steps=self.num_action_repeats)
done = not self.env.is_running()
if not done:
observation = self.observation()
else:
# Do not have actual observation in done state, but need to return something
observation = np.zeros(self.observation_space.shape, dtype=self.observation_space.dtype) # type: ignore
return observation, reward, done, {}
3
Source : fitpack.py
with MIT License
from ktraunmueller
with MIT License
from ktraunmueller
def _intc_overflow(x, msg=None):
"""Cast the value to an intc and raise an OverflowError if the value
cannot fit.
"""
if x > iinfo(intc).max:
if msg is None:
msg = '%r cannot fit into an intc' % x
raise OverflowError(msg)
return intc(x)
_iermess = {0:["""\
3
Source : csc.py
with MIT License
from ktraunmueller
with MIT License
from ktraunmueller
def tocsr(self):
M,N = self.shape
indptr = np.empty(M + 1, dtype=np.intc)
indices = np.empty(self.nnz, dtype=np.intc)
data = np.empty(self.nnz, dtype=upcast(self.dtype))
csc_tocsr(M, N,
self.indptr, self.indices, self.data,
indptr, indices, data)
from .csr import csr_matrix
A = csr_matrix((data, indices, indptr), shape=self.shape)
A.has_sorted_indices = True
return A
def __getitem__(self, key):
3
Source : csc.py
with MIT License
from ktraunmueller
with MIT License
from ktraunmueller
def nonzero(self):
# CSC can't use _cs_matrix's .nonzero method because it
# returns the indices sorted for self transposed.
# Get row and col indices, from _cs_matrix.tocoo
major_dim, minor_dim = self._swap(self.shape)
minor_indices = self.indices
major_indices = np.empty(len(minor_indices), dtype=np.intc)
sparsetools.expandptr(major_dim,self.indptr, major_indices)
row, col = self._swap((major_indices, minor_indices))
# Sort them to be in C-style order
ind = np.lexsort((col, row))
row = row[ind]
col = col[ind]
return row, col
nonzero.__doc__ = _cs_matrix.nonzero.__doc__
3
Source : csr.py
with MIT License
from ktraunmueller
with MIT License
from ktraunmueller
def tocsc(self):
indptr = np.empty(self.shape[1] + 1, dtype=np.intc)
indices = np.empty(self.nnz, dtype=np.intc)
data = np.empty(self.nnz, dtype=upcast(self.dtype))
csr_tocsc(self.shape[0], self.shape[1],
self.indptr, self.indices, self.data,
indptr, indices, data)
from .csc import csc_matrix
A = csc_matrix((data, indices, indptr), shape=self.shape)
A.has_sorted_indices = True
return A
def tobsr(self, blocksize=None, copy=True):
3
Source : test_sputils.py
with MIT License
from ktraunmueller
with MIT License
from ktraunmueller
def test_upcast(self):
assert_equal(sputils.upcast('intc'),np.intc)
assert_equal(sputils.upcast('int32','float32'),np.float64)
assert_equal(sputils.upcast('bool',complex,float),np.complex128)
assert_equal(sputils.upcast('i','d'),np.float64)
def test_getdtype(self):
3
Source : __init__.py
with GNU General Public License v3.0
from MDIL-SNU
with GNU General Public License v3.0
from MDIL-SNU
def _read_params(filename):
params_i = list()
params_d = list()
with open(filename, 'r') as fil:
for line in fil:
tmp = line.split()
params_i += [list(map(int, tmp[:3]))]
params_d += [list(map(float, tmp[3:]))]
params_i = np.asarray(params_i, dtype=np.intc, order='C')
params_d = np.asarray(params_d, dtype=np.float64, order='C')
return params_i, params_d
class Symmetry_function(object):
3
Source : evaluate_loo.py
with Apache License 2.0
from openbenchmark
with Apache License 2.0
from openbenchmark
def argmax_top_k(a, top_k=50):
ele_idx = heapq.nlargest(top_k, zip(a, itertools.count()))
return np.array([idx for ele, idx in ele_idx], dtype=np.intc)
def hit(rank, ground_truth):
3
Source : rotation_forest.py
with BSD 2-Clause "Simplified" License
from societe-generale
with BSD 2-Clause "Simplified" License
from societe-generale
def _validate_X_predict(self, X, check_input):
"""Validate X whenever one tries to predict, apply, predict_proba"""
if check_input:
X = check_array(X, dtype=DTYPE, accept_sparse="csr")
if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc):
raise ValueError("No support for np.int64 index based " "sparse matrices")
n_features = X.shape[1]
if self.n_features_ != n_features:
raise ValueError(
"Number of features of the model must "
"match the input. Model n_features is %s and "
"input n_features is %s " % (self.n_features_, n_features)
)
return X
class GroupPCADecisionTreeRegressor(BaseEstimator, RegressorMixin):
0
Source : test_ctypeslib.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def test_dtype(self):
dt = np.intc
p = ndpointer(dtype=dt)
assert_(p.from_param(np.array([1], dt)))
dt = ' < i4'
p = ndpointer(dtype=dt)
assert_(p.from_param(np.array([1], dt)))
dt = np.dtype('>i4')
p = ndpointer(dtype=dt)
p.from_param(np.array([1], dt))
assert_raises(TypeError, p.from_param,
np.array([1], dt.newbyteorder('swap')))
dtnames = ['x', 'y']
dtformats = [np.intc, np.float64]
dtdescr = {'names': dtnames, 'formats': dtformats}
dt = np.dtype(dtdescr)
p = ndpointer(dtype=dt)
assert_(p.from_param(np.zeros((10,), dt)))
samedt = np.dtype(dtdescr)
p = ndpointer(dtype=samedt)
assert_(p.from_param(np.zeros((10,), dt)))
dt2 = np.dtype(dtdescr, align=True)
if dt.itemsize != dt2.itemsize:
assert_raises(TypeError, p.from_param, np.zeros((10,), dt2))
else:
assert_(p.from_param(np.zeros((10,), dt2)))
def test_ndim(self):
0
Source : linesearch.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
c1=1e-4, c2=0.9,
amax=50, amin=1e-8, xtol=1e-14):
"""
Scalar function search for alpha that satisfies strong Wolfe conditions
alpha > 0 is assumed to be a descent direction.
Parameters
----------
phi : callable phi(alpha)
Function at point `alpha`
derphi : callable dphi(alpha)
Derivative `d phi(alpha)/ds`. Returns a scalar.
phi0 : float, optional
Value of `f` at 0
old_phi0 : float, optional
Value of `f` at the previous point
derphi0 : float, optional
Value `derphi` at 0
c1, c2 : float, optional
Wolfe parameters
amax, amin : float, optional
Maximum and minimum step size
xtol : float, optional
Relative tolerance for an acceptable step.
Returns
-------
alpha : float
Step size, or None if no suitable step was found
phi : float
Value of `phi` at the new point `alpha`
phi0 : float
Value of `phi` at `alpha=0`
Notes
-----
Uses routine DCSRCH from MINPACK.
"""
if phi0 is None:
phi0 = phi(0.)
if derphi0 is None:
derphi0 = derphi(0.)
if old_phi0 is not None and derphi0 != 0:
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
if alpha1 < 0:
alpha1 = 1.0
else:
alpha1 = 1.0
phi1 = phi0
derphi1 = derphi0
isave = np.zeros((2,), np.intc)
dsave = np.zeros((13,), float)
task = b'START'
maxiter = 100
for i in xrange(maxiter):
stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
c1, c2, xtol, task,
amin, amax, isave, dsave)
if task[:2] == b'FG':
alpha1 = stp
phi1 = phi(stp)
derphi1 = derphi(stp)
else:
break
else:
# maxiter reached, the line search did not converge
stp = None
if task[:5] == b'ERROR' or task[:4] == b'WARN':
stp = None # failed
return stp, phi1, phi0
line_search = line_search_wolfe1
0
Source : dia.py
with GNU General Public License v3.0
from adityaprakash-bobby
with GNU General Public License v3.0
from adityaprakash-bobby
def transpose(self, axes=None, copy=False):
if axes is not None:
raise ValueError(("Sparse matrices do not support "
"an 'axes' parameter because swapping "
"dimensions is the only logical permutation."))
num_rows, num_cols = self.shape
max_dim = max(self.shape)
# flip diagonal offsets
offsets = -self.offsets
# re-align the data matrix
r = np.arange(len(offsets), dtype=np.intc)[:, None]
c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None]
pad_amount = max(0, max_dim-self.data.shape[1])
data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount),
dtype=self.data.dtype)))
data = data[r, c]
return dia_matrix((data, offsets), shape=(
num_cols, num_rows), copy=copy)
transpose.__doc__ = spmatrix.transpose.__doc__
0
Source : dict_vectorizer.py
with MIT License
from alvarobartt
with MIT License
from alvarobartt
def _transform(self, X, fitting):
# Sanity check: Python's array has no way of explicitly requesting the
# signed 32-bit integers that scipy.sparse needs, so we use the next
# best thing: typecode "i" (int). However, if that gives larger or
# smaller integers than 32-bit ones, np.frombuffer screws up.
assert array("i").itemsize == 4, (
"sizeof(int) != 4 on your platform; please report this at"
" https://github.com/scikit-learn/scikit-learn/issues and"
" include the output from platform.platform() in your bug report")
dtype = self.dtype
if fitting:
feature_names = []
vocab = {}
else:
feature_names = self.feature_names_
vocab = self.vocabulary_
# Process everything as sparse regardless of setting
X = [X] if isinstance(X, Mapping) else X
indices = array("i")
indptr = array("i", [0])
# XXX we could change values to an array.array as well, but it
# would require (heuristic) conversion of dtype to typecode...
values = []
# collect all the possible feature names and build sparse matrix at
# same time
for x in X:
for f, v in six.iteritems(x):
if isinstance(v, six.string_types):
f = "%s%s%s" % (f, self.separator, v)
v = 1
if f in vocab:
indices.append(vocab[f])
values.append(dtype(v))
else:
if fitting:
feature_names.append(f)
vocab[f] = len(vocab)
indices.append(vocab[f])
values.append(dtype(v))
indptr.append(len(indices))
if len(indptr) == 1:
raise ValueError("Sample sequence X is empty.")
indices = np.frombuffer(indices, dtype=np.intc)
indptr = np.frombuffer(indptr, dtype=np.intc)
shape = (len(indptr) - 1, len(vocab))
result_matrix = sp.csr_matrix((values, indices, indptr),
shape=shape, dtype=dtype)
# Sort everything if asked
if fitting and self.sort:
feature_names.sort()
map_index = np.empty(len(feature_names), dtype=np.int32)
for new_val, f in enumerate(feature_names):
map_index[new_val] = vocab[f]
vocab[f] = new_val
result_matrix = result_matrix[:, map_index]
if self.sparse:
result_matrix.sort_indices()
else:
result_matrix = result_matrix.toarray()
if fitting:
self.feature_names_ = feature_names
self.vocabulary_ = vocab
return result_matrix
def fit_transform(self, X, y=None):
0
Source : text.py
with MIT License
from alvarobartt
with MIT License
from alvarobartt
def _count_vocab(self, raw_documents, fixed_vocab):
"""Create sparse feature matrix, and vocabulary where fixed_vocab=False
"""
if fixed_vocab:
vocabulary = self.vocabulary_
else:
# Add a new value when a new vocabulary item is seen
vocabulary = defaultdict()
vocabulary.default_factory = vocabulary.__len__
analyze = self.build_analyzer()
j_indices = []
indptr = _make_int_array()
values = _make_int_array()
indptr.append(0)
for doc in raw_documents:
feature_counter = {}
for feature in analyze(doc):
try:
feature_idx = vocabulary[feature]
if feature_idx not in feature_counter:
feature_counter[feature_idx] = 1
else:
feature_counter[feature_idx] += 1
except KeyError:
# Ignore out-of-vocabulary items for fixed_vocab=True
continue
j_indices.extend(feature_counter.keys())
values.extend(feature_counter.values())
indptr.append(len(j_indices))
if not fixed_vocab:
# disable defaultdict behaviour
vocabulary = dict(vocabulary)
if not vocabulary:
raise ValueError("empty vocabulary; perhaps the documents only"
" contain stop words")
j_indices = np.asarray(j_indices, dtype=np.intc)
indptr = np.frombuffer(indptr, dtype=np.intc)
values = np.frombuffer(values, dtype=np.intc)
X = sp.csr_matrix((values, j_indices, indptr),
shape=(len(indptr) - 1, len(vocabulary)),
dtype=self.dtype)
X.sort_indices()
return vocabulary, X
def fit(self, raw_documents, y=None):
0
Source : tree.py
with MIT License
from alvarobartt
with MIT License
from alvarobartt
def fit(self, X, y, sample_weight=None, check_input=True,
X_idx_sorted=None):
random_state = check_random_state(self.random_state)
if check_input:
X = check_array(X, dtype=DTYPE, accept_sparse="csc")
y = check_array(y, ensure_2d=False, dtype=None)
if issparse(X):
X.sort_indices()
if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
raise ValueError("No support for np.int64 index based "
"sparse matrices")
# Determine output settings
n_samples, self.n_features_ = X.shape
is_classification = is_classifier(self)
y = np.atleast_1d(y)
expanded_class_weight = None
if y.ndim == 1:
# reshape is necessary to preserve the data contiguity against vs
# [:, np.newaxis] that does not.
y = np.reshape(y, (-1, 1))
self.n_outputs_ = y.shape[1]
if is_classification:
check_classification_targets(y)
y = np.copy(y)
self.classes_ = []
self.n_classes_ = []
if self.class_weight is not None:
y_original = np.copy(y)
y_encoded = np.zeros(y.shape, dtype=np.int)
for k in range(self.n_outputs_):
classes_k, y_encoded[:, k] = np.unique(y[:, k],
return_inverse=True)
self.classes_.append(classes_k)
self.n_classes_.append(classes_k.shape[0])
y = y_encoded
if self.class_weight is not None:
expanded_class_weight = compute_sample_weight(
self.class_weight, y_original)
else:
self.classes_ = [None] * self.n_outputs_
self.n_classes_ = [1] * self.n_outputs_
self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)
if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
y = np.ascontiguousarray(y, dtype=DOUBLE)
# Check parameters
max_depth = ((2 ** 31) - 1 if self.max_depth is None
else self.max_depth)
max_leaf_nodes = (-1 if self.max_leaf_nodes is None
else self.max_leaf_nodes)
if isinstance(self.min_samples_leaf, (numbers.Integral, np.integer)):
if not 1 < = self.min_samples_leaf:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = self.min_samples_leaf
else: # float
if not 0. < self.min_samples_leaf < = 0.5:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))
if isinstance(self.min_samples_split, (numbers.Integral, np.integer)):
if not 2 < = self.min_samples_split:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the integer %s"
% self.min_samples_split)
min_samples_split = self.min_samples_split
else: # float
if not 0. < self.min_samples_split < = 1.:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the float %s"
% self.min_samples_split)
min_samples_split = int(ceil(self.min_samples_split * n_samples))
min_samples_split = max(2, min_samples_split)
min_samples_split = max(min_samples_split, 2 * min_samples_leaf)
if isinstance(self.max_features, six.string_types):
if self.max_features == "auto":
if is_classification:
max_features = max(1, int(np.sqrt(self.n_features_)))
else:
max_features = self.n_features_
elif self.max_features == "sqrt":
max_features = max(1, int(np.sqrt(self.n_features_)))
elif self.max_features == "log2":
max_features = max(1, int(np.log2(self.n_features_)))
else:
raise ValueError(
'Invalid value for max_features. Allowed string '
'values are "auto", "sqrt" or "log2".')
elif self.max_features is None:
max_features = self.n_features_
elif isinstance(self.max_features, (numbers.Integral, np.integer)):
max_features = self.max_features
else: # float
if self.max_features > 0.0:
max_features = max(1,
int(self.max_features * self.n_features_))
else:
max_features = 0
self.max_features_ = max_features
if len(y) != n_samples:
raise ValueError("Number of labels=%d does not match "
"number of samples=%d" % (len(y), n_samples))
if not 0 < = self.min_weight_fraction_leaf < = 0.5:
raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
if max_depth < = 0:
raise ValueError("max_depth must be greater than zero. ")
if not (0 < max_features < = self.n_features_):
raise ValueError("max_features must be in (0, n_features]")
if not isinstance(max_leaf_nodes, (numbers.Integral, np.integer)):
raise ValueError("max_leaf_nodes must be integral number but was "
"%r" % max_leaf_nodes)
if -1 < max_leaf_nodes < 2:
raise ValueError(("max_leaf_nodes {0} must be either None "
"or larger than 1").format(max_leaf_nodes))
if sample_weight is not None:
if (getattr(sample_weight, "dtype", None) != DOUBLE or
not sample_weight.flags.contiguous):
sample_weight = np.ascontiguousarray(
sample_weight, dtype=DOUBLE)
if len(sample_weight.shape) > 1:
raise ValueError("Sample weights array has more "
"than one dimension: %d" %
len(sample_weight.shape))
if len(sample_weight) != n_samples:
raise ValueError("Number of weights=%d does not match "
"number of samples=%d" %
(len(sample_weight), n_samples))
if expanded_class_weight is not None:
if sample_weight is not None:
sample_weight = sample_weight * expanded_class_weight
else:
sample_weight = expanded_class_weight
# Set min_weight_leaf from min_weight_fraction_leaf
if sample_weight is None:
min_weight_leaf = (self.min_weight_fraction_leaf *
n_samples)
else:
min_weight_leaf = (self.min_weight_fraction_leaf *
np.sum(sample_weight))
if self.min_impurity_split is not None:
warnings.warn("The min_impurity_split parameter is deprecated and"
" will be removed in version 0.21. "
"Use the min_impurity_decrease parameter instead.",
DeprecationWarning)
min_impurity_split = self.min_impurity_split
else:
min_impurity_split = 1e-7
if min_impurity_split < 0.:
raise ValueError("min_impurity_split must be greater than "
"or equal to 0")
if self.min_impurity_decrease < 0.:
raise ValueError("min_impurity_decrease must be greater than "
"or equal to 0")
presort = self.presort
# Allow presort to be 'auto', which means True if the dataset is dense,
# otherwise it will be False.
if self.presort == 'auto' and issparse(X):
presort = False
elif self.presort == 'auto':
presort = True
if presort is True and issparse(X):
raise ValueError("Presorting is not supported for sparse "
"matrices.")
# If multiple trees are built on the same dataset, we only want to
# presort once. Splitters now can accept presorted indices if desired,
# but do not handle any presorting themselves. Ensemble algorithms
# which desire presorting must do presorting themselves and pass that
# matrix into each tree.
if X_idx_sorted is None and presort:
X_idx_sorted = np.asfortranarray(np.argsort(X, axis=0),
dtype=np.int32)
if presort and X_idx_sorted.shape != X.shape:
raise ValueError("The shape of X (X.shape = {}) doesn't match "
"the shape of X_idx_sorted (X_idx_sorted"
".shape = {})".format(X.shape,
X_idx_sorted.shape))
# Build tree
criterion = self.criterion
if not isinstance(criterion, Criterion):
if is_classification:
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
self.n_classes_)
else:
criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
n_samples)
SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS
splitter = self.splitter
if not isinstance(self.splitter, Splitter):
splitter = SPLITTERS[self.splitter](criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state,
self.presort)
self.tree_ = Tree(self.n_features_, self.n_classes_, self.n_outputs_)
# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
min_impurity_split)
else:
builder = BestFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
min_impurity_split)
builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)
if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
self.classes_ = self.classes_[0]
return self
def _validate_X_predict(self, X, check_input):
0
Source : onlineLDA.py
with MIT License
from armor-ai
with MIT License
from armor-ai
def _initialize(self, X):
D, W = X.shape
N = int(X.sum())
n_topics = self.n_topics
n_iter = self.n_iter
logger.info("n_documents: {}".format(D))
logger.info("vocab_size: {}".format(W))
logger.info("n_words: {}".format(N))
logger.info("n_topics: {}".format(n_topics))
logger.info("n_iter: {}".format(n_iter))
self.nzw_ = nzw_ = np.zeros((n_topics, W), dtype=np.intc)
self.ndz_ = ndz_ = np.zeros((D, n_topics), dtype=np.intc)
self.nz_ = nz_ = np.zeros(n_topics, dtype=np.intc)
self.WS, self.DS = WS, DS = self.matrix_to_lists(X)
self.ZS = ZS = np.empty_like(self.WS, dtype=np.intc)
np.testing.assert_equal(N, len(WS))
for i in range(N):
w, d = WS[i], DS[i]
z_new = i % n_topics
ZS[i] = z_new
ndz_[d, z_new] += 1
nzw_[z_new, w] += 1
nz_[z_new] += 1
self.loglikelihoods_ = []
def loglikelihood(self):
0
Source : onlineLDA.py
with MIT License
from armor-ai
with MIT License
from armor-ai
def lists_to_matrix(self, WS, DS):
"""Convert array of word (or topic) and document indices to doc-term array
Parameters
-----------
(WS, DS) : tuple of two arrays
WS[k] contains the kth word in the corpus
DS[k] contains the document index for the kth word
Returns
-------
doc_word : array (D, V)
document-term array of counts
"""
D = max(DS) + 1
V = max(WS) + 1
doc_word = np.empty((D, V), dtype=np.intc)
for d in range(D):
for v in range(V):
doc_word[d, v] = np.count_nonzero(WS[DS == d] == v)
return doc_word
0
Source : test_connect4.py
with MIT License
from bhansconnect
with MIT License
from bhansconnect
def test_symmetries():
"""Tests symetric board are produced."""
board, player, game = init_board_from_moves([0, 0, 1, 0, 6])
pi = 0.8
(board1, pi1), (board2, pi2) = game.getSymmetries(board, pi)
assert pi == pi1 and pi == pi2
expected_board1 = np.array(
[[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0],
[-1, 0, 0, 0, 0, 0, 0],
[-1, 0, 0, 0, 0, 0, 0],
[ 1, 1, 0, 0, 0, 0, 1]], dtype=np.intc).tostring()
assert expected_board1 == game.stringRepresentation(board1)
expected_board2 = np.array(
[[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, -1],
[ 0, 0, 0, 0, 0, 0, -1],
[ 1, 0, 0, 0, 0, 1, 1]], dtype=np.intc).tostring()
assert expected_board2 == game.stringRepresentation(board2)
def test_game_ended():
0
Source : test_connect4.py
with MIT License
from bhansconnect
with MIT License
from bhansconnect
def test_game_ended():
"""Tests game end detection logic based on fixed boards."""
array_end_state_pairs = [
(np.array([[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0]], dtype=np.intc), 1, 0),
(np.array([[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 0],
[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0]], dtype=np.intc), 1, 1),
(np.array([[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0],
[0, 1, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0]], dtype=np.intc), -1, -1),
(np.array([[0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0],
[0, 0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 0]], dtype=np.intc), -1, -1),
(np.array([[0, 0, 0, -1],
[0, 0, -1, 0],
[0, -1, 0, 0],
[-1, 0, 0, 0]], dtype=np.intc), 1, -1),
(np.array([[0, 0, 0, 0, 1],
[0, 0, 0, 1, 0],
[0, 0, 1, 0, 0],
[0, 1, 0, 0, 0]], dtype=np.intc), -1, -1),
(np.array([[1, 0, 0, 0, 0],
[0, 1, 0, 0, 0],
[0, 0, 1, 0, 0],
[0, 0, 0, 1, 0]], dtype=np.intc), -1, -1),
(np.array([[ 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, -1, 0, 0, 0],
[ 0, 0, 0, -1, 0, 0, 1],
[ 0, 0, 0, 1, 1, -1, -1],
[ 0, 0, 0, -1, 1, 1, 1],
[ 0, -1, 0, -1, 1, -1, 1]], dtype=np.intc), -1, 0),
(np.array([[ 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., -1., 0., 0., 0.],
[ 1., 0., 1., -1., 0., 0., 0.],
[-1., -1., 1., 1., 0., 0., 0.],
[ 1., 1., 1., -1., 0., 0., 0.],
[ 1., -1., 1., -1., 0., -1., 0.]], dtype=np.intc), -1, -1),
(np.array([[ 0., 0., 0., 1., 0., 0., 0.,],
[ 0., 0., 0., 1., 0., 0., 0.,],
[ 0., 0., 0., -1., 0., 0., 0.,],
[ 0., 0., 1., 1., -1., 0., -1.,],
[ 0., 0., -1., 1., 1., 1., 1.,],
[-1., 0., -1., 1., -1., -1., -1.,],], dtype=np.intc), 1, 1),
]
for np_pieces, player, expected_end_state in array_end_state_pairs:
board, player, game = init_board_from_array(np_pieces, player)
end_state = game.getGameEnded(board, player)
assert expected_end_state == end_state, ("expected=%s, actual=%s, board=\n%s" % (expected_end_state, end_state, board))
def test_immutable_move():
0
Source : linesearch.py
with MIT License
from buds-lab
with MIT License
from buds-lab
def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
c1=1e-4, c2=0.9,
amax=50, amin=1e-8, xtol=1e-14):
"""
Scalar function search for alpha that satisfies strong Wolfe conditions
alpha > 0 is assumed to be a descent direction.
Parameters
----------
phi : callable phi(alpha)
Function at point `alpha`
derphi : callable phi'(alpha)
Objective function derivative. Returns a scalar.
phi0 : float, optional
Value of phi at 0
old_phi0 : float, optional
Value of phi at previous point
derphi0 : float, optional
Value derphi at 0
c1 : float, optional
Parameter for Armijo condition rule.
c2 : float, optional
Parameter for curvature condition rule.
amax, amin : float, optional
Maximum and minimum step size
xtol : float, optional
Relative tolerance for an acceptable step.
Returns
-------
alpha : float
Step size, or None if no suitable step was found
phi : float
Value of `phi` at the new point `alpha`
phi0 : float
Value of `phi` at `alpha=0`
Notes
-----
Uses routine DCSRCH from MINPACK.
"""
if phi0 is None:
phi0 = phi(0.)
if derphi0 is None:
derphi0 = derphi(0.)
if old_phi0 is not None and derphi0 != 0:
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
if alpha1 < 0:
alpha1 = 1.0
else:
alpha1 = 1.0
phi1 = phi0
derphi1 = derphi0
isave = np.zeros((2,), np.intc)
dsave = np.zeros((13,), float)
task = b'START'
maxiter = 100
for i in xrange(maxiter):
stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
c1, c2, xtol, task,
amin, amax, isave, dsave)
if task[:2] == b'FG':
alpha1 = stp
phi1 = phi(stp)
derphi1 = derphi(stp)
else:
break
else:
# maxiter reached, the line search did not converge
stp = None
if task[:5] == b'ERROR' or task[:4] == b'WARN':
stp = None # failed
return stp, phi1, phi0
line_search = line_search_wolfe1
0
Source : signaltools.py
with MIT License
from buds-lab
with MIT License
from buds-lab
def fftconvolve(in1, in2, mode="full", axes=None):
"""Convolve two N-dimensional arrays using FFT.
Convolve `in1` and `in2` using the fast Fourier transform method, with
the output size determined by the `mode` argument.
This is generally much faster than `convolve` for large arrays (n > ~500),
but can be slower when only a few output values are needed, and can only
output float arrays (int or object array inputs will be cast to float).
As of v0.19, `convolve` automatically chooses this method or the direct
method based on an estimation of which is faster.
Parameters
----------
in1 : array_like
First input.
in2 : array_like
Second input. Should have the same number of dimensions as `in1`.
mode : str {'full', 'valid', 'same'}, optional
A string indicating the size of the output:
``full``
The output is the full discrete linear convolution
of the inputs. (Default)
``valid``
The output consists only of those elements that do not
rely on the zero-padding. In 'valid' mode, either `in1` or `in2`
must be at least as large as the other in every dimension.
``same``
The output is the same size as `in1`, centered
with respect to the 'full' output.
axis : tuple, optional
axes : int or array_like of ints or None, optional
Axes over which to compute the convolution.
The default is over all axes.
Returns
-------
out : array
An N-dimensional array containing a subset of the discrete linear
convolution of `in1` with `in2`.
Examples
--------
Autocorrelation of white noise is an impulse.
>>> from scipy import signal
>>> sig = np.random.randn(1000)
>>> autocorr = signal.fftconvolve(sig, sig[::-1], mode='full')
>>> import matplotlib.pyplot as plt
>>> fig, (ax_orig, ax_mag) = plt.subplots(2, 1)
>>> ax_orig.plot(sig)
>>> ax_orig.set_title('White noise')
>>> ax_mag.plot(np.arange(-len(sig)+1,len(sig)), autocorr)
>>> ax_mag.set_title('Autocorrelation')
>>> fig.tight_layout()
>>> fig.show()
Gaussian blur implemented using FFT convolution. Notice the dark borders
around the image, due to the zero-padding beyond its boundaries.
The `convolve2d` function allows for other types of image boundaries,
but is far slower.
>>> from scipy import misc
>>> face = misc.face(gray=True)
>>> kernel = np.outer(signal.gaussian(70, 8), signal.gaussian(70, 8))
>>> blurred = signal.fftconvolve(face, kernel, mode='same')
>>> fig, (ax_orig, ax_kernel, ax_blurred) = plt.subplots(3, 1,
... figsize=(6, 15))
>>> ax_orig.imshow(face, cmap='gray')
>>> ax_orig.set_title('Original')
>>> ax_orig.set_axis_off()
>>> ax_kernel.imshow(kernel, cmap='gray')
>>> ax_kernel.set_title('Gaussian kernel')
>>> ax_kernel.set_axis_off()
>>> ax_blurred.imshow(blurred, cmap='gray')
>>> ax_blurred.set_title('Blurred')
>>> ax_blurred.set_axis_off()
>>> fig.show()
"""
in1 = asarray(in1)
in2 = asarray(in2)
noaxes = axes is None
if in1.ndim == in2.ndim == 0: # scalar inputs
return in1 * in2
elif in1.ndim != in2.ndim:
raise ValueError("in1 and in2 should have the same dimensionality")
elif in1.size == 0 or in2.size == 0: # empty arrays
return array([])
_, axes = _init_nd_shape_and_axes_sorted(in1, shape=None, axes=axes)
if not noaxes and not axes.size:
raise ValueError("when provided, axes cannot be empty")
if noaxes:
other_axes = array([], dtype=np.intc)
else:
other_axes = np.setdiff1d(np.arange(in1.ndim), axes)
s1 = array(in1.shape)
s2 = array(in2.shape)
if not np.all((s1[other_axes] == s2[other_axes])
| (s1[other_axes] == 1) | (s2[other_axes] == 1)):
raise ValueError("incompatible shapes for in1 and in2:"
" {0} and {1}".format(in1.shape, in2.shape))
complex_result = (np.issubdtype(in1.dtype, np.complexfloating)
or np.issubdtype(in2.dtype, np.complexfloating))
shape = np.maximum(s1, s2)
shape[axes] = s1[axes] + s2[axes] - 1
# Check that input sizes are compatible with 'valid' mode
if _inputs_swap_needed(mode, s1, s2):
# Convolution is commutative; order doesn't have any effect on output
in1, s1, in2, s2 = in2, s2, in1, s1
# Speed up FFT by padding to optimal size for FFTPACK
fshape = [fftpack.helper.next_fast_len(d) for d in shape[axes]]
fslice = tuple([slice(sz) for sz in shape])
# Pre-1.9 NumPy FFT routines are not threadsafe. For older NumPys, make
# sure we only call rfftn/irfftn from one thread at a time.
if not complex_result and (_rfft_mt_safe or _rfft_lock.acquire(False)):
try:
sp1 = np.fft.rfftn(in1, fshape, axes=axes)
sp2 = np.fft.rfftn(in2, fshape, axes=axes)
ret = np.fft.irfftn(sp1 * sp2, fshape, axes=axes)[fslice].copy()
finally:
if not _rfft_mt_safe:
_rfft_lock.release()
else:
# If we're here, it's either because we need a complex result, or we
# failed to acquire _rfft_lock (meaning rfftn isn't threadsafe and
# is already in use by another thread). In either case, use the
# (threadsafe but slower) SciPy complex-FFT routines instead.
sp1 = fftpack.fftn(in1, fshape, axes=axes)
sp2 = fftpack.fftn(in2, fshape, axes=axes)
ret = fftpack.ifftn(sp1 * sp2, axes=axes)[fslice].copy()
if not complex_result:
ret = ret.real
if mode == "full":
return ret
elif mode == "same":
return _centered(ret, s1)
elif mode == "valid":
shape_valid = shape.copy()
shape_valid[axes] = s1[axes] - s2[axes] + 1
return _centered(ret, shape_valid)
else:
raise ValueError("acceptable mode flags are 'valid',"
" 'same', or 'full'")
def _numeric_arrays(arrays, kinds='buifc'):
0
Source : linesearch.py
with Apache License 2.0
from dashanji
with Apache License 2.0
from dashanji
def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
c1=1e-4, c2=0.9,
amax=50, amin=1e-8, xtol=1e-14):
"""
Scalar function search for alpha that satisfies strong Wolfe conditions
alpha > 0 is assumed to be a descent direction.
Parameters
----------
phi : callable phi(alpha)
Function at point `alpha`
derphi : callable phi'(alpha)
Objective function derivative. Returns a scalar.
phi0 : float, optional
Value of phi at 0
old_phi0 : float, optional
Value of phi at previous point
derphi0 : float, optional
Value derphi at 0
c1 : float, optional
Parameter for Armijo condition rule.
c2 : float, optional
Parameter for curvature condition rule.
amax, amin : float, optional
Maximum and minimum step size
xtol : float, optional
Relative tolerance for an acceptable step.
Returns
-------
alpha : float
Step size, or None if no suitable step was found
phi : float
Value of `phi` at the new point `alpha`
phi0 : float
Value of `phi` at `alpha=0`
Notes
-----
Uses routine DCSRCH from MINPACK.
"""
if phi0 is None:
phi0 = phi(0.)
if derphi0 is None:
derphi0 = derphi(0.)
if old_phi0 is not None and derphi0 != 0:
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
if alpha1 < 0:
alpha1 = 1.0
else:
alpha1 = 1.0
phi1 = phi0
derphi1 = derphi0
isave = np.zeros((2,), np.intc)
dsave = np.zeros((13,), float)
task = b'START'
maxiter = 100
for i in range(maxiter):
stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
c1, c2, xtol, task,
amin, amax, isave, dsave)
if task[:2] == b'FG':
alpha1 = stp
phi1 = phi(stp)
derphi1 = derphi(stp)
else:
break
else:
# maxiter reached, the line search did not converge
stp = None
if task[:5] == b'ERROR' or task[:4] == b'WARN':
stp = None # failed
return stp, phi1, phi0
line_search = line_search_wolfe1
0
Source : _dict_vectorizer.py
with Apache License 2.0
from dashanji
with Apache License 2.0
from dashanji
def _transform(self, X, fitting):
# Sanity check: Python's array has no way of explicitly requesting the
# signed 32-bit integers that scipy.sparse needs, so we use the next
# best thing: typecode "i" (int). However, if that gives larger or
# smaller integers than 32-bit ones, np.frombuffer screws up.
assert array("i").itemsize == 4, (
"sizeof(int) != 4 on your platform; please report this at"
" https://github.com/scikit-learn/scikit-learn/issues and"
" include the output from platform.platform() in your bug report")
dtype = self.dtype
if fitting:
feature_names = []
vocab = {}
else:
feature_names = self.feature_names_
vocab = self.vocabulary_
# Process everything as sparse regardless of setting
X = [X] if isinstance(X, Mapping) else X
indices = array("i")
indptr = [0]
# XXX we could change values to an array.array as well, but it
# would require (heuristic) conversion of dtype to typecode...
values = []
# collect all the possible feature names and build sparse matrix at
# same time
for x in X:
for f, v in x.items():
if isinstance(v, str):
f = "%s%s%s" % (f, self.separator, v)
v = 1
if f in vocab:
indices.append(vocab[f])
values.append(dtype(v))
else:
if fitting:
feature_names.append(f)
vocab[f] = len(vocab)
indices.append(vocab[f])
values.append(dtype(v))
indptr.append(len(indices))
if len(indptr) == 1:
raise ValueError("Sample sequence X is empty.")
indices = np.frombuffer(indices, dtype=np.intc)
shape = (len(indptr) - 1, len(vocab))
result_matrix = sp.csr_matrix((values, indices, indptr),
shape=shape, dtype=dtype)
# Sort everything if asked
if fitting and self.sort:
feature_names.sort()
map_index = np.empty(len(feature_names), dtype=np.int32)
for new_val, f in enumerate(feature_names):
map_index[new_val] = vocab[f]
vocab[f] = new_val
result_matrix = result_matrix[:, map_index]
if self.sparse:
result_matrix.sort_indices()
else:
result_matrix = result_matrix.toarray()
if fitting:
self.feature_names_ = feature_names
self.vocabulary_ = vocab
return result_matrix
def fit_transform(self, X, y=None):
0
Source : _classes.py
with Apache License 2.0
from dashanji
with Apache License 2.0
from dashanji
def fit(self, X, y, sample_weight=None, check_input=True,
X_idx_sorted=None):
random_state = check_random_state(self.random_state)
if self.ccp_alpha < 0.0:
raise ValueError("ccp_alpha must be greater than or equal to 0")
if check_input:
# Need to validate separately here.
# We can't pass multi_ouput=True because that would allow y to be
# csr.
check_X_params = dict(dtype=DTYPE, accept_sparse="csc")
check_y_params = dict(ensure_2d=False, dtype=None)
X, y = self._validate_data(X, y,
validate_separately=(check_X_params,
check_y_params))
if issparse(X):
X.sort_indices()
if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
raise ValueError("No support for np.int64 index based "
"sparse matrices")
# Determine output settings
n_samples, self.n_features_ = X.shape
is_classification = is_classifier(self)
y = np.atleast_1d(y)
expanded_class_weight = None
if y.ndim == 1:
# reshape is necessary to preserve the data contiguity against vs
# [:, np.newaxis] that does not.
y = np.reshape(y, (-1, 1))
self.n_outputs_ = y.shape[1]
if is_classification:
check_classification_targets(y)
y = np.copy(y)
self.classes_ = []
self.n_classes_ = []
if self.class_weight is not None:
y_original = np.copy(y)
y_encoded = np.zeros(y.shape, dtype=np.int)
for k in range(self.n_outputs_):
classes_k, y_encoded[:, k] = np.unique(y[:, k],
return_inverse=True)
self.classes_.append(classes_k)
self.n_classes_.append(classes_k.shape[0])
y = y_encoded
if self.class_weight is not None:
expanded_class_weight = compute_sample_weight(
self.class_weight, y_original)
self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)
if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
y = np.ascontiguousarray(y, dtype=DOUBLE)
# Check parameters
max_depth = (np.iinfo(np.int32).max if self.max_depth is None
else self.max_depth)
max_leaf_nodes = (-1 if self.max_leaf_nodes is None
else self.max_leaf_nodes)
if isinstance(self.min_samples_leaf, numbers.Integral):
if not 1 < = self.min_samples_leaf:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = self.min_samples_leaf
else: # float
if not 0. < self.min_samples_leaf < = 0.5:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))
if isinstance(self.min_samples_split, numbers.Integral):
if not 2 < = self.min_samples_split:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the integer %s"
% self.min_samples_split)
min_samples_split = self.min_samples_split
else: # float
if not 0. < self.min_samples_split < = 1.:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the float %s"
% self.min_samples_split)
min_samples_split = int(ceil(self.min_samples_split * n_samples))
min_samples_split = max(2, min_samples_split)
min_samples_split = max(min_samples_split, 2 * min_samples_leaf)
if isinstance(self.max_features, str):
if self.max_features == "auto":
if is_classification:
max_features = max(1, int(np.sqrt(self.n_features_)))
else:
max_features = self.n_features_
elif self.max_features == "sqrt":
max_features = max(1, int(np.sqrt(self.n_features_)))
elif self.max_features == "log2":
max_features = max(1, int(np.log2(self.n_features_)))
else:
raise ValueError("Invalid value for max_features. "
"Allowed string values are 'auto', "
"'sqrt' or 'log2'.")
elif self.max_features is None:
max_features = self.n_features_
elif isinstance(self.max_features, numbers.Integral):
max_features = self.max_features
else: # float
if self.max_features > 0.0:
max_features = max(1,
int(self.max_features * self.n_features_))
else:
max_features = 0
self.max_features_ = max_features
if len(y) != n_samples:
raise ValueError("Number of labels=%d does not match "
"number of samples=%d" % (len(y), n_samples))
if not 0 < = self.min_weight_fraction_leaf < = 0.5:
raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
if max_depth < = 0:
raise ValueError("max_depth must be greater than zero. ")
if not (0 < max_features < = self.n_features_):
raise ValueError("max_features must be in (0, n_features]")
if not isinstance(max_leaf_nodes, numbers.Integral):
raise ValueError("max_leaf_nodes must be integral number but was "
"%r" % max_leaf_nodes)
if -1 < max_leaf_nodes < 2:
raise ValueError(("max_leaf_nodes {0} must be either None "
"or larger than 1").format(max_leaf_nodes))
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)
if expanded_class_weight is not None:
if sample_weight is not None:
sample_weight = sample_weight * expanded_class_weight
else:
sample_weight = expanded_class_weight
# Set min_weight_leaf from min_weight_fraction_leaf
if sample_weight is None:
min_weight_leaf = (self.min_weight_fraction_leaf *
n_samples)
else:
min_weight_leaf = (self.min_weight_fraction_leaf *
np.sum(sample_weight))
min_impurity_split = self.min_impurity_split
if min_impurity_split is not None:
warnings.warn("The min_impurity_split parameter is deprecated. "
"Its default value has changed from 1e-7 to 0 in "
"version 0.23, and it will be removed in 0.25. "
"Use the min_impurity_decrease parameter instead.",
FutureWarning)
if min_impurity_split < 0.:
raise ValueError("min_impurity_split must be greater than "
"or equal to 0")
else:
min_impurity_split = 0
if self.min_impurity_decrease < 0.:
raise ValueError("min_impurity_decrease must be greater than "
"or equal to 0")
if self.presort != 'deprecated':
warnings.warn("The parameter 'presort' is deprecated and has no "
"effect. It will be removed in v0.24. You can "
"suppress this warning by not passing any value "
"to the 'presort' parameter.",
FutureWarning)
# Build tree
criterion = self.criterion
if not isinstance(criterion, Criterion):
if is_classification:
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
self.n_classes_)
else:
criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
n_samples)
SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS
splitter = self.splitter
if not isinstance(self.splitter, Splitter):
splitter = SPLITTERS[self.splitter](criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state)
if is_classifier(self):
self.tree_ = Tree(self.n_features_,
self.n_classes_, self.n_outputs_)
else:
self.tree_ = Tree(self.n_features_,
# TODO: tree should't need this in this case
np.array([1] * self.n_outputs_, dtype=np.intp),
self.n_outputs_)
# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
min_impurity_split)
else:
builder = BestFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
min_impurity_split)
builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)
if self.n_outputs_ == 1 and is_classifier(self):
self.n_classes_ = self.n_classes_[0]
self.classes_ = self.classes_[0]
self._prune_tree()
return self
def _validate_X_predict(self, X, check_input):
0
Source : snippet.py
with Apache License 2.0
from dockerizeme
with Apache License 2.0
from dockerizeme
def _count_vocab(self, raw_documents, fixed_vocab):
"""Create sparse feature matrix, and vocabulary where fixed_vocab=False
"""
if fixed_vocab:
vocabulary = self.vocabulary_
else:
# Add a new value when a new vocabulary item is seen
vocabulary = defaultdict()
vocabulary.default_factory = vocabulary.__len__
analyze = self.build_analyzer()
j_indices = _make_int_array()
indptr = _make_int_array()
indptr.append(0)
for doc in raw_documents:
for feature in analyze(doc):
try:
j_indices.append(vocabulary[feature])
except KeyError:
# Ignore out-of-vocabulary items for fixed_vocab=True
continue
indptr.append(len(j_indices))
if not fixed_vocab:
# disable defaultdict behaviour
vocabulary = dict(vocabulary)
if not vocabulary:
raise ValueError("empty vocabulary; perhaps the documents only"
" contain stop words")
# some Python/Scipy versions won't accept an array.array:
if j_indices:
j_indices = np.frombuffer(j_indices, dtype=np.intc)
else:
j_indices = np.array([], dtype=np.int32)
indptr = np.frombuffer(indptr, dtype=np.intc)
values = np.ones(len(j_indices))
X = sp.csr_matrix((values, j_indices, indptr),
shape=(len(indptr) - 1, len(vocabulary)),
dtype=self.dtype)
X.sum_duplicates()
return vocabulary, X
def fit(self, raw_documents, y=None):
0
Source : conf_gen.py
with MIT License
from duartegroup
with MIT License
from duartegroup
def _get_bond_matrix(n_atoms, bonds, fixed_bonds):
"""
Populate a bond matrix with 1 if i, j are bonded, 2 if i, j are bonded and
fixed and 0 otherwise. Can support a partial structure with bonds to atoms
that don't (yet) exist.
---------------------------------------------------------------------------
Arguments:
n_atoms (int):
bonds (list(tuple)):
fixed_bonds (list(tuple)):
Returns:
(np.ndarray): Bond matrix, shape = (n_atoms, n_atoms)
"""
bond_matrix = np.zeros((n_atoms, n_atoms), dtype=np.intc)
for i, j in bonds:
if i < n_atoms and j < n_atoms:
bond_matrix[i, j] = 1
bond_matrix[j, i] = 1
for i, j in fixed_bonds:
if i < n_atoms and j < n_atoms:
bond_matrix[i, j] = 2
bond_matrix[j, i] = 2
return bond_matrix
def _get_coords_energy(coords, bonds, k, c, d0, tol, fixed_bonds,
0
Source : _classes.py
with GNU General Public License v3.0
from gustavowillam
with GNU General Public License v3.0
from gustavowillam
def fit(self, X, y, sample_weight=None, check_input=True,
X_idx_sorted=None):
random_state = check_random_state(self.random_state)
if self.ccp_alpha < 0.0:
raise ValueError("ccp_alpha must be greater than or equal to 0")
if check_input:
X = check_array(X, dtype=DTYPE, accept_sparse="csc")
y = check_array(y, ensure_2d=False, dtype=None)
if issparse(X):
X.sort_indices()
if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
raise ValueError("No support for np.int64 index based "
"sparse matrices")
# Determine output settings
n_samples, self.n_features_ = X.shape
is_classification = is_classifier(self)
y = np.atleast_1d(y)
expanded_class_weight = None
if y.ndim == 1:
# reshape is necessary to preserve the data contiguity against vs
# [:, np.newaxis] that does not.
y = np.reshape(y, (-1, 1))
self.n_outputs_ = y.shape[1]
if is_classification:
check_classification_targets(y)
y = np.copy(y)
self.classes_ = []
self.n_classes_ = []
if self.class_weight is not None:
y_original = np.copy(y)
y_encoded = np.zeros(y.shape, dtype=np.int)
for k in range(self.n_outputs_):
classes_k, y_encoded[:, k] = np.unique(y[:, k],
return_inverse=True)
self.classes_.append(classes_k)
self.n_classes_.append(classes_k.shape[0])
y = y_encoded
if self.class_weight is not None:
expanded_class_weight = compute_sample_weight(
self.class_weight, y_original)
self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)
if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
y = np.ascontiguousarray(y, dtype=DOUBLE)
# Check parameters
max_depth = (np.iinfo(np.int32).max if self.max_depth is None
else self.max_depth)
max_leaf_nodes = (-1 if self.max_leaf_nodes is None
else self.max_leaf_nodes)
if isinstance(self.min_samples_leaf, numbers.Integral):
if not 1 < = self.min_samples_leaf:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = self.min_samples_leaf
else: # float
if not 0. < self.min_samples_leaf < = 0.5:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))
if isinstance(self.min_samples_split, numbers.Integral):
if not 2 < = self.min_samples_split:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the integer %s"
% self.min_samples_split)
min_samples_split = self.min_samples_split
else: # float
if not 0. < self.min_samples_split < = 1.:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the float %s"
% self.min_samples_split)
min_samples_split = int(ceil(self.min_samples_split * n_samples))
min_samples_split = max(2, min_samples_split)
min_samples_split = max(min_samples_split, 2 * min_samples_leaf)
if isinstance(self.max_features, str):
if self.max_features == "auto":
if is_classification:
max_features = max(1, int(np.sqrt(self.n_features_)))
else:
max_features = self.n_features_
elif self.max_features == "sqrt":
max_features = max(1, int(np.sqrt(self.n_features_)))
elif self.max_features == "log2":
max_features = max(1, int(np.log2(self.n_features_)))
else:
raise ValueError("Invalid value for max_features. "
"Allowed string values are 'auto', "
"'sqrt' or 'log2'.")
elif self.max_features is None:
max_features = self.n_features_
elif isinstance(self.max_features, numbers.Integral):
max_features = self.max_features
else: # float
if self.max_features > 0.0:
max_features = max(1,
int(self.max_features * self.n_features_))
else:
max_features = 0
self.max_features_ = max_features
if len(y) != n_samples:
raise ValueError("Number of labels=%d does not match "
"number of samples=%d" % (len(y), n_samples))
if not 0 < = self.min_weight_fraction_leaf < = 0.5:
raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
if max_depth < = 0:
raise ValueError("max_depth must be greater than zero. ")
if not (0 < max_features < = self.n_features_):
raise ValueError("max_features must be in (0, n_features]")
if not isinstance(max_leaf_nodes, numbers.Integral):
raise ValueError("max_leaf_nodes must be integral number but was "
"%r" % max_leaf_nodes)
if -1 < max_leaf_nodes < 2:
raise ValueError(("max_leaf_nodes {0} must be either None "
"or larger than 1").format(max_leaf_nodes))
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)
if expanded_class_weight is not None:
if sample_weight is not None:
sample_weight = sample_weight * expanded_class_weight
else:
sample_weight = expanded_class_weight
# Set min_weight_leaf from min_weight_fraction_leaf
if sample_weight is None:
min_weight_leaf = (self.min_weight_fraction_leaf *
n_samples)
else:
min_weight_leaf = (self.min_weight_fraction_leaf *
np.sum(sample_weight))
if self.min_impurity_split is not None:
warnings.warn("The min_impurity_split parameter is deprecated. "
"Its default value will change from 1e-7 to 0 in "
"version 0.23, and it will be removed in 0.25. "
"Use the min_impurity_decrease parameter instead.",
FutureWarning)
min_impurity_split = self.min_impurity_split
else:
min_impurity_split = 1e-7
if min_impurity_split < 0.:
raise ValueError("min_impurity_split must be greater than "
"or equal to 0")
if self.min_impurity_decrease < 0.:
raise ValueError("min_impurity_decrease must be greater than "
"or equal to 0")
if self.presort != 'deprecated':
warnings.warn("The parameter 'presort' is deprecated and has no "
"effect. It will be removed in v0.24. You can "
"suppress this warning by not passing any value "
"to the 'presort' parameter.",
FutureWarning)
# Build tree
criterion = self.criterion
if not isinstance(criterion, Criterion):
if is_classification:
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
self.n_classes_)
else:
criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
n_samples)
SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS
splitter = self.splitter
if not isinstance(self.splitter, Splitter):
splitter = SPLITTERS[self.splitter](criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state)
if is_classifier(self):
self.tree_ = Tree(self.n_features_,
self.n_classes_, self.n_outputs_)
else:
self.tree_ = Tree(self.n_features_,
# TODO: tree should't need this in this case
np.array([1] * self.n_outputs_, dtype=np.intp),
self.n_outputs_)
# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
min_impurity_split)
else:
builder = BestFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
min_impurity_split)
builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)
if self.n_outputs_ == 1 and is_classifier(self):
self.n_classes_ = self.n_classes_[0]
self.classes_ = self.classes_[0]
self._prune_tree()
return self
def _validate_X_predict(self, X, check_input):
0
Source : _dict_vectorizer.py
with GNU General Public License v3.0
from gustavowillam
with GNU General Public License v3.0
from gustavowillam
def _transform(self, X, fitting):
# Sanity check: Python's array has no way of explicitly requesting the
# signed 32-bit integers that scipy.sparse needs, so we use the next
# best thing: typecode "i" (int). However, if that gives larger or
# smaller integers than 32-bit ones, np.frombuffer screws up.
assert array("i").itemsize == 4, (
"sizeof(int) != 4 on your platform; please report this at"
" https://github.com/scikit-learn/scikit-learn/issues and"
" include the output from platform.platform() in your bug report")
dtype = self.dtype
if fitting:
feature_names = []
vocab = {}
else:
feature_names = self.feature_names_
vocab = self.vocabulary_
transforming = True
# Process everything as sparse regardless of setting
X = [X] if isinstance(X, Mapping) else X
indices = array("i")
indptr = [0]
# XXX we could change values to an array.array as well, but it
# would require (heuristic) conversion of dtype to typecode...
values = []
# collect all the possible feature names and build sparse matrix at
# same time
for x in X:
for f, v in x.items():
if isinstance(v, str):
feature_name = "%s%s%s" % (f, self.separator, v)
v = 1
elif isinstance(v, Number) or (v is None):
feature_name = f
elif isinstance(v, Mapping):
raise TypeError(f'Unsupported value Type {type(v)} '
f'for {f}: {v}.\n'
'Mapping objects are not supported.')
elif isinstance(v, Iterable):
feature_name = None
self._add_iterable_element(f, v, feature_names, vocab,
fitting=fitting,
transforming=transforming,
indices=indices, values=values)
if feature_name is not None:
if fitting and feature_name not in vocab:
vocab[feature_name] = len(feature_names)
feature_names.append(feature_name)
if feature_name in vocab:
indices.append(vocab[feature_name])
values.append(self.dtype(v))
indptr.append(len(indices))
if len(indptr) == 1:
raise ValueError("Sample sequence X is empty.")
indices = np.frombuffer(indices, dtype=np.intc)
shape = (len(indptr) - 1, len(vocab))
result_matrix = sp.csr_matrix((values, indices, indptr),
shape=shape, dtype=dtype)
# Sort everything if asked
if fitting and self.sort:
feature_names.sort()
map_index = np.empty(len(feature_names), dtype=np.int32)
for new_val, f in enumerate(feature_names):
map_index[new_val] = vocab[f]
vocab[f] = new_val
result_matrix = result_matrix[:, map_index]
if self.sparse:
result_matrix.sort_indices()
else:
result_matrix = result_matrix.toarray()
if fitting:
self.feature_names_ = feature_names
self.vocabulary_ = vocab
return result_matrix
def fit_transform(self, X, y=None):
0
Source : _classes.py
with GNU General Public License v3.0
from gustavowillam
with GNU General Public License v3.0
from gustavowillam
def fit(self, X, y, sample_weight=None, check_input=True,
X_idx_sorted="deprecated"):
random_state = check_random_state(self.random_state)
if self.ccp_alpha < 0.0:
raise ValueError("ccp_alpha must be greater than or equal to 0")
if check_input:
# Need to validate separately here.
# We can't pass multi_ouput=True because that would allow y to be
# csr.
check_X_params = dict(dtype=DTYPE, accept_sparse="csc")
check_y_params = dict(ensure_2d=False, dtype=None)
X, y = self._validate_data(X, y,
validate_separately=(check_X_params,
check_y_params))
if issparse(X):
X.sort_indices()
if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
raise ValueError("No support for np.int64 index based "
"sparse matrices")
if self.criterion == "poisson":
if np.any(y < 0):
raise ValueError("Some value(s) of y are negative which is"
" not allowed for Poisson regression.")
if np.sum(y) < = 0:
raise ValueError("Sum of y is not positive which is "
"necessary for Poisson regression.")
# Determine output settings
n_samples, self.n_features_ = X.shape
self.n_features_in_ = self.n_features_
is_classification = is_classifier(self)
y = np.atleast_1d(y)
expanded_class_weight = None
if y.ndim == 1:
# reshape is necessary to preserve the data contiguity against vs
# [:, np.newaxis] that does not.
y = np.reshape(y, (-1, 1))
self.n_outputs_ = y.shape[1]
if is_classification:
check_classification_targets(y)
y = np.copy(y)
self.classes_ = []
self.n_classes_ = []
if self.class_weight is not None:
y_original = np.copy(y)
y_encoded = np.zeros(y.shape, dtype=int)
for k in range(self.n_outputs_):
classes_k, y_encoded[:, k] = np.unique(y[:, k],
return_inverse=True)
self.classes_.append(classes_k)
self.n_classes_.append(classes_k.shape[0])
y = y_encoded
if self.class_weight is not None:
expanded_class_weight = compute_sample_weight(
self.class_weight, y_original)
self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)
if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
y = np.ascontiguousarray(y, dtype=DOUBLE)
# Check parameters
max_depth = (np.iinfo(np.int32).max if self.max_depth is None
else self.max_depth)
max_leaf_nodes = (-1 if self.max_leaf_nodes is None
else self.max_leaf_nodes)
if isinstance(self.min_samples_leaf, numbers.Integral):
if not 1 < = self.min_samples_leaf:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = self.min_samples_leaf
else: # float
if not 0. < self.min_samples_leaf < = 0.5:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))
if isinstance(self.min_samples_split, numbers.Integral):
if not 2 < = self.min_samples_split:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the integer %s"
% self.min_samples_split)
min_samples_split = self.min_samples_split
else: # float
if not 0. < self.min_samples_split < = 1.:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the float %s"
% self.min_samples_split)
min_samples_split = int(ceil(self.min_samples_split * n_samples))
min_samples_split = max(2, min_samples_split)
min_samples_split = max(min_samples_split, 2 * min_samples_leaf)
if isinstance(self.max_features, str):
if self.max_features == "auto":
if is_classification:
max_features = max(1, int(np.sqrt(self.n_features_)))
else:
max_features = self.n_features_
elif self.max_features == "sqrt":
max_features = max(1, int(np.sqrt(self.n_features_)))
elif self.max_features == "log2":
max_features = max(1, int(np.log2(self.n_features_)))
else:
raise ValueError("Invalid value for max_features. "
"Allowed string values are 'auto', "
"'sqrt' or 'log2'.")
elif self.max_features is None:
max_features = self.n_features_
elif isinstance(self.max_features, numbers.Integral):
max_features = self.max_features
else: # float
if self.max_features > 0.0:
max_features = max(1,
int(self.max_features * self.n_features_))
else:
max_features = 0
self.max_features_ = max_features
if len(y) != n_samples:
raise ValueError("Number of labels=%d does not match "
"number of samples=%d" % (len(y), n_samples))
if not 0 < = self.min_weight_fraction_leaf < = 0.5:
raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
if max_depth < = 0:
raise ValueError("max_depth must be greater than zero. ")
if not (0 < max_features < = self.n_features_):
raise ValueError("max_features must be in (0, n_features]")
if not isinstance(max_leaf_nodes, numbers.Integral):
raise ValueError("max_leaf_nodes must be integral number but was "
"%r" % max_leaf_nodes)
if -1 < max_leaf_nodes < 2:
raise ValueError(("max_leaf_nodes {0} must be either None "
"or larger than 1").format(max_leaf_nodes))
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X, DOUBLE)
if expanded_class_weight is not None:
if sample_weight is not None:
sample_weight = sample_weight * expanded_class_weight
else:
sample_weight = expanded_class_weight
# Set min_weight_leaf from min_weight_fraction_leaf
if sample_weight is None:
min_weight_leaf = (self.min_weight_fraction_leaf *
n_samples)
else:
min_weight_leaf = (self.min_weight_fraction_leaf *
np.sum(sample_weight))
min_impurity_split = self.min_impurity_split
if min_impurity_split is not None:
warnings.warn(
"The min_impurity_split parameter is deprecated. Its default "
"value has changed from 1e-7 to 0 in version 0.23, and it "
"will be removed in 1.0 (renaming of 0.25). Use the "
"min_impurity_decrease parameter instead.",
FutureWarning
)
if min_impurity_split < 0.:
raise ValueError("min_impurity_split must be greater than "
"or equal to 0")
else:
min_impurity_split = 0
if self.min_impurity_decrease < 0.:
raise ValueError("min_impurity_decrease must be greater than "
"or equal to 0")
# TODO: Remove in 1.1
if X_idx_sorted != "deprecated":
warnings.warn(
"The parameter 'X_idx_sorted' is deprecated and has no "
"effect. It will be removed in 1.1 (renaming of 0.26). You "
"can suppress this warning by not passing any value to the "
"'X_idx_sorted' parameter.",
FutureWarning
)
# Build tree
criterion = self.criterion
if not isinstance(criterion, Criterion):
if is_classification:
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
self.n_classes_)
else:
criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
n_samples)
else:
# Make a deepcopy in case the criterion has mutable attributes that
# might be shared and modified concurrently during parallel fitting
criterion = copy.deepcopy(criterion)
SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS
splitter = self.splitter
if not isinstance(self.splitter, Splitter):
splitter = SPLITTERS[self.splitter](criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state)
if is_classifier(self):
self.tree_ = Tree(self.n_features_,
self.n_classes_, self.n_outputs_)
else:
self.tree_ = Tree(self.n_features_,
# TODO: tree should't need this in this case
np.array([1] * self.n_outputs_, dtype=np.intp),
self.n_outputs_)
# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
min_impurity_split)
else:
builder = BestFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
min_impurity_split)
builder.build(self.tree_, X, y, sample_weight)
if self.n_outputs_ == 1 and is_classifier(self):
self.n_classes_ = self.n_classes_[0]
self.classes_ = self.classes_[0]
self._prune_tree()
return self
def _validate_X_predict(self, X, check_input):
0
Source : tree.py
with GNU General Public License v3.0
from HHHHhgqcdxhg
with GNU General Public License v3.0
from HHHHhgqcdxhg
def fit(self, X, y, sample_weight=None, check_input=True,
X_idx_sorted=None):
random_state = check_random_state(self.random_state)
if check_input:
X = check_array(X, dtype=DTYPE, accept_sparse="csc")
y = check_array(y, ensure_2d=False, dtype=None)
if issparse(X):
X.sort_indices()
if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
raise ValueError("No support for np.int64 index based "
"sparse matrices")
# Determine output settings
n_samples, self.n_features_ = X.shape
is_classification = is_classifier(self)
y = np.atleast_1d(y)
expanded_class_weight = None
if y.ndim == 1:
# reshape is necessary to preserve the data contiguity against vs
# [:, np.newaxis] that does not.
y = np.reshape(y, (-1, 1))
self.n_outputs_ = y.shape[1]
if is_classification:
check_classification_targets(y)
y = np.copy(y)
self.classes_ = []
self.n_classes_ = []
if self.class_weight is not None:
y_original = np.copy(y)
y_encoded = np.zeros(y.shape, dtype=np.int)
for k in range(self.n_outputs_):
classes_k, y_encoded[:, k] = np.unique(y[:, k],
return_inverse=True)
self.classes_.append(classes_k)
self.n_classes_.append(classes_k.shape[0])
y = y_encoded
if self.class_weight is not None:
expanded_class_weight = compute_sample_weight(
self.class_weight, y_original)
else:
self.classes_ = [None] * self.n_outputs_
self.n_classes_ = [1] * self.n_outputs_
self.n_classes_ = np.array(self.n_classes_, dtype=np.intp)
if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
y = np.ascontiguousarray(y, dtype=DOUBLE)
# Check parameters
max_depth = ((2 ** 31) - 1 if self.max_depth is None
else self.max_depth)
max_leaf_nodes = (-1 if self.max_leaf_nodes is None
else self.max_leaf_nodes)
if isinstance(self.min_samples_leaf, (numbers.Integral, np.integer)):
if not 1 < = self.min_samples_leaf:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = self.min_samples_leaf
else: # float
if not 0. < self.min_samples_leaf < = 0.5:
raise ValueError("min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s"
% self.min_samples_leaf)
min_samples_leaf = int(ceil(self.min_samples_leaf * n_samples))
if isinstance(self.min_samples_split, (numbers.Integral, np.integer)):
if not 2 < = self.min_samples_split:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the integer %s"
% self.min_samples_split)
min_samples_split = self.min_samples_split
else: # float
if not 0. < self.min_samples_split < = 1.:
raise ValueError("min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the float %s"
% self.min_samples_split)
min_samples_split = int(ceil(self.min_samples_split * n_samples))
min_samples_split = max(2, min_samples_split)
min_samples_split = max(min_samples_split, 2 * min_samples_leaf)
if isinstance(self.max_features, six.string_types):
if self.max_features == "auto":
if is_classification:
max_features = max(1, int(np.sqrt(self.n_features_)))
else:
max_features = self.n_features_
elif self.max_features == "sqrt":
max_features = max(1, int(np.sqrt(self.n_features_)))
elif self.max_features == "log2":
max_features = max(1, int(np.log2(self.n_features_)))
else:
raise ValueError(
'Invalid value for max_features. Allowed string '
'values are "auto", "sqrt" or "log2".')
elif self.max_features is None:
max_features = self.n_features_
elif isinstance(self.max_features, (numbers.Integral, np.integer)):
max_features = self.max_features
else: # float
if self.max_features > 0.0:
max_features = max(1,
int(self.max_features * self.n_features_))
else:
max_features = 0
self.max_features_ = max_features
if len(y) != n_samples:
raise ValueError("Number of labels=%d does not match "
"number of samples=%d" % (len(y), n_samples))
if not 0 < = self.min_weight_fraction_leaf < = 0.5:
raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
if max_depth < = 0:
raise ValueError("max_depth must be greater than zero. ")
if not (0 < max_features < = self.n_features_):
raise ValueError("max_features must be in (0, n_features]")
if not isinstance(max_leaf_nodes, (numbers.Integral, np.integer)):
raise ValueError("max_leaf_nodes must be integral number but was "
"%r" % max_leaf_nodes)
if -1 < max_leaf_nodes < 2:
raise ValueError(("max_leaf_nodes {0} must be either None "
"or larger than 1").format(max_leaf_nodes))
if sample_weight is not None:
if (getattr(sample_weight, "dtype", None) != DOUBLE or
not sample_weight.flags.contiguous):
sample_weight = np.ascontiguousarray(
sample_weight, dtype=DOUBLE)
if len(sample_weight.shape) > 1:
raise ValueError("Sample weights array has more "
"than one dimension: %d" %
len(sample_weight.shape))
if len(sample_weight) != n_samples:
raise ValueError("Number of weights=%d does not match "
"number of samples=%d" %
(len(sample_weight), n_samples))
if expanded_class_weight is not None:
if sample_weight is not None:
sample_weight = sample_weight * expanded_class_weight
else:
sample_weight = expanded_class_weight
# Set min_weight_leaf from min_weight_fraction_leaf
if sample_weight is None:
min_weight_leaf = (self.min_weight_fraction_leaf *
n_samples)
else:
min_weight_leaf = (self.min_weight_fraction_leaf *
np.sum(sample_weight))
if self.min_impurity_split is not None:
warnings.warn("The min_impurity_split parameter is deprecated. "
"Its default value will change from 1e-7 to 0 in "
"version 0.23, and it will be removed in 0.25. "
"Use the min_impurity_decrease parameter instead.",
DeprecationWarning)
min_impurity_split = self.min_impurity_split
else:
min_impurity_split = 1e-7
if min_impurity_split < 0.:
raise ValueError("min_impurity_split must be greater than "
"or equal to 0")
if self.min_impurity_decrease < 0.:
raise ValueError("min_impurity_decrease must be greater than "
"or equal to 0")
allowed_presort = ('auto', True, False)
if self.presort not in allowed_presort:
raise ValueError("'presort' should be in {}. Got {!r} instead."
.format(allowed_presort, self.presort))
if self.presort is True and issparse(X):
raise ValueError("Presorting is not supported for sparse "
"matrices.")
presort = self.presort
# Allow presort to be 'auto', which means True if the dataset is dense,
# otherwise it will be False.
if self.presort == 'auto':
presort = not issparse(X)
# If multiple trees are built on the same dataset, we only want to
# presort once. Splitters now can accept presorted indices if desired,
# but do not handle any presorting themselves. Ensemble algorithms
# which desire presorting must do presorting themselves and pass that
# matrix into each tree.
if X_idx_sorted is None and presort:
X_idx_sorted = np.asfortranarray(np.argsort(X, axis=0),
dtype=np.int32)
if presort and X_idx_sorted.shape != X.shape:
raise ValueError("The shape of X (X.shape = {}) doesn't match "
"the shape of X_idx_sorted (X_idx_sorted"
".shape = {})".format(X.shape,
X_idx_sorted.shape))
# Build tree
criterion = self.criterion
if not isinstance(criterion, Criterion):
if is_classification:
criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
self.n_classes_)
else:
criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
n_samples)
SPLITTERS = SPARSE_SPLITTERS if issparse(X) else DENSE_SPLITTERS
splitter = self.splitter
if not isinstance(self.splitter, Splitter):
splitter = SPLITTERS[self.splitter](criterion,
self.max_features_,
min_samples_leaf,
min_weight_leaf,
random_state,
self.presort)
self.tree_ = Tree(self.n_features_, self.n_classes_, self.n_outputs_)
# Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
if max_leaf_nodes < 0:
builder = DepthFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
self.min_impurity_decrease,
min_impurity_split)
else:
builder = BestFirstTreeBuilder(splitter, min_samples_split,
min_samples_leaf,
min_weight_leaf,
max_depth,
max_leaf_nodes,
self.min_impurity_decrease,
min_impurity_split)
builder.build(self.tree_, X, y, sample_weight, X_idx_sorted)
if self.n_outputs_ == 1:
self.n_classes_ = self.n_classes_[0]
self.classes_ = self.classes_[0]
return self
def _validate_X_predict(self, X, check_input):
0
Source : formats.py
with BSD 3-Clause "New" or "Revised" License
from holzschu
with BSD 3-Clause "New" or "Revised" License
from holzschu
def set_jds(self, val1, val2):
"""Convert datetime object contained in val1 to jd1, jd2"""
# Iterate through the datetime objects, getting year, month, etc.
iterator = np.nditer([val1, None, None, None, None, None, None],
flags=['refs_ok', 'zerosize_ok'],
op_dtypes=[None] + 5*[np.intc] + [np.double])
for val, iy, im, id, ihr, imin, dsec in iterator:
dt = val.item()
if dt.tzinfo is not None:
dt = (dt - dt.utcoffset()).replace(tzinfo=None)
iy[...] = dt.year
im[...] = dt.month
id[...] = dt.day
ihr[...] = dt.hour
imin[...] = dt.minute
dsec[...] = dt.second + dt.microsecond / 1e6
jd1, jd2 = erfa.dtf2d(self.scale.upper().encode('ascii'),
*iterator.operands[1:])
self.jd1, self.jd2 = day_frac(jd1, jd2)
def to_value(self, timezone=None, parent=None):
0
Source : formats.py
with BSD 3-Clause "New" or "Revised" License
from holzschu
with BSD 3-Clause "New" or "Revised" License
from holzschu
def set_jds(self, val1, val2):
"""Parse the time strings contained in val1 and set jd1, jd2"""
# Select subformats based on current self.in_subfmt
subfmts = self._select_subfmts(self.in_subfmt)
# Be liberal in what we accept: convert bytes to ascii.
# Here .item() is needed for arrays with entries of unequal length,
# to strip trailing 0 bytes.
to_string = (str if val1.dtype.kind == 'U' else
lambda x: str(x.item(), encoding='ascii'))
iterator = np.nditer([val1, None, None, None, None, None, None],
flags=['zerosize_ok'],
op_dtypes=[None] + 5*[np.intc] + [np.double])
for val, iy, im, id, ihr, imin, dsec in iterator:
val = to_string(val)
iy[...], im[...], id[...], ihr[...], imin[...], dsec[...] = (
self.parse_string(val, subfmts))
jd1, jd2 = erfa.dtf2d(self.scale.upper().encode('ascii'),
*iterator.operands[1:])
self.jd1, self.jd2 = day_frac(jd1, jd2)
def str_kwargs(self):
0
Source : test_basic.py
with BSD 3-Clause "New" or "Revised" License
from holzschu
with BSD 3-Clause "New" or "Revised" License
from holzschu
def test_bad_time(self):
iy = np.array([2000], dtype=np.intc)
im = np.array([2000], dtype=np.intc) # bad month
id = np.array([2000], dtype=np.intc) # bad day
with pytest.raises(ValueError): # bad month, fatal error
djm0, djm = erfa.cal2jd(iy, im, id)
iy[0] = -5000
im[0] = 2
with pytest.raises(ValueError): # bad year, fatal error
djm0, djm = erfa.cal2jd(iy, im, id)
iy[0] = 2000
with catch_warnings() as w:
djm0, djm = erfa.cal2jd(iy, im, id)
assert len(w) == 1
assert 'bad day (JD computed)' in str(w[0].message)
assert allclose_jd(djm0, [2400000.5])
assert allclose_jd(djm, [53574.])
class TestCopyReplicate:
0
Source : suite_dmlab.py
with Apache License 2.0
from HorizonRobotics
with Apache License 2.0
from HorizonRobotics
def action_discretize(action_spec,
look_left_right_pixels_per_frame=(-20, 20),
look_down_up_pixels_per_frame=(-10, 10),
strafe_left_right=(-1, 1),
move_back_forward=(-1, 1),
fire=(),
jump=(1, ),
crouch=(1, ),
**kwargs):
"""Discretize action from action_spec
TODO: action combinations
Mapping all valid action values to discrete action
original deepmind lab environment action_spec:
.. code-block:: python
[{'max': 512, 'min': -512, 'name': 'LOOK_LEFT_RIGHT_PIXELS_PER_FRAME'},
{'max': 512, 'min': -512, 'name': 'LOOK_DOWN_UP_PIXELS_PER_FRAME'},
{'max': 1, 'min': -1, 'name': 'STRAFE_LEFT_RIGHT'},
{'max': 1, 'min': -1, 'name': 'MOVE_BACK_FORWARD'},
{'max': 1, 'min': 0, 'name': 'FIRE'},
{'max': 1, 'min': 0, 'name': 'JUMP'},
{'max': 1, 'min': 0, 'name': 'CROUCH'}]
and discretized actions:
.. code-block::
0 -> [20,0,0,0,0,0,0] (look left 20 pixels),
1 -> [-20,0,0,0,0,0,0] (look right 20 pixels),
...,
m -> [0,0,0,-1,0,0,0] (move back),
m+1-> [0,0,0,1,0,0,0] (move forward) ,
...,
n -> [0,0,0,0,1,1,0] (jump and fire),
...
see `SuiteDMLabTest.test_action_discretize` in `suite_dmlab_test.py` for examples
Args:
action_spec (list(dict)): action spec
look_left_right_pixels_per_frame (iterable|str): look left or look right pixels
look_down_up_pixels_per_frame (iterable|str): look down or look up pixels
strafe_left_right (iterable|str): strafe left or strafe right
move_back_forward (iterable|str): move back or move forward
fire (iterable|str): fire values
jump (iterable|str): jump values
crouch (iterable|str): crouch values
kwargs (dict): other config for actions
Returns:
actions (list[numpy.array]): discrete actions
"""
actions = []
config = dict(
look_left_right_pixels_per_frame=look_left_right_pixels_per_frame,
look_down_up_pixels_per_frame=look_down_up_pixels_per_frame,
strafe_left_right=strafe_left_right,
move_back_forward=move_back_forward,
fire=fire,
jump=jump,
crouch=crouch)
config.update(kwargs)
config = {key.upper(): value for key, value in config.items()}
for i, spec in enumerate(action_spec):
val_min = spec['min']
val_max = spec['max']
values = config.get(spec['name'], None)
if values is None:
values = list(range(val_min, val_max + 1))
elif isinstance(values, str):
values = eval(values)
for value in values:
if value < val_min or value > val_max or value == 0:
continue
action = np.zeros([len(action_spec)], np.intc)
action[i] = value
actions.append(action)
return actions
@alf.configurable
0
Source : _dbscan.py
with Apache License 2.0
from intel
with Apache License 2.0
from intel
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None):
ww = make2d(sample_weight) if sample_weight is not None else None
XX = make2d(X)
fpt = getFPType(XX)
alg = daal4py.dbscan(
method='defaultDense',
fptype=fpt,
epsilon=float(eps),
minObservations=int(min_samples),
memorySavingMode=False,
resultsToCompute="computeCoreIndices"
)
daal_res = alg.compute(XX, ww)
assignments = daal_res.assignments.ravel()
if daal_res.coreIndices is not None:
core_ind = daal_res.coreIndices.ravel()
else:
core_ind = np.array([], dtype=np.intc)
return (core_ind, assignments)
class DBSCAN(DBSCAN_original):
0
Source : env.py
with MIT License
from jkulhanek
with MIT License
from jkulhanek
def _action(*entries):
return np.array(entries, dtype=np.intc)
ACTION_LIST = [
0
Source : dmlab_environment.py
with MIT License
from jkulhanek
with MIT License
from jkulhanek
def _action(*entries):
return np.array(entries, dtype=np.intc)
ACTION_LIST = [
0
Source : dmlab_environment.py
with MIT License
from jkulhanek
with MIT License
from jkulhanek
def _action(*entries):
return np.array(entries, dtype=np.intc)
0
Source : deepmind_lab.py
with MIT License
from jviquerat
with MIT License
from jviquerat
def execute(self, action):
"""
Pass action to universe environment, return reward, next step, terminal state and
additional info.
:param action: action to execute as numpy array, should have dtype np.intc and should adhere to
the specification given in DeepMindLabEnvironment.action_spec(level_id)
:return: dict containing the next state, the reward, and a boolean indicating if the
next state is a terminal state
"""
adjusted_action = list()
for action_spec in self.level.action_spec():
if action_spec['min'] == -1 and action_spec['max'] == 1:
adjusted_action.append(action[action_spec['name']] - 1)
else:
adjusted_action.append(action[action_spec['name']]) # clip?
action = np.array(adjusted_action, dtype=np.intc)
reward = self.level.step(action=action, num_steps=self.repeat_action)
state = self.level.observations()['RGB_INTERLACED']
terminal = not self.level.is_running()
return state, terminal, reward
def states(self):
See More Examples