"""Package teneva, module sample: random sampling for/from the TT-tensor.
This module contains functions for sampling from the TT-tensor and for
generation of random multi-indices and points for learning.
"""
import itertools
import numpy as np
import teneva
[docs]def sample(Y, m=1, seed=None, unsert=1.E-10):
"""Sample multi-indices according to given probability TT-tensor.
Args:
Y (list): TT-tensor, which represents the discrete probability
distribution.
m (int, float): number of samples.
seed (int): random seed. It should be an integer number or a numpy
Generator class instance.
unsert (float): noise parameter.
Returns:
np.ndarray: generated multi-indices for the tensor in the form
of array of the shape [m, d], where d is the dimension of the tensor.
"""
m = int(m)
d = len(Y)
rand = teneva._rand(seed)
phi = [None]*(d+1)
phi[-1] = np.ones(1)
for i in range(d-1, 0, -1):
phi[i] = np.sum(Y[i], axis=1) @ phi[i+1]
p = Y[0] @ phi[1]
p = p.flatten()
p += unsert
p = np.maximum(p, 0)
p = p / p.sum()
ind = rand.choice(Y[0].shape[1], m, p=p)
phi[0] = Y[0][0, ind, :] # ind here is an array even if m=1
res = np.zeros((m, d), dtype=int)
res[:, 0] = ind
for i, c in enumerate(Y[1:], start=1):
p = np.einsum('ma,aib,b->mi', phi[i-1], Y[i], phi[i+1])
p = np.maximum(p, 0)
ind = [rand.choice(c.shape[1], p=pi/pi.sum()) for pi in p]
ind = np.array(ind)
res[:, i] = ind
phi[i] = np.einsum('il,lij->ij', phi[i-1], c[:, ind])
return res
[docs]def sample_lhs(n, m, seed=None):
"""Generate LHS multi-indices for the tensor of the given shape.
Args:
n (list, np.ndarray): tensor size for each dimension (list or
np.ndarray of int/float of the length d, where d is the
dimension of the tensor).
m (int, float): number of samples.
seed (int): random seed. It should be an integer number or a numpy
Generator class instance.
Returns:
np.ndarray: generated multi-indices for the tensor in the form of array
of the shape [m, d], where d is the dimension of the tensor.
"""
n = np.asanyarray(n, dtype=int)
m = int(m)
d = len(n)
rand = teneva._rand(seed)
I = np.empty((m, d), dtype=int)
for i, k in enumerate(n):
I1 = np.repeat(np.arange(k), m // k)
I2 = rand.choice(k, m-len(I1), replace=False)
I[:, i] = np.concatenate([I1, I2])
rand.shuffle(I[:, i])
return I
[docs]def sample_rand(n, m, seed=None):
"""Generate random multi-indices for the tensor of the given shape.
Args:
n (list, np.ndarray): tensor size for each dimension (list or
np.ndarray of int/float of the length d, where d is the
dimension of the tensor).
m (int, float): number of samples.
seed (int): random seed. It should be an integer number or a numpy
Generator class instance.
Returns:
np.ndarray: generated multi-indices for the tensor in the form of array
of the shape [m, d], where d is the dimension of the tensor.
"""
n = np.asanyarray(n, dtype=int)
m = int(m)
d = len(n)
rand = teneva._rand(seed)
I = np.vstack([rand.choice(np.arange(k), m) for k in n]).T
return I
[docs]def sample_rand_poi(a, b, m, seed=None):
"""Generate random multidimensional points from provided limits.
Args:
a (list, np.ndarray): lower grid limits (list or np.ndarray of float
of the length d, where d is the dimension).
b (list, np.ndarray): upper grid limits (list or np.ndarray of float
of the length d, where d is the dimension).
m (int, float): number of samples.
seed (int): random seed. It should be an integer number or a numpy
Generator class instance.
Returns:
np.ndarray: generated random multidimensional points in the form of
array of the shape [m, d], where d is the dimension of the problem.
"""
d = len(a)
rand = teneva._rand(seed)
X = [rand.uniform(a[i], b[i], int(m)) for i in range(d)]
return np.vstack(X).T
[docs]def sample_square(Y, m=1, unique=True, seed=None, m_fact=5, max_rep=100,
float_cf=None):
"""Sample according to given probability TT-tensor (with squaring it).
Args:
Y (list): TT-tensor, which represents the discrete probability
distribution.
m (int, float): number of samples.
unique (bool): if True, then unique multi-indices will be generated.
seed (int): random seed. It should be an integer number or a numpy
Generator class instance.
m_fact (int): scale factor to find enough unique samples.
max_rep (int): number of restarts to find enough unique samples.
float_cf (float): special parameter (TODO: check).
Returns:
np.ndarray: generated multi-indices for the tensor in the form
of array of the shape [m, d], where d is the dimension of the tensor.
"""
m = int(m)
d = len(Y)
rand = teneva._rand(seed)
err_msg = 'Can not generate the required number of samples'
Z, p = teneva.orthogonalize(Y, 0, use_stab=True)
G = Z[0]
r1, n, r2 = G.shape
if float_cf is not None:
n, nold = n * float_cf, n
G = _extend_core(G, n)
Q = G.reshape(n, r2)
m1 = m_fact*m if unique else m
Q, I1 = _sample_core_first(Q, teneva._range(n), m1, rand)
I = np.empty([I1.shape[0], d])
I[:, 0] = I1[:, 0]
for di, G in enumerate(Z[1:], start=1):
r1, n, r2 = G.shape
if float_cf is not None:
n, nold = n * float_cf, n
G = _extend_core(G, n)
Qtens = np.einsum('kr,riq->kiq', Q, G, optimize='optimal')
Q = np.empty([Q.shape[0], r2])
for im, qm, qnew in zip(I, Qtens, Q):
norms = np.sum(qm**2, axis=1)
norms /= norms.sum()
i_cur = im[di] = rand.choice(n, size=1, p=norms)
qnew[:] = qm[i_cur]
if unique:
I = np.unique(I, axis=0)
if I.shape[0] < m:
if max_rep < 0 or m_fact > 1000000:
raise ValueError(err_msg)
return sample_square(Y, m, True, seed, 2*m_fact, max_rep-1,
float_cf=float_cf)
else:
np.random.shuffle(I)
I = I[:m]
if I.shape[0] != m:
raise ValueError(err_msg)
if float_cf is not None:
I = I / float_cf
else:
I = I.astype(int)
return I
[docs]def sample_tt(n, r=4, seed=None):
"""Generate special samples for the tensor of the shape n.
Generate special samples (multi-indices) for the tensor, which are the best
(in many cases) for the subsequent construction of the TT-tensor.
Args:
n (list, np.ndarray): tensor size for each dimension (list or
np.ndarray of int/float of the length d).
r (int): expected TT-rank of the tensor. The number of generated
samples will be selected according to this value.
seed (int): random seed. It should be an integer number or a numpy
Generator class instance.
Returns:
(np.ndarray, np.ndarray, np.ndarray): generated multi-indices for the
tensor in the form of array of the shape [samples, d], starting
poisitions in generated samples for the corresponding dimensions in the
form of array of the shape [d+1] and numbers of points for the right
unfoldings in generated samples in the form of array of the shape [d].
Note:
The resulting number of samples will be chosen adaptively based on the
specified expected TT-rank (r).
"""
def one_mode(sh1, sh2, rng):
res = []
if len(sh2) == 0:
lhs_1 = sample_lhs(sh1, r, seed)
for n in range(rng):
for i in lhs_1:
res.append(np.concatenate([i, [n]]))
len_1, len_2 = len(lhs_1), 1
elif len(sh1) == 0:
lhs_2 = sample_lhs(sh2, r, seed)
for n in range(rng):
for j in lhs_2:
res.append(np.concatenate([[n], j]))
len_1, len_2 = 1, len(lhs_2)
else:
lhs_1 = sample_lhs(sh1, r, seed)
lhs_2 = sample_lhs(sh2, r, seed)
for n in range(rng):
for i, j in itertools.product(lhs_1, lhs_2):
res.append(np.concatenate([i, [n], j]))
len_1, len_2 = len(lhs_1), len(lhs_2)
return res, len_1, len_2
I, idx, idx_many = [], [0], []
for i in range(len(n)):
pnts, len_1, len_2 = one_mode(n[:i], n[i+1:], n[i])
I.append(pnts)
idx.append(idx[-1] + len(pnts))
idx_many.append(len_2)
return np.vstack(I), np.array(idx), np.array(idx_many)
def _extend_core(G, n):
r1, nold, r2 = G.shape
Gn = np.empty([r1, n, r2])
for i1 in range(r1):
for i2 in range(r2):
Gn[i1, :, i2] = np.interp(np.arange(n)*(nold - 1)/(n - 1),
range(nold), G[i1, :, i2])
return Gn
def _sample_core_first(Q, I, m, rand):
n = Q.shape[0]
norms = np.sum(Q**2, axis=1)
norms /= norms.sum()
ind = rand.choice(n, size=m, p=norms, replace=True)
return Q[ind, :], I[ind, :]