from typing import Optional, Tuple, Union
import torch
from pawX import Axis, DistributionFamily
from pawX import count_skop as _count_skop
from pawX import dense_sketch_operator as _dense_sketch_operator
from pawX import gaussian_skop as _gaussian_skop
from pawX import scaled_sign_sketch as _scaled_sign_sketch
from pawX import sjlt_skop as _sjlt_skop
from pawX import sketch_tensor as _sketch_tensor
from pawX import sparse_sketch_operator as _sparse_sketch_operator
from pawX import srht as _srht
[docs]
def scaled_sign_sketch(
m: int,
n: int,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
"""
Generates a scaled sign sketch matrix as a PyTorch tensor.
A scaled sign sketch is a random projection matrix where each entry is independently set to +1 or -1,
scaled by a normalization factor. This is commonly used in randomized linear algebra and sketching algorithms.
Args:
m (int): Number of rows in the output tensor (sketch dimension).
n (int): Number of columns in the output tensor (original dimension).
device (Optional[torch.device], optional): The device on which to create the tensor. Defaults to None (uses current device).
dtype (Optional[torch.dtype], optional): The desired data type of returned tensor. Defaults to None (uses default dtype).
Returns:
torch.Tensor: A tensor of shape (m, n) containing the scaled sign sketch matrix.
Example:
>>> import torch
>>> from panther.sketch import scaled_sign_sketch
>>> m, n = 32, 128
>>> S = scaled_sign_sketch(m, n)
>>> print(S.shape)
torch.Size([32, 128])
>>> # Each entry is either +1/sqrt(m) or -1/sqrt(m)
>>> print(torch.unique(S))
tensor([-0.1768, 0.1768]) # For m=32, 1/sqrt(32) ≈ 0.1768
"""
return _scaled_sign_sketch(m, n, device=device, dtype=dtype)
[docs]
def dense_sketch_operator(
m: int,
n: int,
distribution: DistributionFamily,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
"""
Creates a dense sketch operator matrix with entries sampled from a specified distribution.
This function generates a dense random matrix of shape `(m, n)` where each entry is drawn independently from the given `distribution`. Such sketch operators are commonly used in randomized linear algebra, dimensionality reduction, and compressed sensing to project high-dimensional data into a lower-dimensional space while preserving certain geometric properties.
Args:
m : int
The number of rows of the sketch operator (i.e., the target dimension after sketching).
n : int
The number of columns of the sketch operator (i.e., the original dimension of the data).
distribution : DistributionFamily
The distribution family from which to sample the entries of the sketch operator.
This could be, for example, a standard normal distribution, a uniform distribution, or any other supported distribution.
device : Optional[torch.device], default=None
The device on which to allocate the resulting tensor (e.g., 'cpu' or 'cuda'). If None, defaults to the current device.
dtype : Optional[torch.dtype], default=None
The desired data type of the returned tensor. If None, defaults to the default dtype of the current torch device.
Returns:
torch.Tensor
A dense tensor of shape `(m, n)` with entries sampled from the specified distribution.
Example:
>>> import torch
>>> from panther.sketch import dense_sketch_operator
>>> from panther.sketch import DistributionFamily
>>> m, n = 100, 500
>>> sketch = dense_sketch_operator(m, n, DistributionFamily.GAUSSIAN)
>>> print(sketch.shape)
torch.Size([100, 500])
Notes
-----
- The choice of distribution affects the properties of the sketch operator. For example, using a standard normal distribution yields a Johnson-Lindenstrauss transform.
- For large matrices, consider using sparse sketch operators for improved efficiency.
"""
return _dense_sketch_operator(m, n, distribution, device=device, dtype=dtype)
[docs]
def sketch_tensor(
input: torch.Tensor,
axis: int,
new_size: int,
distribution: Optional[DistributionFamily] = None,
sketch_matrix: Optional[torch.Tensor] = None,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
"""
Sketches a given input tensor along a specified axis using a randomized projection technique.
This function reduces the dimensionality of the input tensor along the specified axis to `new_size`
by applying a sketching (random projection) operation. The type of random projection is determined
by the `distribution_or_sketch_matrix` parameter, which can be either a distribution family or a precomputed sketching matrix.
If a distribution family is provided, the function returns a tuple (sketched_tensor, sketch_matrix).
If a sketching matrix (torch.Tensor) is provided, only the sketched tensor is returned.
Parameters:
input (torch.Tensor):
The input tensor to be sketched. Can be of any shape, but the dimension along `axis` will be reduced.
axis (int):
The axis along which to apply the sketching operation. Must be a valid axis for `input`.
new_size (int):
The target size for the specified axis after sketching. Must be less than or equal to the original size.
distribution_or_sketch_matrix (Union[DistributionFamily, torch.Tensor]):
Either the distribution family to use for generating the sketching matrix (e.g., Gaussian, Rademacher),
or a precomputed sketching matrix (torch.Tensor) to use directly.
device (Optional[torch.device], default=None):
The device on which to perform the computation and allocate the sketching matrix. If None, uses the device of `input`.
dtype (Optional[torch.dtype], default=None):
The desired data type of the output tensor and sketching matrix. If None, uses the dtype of `input`.
Returns:
Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
If a distribution family is provided, returns a tuple (sketched_tensor, sketch_matrix).
If a sketching matrix is provided, returns only the sketched tensor.
Example:
>>> import torch
>>> from panther.sketch.core import sketch_tensor, DistributionFamily
>>> x = torch.randn(10, 100)
>>> # Using a distribution family
>>> sketched_x, sketch_matrix = sketch_tensor(
... input=x,
... axis=1,
... new_size=20,
... distribution_or_sketch_matrix=DistributionFamily.GAUSSIAN
... )
>>> print(sketched_x.shape)
torch.Size([10, 20])
>>> print(sketch_matrix.shape)
torch.Size([20, 100])
>>> # Using a precomputed sketching matrix
>>> from panther.sketch import dense_sketch_operator
>>> S = dense_sketch_operator(100, 20, DistributionFamily.GAUSSIAN)
>>> sketched_x = sketch_tensor(
... input=x,
... axis=1,
... new_size=20,
... distribution_or_sketch_matrix=S
... )
>>> print(sketched_x.shape)
torch.Size([10, 20])
Notes:
- The sketching operation is commonly used for dimensionality reduction, speeding up computations,
and preserving essential structure in large-scale machine learning and signal processing tasks.
- The choice of distribution affects the quality and properties of the sketch.
- The returned sketching matrix can be reused for consistent projections or analysis.
"""
# enforce exactly one path
if distribution is not None and sketch_matrix is not None:
raise ValueError(
"Cannot specify both `distribution` and `sketch_matrix`, "
f"got distribution={distribution} and sketch_matrix={sketch_matrix}"
)
# dispatch to the low-level binding
if distribution is not None:
# distribution path: return both sketched tensor and sketch matrix
return _sketch_tensor(
input=input,
axis=axis,
new_size=new_size,
distribution=distribution,
device=device,
dtype=dtype,
)
elif sketch_matrix is not None:
# sketch_matrix path: only return sketched tensor
return _sketch_tensor(
input=input,
axis=axis,
new_size=new_size,
sketch_matrix=sketch_matrix,
device=device,
dtype=dtype,
)
else:
raise ValueError(
"Must specify exactly one of `distribution` or `sketch_matrix`, "
f"got distribution={distribution} and sketch_matrix={sketch_matrix}"
)
[docs]
def sparse_sketch_operator(
m: int,
n: int,
vec_nnz: int,
axis: Axis,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
seed: Optional[int] = None,
) -> torch.Tensor:
"""
Creates a sparse sketch operator matrix with specified number of non-zero entries per vector.
This function generates a sparse random matrix of shape `(m, n)` where each row or column (depending on the `axis`)
has exactly `vec_nnz` non-zero entries. This is commonly used in randomized linear algebra and sketching algorithms
to project high-dimensional data into a lower-dimensional space while preserving certain geometric properties.
Args:
m : int
The number of rows of the sketch operator (i.e., the target dimension after sketching).
n : int
The number of columns of the sketch operator (i.e., the original dimension of the data).
vec_nnz : int
The number of non-zero entries per vector (row or column) in the sketch operator.
axis : Axis
The axis along which to create the sparse sketch operator. Can be either `Axis.Short` or `Axis.Long`.
device : Optional[torch.device], default=None
The device on which to allocate the resulting tensor (e.g., 'cpu' or 'cuda'). If None, defaults to the current device.
dtype : Optional[torch.dtype], default=None
The desired data type of the returned tensor. If None, defaults to the default dtype of the current torch device.
seed : Optional[int], default=None
Integer seed for the internal C++ random number generator used to construct the sparse pattern.
When ``None`` (default) the seed is drawn from ``torch``'s current RNG, so calling
``torch.manual_seed(...)`` before this function produces a reproducible sketch.
Pass an explicit integer to bypass PyTorch's RNG entirely.
Returns:
torch.Tensor
A sparse COO tensor of shape `(m, n)` with exactly `vec_nnz` non-zero entries per vector along the specified axis.
Example:
>>> import torch
>>> from panther.sketch import sparse_sketch_operator, Axis
>>> m, n = 100, 500
>>> vec_nnz = 5
>>> # Reproducible via torch seed
>>> torch.manual_seed(42)
>>> sketch = sparse_sketch_operator(m, n, vec_nnz, Axis.Short)
>>> print(sketch.shape)
torch.Size([100, 500])
>>> print(sketch._nnz()) # Number of non-zero entries in the sparse tensor
"""
return _sparse_sketch_operator(m, n, vec_nnz, axis, device=device, dtype=dtype, seed=seed)
[docs]
def srht(x: torch.Tensor, m: int) -> torch.Tensor:
"""
Subsampled Randomized Hadamard Transform (SRHT).
This function computes a Subsampled Randomized Hadamard Transform of a 1D input tensor `x` of length `n`
(where `n` must be a power of 2). It performs the following steps:
1. Multiply the input by a random diagonal matrix with ±1 entries (random sign flipping).
2. Apply the Fast Walsh-Hadamard Transform (FWHT).
3. Uniformly subsample `m` rows from the result.
This is commonly used in randomized numerical linear algebra and compressed sensing to reduce
dimensionality while approximately preserving distances.
Parameters
----------
x : torch.Tensor
A 1D input tensor of shape `(n,)` where `n` is a power of 2.
The tensor must reside on the CPU and be of floating point type.
m : int
The number of rows to subsample from the Hadamard-transformed output.
Must satisfy `0 < m <= n`.
Returns
-------
torch.Tensor
A 1D tensor of shape `(m,)` containing the subsampled rows from the Hadamard-transformed vector.
Raises
------
RuntimeError
If `x` is not on CPU, is not 1-dimensional, or `n` is not a power of 2.
If `m` is greater than `n`.
Example
-------
>>> import torch
>>> from panther.sketch import srht
>>> x = torch.randn(8)
>>> y = srht(x, 4)
>>> print(y.shape)
torch.Size([4])
"""
return _srht(x, m)
[docs]
def count_skop(
m: int,
d: int,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
"""
Generates a CountSketch sketching matrix of shape (m, d).
Each column of the sketching matrix has exactly one non-zero entry,
which is randomly assigned to a row via a simple hash (uniform random),
and assigned a value of ±1 with equal probability. The result is a sparse
sketching operator used to project high-dimensional data into a lower-dimensional
space while approximately preserving inner products.
This is commonly used in streaming and randomized linear algebra algorithms.
Args:
m (int): Number of rows in the sketching matrix (target dimension).
d (int): Number of columns in the sketching matrix (original dimension).
Returns:
torch.Tensor: A sparse sketching matrix of shape (m, d) with exactly one
non-zero entry per column (either +1 or -1).
Example:
>>> import torch
>>> from panther.sketch import count_sketch_operator
>>> m, d = 100, 1000
>>> S = count_skop(m, d)
>>> X = torch.randn(50, d) # Input data
>>> X_sketched = X @ S.t()
>>> X_sketched.shape
torch.Size([50, 100])
"""
return _count_skop(m, d, device=device, dtype=dtype)
[docs]
def gaussian_skop(
m: int,
d: int,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
"""
Generates a Gaussian random projection matrix of shape (m, d).
Each entry in the sketching matrix is an independent sample from
the normal distribution N(0, 1/m). This type of projection approximately
preserves Euclidean distances and inner products with high probability.
Args:
m (int): Number of rows in the sketching matrix (target dimension).
d (int): Number of columns in the sketching matrix (original dimension).
Returns:
torch.Tensor: A dense sketching matrix of shape (m, d) with entries ~ N(0, 1/m).
Example:
>>> S = gaussian_skop(100, 1000)
>>> X = torch.randn(64, 1000)
>>> X_sketched = X @ S.t()
>>> X_sketched.shape
torch.Size([64, 100])
"""
return _gaussian_skop(m, d, device=device, dtype=dtype)
[docs]
def sjlt_skop(
m: int,
d: int,
sparsity: int = 2,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
"""
Generates a sparse Johnson-Lindenstrauss Transform (SJLT) sketching matrix.
Each column of the matrix has exactly `sparsity` non-zero entries, chosen uniformly
at random among the rows, with values ±1. The matrix is scaled by 1/√sparsity.
This sketch is ideal for fast projections with low memory usage.
Args:
m (int): Number of rows in the sketching matrix (target dimension).
d (int): Number of columns in the sketching matrix (original dimension).
sparsity (int, optional): Number of non-zero entries per column. Defaults to 2.
Returns:
torch.Tensor: A sparse sketching matrix of shape (m, d), entries ∈ {±1/√sparsity}.
Example:
>>> S = sjlt_skop(200, 1000, sparsity=3)
>>> X = torch.randn(32, 1000)
>>> X_sketched = X @ S.t()
>>> X_sketched.shape
torch.Size([32, 200])
"""
return _sjlt_skop(m, d, sparsity=sparsity, device=device, dtype=dtype)