Source code for panther.sketch.core

from typing import Optional, Tuple, Union

import torch

from pawX import Axis, DistributionFamily
from pawX import count_skop as _count_skop
from pawX import dense_sketch_operator as _dense_sketch_operator
from pawX import gaussian_skop as _gaussian_skop
from pawX import scaled_sign_sketch as _scaled_sign_sketch
from pawX import sjlt_skop as _sjlt_skop
from pawX import sketch_tensor as _sketch_tensor
from pawX import sparse_sketch_operator as _sparse_sketch_operator
from pawX import srht as _srht



[docs]
def scaled_sign_sketch(
    m: int,
    n: int,
    device: Optional[torch.device] = None,
    dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
    """
    Generates a scaled sign sketch matrix as a PyTorch tensor.

    A scaled sign sketch is a random projection matrix where each entry is independently set to +1 or -1,
    scaled by a normalization factor. This is commonly used in randomized linear algebra and sketching algorithms.

    Args:
        m (int): Number of rows in the output tensor (sketch dimension).
        n (int): Number of columns in the output tensor (original dimension).
        device (Optional[torch.device], optional): The device on which to create the tensor. Defaults to None (uses current device).
        dtype (Optional[torch.dtype], optional): The desired data type of returned tensor. Defaults to None (uses default dtype).

    Returns:
        torch.Tensor: A tensor of shape (m, n) containing the scaled sign sketch matrix.

    Example:
        >>> import torch
        >>> from panther.sketch import scaled_sign_sketch
        >>> m, n = 32, 128
        >>> S = scaled_sign_sketch(m, n)
        >>> print(S.shape)
        torch.Size([32, 128])
        >>> # Each entry is either +1/sqrt(m) or -1/sqrt(m)
        >>> print(torch.unique(S))
        tensor([-0.1768,  0.1768])  # For m=32, 1/sqrt(32) ≈ 0.1768

    """
    return _scaled_sign_sketch(m, n, device=device, dtype=dtype)




[docs]
def dense_sketch_operator(
    m: int,
    n: int,
    distribution: DistributionFamily,
    device: Optional[torch.device] = None,
    dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
    """
    Creates a dense sketch operator matrix with entries sampled from a specified distribution.
    This function generates a dense random matrix of shape `(m, n)` where each entry is drawn independently from the given `distribution`. Such sketch operators are commonly used in randomized linear algebra, dimensionality reduction, and compressed sensing to project high-dimensional data into a lower-dimensional space while preserving certain geometric properties.

    Args:
        m : int
            The number of rows of the sketch operator (i.e., the target dimension after sketching).
        n : int
            The number of columns of the sketch operator (i.e., the original dimension of the data).
        distribution : DistributionFamily
            The distribution family from which to sample the entries of the sketch operator.
            This could be, for example, a standard normal distribution, a uniform distribution, or any other supported distribution.
        device : Optional[torch.device], default=None
            The device on which to allocate the resulting tensor (e.g., 'cpu' or 'cuda'). If None, defaults to the current device.
        dtype : Optional[torch.dtype], default=None
            The desired data type of the returned tensor. If None, defaults to the default dtype of the current torch device.
    Returns:
        torch.Tensor
            A dense tensor of shape `(m, n)` with entries sampled from the specified distribution.

    Example:
        >>> import torch
        >>> from panther.sketch import dense_sketch_operator
        >>> from panther.sketch import DistributionFamily
        >>> m, n = 100, 500
        >>> sketch = dense_sketch_operator(m, n, DistributionFamily.GAUSSIAN)
        >>> print(sketch.shape)
        torch.Size([100, 500])


    Notes
    -----
    - The choice of distribution affects the properties of the sketch operator. For example, using a standard normal distribution yields a Johnson-Lindenstrauss transform.
    - For large matrices, consider using sparse sketch operators for improved efficiency.
    """
    return _dense_sketch_operator(m, n, distribution, device=device, dtype=dtype)




[docs]
def sketch_tensor(
    input: torch.Tensor,
    axis: int,
    new_size: int,
    distribution: Optional[DistributionFamily] = None,
    sketch_matrix: Optional[torch.Tensor] = None,
    device: Optional[torch.device] = None,
    dtype: Optional[torch.dtype] = None,
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
    """
    Sketches a given input tensor along a specified axis using a randomized projection technique.

    This function reduces the dimensionality of the input tensor along the specified axis to `new_size`
    by applying a sketching (random projection) operation. The type of random projection is determined
    by the `distribution_or_sketch_matrix` parameter, which can be either a distribution family or a precomputed sketching matrix.
    If a distribution family is provided, the function returns a tuple (sketched_tensor, sketch_matrix).
    If a sketching matrix (torch.Tensor) is provided, only the sketched tensor is returned.

    Parameters:
        input (torch.Tensor):
            The input tensor to be sketched. Can be of any shape, but the dimension along `axis` will be reduced.
        axis (int):
            The axis along which to apply the sketching operation. Must be a valid axis for `input`.
        new_size (int):
            The target size for the specified axis after sketching. Must be less than or equal to the original size.
        distribution_or_sketch_matrix (Union[DistributionFamily, torch.Tensor]):
            Either the distribution family to use for generating the sketching matrix (e.g., Gaussian, Rademacher),
            or a precomputed sketching matrix (torch.Tensor) to use directly.
        device (Optional[torch.device], default=None):
            The device on which to perform the computation and allocate the sketching matrix. If None, uses the device of `input`.
        dtype (Optional[torch.dtype], default=None):
            The desired data type of the output tensor and sketching matrix. If None, uses the dtype of `input`.

    Returns:
        Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
            If a distribution family is provided, returns a tuple (sketched_tensor, sketch_matrix).
            If a sketching matrix is provided, returns only the sketched tensor.

    Example:
        >>> import torch
        >>> from panther.sketch.core import sketch_tensor, DistributionFamily
        >>> x = torch.randn(10, 100)
        >>> # Using a distribution family
        >>> sketched_x, sketch_matrix = sketch_tensor(
        ...     input=x,
        ...     axis=1,
        ...     new_size=20,
        ...     distribution_or_sketch_matrix=DistributionFamily.GAUSSIAN
        ... )
        >>> print(sketched_x.shape)
        torch.Size([10, 20])
        >>> print(sketch_matrix.shape)
        torch.Size([20, 100])
        >>> # Using a precomputed sketching matrix
        >>> from panther.sketch import dense_sketch_operator
        >>> S = dense_sketch_operator(100, 20, DistributionFamily.GAUSSIAN)
        >>> sketched_x = sketch_tensor(
        ...     input=x,
        ...     axis=1,
        ...     new_size=20,
        ...     distribution_or_sketch_matrix=S
        ... )
        >>> print(sketched_x.shape)
        torch.Size([10, 20])

    Notes:
        - The sketching operation is commonly used for dimensionality reduction, speeding up computations,
          and preserving essential structure in large-scale machine learning and signal processing tasks.
        - The choice of distribution affects the quality and properties of the sketch.
        - The returned sketching matrix can be reused for consistent projections or analysis.
    """
    # enforce exactly one path
    if distribution is not None and sketch_matrix is not None:
        raise ValueError(
            "Cannot specify both `distribution` and `sketch_matrix`, "
            f"got distribution={distribution} and sketch_matrix={sketch_matrix}"
        )

    # dispatch to the low-level binding
    if distribution is not None:
        # distribution path: return both sketched tensor and sketch matrix
        return _sketch_tensor(
            input=input,
            axis=axis,
            new_size=new_size,
            distribution=distribution,
            device=device,
            dtype=dtype,
        )
    elif sketch_matrix is not None:
        # sketch_matrix path: only return sketched tensor
        return _sketch_tensor(
            input=input,
            axis=axis,
            new_size=new_size,
            sketch_matrix=sketch_matrix,
            device=device,
            dtype=dtype,
        )
    else:
        raise ValueError(
            "Must specify exactly one of `distribution` or `sketch_matrix`, "
            f"got distribution={distribution} and sketch_matrix={sketch_matrix}"
        )




[docs]
def sparse_sketch_operator(
    m: int,
    n: int,
    vec_nnz: int,
    axis: Axis,
    device: Optional[torch.device] = None,
    dtype: Optional[torch.dtype] = None,
    seed: Optional[int] = None,
) -> torch.Tensor:
    """
    Creates a sparse sketch operator matrix with specified number of non-zero entries per vector.

    This function generates a sparse random matrix of shape `(m, n)` where each row or column (depending on the `axis`)
    has exactly `vec_nnz` non-zero entries. This is commonly used in randomized linear algebra and sketching algorithms
    to project high-dimensional data into a lower-dimensional space while preserving certain geometric properties.

    Args:
        m : int
            The number of rows of the sketch operator (i.e., the target dimension after sketching).
        n : int
            The number of columns of the sketch operator (i.e., the original dimension of the data).
        vec_nnz : int
            The number of non-zero entries per vector (row or column) in the sketch operator.
        axis : Axis
            The axis along which to create the sparse sketch operator. Can be either `Axis.Short` or `Axis.Long`.
        device : Optional[torch.device], default=None
            The device on which to allocate the resulting tensor (e.g., 'cpu' or 'cuda'). If None, defaults to the current device.
        dtype : Optional[torch.dtype], default=None
            The desired data type of the returned tensor. If None, defaults to the default dtype of the current torch device.
        seed : Optional[int], default=None
            Integer seed for the internal C++ random number generator used to construct the sparse pattern.
            When ``None`` (default) the seed is drawn from ``torch``'s current RNG, so calling
            ``torch.manual_seed(...)`` before this function produces a reproducible sketch.
            Pass an explicit integer to bypass PyTorch's RNG entirely.

    Returns:
        torch.Tensor
            A sparse COO tensor of shape `(m, n)` with exactly `vec_nnz` non-zero entries per vector along the specified axis.

    Example:
        >>> import torch
        >>> from panther.sketch import sparse_sketch_operator, Axis
        >>> m, n = 100, 500
        >>> vec_nnz = 5
        >>> # Reproducible via torch seed
        >>> torch.manual_seed(42)
        >>> sketch = sparse_sketch_operator(m, n, vec_nnz, Axis.Short)
        >>> print(sketch.shape)
        torch.Size([100, 500])
        >>> print(sketch._nnz())  # Number of non-zero entries in the sparse tensor

    """
    return _sparse_sketch_operator(m, n, vec_nnz, axis, device=device, dtype=dtype, seed=seed)




[docs]
def srht(x: torch.Tensor, m: int) -> torch.Tensor:
    """
    Subsampled Randomized Hadamard Transform (SRHT).

    This function computes a Subsampled Randomized Hadamard Transform of a 1D input tensor `x` of length `n`
    (where `n` must be a power of 2). It performs the following steps:

    1. Multiply the input by a random diagonal matrix with ±1 entries (random sign flipping).
    2. Apply the Fast Walsh-Hadamard Transform (FWHT).
    3. Uniformly subsample `m` rows from the result.

    This is commonly used in randomized numerical linear algebra and compressed sensing to reduce
    dimensionality while approximately preserving distances.

    Parameters
    ----------
    x : torch.Tensor
        A 1D input tensor of shape `(n,)` where `n` is a power of 2.
        The tensor must reside on the CPU and be of floating point type.

    m : int
        The number of rows to subsample from the Hadamard-transformed output.
        Must satisfy `0 < m <= n`.

    Returns
    -------
    torch.Tensor
        A 1D tensor of shape `(m,)` containing the subsampled rows from the Hadamard-transformed vector.

    Raises
    ------
    RuntimeError
        If `x` is not on CPU, is not 1-dimensional, or `n` is not a power of 2.
        If `m` is greater than `n`.

    Example
    -------
    >>> import torch
    >>> from panther.sketch import srht
    >>> x = torch.randn(8)
    >>> y = srht(x, 4)
    >>> print(y.shape)
    torch.Size([4])
    """
    return _srht(x, m)




[docs]
def count_skop(
    m: int,
    d: int,
    device: Optional[torch.device] = None,
    dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
    """
    Generates a CountSketch sketching matrix of shape (m, d).

    Each column of the sketching matrix has exactly one non-zero entry,
    which is randomly assigned to a row via a simple hash (uniform random),
    and assigned a value of ±1 with equal probability. The result is a sparse
    sketching operator used to project high-dimensional data into a lower-dimensional
    space while approximately preserving inner products.

    This is commonly used in streaming and randomized linear algebra algorithms.

    Args:
        m (int): Number of rows in the sketching matrix (target dimension).
        d (int): Number of columns in the sketching matrix (original dimension).

    Returns:
        torch.Tensor: A sparse sketching matrix of shape (m, d) with exactly one
                      non-zero entry per column (either +1 or -1).

    Example:
        >>> import torch
        >>> from panther.sketch import count_sketch_operator
        >>> m, d = 100, 1000
        >>> S = count_skop(m, d)
        >>> X = torch.randn(50, d)  # Input data
        >>> X_sketched = X @ S.t()
        >>> X_sketched.shape
        torch.Size([50, 100])
    """
    return _count_skop(m, d, device=device, dtype=dtype)




[docs]
def gaussian_skop(
    m: int,
    d: int,
    device: Optional[torch.device] = None,
    dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
    """
    Generates a Gaussian random projection matrix of shape (m, d).

    Each entry in the sketching matrix is an independent sample from
    the normal distribution N(0, 1/m). This type of projection approximately
    preserves Euclidean distances and inner products with high probability.

    Args:
        m (int): Number of rows in the sketching matrix (target dimension).
        d (int): Number of columns in the sketching matrix (original dimension).

    Returns:
        torch.Tensor: A dense sketching matrix of shape (m, d) with entries ~ N(0, 1/m).

    Example:
        >>> S = gaussian_skop(100, 1000)
        >>> X = torch.randn(64, 1000)
        >>> X_sketched = X @ S.t()
        >>> X_sketched.shape
        torch.Size([64, 100])
    """
    return _gaussian_skop(m, d, device=device, dtype=dtype)




[docs]
def sjlt_skop(
    m: int,
    d: int,
    sparsity: int = 2,
    device: Optional[torch.device] = None,
    dtype: Optional[torch.dtype] = None,
) -> torch.Tensor:
    """
    Generates a sparse Johnson-Lindenstrauss Transform (SJLT) sketching matrix.

    Each column of the matrix has exactly `sparsity` non-zero entries, chosen uniformly
    at random among the rows, with values ±1. The matrix is scaled by 1/√sparsity.
    This sketch is ideal for fast projections with low memory usage.

    Args:
        m (int): Number of rows in the sketching matrix (target dimension).
        d (int): Number of columns in the sketching matrix (original dimension).
        sparsity (int, optional): Number of non-zero entries per column. Defaults to 2.

    Returns:
        torch.Tensor: A sparse sketching matrix of shape (m, d), entries ∈ {±1/√sparsity}.

    Example:
        >>> S = sjlt_skop(200, 1000, sparsity=3)
        >>> X = torch.randn(32, 1000)
        >>> X_sketched = X @ S.t()
        >>> X_sketched.shape
        torch.Size([32, 200])
    """
    return _sjlt_skop(m, d, sparsity=sparsity, device=device, dtype=dtype)