Skip to content

Sparse Signature

log_signatures_pytorch.sparse_signature.pad_paths_correctly(paths, max_length=None)

Pad variable-length paths by repeating each path's last point.

This is the recommended padding strategy when batching variable-length paths for signatures/log-signatures: repeating the final valid point produces zero increments on the padded tail, so the signature remains unchanged.

When using this padding strategy, you typically do not need to pass a lengths tensor to :func:signature_sparse/sparse log-signature calls, because the padding does not affect the result.

Parameters:

Name Type Description Default
paths list[Tensor]

List of tensors shaped (T_i, D) with possibly different lengths T_i. Each path must have at least one point.

required
max_length int

Target padded length. If None, uses max(T_i).

None

Returns:

Type Description
tuple[Tensor, Tensor]
  • padded_paths: Tensor of shape (batch, max_length, D)
  • lengths: Tensor of shape (batch,) containing the original lengths

Examples:

>>> import torch
>>> from log_signatures_pytorch.sparse_signature import pad_paths_correctly, signature_sparse
>>>
>>> paths = [
...     torch.tensor([[0.0], [1.0], [2.0]]),  # length 3
...     torch.tensor([[0.0], [1.0]]),         # length 2
... ]
>>> padded, lengths = pad_paths_correctly(paths)
>>> padded.shape
torch.Size([2, 3, 1])
>>> lengths
tensor([3, 2])
>>> # Padding is "signature-safe" (zero increments), so lengths is optional here:
>>> sig = signature_sparse(padded, depth=2)
Source code in src/log_signatures_pytorch/sparse_signature.py
def pad_paths_correctly(
    paths: list[Tensor], max_length: int | None = None
) -> tuple[Tensor, Tensor]:
    """Pad variable-length paths by repeating each path's last point.

    This is the recommended padding strategy when batching variable-length paths
    for signatures/log-signatures: repeating the final valid point produces zero
    increments on the padded tail, so the signature remains unchanged.

    When using this padding strategy, you typically do **not** need to pass a
    ``lengths`` tensor to :func:`signature_sparse`/sparse log-signature calls,
    because the padding does not affect the result.

    Parameters
    ----------
    paths : list[Tensor]
        List of tensors shaped ``(T_i, D)`` with possibly different lengths
        ``T_i``. Each path must have at least one point.
    max_length : int, optional
        Target padded length. If None, uses ``max(T_i)``.

    Returns
    -------
    tuple[Tensor, Tensor]
        - padded_paths: Tensor of shape ``(batch, max_length, D)``
        - lengths: Tensor of shape ``(batch,)`` containing the original lengths

    Examples
    --------
    >>> import torch
    >>> from log_signatures_pytorch.sparse_signature import pad_paths_correctly, signature_sparse
    >>>
    >>> paths = [
    ...     torch.tensor([[0.0], [1.0], [2.0]]),  # length 3
    ...     torch.tensor([[0.0], [1.0]]),         # length 2
    ... ]
    >>> padded, lengths = pad_paths_correctly(paths)
    >>> padded.shape
    torch.Size([2, 3, 1])
    >>> lengths
    tensor([3, 2])
    >>> # Padding is "signature-safe" (zero increments), so lengths is optional here:
    >>> sig = signature_sparse(padded, depth=2)
    """
    if len(paths) == 0:
        raise ValueError("paths must be a non-empty list of tensors")

    lengths_list = [int(p.shape[0]) for p in paths]
    if any(length <= 0 for length in lengths_list):
        raise ValueError("each path must have at least one point (T_i >= 1)")

    first = paths[0]
    if first.ndim != 2:
        raise ValueError(
            f"each path must have shape (T, D); got {first.shape} for paths[0]"
        )
    d = int(first.shape[1])
    device = first.device
    dtype = first.dtype

    for i, p in enumerate(paths[1:], start=1):
        if p.ndim != 2:
            raise ValueError(f"each path must have shape (T, D); got {p.shape} at {i}")
        if int(p.shape[1]) != d:
            raise ValueError(
                f"all paths must have the same D; got {d} and {int(p.shape[1])} at {i}"
            )
        if p.device != device:
            raise ValueError("all paths must be on the same device")
        if p.dtype != dtype:
            raise ValueError("all paths must have the same dtype")

    if max_length is None:
        max_length = max(lengths_list)
    max_length = int(max_length)
    if max_length <= 0:
        raise ValueError("max_length must be >= 1")
    if max_length < max(lengths_list):
        raise ValueError("max_length must be >= max(path lengths)")

    batch = len(paths)
    padded = torch.empty((batch, max_length, d), device=device, dtype=dtype)
    lengths = torch.tensor(lengths_list, device=device, dtype=torch.long)

    for b, p in enumerate(paths):
        t = int(p.shape[0])
        padded[b, :t] = p
        if t < max_length:
            padded[b, t:] = p[-1].expand(max_length - t, -1)

    return padded, lengths

log_signatures_pytorch.sparse_signature.signature_sparse(path, depth, eps=0.0, lengths=None, return_levels=False, stream=False)

Compute sparse path signature for paths with repeated points.

Uses Chen's identity to combine segment signatures, skipping zero increments (repeated points). For a path with M knots, computes the signature as the ordered tensor product of M-1 segment exponentials.

Parameters:

Name Type Description Default
path Tensor

Tensor of shape (batch, T, D_aug) representing batched paths. For a single path, pass path.unsqueeze(0) to add a batch dimension.

required
depth int

Maximum depth L for truncation (>=1).

required
eps float

Threshold for change detection. Default is 0.0.

0.0
lengths Tensor

Tensor of shape (batch,) with valid lengths in a padded batch.

Best practice (recommended): pad by repeating the last valid point of each path (see :func:pad_paths_correctly). This padding produces zero increments on the tail, so it does not change the signature and you can usually leave lengths=None.

If you instead pad with zeros/any other values, you must pass lengths to ignore the padded tail (otherwise padding can introduce spurious increments and change the result).

Default is None.

None
return_levels bool

If True, return list of level tensors. If False, return flattened signature. Default is False.

False
stream bool

If True, return signatures at each step along the path. If False, return only the final signature. Default is False.

False

Returns:

Type Description
Tensor or list[Tensor]

If return_levels=False: Tensor of shape (batch, dim_sig) or (batch, T-1, dim_sig) (if stream=True).

If return_levels=True: List of tensors, either final signatures or streams depending on stream argument.

Examples:

>>> import torch
>>> from log_signatures_pytorch.sparse_signature import signature_sparse
>>>
>>> # Path with repeats
>>> path = torch.tensor([
...     [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0], [2.0, 0.0]]
... ])
>>> sig = signature_sparse(path, depth=2)
>>> sig.shape
torch.Size([1, 6])
>>> stream_sig = signature_sparse(path, depth=2, stream=True)
>>> stream_sig.shape
torch.Size([1, 4, 6])
Source code in src/log_signatures_pytorch/sparse_signature.py
def signature_sparse(
    path: Tensor,
    depth: int,
    eps: float = 0.0,
    lengths: Tensor | None = None,
    return_levels: bool = False,
    stream: bool = False,
) -> Tensor | list[Tensor]:
    """Compute sparse path signature for paths with repeated points.

    Uses Chen's identity to combine segment signatures, skipping zero
    increments (repeated points). For a path with M knots, computes the
    signature as the ordered tensor product of M-1 segment exponentials.

    Parameters
    ----------
    path : Tensor
        Tensor of shape ``(batch, T, D_aug)`` representing batched paths.
        For a single path, pass ``path.unsqueeze(0)`` to add a batch dimension.
    depth : int
        Maximum depth L for truncation (>=1).
    eps : float, optional
        Threshold for change detection. Default is 0.0.
    lengths : Tensor, optional
        Tensor of shape ``(batch,)`` with valid lengths in a padded batch.

        **Best practice (recommended)**: pad by repeating the last valid point
        of each path (see :func:`pad_paths_correctly`). This padding produces
        zero increments on the tail, so it does not change the signature and
        you can usually leave ``lengths=None``.

        If you instead pad with zeros/any other values, you must pass ``lengths``
        to ignore the padded tail (otherwise padding can introduce spurious
        increments and change the result).

        Default is None.
    return_levels : bool, optional
        If True, return list of level tensors. If False, return flattened
        signature. Default is False.
    stream : bool, optional
        If True, return signatures at each step along the path. If False,
        return only the final signature. Default is False.

    Returns
    -------
    Tensor or list[Tensor]
        If ``return_levels=False``: Tensor of shape
        ``(batch, dim_sig)`` or ``(batch, T-1, dim_sig)`` (if stream=True).

        If ``return_levels=True``: List of tensors, either final signatures
        or streams depending on ``stream`` argument.

    Examples
    --------
    >>> import torch
    >>> from log_signatures_pytorch.sparse_signature import signature_sparse
    >>>
    >>> # Path with repeats
    >>> path = torch.tensor([
    ...     [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0], [2.0, 0.0]]
    ... ])
    >>> sig = signature_sparse(path, depth=2)
    >>> sig.shape
    torch.Size([1, 6])
    >>> stream_sig = signature_sparse(path, depth=2, stream=True)
    >>> stream_sig.shape
    torch.Size([1, 4, 6])
    """
    if path.ndim != 3:
        msg = (
            f"Path must be of shape (batch, T, D_aug); got {path.shape}. "
            "Wrap a single path with path.unsqueeze(0)."
        )
        raise ValueError(msg)

    if depth < 1:
        raise ValueError("depth must be >= 1")

    # Extract sparse increments (padded with zeros)
    # increments: (batch, max_segments, width)
    increments, knot_counts, knots = _sparse_increments_and_knots(
        path, eps=eps, lengths=lengths
    )

    # Construct a compressed path that generates these increments.
    # We prepend a zero starting point.
    batch_size, _, width = increments.shape
    device = path.device
    dtype = path.dtype

    zeros = torch.zeros((batch_size, 1, width), device=device, dtype=dtype)
    compressed_path_increments = torch.cat([zeros, increments], dim=1)
    compressed_path = torch.cumsum(compressed_path_increments, dim=1)

    # Compute signature using the vectorized implementation
    # If stream=True, we need the streaming signature of the compressed path
    # to reconstruct the full stream.
    sig_result = _batch_signature(compressed_path, depth=depth, stream=stream)

    if stream:
        # Map compressed stream back to full path
        seq_len = path.shape[1]

        # sig_result is (batch, max_segments, sig_dim)
        # We need to expand to (batch, seq_len-1, sig_dim)
        sig_result = _expand_stream_signature(
            sig_result, knots, seq_len - 1, knot_counts
        )

    if return_levels:
        if stream:
            return _unflatten_stream_signature(sig_result, width=width, depth=depth)
        return _unflatten_signature(sig_result, width=width, depth=depth)
    else:
        return sig_result