Sparse Signature

`log_signatures_pytorch.sparse_signature.pad_paths_correctly(paths, max_length=None)`

Pad variable-length paths by repeating each path's last point.

This is the recommended padding strategy when batching variable-length paths for signatures/log-signatures: repeating the final valid point produces zero increments on the padded tail, so the signature remains unchanged.

When using this padding strategy, you typically do not need to pass a lengths tensor to :func:signature_sparse/sparse log-signature calls, because the padding does not affect the result.

Parameters:

Name	Type	Description	Default
`paths`	`list[Tensor]`	List of tensors shaped `(T_i, D)` with possibly different lengths `T_i`. Each path must have at least one point.	required
`max_length`	`int`	Target padded length. If None, uses `max(T_i)`.	`None`

Returns:

Type	Description
`tuple[Tensor, Tensor]`	padded_paths: Tensor of shape `(batch, max_length, D)` lengths: Tensor of shape `(batch,)` containing the original lengths

Examples:

>>> import torch
>>> from log_signatures_pytorch.sparse_signature import pad_paths_correctly, signature_sparse
>>>
>>> paths = [
...     torch.tensor([[0.0], [1.0], [2.0]]),  # length 3
...     torch.tensor([[0.0], [1.0]]),         # length 2
... ]
>>> padded, lengths = pad_paths_correctly(paths)
>>> padded.shape
torch.Size([2, 3, 1])
>>> lengths
tensor([3, 2])
>>> # Padding is "signature-safe" (zero increments), so lengths is optional here:
>>> sig = signature_sparse(padded, depth=2)

Source code in src/log_signatures_pytorch/sparse_signature.py

def pad_paths_correctly(
    paths: list[Tensor], max_length: int | None = None
) -> tuple[Tensor, Tensor]:
    """Pad variable-length paths by repeating each path's last point.

    This is the recommended padding strategy when batching variable-length paths
    for signatures/log-signatures: repeating the final valid point produces zero
    increments on the padded tail, so the signature remains unchanged.

    When using this padding strategy, you typically do **not** need to pass a
    ``lengths`` tensor to :func:`signature_sparse`/sparse log-signature calls,
    because the padding does not affect the result.

    Parameters
    ----------
    paths : list[Tensor]
        List of tensors shaped ``(T_i, D)`` with possibly different lengths
        ``T_i``. Each path must have at least one point.
    max_length : int, optional
        Target padded length. If None, uses ``max(T_i)``.

    Returns
    -------
    tuple[Tensor, Tensor]
        - padded_paths: Tensor of shape ``(batch, max_length, D)``
        - lengths: Tensor of shape ``(batch,)`` containing the original lengths

    Examples
    --------
    >>> import torch
    >>> from log_signatures_pytorch.sparse_signature import pad_paths_correctly, signature_sparse
    >>>
    >>> paths = [
    ...     torch.tensor([[0.0], [1.0], [2.0]]),  # length 3
    ...     torch.tensor([[0.0], [1.0]]),         # length 2
    ... ]
    >>> padded, lengths = pad_paths_correctly(paths)
    >>> padded.shape
    torch.Size([2, 3, 1])
    >>> lengths
    tensor([3, 2])
    >>> # Padding is "signature-safe" (zero increments), so lengths is optional here:
    >>> sig = signature_sparse(padded, depth=2)
    """
    if len(paths) == 0:
        raise ValueError("paths must be a non-empty list of tensors")

    lengths_list = [int(p.shape[0]) for p in paths]
    if any(length <= 0 for length in lengths_list):
        raise ValueError("each path must have at least one point (T_i >= 1)")

    first = paths[0]
    if first.ndim != 2:
        raise ValueError(
            f"each path must have shape (T, D); got {first.shape} for paths[0]"
        )
    d = int(first.shape[1])
    device = first.device
    dtype = first.dtype

    for i, p in enumerate(paths[1:], start=1):
        if p.ndim != 2:
            raise ValueError(f"each path must have shape (T, D); got {p.shape} at {i}")
        if int(p.shape[1]) != d:
            raise ValueError(
                f"all paths must have the same D; got {d} and {int(p.shape[1])} at {i}"
            )
        if p.device != device:
            raise ValueError("all paths must be on the same device")
        if p.dtype != dtype:
            raise ValueError("all paths must have the same dtype")

    if max_length is None:
        max_length = max(lengths_list)
    max_length = int(max_length)
    if max_length <= 0:
        raise ValueError("max_length must be >= 1")
    if max_length < max(lengths_list):
        raise ValueError("max_length must be >= max(path lengths)")

    batch = len(paths)
    padded = torch.empty((batch, max_length, d), device=device, dtype=dtype)
    lengths = torch.tensor(lengths_list, device=device, dtype=torch.long)

    for b, p in enumerate(paths):
        t = int(p.shape[0])
        padded[b, :t] = p
        if t < max_length:
            padded[b, t:] = p[-1].expand(max_length - t, -1)

    return padded, lengths

`log_signatures_pytorch.sparse_signature.signature_sparse(path, depth, eps=0.0, lengths=None, return_levels=False, stream=False)`

Compute sparse path signature for paths with repeated points.

Uses Chen's identity to combine segment signatures, skipping zero increments (repeated points). For a path with M knots, computes the signature as the ordered tensor product of M-1 segment exponentials.

Parameters:

Name	Type	Description	Default
`path`	`Tensor`	Tensor of shape `(batch, T, D_aug)` representing batched paths. For a single path, pass `path.unsqueeze(0)` to add a batch dimension.	required
`depth`	`int`	Maximum depth L for truncation (>=1).	required
`eps`	`float`	Threshold for change detection. Default is 0.0.	`0.0`
`lengths`	`Tensor`	Tensor of shape `(batch,)` with valid lengths in a padded batch. Best practice (recommended): pad by repeating the last valid point of each path (see :func:`pad_paths_correctly`). This padding produces zero increments on the tail, so it does not change the signature and you can usually leave `lengths=None`. If you instead pad with zeros/any other values, you must pass `lengths` to ignore the padded tail (otherwise padding can introduce spurious increments and change the result). Default is None.	`None`
`return_levels`	`bool`	If True, return list of level tensors. If False, return flattened signature. Default is False.	`False`
`stream`	`bool`	If True, return signatures at each step along the path. If False, return only the final signature. Default is False.	`False`

Returns:

Type	Description
`Tensor or list[Tensor]`	If `return_levels=False`: Tensor of shape `(batch, dim_sig)` or `(batch, T-1, dim_sig)` (if stream=True). If `return_levels=True`: List of tensors, either final signatures or streams depending on `stream` argument.

Examples:

>>> import torch
>>> from log_signatures_pytorch.sparse_signature import signature_sparse
>>>
>>> # Path with repeats
>>> path = torch.tensor([
...     [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0], [2.0, 0.0]]
... ])
>>> sig = signature_sparse(path, depth=2)
>>> sig.shape
torch.Size([1, 6])
>>> stream_sig = signature_sparse(path, depth=2, stream=True)
>>> stream_sig.shape
torch.Size([1, 4, 6])

Source code in src/log_signatures_pytorch/sparse_signature.py

def signature_sparse(
    path: Tensor,
    depth: int,
    eps: float = 0.0,
    lengths: Tensor | None = None,
    return_levels: bool = False,
    stream: bool = False,
) -> Tensor | list[Tensor]:
    """Compute sparse path signature for paths with repeated points.

    Uses Chen's identity to combine segment signatures, skipping zero
    increments (repeated points). For a path with M knots, computes the
    signature as the ordered tensor product of M-1 segment exponentials.

    Parameters
    ----------
    path : Tensor
        Tensor of shape ``(batch, T, D_aug)`` representing batched paths.
        For a single path, pass ``path.unsqueeze(0)`` to add a batch dimension.
    depth : int
        Maximum depth L for truncation (>=1).
    eps : float, optional
        Threshold for change detection. Default is 0.0.
    lengths : Tensor, optional
        Tensor of shape ``(batch,)`` with valid lengths in a padded batch.

        **Best practice (recommended)**: pad by repeating the last valid point
        of each path (see :func:`pad_paths_correctly`). This padding produces
        zero increments on the tail, so it does not change the signature and
        you can usually leave ``lengths=None``.

        If you instead pad with zeros/any other values, you must pass ``lengths``
        to ignore the padded tail (otherwise padding can introduce spurious
        increments and change the result).

        Default is None.
    return_levels : bool, optional
        If True, return list of level tensors. If False, return flattened
        signature. Default is False.
    stream : bool, optional
        If True, return signatures at each step along the path. If False,
        return only the final signature. Default is False.

    Returns
    -------
    Tensor or list[Tensor]
        If ``return_levels=False``: Tensor of shape
        ``(batch, dim_sig)`` or ``(batch, T-1, dim_sig)`` (if stream=True).

        If ``return_levels=True``: List of tensors, either final signatures
        or streams depending on ``stream`` argument.

    Examples
    --------
    >>> import torch
    >>> from log_signatures_pytorch.sparse_signature import signature_sparse
    >>>
    >>> # Path with repeats
    >>> path = torch.tensor([
    ...     [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0], [2.0, 0.0]]
    ... ])
    >>> sig = signature_sparse(path, depth=2)
    >>> sig.shape
    torch.Size([1, 6])
    >>> stream_sig = signature_sparse(path, depth=2, stream=True)
    >>> stream_sig.shape
    torch.Size([1, 4, 6])
    """
    if path.ndim != 3:
        msg = (
            f"Path must be of shape (batch, T, D_aug); got {path.shape}. "
            "Wrap a single path with path.unsqueeze(0)."
        )
        raise ValueError(msg)

    if depth < 1:
        raise ValueError("depth must be >= 1")

    # Extract sparse increments (padded with zeros)
    # increments: (batch, max_segments, width)
    increments, knot_counts, knots = _sparse_increments_and_knots(
        path, eps=eps, lengths=lengths
    )

    # Construct a compressed path that generates these increments.
    # We prepend a zero starting point.
    batch_size, _, width = increments.shape
    device = path.device
    dtype = path.dtype

    zeros = torch.zeros((batch_size, 1, width), device=device, dtype=dtype)
    compressed_path_increments = torch.cat([zeros, increments], dim=1)
    compressed_path = torch.cumsum(compressed_path_increments, dim=1)

    # Compute signature using the vectorized implementation
    # If stream=True, we need the streaming signature of the compressed path
    # to reconstruct the full stream.
    sig_result = _batch_signature(compressed_path, depth=depth, stream=stream)

    if stream:
        # Map compressed stream back to full path
        seq_len = path.shape[1]

        # sig_result is (batch, max_segments, sig_dim)
        # We need to expand to (batch, seq_len-1, sig_dim)
        sig_result = _expand_stream_signature(
            sig_result, knots, seq_len - 1, knot_counts
        )

    if return_levels:
        if stream:
            return _unflatten_stream_signature(sig_result, width=width, depth=depth)
        return _unflatten_signature(sig_result, width=width, depth=depth)
    else:
        return sig_result