Source code for scimba_torch.neural_nets.coordinates_based_nets.discontinuous_mlp

"""A Multi-Layer Perceptron (MLP) with discontinuous layers.

Each hidden layer can either be discontinuous or regular.
"""

import torch
from torch import nn

from scimba_torch.neural_nets.coordinates_based_nets.scimba_module import ScimbaModule

from .activation import Heaviside, activation_function



[docs]
class DiscontinuousLayer(nn.Module):
    r"""Class that encodes a fully connected layer which can be discontinuous or not.

    It computes: :math:`y = \sigma(Ax + b) + \epsilon * H(Ax + b)`.

    where :math:`H(x)` is the Heaviside function and :math:`\epsilon` is a learnable
    vector.

    Args:
        in_size: The input dimension size.
        out_size: The output dimension size.
        **kwargs: Keyword arguments including:

            * `activation_type` (:code:`str`): The activation function type.
              Defaults to "tanh".
            * `dis` (:code:`bool`): If True, the layer includes the discontinuous term,
              otherwise it behaves as a regular layer. Defaults to True.

    Example:
        >>> layer = DiscontinuousLayer(10, 5, activation_type='relu', dis=True)
    """

    def __init__(self, in_size: int, out_size: int, **kwargs):
        super().__init__()

        self.in_size = in_size
        self.out_size = out_size
        self.dis = kwargs.get("dis", True)

        #: The linear transformation applied to the inputs.
        self.linearlayer = nn.Linear(in_size, out_size)
        #: The parameters which multiply the Heaviside function.
        #: The size is the size of the output of the layer.
        self.eps = nn.Parameter(torch.zeros(out_size))

        # Get the keyword arguments
        self.layer_type = kwargs.get("dis", True)
        self.activation_type = kwargs.get("activation_type", "tanh")

        # Define the layers
        self.linearlayer = nn.Linear(in_size, out_size)
        self.eps = nn.Parameter(torch.rand((out_size)))
        self.activation = activation_function(
            self.activation_type, in_size=in_size, **kwargs
        )

        # Define Heaviside function
        self.heaviside = Heaviside(k=100)


[docs]
    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        """Apply the network to the inputs.

        Args:
            inputs: Input tensor

        Returns:
            The result of the network
        """
        if self.layer_type:
            # Compute the discontinuous version
            x = self.activation(self.linearlayer(inputs))
            res = x + self.eps[None, :] * x
        else:
            # Standard linear layer with activation
            res = self.activation(self.linearlayer(inputs))

        return res


    def __str__(self):
        """String representation of the layer.

        Returns:
            A string describing the layer.
        """
        return (
            f"Discontinuous Layer, input size: {self.in_size}, "
            f"output size: {self.out_size}, layer_type: {self.layer_type}"
        )




[docs]
class DiscontinuousMLP(ScimbaModule):
    """A Multi-Layer Perceptron (MLP) with discontinuous layers.

    Each hidden layer can either be discontinuous or regular.

    Args:
        in_size: Input dimension.
        out_size: Output dimension.
        **kwargs: Keyword arguments including:

            * `activation_type` (:code:`str`): The type of activation function to be
              used for hidden layers. Defaults to "tanh".
            * `activation_output` (:code:`str`): The type of activation function for
              the output layer. Defaults to "id".
            * `layer_sizes` (:code:`list[int]`): List of sizes for each hidden layer.
              Defaults to :code:`[10, 20, 20, 20, 5]`.
            * `layer_type` (:code:`list[bool]`): List of booleans indicating whether
              each hidden layer should be discontinuous. Defaults to :code:`[False,
              False, True, False, False]`.

    Raises:
        ValueError: If layer_sizes and layer_type lists have different lengths.

    Example:
        >>> model = DiscontinuousMLP(
        ...     10, 5, activation_type="relu", activation_output="tanh",
        ...     layer_sizes=[50, 30], layer_type=[False, True, False]
        ... )
    """

    def __init__(self, in_size: int, out_size: int, **kwargs):
        super().__init__(in_size, out_size, **kwargs)

        # Default parameter values
        self.activation_type = kwargs.get("activation_type", "tanh")
        self.activation_output_type = kwargs.get("activation_output", "id")
        layer_sizes = kwargs.get("layer_sizes", [10, 20, 20, 20, 5])
        layer_type = kwargs.get("layer_type", [False, False, True, False, False])
        last_layer_has_bias = kwargs.get("last_layer_has_bias", False)

        # Ensure layer_type length matches layer_sizes length
        if len(layer_type) != len(layer_sizes):
            raise ValueError(
                "The length of 'layer_type' must match the length of 'layer_sizes'."
            )

        # Prepare the network architecture
        self.layer_sizes = [in_size] + layer_sizes + [out_size]

        # Adding "C" to layer_type list for the output layer
        self.layer_type = layer_type + ["C"]

        # Hidden layers initialization
        #: The list of discontinuous or regular layers in the model.
        self.hidden_layers = []
        for l1, l2, ltype in zip(
            self.layer_sizes[:-2], self.layer_sizes[1:-1], self.layer_type
        ):
            self.hidden_layers.append(
                DiscontinuousLayer(
                    l1, l2, dis=ltype, activation_type=self.activation_type
                )
            )
        self.hidden_layers = nn.ModuleList(self.hidden_layers)

        # Output layer
        #: The final output layer.
        self.output_layer = nn.Linear(
            self.layer_sizes[-2], self.layer_sizes[-1], bias=last_layer_has_bias
        )

        # Output activation function
        self.activation_output = activation_function(
            self.activation_output_type, in_size=in_size, **kwargs
        )


[docs]
    def forward(
        self, inputs: torch.Tensor, with_last_layer: bool = True
    ) -> torch.Tensor:
        """Forward pass through the discontinuous MLP network.

        Args:
            inputs: Input tensor.
            with_last_layer: Whether to apply the final output layer. Defaults to True.

        Returns:
            Output tensor after processing through the MLP.
        """
        for hidden_layer in self.hidden_layers:
            inputs = hidden_layer(inputs)
        if with_last_layer:
            inputs = self.activation_output(self.output_layer(inputs))
        return inputs


    def __str__(self) -> str:
        """String representation of the model.

        Returns:
            A string describing the model.
        """
        return f"Discontinuous MLP network with layers: {self.layer_sizes}"