"""A Multi-Layer Perceptron (MLP) with discontinuous layers.
Each hidden layer can either be discontinuous or regular.
"""
import torch
from torch import nn
from scimba_torch.neural_nets.coordinates_based_nets.scimba_module import ScimbaModule
from .activation import Heaviside, activation_function
[docs]
class DiscontinuousLayer(nn.Module):
r"""Class that encodes a fully connected layer which can be discontinuous or not.
It computes: :math:`y = \sigma(Ax + b) + \epsilon * H(Ax + b)`.
where :math:`H(x)` is the Heaviside function and :math:`\epsilon` is a learnable
vector.
Args:
in_size: The input dimension size.
out_size: The output dimension size.
**kwargs: Keyword arguments including:
* `activation_type` (:code:`str`): The activation function type.
Defaults to "tanh".
* `dis` (:code:`bool`): If True, the layer includes the discontinuous term,
otherwise it behaves as a regular layer. Defaults to True.
Example:
>>> layer = DiscontinuousLayer(10, 5, activation_type='relu', dis=True)
"""
def __init__(self, in_size: int, out_size: int, **kwargs):
super().__init__()
self.in_size = in_size
self.out_size = out_size
self.dis = kwargs.get("dis", True)
#: The linear transformation applied to the inputs.
self.linearlayer = nn.Linear(in_size, out_size)
#: The parameters which multiply the Heaviside function.
#: The size is the size of the output of the layer.
self.eps = nn.Parameter(torch.zeros(out_size))
# Get the keyword arguments
self.layer_type = kwargs.get("dis", True)
self.activation_type = kwargs.get("activation_type", "tanh")
# Define the layers
self.linearlayer = nn.Linear(in_size, out_size)
self.eps = nn.Parameter(torch.rand((out_size)))
self.activation = activation_function(
self.activation_type, in_size=in_size, **kwargs
)
# Define Heaviside function
self.heaviside = Heaviside(k=100)
[docs]
def forward(self, inputs: torch.Tensor) -> torch.Tensor:
"""Apply the network to the inputs.
Args:
inputs: Input tensor
Returns:
The result of the network
"""
if self.layer_type:
# Compute the discontinuous version
x = self.activation(self.linearlayer(inputs))
res = x + self.eps[None, :] * x
else:
# Standard linear layer with activation
res = self.activation(self.linearlayer(inputs))
return res
def __str__(self):
"""String representation of the layer.
Returns:
A string describing the layer.
"""
return (
f"Discontinuous Layer, input size: {self.in_size}, "
f"output size: {self.out_size}, layer_type: {self.layer_type}"
)
[docs]
class DiscontinuousMLP(ScimbaModule):
"""A Multi-Layer Perceptron (MLP) with discontinuous layers.
Each hidden layer can either be discontinuous or regular.
Args:
in_size: Input dimension.
out_size: Output dimension.
**kwargs: Keyword arguments including:
* `activation_type` (:code:`str`): The type of activation function to be
used for hidden layers. Defaults to "tanh".
* `activation_output` (:code:`str`): The type of activation function for
the output layer. Defaults to "id".
* `layer_sizes` (:code:`list[int]`): List of sizes for each hidden layer.
Defaults to :code:`[10, 20, 20, 20, 5]`.
* `layer_type` (:code:`list[bool]`): List of booleans indicating whether
each hidden layer should be discontinuous. Defaults to :code:`[False,
False, True, False, False]`.
Raises:
ValueError: If layer_sizes and layer_type lists have different lengths.
Example:
>>> model = DiscontinuousMLP(
... 10, 5, activation_type="relu", activation_output="tanh",
... layer_sizes=[50, 30], layer_type=[False, True, False]
... )
"""
def __init__(self, in_size: int, out_size: int, **kwargs):
super().__init__(in_size, out_size, **kwargs)
# Default parameter values
self.activation_type = kwargs.get("activation_type", "tanh")
self.activation_output_type = kwargs.get("activation_output", "id")
layer_sizes = kwargs.get("layer_sizes", [10, 20, 20, 20, 5])
layer_type = kwargs.get("layer_type", [False, False, True, False, False])
last_layer_has_bias = kwargs.get("last_layer_has_bias", False)
# Ensure layer_type length matches layer_sizes length
if len(layer_type) != len(layer_sizes):
raise ValueError(
"The length of 'layer_type' must match the length of 'layer_sizes'."
)
# Prepare the network architecture
self.layer_sizes = [in_size] + layer_sizes + [out_size]
# Adding "C" to layer_type list for the output layer
self.layer_type = layer_type + ["C"]
# Hidden layers initialization
#: The list of discontinuous or regular layers in the model.
self.hidden_layers = []
for l1, l2, ltype in zip(
self.layer_sizes[:-2], self.layer_sizes[1:-1], self.layer_type
):
self.hidden_layers.append(
DiscontinuousLayer(
l1, l2, dis=ltype, activation_type=self.activation_type
)
)
self.hidden_layers = nn.ModuleList(self.hidden_layers)
# Output layer
#: The final output layer.
self.output_layer = nn.Linear(
self.layer_sizes[-2], self.layer_sizes[-1], bias=last_layer_has_bias
)
# Output activation function
self.activation_output = activation_function(
self.activation_output_type, in_size=in_size, **kwargs
)
[docs]
def forward(
self, inputs: torch.Tensor, with_last_layer: bool = True
) -> torch.Tensor:
"""Forward pass through the discontinuous MLP network.
Args:
inputs: Input tensor.
with_last_layer: Whether to apply the final output layer. Defaults to True.
Returns:
Output tensor after processing through the MLP.
"""
for hidden_layer in self.hidden_layers:
inputs = hidden_layer(inputs)
if with_last_layer:
inputs = self.activation_output(self.output_layer(inputs))
return inputs
def __str__(self) -> str:
"""String representation of the model.
Returns:
A string describing the model.
"""
return f"Discontinuous MLP network with layers: {self.layer_sizes}"