r"""Solves a 2D diffusion equation with Dirichlet BC.

.. math::

    -\nabla \cdot (a \nabla u) & = f in \Omega \\
    u & = g on \partial \Omega

where :math:`x = (x_1, x_2) \in \Omega = (0, 1) \times (0, 1)`, :math:`a = [[1, \alpha], [\alpha, 1]]` with :math:`-1 < \alpha < 1` a diffusion coefficient, :math:`f` such that :math:`u(x_1, x_2) = \sin(\pi x_1) \sin(\pi x_2)`, and :math:`g = 0`.

Boundary conditions are enforced strongly.

The neural network is a MLP, and the optimization is done using Natural Gradient Descent.

This particular example checks whether natural gradient preconditioning can also work if the diffusion coefficient is not taken into account in the functional operator. We see that, as long as the diffusion coefficient is not too large, the natural gradient preconditioning can still work, even if the functional operator is not correct. However, if the diffusion coefficient is large, then taking it into account in the functional operator becomes crucial for the convergence of the method.
"""

# %%

import matplotlib.pyplot as plt
import torch

from scimba_torch.approximation_space.nn_space import NNxSpace
from scimba_torch.domain.meshless_domain.domain_2d import Square2D
from scimba_torch.integration.monte_carlo import DomainSampler, TensorizedSampler
from scimba_torch.integration.monte_carlo_parameters import UniformParametricSampler
from scimba_torch.neural_nets.coordinates_based_nets.mlp import GenericMLP
from scimba_torch.numerical_solvers.elliptic_pde.pinns import (
    NaturalGradientPinnsElliptic,
)
from scimba_torch.physical_models.elliptic_pde.linear_order_2 import DivAGradUPDE
from scimba_torch.plots.plots_nd import plot_abstract_approx_spaces
from scimba_torch.utils.scimba_tensors import LabelTensor

torch.manual_seed(0)

DOMAIN_X = Square2D([(-1, 1), (-1, 1)], is_main_domain=True)
DOMAIN_MU = [(1, 1)]


def f_rhs(x: LabelTensor, mu: LabelTensor, α: float):
    x1, x2 = x.get_components()
    sin_x1, sin_x2 = torch.sin(torch.pi * x1), torch.sin(torch.pi * x2)
    cos_x1, cos_x2 = torch.cos(torch.pi * x1), torch.cos(torch.pi * x2)
    return 2 * torch.pi**2 * (sin_x1 * sin_x2 - α * cos_x1 * cos_x2)


def exact_sol(x: LabelTensor, mu: LabelTensor):
    x1, x2 = x.get_components()
    return torch.sin(torch.pi * x1) * torch.sin(torch.pi * x2)


def a(var: torch.Tensor, α: float):
    r"""Creates a function that returns a diffusion coefficient matrix for the PDE.

    For 2D problems, it is the matrix :math:`a = [[1, \alpha], [\alpha, 1]]` with :math:`-1 < \alpha < 1`.
    """
    return torch.tensor(
        [[1, α], [α, 1]],
        dtype=torch.get_default_dtype(),
        device=torch.get_default_device(),
    )[None, :, :].repeat([var.shape[0], 1, 1])


def post_processing(inputs: torch.Tensor, x: LabelTensor, mu: LabelTensor):
    x1, x2 = x.get_components()
    return inputs * (x1 + 1) * (1 - x1) * (x2 + 1) * (1 - x2)


def functional_post_processing(u, xy, mu, theta):
    return u(xy, mu, theta) * (xy[0] + 1) * (1 - xy[0]) * (xy[1] + 1) * (1 - xy[1])


class DivAGradUWithWrongFunctionalOperator(DivAGradUPDE):
    def functional_operator(self, func, x, mu, theta):
        """Wrong functional operator, which does not take into account the diffusion coefficient A."""
        grad_u = torch.func.jacrev(func, 0)
        hessian_u = torch.func.jacrev(grad_u, 0, chunk_size=None)(x, mu, theta)
        return -mu[0] * (hessian_u[..., 0, 0] + hessian_u[..., 1, 1])


def create_pinn(α: float, take_a_into_account: bool):
    sampler = TensorizedSampler(
        [DomainSampler(DOMAIN_X), UniformParametricSampler(DOMAIN_MU)]
    )

    space = NNxSpace(
        1,
        1,
        GenericMLP,
        DOMAIN_X,
        sampler,
        layer_sizes=[18] * 3,
        post_processing=post_processing,
    )

    pde_type = (
        DivAGradUPDE if take_a_into_account else DivAGradUWithWrongFunctionalOperator
    )

    pde = pde_type(
        space,
        spatial_dim=2,
        f=lambda x, mu: f_rhs(x, mu, α),
        A=lambda var: a(var, α),
    )

    return NaturalGradientPinnsElliptic(
        pde, bc_type="strong", functional_post_processing=functional_post_processing
    )


def plot_pinns(list_of_pinns):
    plot_abstract_approx_spaces(
        [pinn.space for pinn in list_of_pinns],
        DOMAIN_X,
        DOMAIN_MU,
        loss=[pinn.losses for pinn in list_of_pinns],
        residual=[pinn.pde for pinn in list_of_pinns],
        error=exact_sol,
        draw_contours=True,
        n_drawn_contours=20,
    )

    plt.show()


# %%  Test 1A: small α, taking A into account

pinn1a = create_pinn(α=0.05, take_a_into_account=True)
pinn1a.solve(max_epochs=200, loss_target=5e-9, n_collocation=3000)

# %%  Test 1B: small α, not taking A into account

pinn1b = create_pinn(α=0.05, take_a_into_account=False)
pinn1b.solve(max_epochs=200, loss_target=5e-9, n_collocation=3000)

# %%  Test 1: plots

plot_pinns([pinn1a, pinn1b])

# %%  Test 2A: large α, taking A into account

pinn2a = create_pinn(α=0.5, take_a_into_account=True)
pinn2a.solve(max_epochs=200, loss_target=5e-9, n_collocation=3000)

# %%  Test 2B: large α, not taking A into account

pinn2b = create_pinn(α=0.5, take_a_into_account=False)
pinn2b.solve(max_epochs=400, loss_target=5e-9, n_collocation=3000)

# %%  Test 2: plot

plot_pinns([pinn2a, pinn2b])

# %%