#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
***********************************************************************************
                           tutorial_opencs_dae_8_vector_kernels.py
                DAE Tools: pyOpenCS module, www.daetools.com
                Copyright (C) Dragan Nikolic
***********************************************************************************
DAE Tools is free software; you can redistribute it and/or modify it under the
terms of the GNU General Public License version 3 as published by the Free Software
Foundation. DAE Tools is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with the
DAE Tools software; if not, see <http://www.gnu.org/licenses/>.
************************************************************************************
"""
__doc__ = """
In this example the Cahn-Hilliard equation is solved using the finite difference method.
This equation describes the process of phase separation, where two components of a
binary mixture separate and form domains pure in each component.

   dc/dt = Diffusivity * nabla^2(mu)
   mu = c^3 - c - gamma * nabla^2(c)

The mesh is a simple square (0-100)x(0-100).
Input parameters are Diffusivity = 1 and gamma = 1.
For both c an mu insulated boundary conditions are set (no flux on boundaries).
Initial conditions are set to c(0) = 0.0 + c_noise where the noise is specified using
the normal distribution with standard deviation of 0.1.
The system is integrated for 500 seconds and the outputs are taken every 5 seconds.

This version is implemented using auto- and explicitly vectorised kernels.
"""

import os, sys, itertools, numpy, pandas, time
from daetools.solvers.opencs import csModelBuilder_t, csSimulate
from daetools.solvers.opencs import csGroup_t, csKernel_t, csEquation_t
from daetools.solvers.opencs import eAPI_VectorisedSharedLibrary
from daetools.solvers.opencs import eExplicitVectorisation_Default, eExplicitVectorisation_doubleN, eExplicitVectorisation_AVX2_SIMD
from daetools.solvers.opencs import eExplicitVectorisation_AVX512_SIMD, eExplicitVectorisation_AVX2_Intel, eExplicitVectorisation_AVX512_Intel
from daetools.solvers.opencs import eExplicitVectorisation_AVX2_AMD, eExplicitVectorisation_AVX512_AMD
import matplotlib.pyplot as plt
from matplotlib import animation

Diffusivity = 1.0
gamma       = 1.0

class CahnHilliard_Kernels_2D:
    def __init__(self, Nx, Ny, c_flux_bc, mu_flux_bc):
        self.Nx = Nx
        self.Ny = Ny
        self.c_flux_bc  = c_flux_bc
        self.mu_flux_bc = mu_flux_bc
        
        self.x0 = 0.0
        self.x1 = 100.0
        self.y0 = 0.0
        self.y1 = 100.0
        self.dx = (self.x1-self.x0) / (Nx-1)
        self.dy = (self.y1-self.y0) / (Ny-1)

        self.x_domain = []
        self.y_domain = []
        for x in range(self.Nx):
            self.x_domain.append(self.x0 + x * self.dx)
        for y in range(self.Ny):
            self.y_domain.append(self.y0 + y * self.dy)

        self.c_start_index  = 0*Nx*Ny
        self.mu_start_index = 1*Nx*Ny

        self.Nequations = 2*Nx*Ny

        self.group_BCs = csGroup_t ("BoundaryConditions", 1) # the group for boundary conditions (group 1)
        self.kernel_c  = csKernel_t("CahnHilliard_c",     2) # the kernel for c-component (group 2)
        self.kernel_mu = csKernel_t("CahnHilliard_mu",    3) # the kernel for mu-component (group 3)

    def GetInitialConditions(self):
        # Use numpy array so that setting u_0 and v_0 changes the original values
        uv0  = numpy.zeros(self.Nequations)
        c_0  = uv0[self.c_start_index  : self.mu_start_index]
        mu_0 = uv0[self.mu_start_index : self.Nequations]

        numpy.random.seed(124)
        c0     = 0.0
        stddev = 0.1
        def c_with_noise():
            return numpy.random.normal(c0, stddev)

        for ix in range(self.Nx):
            for iy in range(self.Ny):
                index = self.GetIndex(ix,iy)

                c_0[index] = c_with_noise()

        return uv0.tolist()

    def GetVariableNames(self):
        x_y_inds = [(x,y) for x,y in itertools.product(range(self.Nx), range(self.Ny))]
        return ['c(%d,%d)'%(x,y) for x,y in x_y_inds] + ['mu(%d,%d)'%(x,y) for x,y in x_y_inds]

    def CreateEquations(self, y, dydt):
        # y is a list of csNumber_t objects representing model variables
        # dydt is a list of csNumber_t objects representing time derivatives of model variables
        c_values     = y   [self.c_start_index  : self.mu_start_index]
        mu_values    = y   [self.mu_start_index : self.Nequations]
        dcdt_values  = dydt[self.c_start_index  : self.mu_start_index]
        dmudt_values = dydt[self.mu_start_index : self.Nequations]
        
        c_flux_bc  = self.c_flux_bc
        mu_flux_bc = self.mu_flux_bc
        
        dx = self.dx
        dy = self.dy
        Nx = self.Nx
        Ny = self.Ny

        def c(x, y):
            index = self.GetIndex(x, y)
            return c_values[index]
        
        def mu(x, y):
            index = self.GetIndex(x, y)
            return mu_values[index]

        def dc_dt(x, y):
            index = self.GetIndex(x, y)
            return dcdt_values[index]

        def dmu_dt(x, y):
            index = self.GetIndex(x, y)
            return dmudt_values[index]

        # First order partial derivative per x.
        def dc_dx(x, y):
            if(x == 0): # left
                u0 = c(0, y)
                u1 = c(1, y)
                u2 = c(2, y)
                return (-3*u0 + 4*u1 - u2) / (2*dx)
            elif(x == Nx-1): # right
                un  = c(Nx-1,   y)
                un1 = c(Nx-1-1, y)
                un2 = c(Nx-1-2, y)
                return (3*un - 4*un1 + un2) / (2*dx)
            else:
                u1 = c(x+1, y)
                u2 = c(x-1, y)
                return (u1 - u2) / (2*dx)

        def dmu_dx(x, y):
            if(x == 0): # left
                u0 = mu(0, y)
                u1 = mu(1, y)
                u2 = mu(2, y)
                return (-3*u0 + 4*u1 - u2) / (2*dx)
            elif(x == Nx-1): # right
                un  = mu(Nx-1,   y)
                un1 = mu(Nx-1-1, y)
                un2 = mu(Nx-1-2, y)
                return (3*un - 4*un1 + un2) / (2*dx)
            else:
                u1 = mu(x+1, y)
                u2 = mu(x-1, y)
                return (u1 - u2) / (2*dx)

        # First order partial derivative per y.
        def dc_dy(x, y):
            if(y == 0): # bottom
                u0 = c(x, 0)
                u1 = c(x, 1)
                u2 = c(x, 2)
                return (-3*u0 + 4*u1 - u2) / (2*dy)
            elif(y == Ny-1): # top
                un  = c(x, Ny-1  )
                un1 = c(x, Ny-1-1)
                un2 = c(x, Ny-1-2)
                return (3*un - 4*un1 + un2) / (2*dy)
            else:
                ui1 = c(x, y+1)
                ui2 = c(x, y-1)
                return (ui1 - ui2) / (2*dy)

        def dmu_dy(x, y):
            if(y == 0): # bottom
                u0 = mu(x, 0)
                u1 = mu(x, 1)
                u2 = mu(x, 2)
                return (-3*u0 + 4*u1 - u2) / (2*dy)
            elif(y == Ny-1): # top
                un  = mu(x, Ny-1  )
                un1 = mu(x, Ny-1-1)
                un2 = mu(x, Ny-1-2)
                return (3*un - 4*un1 + un2) / (2*dy)
            else:
                ui1 = mu(x, y+1)
                ui2 = mu(x, y-1)
                return (ui1 - ui2) / (2*dy)

        # Second order partial derivative per x.
        def d2c_dx2(x, y):
            if(x == 0 or x == Nx-1):
                raise RuntimeError("d2c_dx2 called at the boundary")

            ui1 = c(x+1, y)
            ui  = c(x,   y)
            ui2 = c(x-1, y)
            return (ui1 - 2*ui + ui2) / (dx*dx)

        def d2mu_dx2(x, y):
            if(x == 0 or x == Nx-1):
                raise RuntimeError("d2mu_dx2 called at the boundary")

            vi1 = mu(x+1, y)
            vi  = mu(x,   y)
            vi2 = mu(x-1, y)
            return (vi1 - 2*vi + vi2) / (dx*dx)

        # Second order partial derivative per y.
        def d2c_dy2(x, y):
            if(y == 0 or y == Ny-1):
                raise RuntimeError("d2c_dy2 called at the boundary")

            ui1 = c(x, y+1)
            ui  = c(x,   y)
            ui2 = c(x, y-1)
            return (ui1 - 2*ui + ui2) / (dy*dy)

        def d2mu_dy2(x, y):
            if(y == 0 or y == Ny-1):
                raise RuntimeError("d2mu_dy2 called at the boundary")

            vi1 = mu(x, y+1)
            vi  = mu(x,   y)
            vi2 = mu(x, y-1)
            return (vi1 - 2*vi + vi2) / (dy*dy)

        equations = []
        kernels   = [self.kernel_c, self.kernel_mu]

        # Set the kernel types (APIs) to be generated.
        apis = [eAPI_VectorisedSharedLibrary]
        self.kernel_c.KernelAPIs  = apis
        self.kernel_mu.KernelAPIs = apis

        # Set the kernel generator options for the VectorisedSharedLibrary kernel generator.
        options_c  = self.kernel_c.GetKernelGeneratorOptions(eAPI_VectorisedSharedLibrary);
        options_mu = self.kernel_mu.GetKernelGeneratorOptions(eAPI_VectorisedSharedLibrary);

        options_c.explicitVectorExtension  = eExplicitVectorisation_AVX2_SIMD;
        options_mu.explicitVectorExtension = eExplicitVectorisation_AVX2_SIMD;
        '''
           Available options:
           - eExplicitVectorisation_Default
             Scalar implementation (uses double and adouble as data types).

           - eExplicitVectorisation_doubleN
             Generic implementation that uses ordinary double and adouble arrays
             (used for testing compiler's capabilities to auto-vectorise the code)
             It requires setting vector width for all kernels, for instance in this example:
                options_u.vectorWidth = 4;
                options_v.vectorWidth = 4;

           - eExplicitVectorisation_AVX2_SIMD and eExplicitVectorisation_AVX512_SIMD
             Generic, vector-extension and platform indenpendent implementation for auto-vectorisation using OpenMP SIMD capabilities.

           - eExplicitVectorisation_AVX2_Intel and eExplicitVectorisation_AVX512_Intel
             Explicit vectorisation using compiler intrinsics and Intel SVML library.
             Kernels must be compiled using the Intel icpx compiler (it is CMake-based).
             I.e. in GNU/Linux execute:
               source /opt/intel/oneapi/setvars.sh
               export CXX=icpx
               export CC=icx-cc
               export FC=icx
               export F77=icx
               export F90=icx
             and then run the examples.
           - eExplicitVectorisation_AVX2_AMD and eExplicitVectorisation_AVX512_AMD
             Explicit vectorisation using compiler intrinsics and AMD libamd_mvec library.
             Kernels must be compiled using the AMD aocc compiler (it is CMake-based).
             I.e. in GNU/Linux execute:
               source ~/setenv_AOCC.sh
               export CXX=clang++
               export CC=clang
               export FC=flang
               export F77=flang
               export F90=flang
             and then run the examples.
        '''

        # Component c:
        for x in range(Nx):
            for y in range(Ny):
                if(x == 0):       # Left BC: Neumann BCs
                    equation = csEquation_t(self.group_BCs)
                    equation[ c(x,y) ] = dc_dx(x,y) - c_flux_bc
                    equations.append(equation)
                elif(x == Nx-1):  # Right BC: Neumann BCs
                    equation = csEquation_t(self.group_BCs)
                    equation[ c(x,y) ] = dc_dx(x,y) - c_flux_bc
                    equations.append(equation)
                elif(y == 0):     # Bottom BC: Neumann BCs
                    equation = csEquation_t(self.group_BCs)
                    equation[ c(x,y) ] = dc_dy(x,y) - c_flux_bc
                    equations.append(equation)
                elif(y == Ny-1):  # Top BC: Neumann BCs
                    equation = csEquation_t(self.group_BCs)
                    equation[ c(x,y) ] = dc_dy(x,y) - c_flux_bc
                    equations.append(equation)
                else:
                    # Interior points
                    equation_c = csEquation_t(self.kernel_c)
                    equation_c[ c(x,y) ] = dc_dt(x,y) - Diffusivity * (d2mu_dx2(x,y) + d2mu_dy2(x,y))
                    equation_c.SetGridPoint(x, y);
                    self.kernel_c.AddEquation(equation_c);
        
        # Component mu:
        for x in range(Nx):
            for y in range(Ny):
                if(x == 0):       # Left BC: Neumann BCs
                    equation = csEquation_t(self.group_BCs)
                    equation[ mu(x,y) ] = dmu_dx(x,y) - mu_flux_bc
                    equations.append(equation)
                elif(x == Nx-1):  # Right BC: Neumann BCs
                    equation = csEquation_t(self.group_BCs)
                    equation[ mu(x,y) ] = dmu_dx(x,y) - mu_flux_bc
                    equations.append(equation)
                elif(y == 0):     # Bottom BC: Neumann BCs
                    equation = csEquation_t(self.group_BCs)
                    equation[ mu(x,y) ] = dmu_dy(x,y) - mu_flux_bc
                    equations.append(equation)
                elif(y == Ny-1):  # Top BC: Neumann BCs
                    equation = csEquation_t(self.group_BCs)
                    equation[ mu(x,y) ] = dmu_dy(x,y) - mu_flux_bc
                    equations.append(equation)
                else:
                    # Interior points
                    equation_mu = csEquation_t(self.kernel_mu)
                    equation_mu[ mu(x,y) ] = mu(x,y) \
                                             + gamma * (d2c_dx2(x,y) + d2c_dy2(x,y)) \
                                             - (c(x,y)*c(x,y)*c(x,y) - c(x,y))
                    equation_mu.SetGridPoint(x, y);
                    self.kernel_mu.AddEquation(equation_mu);

        return (equations, kernels)
    
    def GetIndex(self, x, y):
        if x < 0 or x >= self.Nx:
            raise RuntimeError("Invalid x index")
        if y < 0 or y >= self.Ny:
            raise RuntimeError("Invalid y index")
        return self.Ny*x + y
    
def run(**kwargs):
    inputFilesDirectory = kwargs.get('inputFilesDirectory', os.path.splitext(os.path.basename(__file__))[0])
    Nx         = kwargs.get('Nx',         100)
    Ny         = kwargs.get('Ny',         100)
    c_flux_bc  = kwargs.get('c_flux_bc',  0.0)
    mu_flux_bc = kwargs.get('mu_flux_bc', 0.0)
    
    # Instantiate the model being simulated.
    model = CahnHilliard_Kernels_2D(Nx, Ny, c_flux_bc, mu_flux_bc)
    
    # 1. Initialise the DAE system with the number of variables and other inputs.
    mb = csModelBuilder_t()
    mb.Initialize_DAE_System(model.Nequations, 0, defaultAbsoluteTolerance = 1e-5)
    
    # 2. Specify the OpenCS model.
    # Create and set model equations using the provided time/variable/timeDerivative/dof objects.
    # The DAE system is defined as:
    #     F(x',x,y,t) = 0
    # where x' are derivatives of state variables, x are state variables,
    # y are fixed variables (degrees of freedom) and t is the current simulation time.
    mb.ModelEquations = model.CreateEquations(mb.Variables, mb.TimeDerivatives)    
    # Set initial conditions.
    mb.VariableValues = model.GetInitialConditions()
    # Set variable names.
    mb.VariableNames  = model.GetVariableNames()
    
    # 3. Generate a model for single CPU simulations.    
    # Set simulation options (specified as a string in JSON format).
    options = mb.SimulationOptions
    options['Simulation']['OutputDirectory']             = 'results'
    options['Simulation']['TimeHorizon']                 = 500.0
    options['Simulation']['ReportingInterval']           =   5.0
    options['Solver']['Parameters']['RelativeTolerance'] =  1e-5
    
    # (1) Sequential evaluator (evaluates groups one by one).
    options['Model']['Evaluators']['Device_0']['Library'] = 'Sequential'
    options['Model']['Evaluators']['Device_0']['Name']    = 'seq'
    options['Model']['Evaluators']['Device_0']['Groups']  = ['BoundaryConditions']
    # (2a) OpenMP evaluator.
    options['Model']['Evaluators']['Device_1']               = {}
    options['Model']['Evaluators']['Device_1']['Parameters'] = {}
    options['Model']['Evaluators']['Device_1']['Library'] = 'VectorisedKernels_OpenMP'
    options['Model']['Evaluators']['Device_1']['Name']    = 'kopenmp'
    options['Model']['Evaluators']['Device_1']['Groups']  = []
    options['Model']['Evaluators']['Device_1']['Kernels'] = ['CahnHilliard_c', 'CahnHilliard_mu']
    options['Model']['Evaluators']['Device_1']['Parameters']['numThreads'] = 0

    # ILU options for Ncpu = 1: k = 3, rho = 1.0, alpha = 1e-1, w = 0.5
    options['LinearSolver']['Preconditioner']['Parameters']['fact: level-of-fill']      =    3
    options['LinearSolver']['Preconditioner']['Parameters']['fact: relax value']        =  0.0
    options['LinearSolver']['Preconditioner']['Parameters']['fact: absolute threshold'] = 1e-1
    options['LinearSolver']['Preconditioner']['Parameters']['fact: relative threshold'] =  1.0
    mb.SimulationOptions = options
    
    # Partition the system to create the OpenCS model for a single CPU simulation.
    # In this case (Npe = 1) the graph partitioner is not required.
    Npe = 1
    graphPartitioner = None
    cs_models = mb.PartitionSystem(Npe, graphPartitioner)
    csModelBuilder_t.ExportModels(cs_models, inputFilesDirectory, mb.SimulationOptions)
    print("Single CPU OpenCS model generated successfully!")

    # 5. Run simulation using the exported model from the specified directory.
    csSimulate(inputFilesDirectory)
    
    try:
        csvFilename = os.path.join(inputFilesDirectory, 'results', 'results-0.csv')
        csv_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), csvFilename)
        df = pandas.read_csv(csv_filepath, sep=';', header=2, skiprows=None, quotechar='"', skipinitialspace=True, dtype=float)

        # The data layout in memory is as (for each time point):
        #   [Time][BoundaryConditions][CahnHilliard_c][CahnHilliard_mu]
        #   [  1 ][  2*(2*Nx+2*Ny-4) ][ (Nx-2)*(Ny-2)][ (Nx-2)*(Ny-2) ]

        # Offset the data by time and the whole BoundaryConditions group.
        c_start   = 1 + 2*(2*Nx + 2*Ny - 4)
        c_end     = c_start + (Nx-2)*(Ny-2)

        mu_start   = c_end
        mu_end     = mu_start + (Nx-2)*(Ny-2)

        # a) For simplicity, plot it without the boundaries.
        def getData_c_no_bc(data):
            c = data[c_start : c_end]
            c = numpy.array(c).reshape(Nx-2,Ny-2)
            return c

        # b) Add both boundary and the interior points.
        def getData_c(data):
            cnt = 1 # The first point is always the Time.
            c = numpy.zeros((Nx,Ny), dtype=float)
            # The equations are sorted by groups so that the boundary points are located
            # at the beggining of the data array.
            # First, iterate over boundary points and set their values
            for x in range(Nx):
                for y in range(Ny):
                    if (x == 0) or (x == Nx-1) or (y == 0) or (y == Ny-1):
                        c[x,y] = data[cnt]
                        cnt += 1
                    else:
                        continue
            # Second, set the interior points (the group CahnHilliard_c is after the BoundaryConditions)
            c[1:Nx-1,1:Ny-1] = numpy.array(data[c_start : c_end]).reshape(Nx-2,Ny-2)
            return c

        Nframes = len(df.values)

        # Plot setup and the first frame
        data = df.values[0]
        t = data[0]
        c = getData_c(data)
        fig = plt.figure()
        im = plt.imshow(c, cmap = 'viridis')
        plt.colorbar()
        plt.title('time %.2f s' % t)

        def animate(i):
            data = df.values[i]
            t = data[0]
            c = getData_c(data)
            im.set_data(c)
            plt.title('time %.2f s' % t)
            return im

        # Save the animation object so it does not go out if scope while animation is running.
        plt.__anim__ = animation.FuncAnimation(fig, animate, frames=Nframes, interval = 500, repeat = False)
        plt.show()
    except Exception as e:
        print(str(e))

if __name__ == "__main__":
    if len(sys.argv) == 1:
        Nx = 100
        Ny = 100
    elif len(sys.argv) == 3:
        Nx = int(sys.argv[1])
        Ny = int(sys.argv[2])
    else:
        print('Usage: python tutorial_opencs_dae_8_vector_kernels.py Nx Ny')
        sys.exit()
        
    c_flux_bc  = 0.0
    mu_flux_bc = 0.0
    inputFilesDirectory = 'tutorial_opencs_dae_8_vector_kernels'
    run(Nx = Nx, 
        Ny = Ny, 
        c_flux_bc = c_flux_bc,
        mu_flux_bc = mu_flux_bc,
        inputFilesDirectory = inputFilesDirectory)