#!/usr/bin/env python # -*- coding: utf-8 -*- """ *********************************************************************************** tutorial_opencs_dae_8_vector_kernels.py DAE Tools: pyOpenCS module, www.daetools.com Copyright (C) Dragan Nikolic *********************************************************************************** DAE Tools is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 3 as published by the Free Software Foundation. DAE Tools is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the DAE Tools software; if not, see <http://www.gnu.org/licenses/>. ************************************************************************************ """ __doc__ = """ In this example the Cahn-Hilliard equation is solved using the finite difference method. This equation describes the process of phase separation, where two components of a binary mixture separate and form domains pure in each component. dc/dt = Diffusivity * nabla^2(mu) mu = c^3 - c - gamma * nabla^2(c) The mesh is a simple square (0-100)x(0-100). Input parameters are Diffusivity = 1 and gamma = 1. For both c an mu insulated boundary conditions are set (no flux on boundaries). Initial conditions are set to c(0) = 0.0 + c_noise where the noise is specified using the normal distribution with standard deviation of 0.1. The system is integrated for 500 seconds and the outputs are taken every 5 seconds. This version is implemented using auto- and explicitly vectorised kernels. """ import os, sys, itertools, numpy, pandas, time from daetools.solvers.opencs import csModelBuilder_t, csSimulate from daetools.solvers.opencs import csGroup_t, csKernel_t, csEquation_t from daetools.solvers.opencs import eAPI_VectorisedSharedLibrary from daetools.solvers.opencs import eExplicitVectorisation_Default, eExplicitVectorisation_doubleN, eExplicitVectorisation_AVX2_SIMD from daetools.solvers.opencs import eExplicitVectorisation_AVX512_SIMD, eExplicitVectorisation_AVX2_Intel, eExplicitVectorisation_AVX512_Intel from daetools.solvers.opencs import eExplicitVectorisation_AVX2_AMD, eExplicitVectorisation_AVX512_AMD import matplotlib.pyplot as plt from matplotlib import animation Diffusivity = 1.0 gamma = 1.0 class CahnHilliard_Kernels_2D: def __init__(self, Nx, Ny, c_flux_bc, mu_flux_bc): self.Nx = Nx self.Ny = Ny self.c_flux_bc = c_flux_bc self.mu_flux_bc = mu_flux_bc self.x0 = 0.0 self.x1 = 100.0 self.y0 = 0.0 self.y1 = 100.0 self.dx = (self.x1-self.x0) / (Nx-1) self.dy = (self.y1-self.y0) / (Ny-1) self.x_domain = [] self.y_domain = [] for x in range(self.Nx): self.x_domain.append(self.x0 + x * self.dx) for y in range(self.Ny): self.y_domain.append(self.y0 + y * self.dy) self.c_start_index = 0*Nx*Ny self.mu_start_index = 1*Nx*Ny self.Nequations = 2*Nx*Ny self.group_BCs = csGroup_t ("BoundaryConditions", 1) # the group for boundary conditions (group 1) self.kernel_c = csKernel_t("CahnHilliard_c", 2) # the kernel for c-component (group 2) self.kernel_mu = csKernel_t("CahnHilliard_mu", 3) # the kernel for mu-component (group 3) def GetInitialConditions(self): # Use numpy array so that setting u_0 and v_0 changes the original values uv0 = numpy.zeros(self.Nequations) c_0 = uv0[self.c_start_index : self.mu_start_index] mu_0 = uv0[self.mu_start_index : self.Nequations] numpy.random.seed(124) c0 = 0.0 stddev = 0.1 def c_with_noise(): return numpy.random.normal(c0, stddev) for ix in range(self.Nx): for iy in range(self.Ny): index = self.GetIndex(ix,iy) c_0[index] = c_with_noise() return uv0.tolist() def GetVariableNames(self): x_y_inds = [(x,y) for x,y in itertools.product(range(self.Nx), range(self.Ny))] return ['c(%d,%d)'%(x,y) for x,y in x_y_inds] + ['mu(%d,%d)'%(x,y) for x,y in x_y_inds] def CreateEquations(self, y, dydt): # y is a list of csNumber_t objects representing model variables # dydt is a list of csNumber_t objects representing time derivatives of model variables c_values = y [self.c_start_index : self.mu_start_index] mu_values = y [self.mu_start_index : self.Nequations] dcdt_values = dydt[self.c_start_index : self.mu_start_index] dmudt_values = dydt[self.mu_start_index : self.Nequations] c_flux_bc = self.c_flux_bc mu_flux_bc = self.mu_flux_bc dx = self.dx dy = self.dy Nx = self.Nx Ny = self.Ny def c(x, y): index = self.GetIndex(x, y) return c_values[index] def mu(x, y): index = self.GetIndex(x, y) return mu_values[index] def dc_dt(x, y): index = self.GetIndex(x, y) return dcdt_values[index] def dmu_dt(x, y): index = self.GetIndex(x, y) return dmudt_values[index] # First order partial derivative per x. def dc_dx(x, y): if(x == 0): # left u0 = c(0, y) u1 = c(1, y) u2 = c(2, y) return (-3*u0 + 4*u1 - u2) / (2*dx) elif(x == Nx-1): # right un = c(Nx-1, y) un1 = c(Nx-1-1, y) un2 = c(Nx-1-2, y) return (3*un - 4*un1 + un2) / (2*dx) else: u1 = c(x+1, y) u2 = c(x-1, y) return (u1 - u2) / (2*dx) def dmu_dx(x, y): if(x == 0): # left u0 = mu(0, y) u1 = mu(1, y) u2 = mu(2, y) return (-3*u0 + 4*u1 - u2) / (2*dx) elif(x == Nx-1): # right un = mu(Nx-1, y) un1 = mu(Nx-1-1, y) un2 = mu(Nx-1-2, y) return (3*un - 4*un1 + un2) / (2*dx) else: u1 = mu(x+1, y) u2 = mu(x-1, y) return (u1 - u2) / (2*dx) # First order partial derivative per y. def dc_dy(x, y): if(y == 0): # bottom u0 = c(x, 0) u1 = c(x, 1) u2 = c(x, 2) return (-3*u0 + 4*u1 - u2) / (2*dy) elif(y == Ny-1): # top un = c(x, Ny-1 ) un1 = c(x, Ny-1-1) un2 = c(x, Ny-1-2) return (3*un - 4*un1 + un2) / (2*dy) else: ui1 = c(x, y+1) ui2 = c(x, y-1) return (ui1 - ui2) / (2*dy) def dmu_dy(x, y): if(y == 0): # bottom u0 = mu(x, 0) u1 = mu(x, 1) u2 = mu(x, 2) return (-3*u0 + 4*u1 - u2) / (2*dy) elif(y == Ny-1): # top un = mu(x, Ny-1 ) un1 = mu(x, Ny-1-1) un2 = mu(x, Ny-1-2) return (3*un - 4*un1 + un2) / (2*dy) else: ui1 = mu(x, y+1) ui2 = mu(x, y-1) return (ui1 - ui2) / (2*dy) # Second order partial derivative per x. def d2c_dx2(x, y): if(x == 0 or x == Nx-1): raise RuntimeError("d2c_dx2 called at the boundary") ui1 = c(x+1, y) ui = c(x, y) ui2 = c(x-1, y) return (ui1 - 2*ui + ui2) / (dx*dx) def d2mu_dx2(x, y): if(x == 0 or x == Nx-1): raise RuntimeError("d2mu_dx2 called at the boundary") vi1 = mu(x+1, y) vi = mu(x, y) vi2 = mu(x-1, y) return (vi1 - 2*vi + vi2) / (dx*dx) # Second order partial derivative per y. def d2c_dy2(x, y): if(y == 0 or y == Ny-1): raise RuntimeError("d2c_dy2 called at the boundary") ui1 = c(x, y+1) ui = c(x, y) ui2 = c(x, y-1) return (ui1 - 2*ui + ui2) / (dy*dy) def d2mu_dy2(x, y): if(y == 0 or y == Ny-1): raise RuntimeError("d2mu_dy2 called at the boundary") vi1 = mu(x, y+1) vi = mu(x, y) vi2 = mu(x, y-1) return (vi1 - 2*vi + vi2) / (dy*dy) equations = [] kernels = [self.kernel_c, self.kernel_mu] # Set the kernel types (APIs) to be generated. apis = [eAPI_VectorisedSharedLibrary] self.kernel_c.KernelAPIs = apis self.kernel_mu.KernelAPIs = apis # Set the kernel generator options for the VectorisedSharedLibrary kernel generator. options_c = self.kernel_c.GetKernelGeneratorOptions(eAPI_VectorisedSharedLibrary); options_mu = self.kernel_mu.GetKernelGeneratorOptions(eAPI_VectorisedSharedLibrary); options_c.explicitVectorExtension = eExplicitVectorisation_AVX2_SIMD; options_mu.explicitVectorExtension = eExplicitVectorisation_AVX2_SIMD; ''' Available options: - eExplicitVectorisation_Default Scalar implementation (uses double and adouble as data types). - eExplicitVectorisation_doubleN Generic implementation that uses ordinary double and adouble arrays (used for testing compiler's capabilities to auto-vectorise the code) It requires setting vector width for all kernels, for instance in this example: options_u.vectorWidth = 4; options_v.vectorWidth = 4; - eExplicitVectorisation_AVX2_SIMD and eExplicitVectorisation_AVX512_SIMD Generic, vector-extension and platform indenpendent implementation for auto-vectorisation using OpenMP SIMD capabilities. - eExplicitVectorisation_AVX2_Intel and eExplicitVectorisation_AVX512_Intel Explicit vectorisation using compiler intrinsics and Intel SVML library. Kernels must be compiled using the Intel icpx compiler (it is CMake-based). I.e. in GNU/Linux execute: source /opt/intel/oneapi/setvars.sh export CXX=icpx export CC=icx-cc export FC=icx export F77=icx export F90=icx and then run the examples. - eExplicitVectorisation_AVX2_AMD and eExplicitVectorisation_AVX512_AMD Explicit vectorisation using compiler intrinsics and AMD libamd_mvec library. Kernels must be compiled using the AMD aocc compiler (it is CMake-based). I.e. in GNU/Linux execute: source ~/setenv_AOCC.sh export CXX=clang++ export CC=clang export FC=flang export F77=flang export F90=flang and then run the examples. ''' # Component c: for x in range(Nx): for y in range(Ny): if(x == 0): # Left BC: Neumann BCs equation = csEquation_t(self.group_BCs) equation[ c(x,y) ] = dc_dx(x,y) - c_flux_bc equations.append(equation) elif(x == Nx-1): # Right BC: Neumann BCs equation = csEquation_t(self.group_BCs) equation[ c(x,y) ] = dc_dx(x,y) - c_flux_bc equations.append(equation) elif(y == 0): # Bottom BC: Neumann BCs equation = csEquation_t(self.group_BCs) equation[ c(x,y) ] = dc_dy(x,y) - c_flux_bc equations.append(equation) elif(y == Ny-1): # Top BC: Neumann BCs equation = csEquation_t(self.group_BCs) equation[ c(x,y) ] = dc_dy(x,y) - c_flux_bc equations.append(equation) else: # Interior points equation_c = csEquation_t(self.kernel_c) equation_c[ c(x,y) ] = dc_dt(x,y) - Diffusivity * (d2mu_dx2(x,y) + d2mu_dy2(x,y)) equation_c.SetGridPoint(x, y); self.kernel_c.AddEquation(equation_c); # Component mu: for x in range(Nx): for y in range(Ny): if(x == 0): # Left BC: Neumann BCs equation = csEquation_t(self.group_BCs) equation[ mu(x,y) ] = dmu_dx(x,y) - mu_flux_bc equations.append(equation) elif(x == Nx-1): # Right BC: Neumann BCs equation = csEquation_t(self.group_BCs) equation[ mu(x,y) ] = dmu_dx(x,y) - mu_flux_bc equations.append(equation) elif(y == 0): # Bottom BC: Neumann BCs equation = csEquation_t(self.group_BCs) equation[ mu(x,y) ] = dmu_dy(x,y) - mu_flux_bc equations.append(equation) elif(y == Ny-1): # Top BC: Neumann BCs equation = csEquation_t(self.group_BCs) equation[ mu(x,y) ] = dmu_dy(x,y) - mu_flux_bc equations.append(equation) else: # Interior points equation_mu = csEquation_t(self.kernel_mu) equation_mu[ mu(x,y) ] = mu(x,y) \ + gamma * (d2c_dx2(x,y) + d2c_dy2(x,y)) \ - (c(x,y)*c(x,y)*c(x,y) - c(x,y)) equation_mu.SetGridPoint(x, y); self.kernel_mu.AddEquation(equation_mu); return (equations, kernels) def GetIndex(self, x, y): if x < 0 or x >= self.Nx: raise RuntimeError("Invalid x index") if y < 0 or y >= self.Ny: raise RuntimeError("Invalid y index") return self.Ny*x + y def run(**kwargs): inputFilesDirectory = kwargs.get('inputFilesDirectory', os.path.splitext(os.path.basename(__file__))[0]) Nx = kwargs.get('Nx', 100) Ny = kwargs.get('Ny', 100) c_flux_bc = kwargs.get('c_flux_bc', 0.0) mu_flux_bc = kwargs.get('mu_flux_bc', 0.0) # Instantiate the model being simulated. model = CahnHilliard_Kernels_2D(Nx, Ny, c_flux_bc, mu_flux_bc) # 1. Initialise the DAE system with the number of variables and other inputs. mb = csModelBuilder_t() mb.Initialize_DAE_System(model.Nequations, 0, defaultAbsoluteTolerance = 1e-5) # 2. Specify the OpenCS model. # Create and set model equations using the provided time/variable/timeDerivative/dof objects. # The DAE system is defined as: # F(x',x,y,t) = 0 # where x' are derivatives of state variables, x are state variables, # y are fixed variables (degrees of freedom) and t is the current simulation time. mb.ModelEquations = model.CreateEquations(mb.Variables, mb.TimeDerivatives) # Set initial conditions. mb.VariableValues = model.GetInitialConditions() # Set variable names. mb.VariableNames = model.GetVariableNames() # 3. Generate a model for single CPU simulations. # Set simulation options (specified as a string in JSON format). options = mb.SimulationOptions options['Simulation']['OutputDirectory'] = 'results' options['Simulation']['TimeHorizon'] = 500.0 options['Simulation']['ReportingInterval'] = 5.0 options['Solver']['Parameters']['RelativeTolerance'] = 1e-5 # (1) Sequential evaluator (evaluates groups one by one). options['Model']['Evaluators']['Device_0']['Library'] = 'Sequential' options['Model']['Evaluators']['Device_0']['Name'] = 'seq' options['Model']['Evaluators']['Device_0']['Groups'] = ['BoundaryConditions'] # (2a) OpenMP evaluator. options['Model']['Evaluators']['Device_1'] = {} options['Model']['Evaluators']['Device_1']['Parameters'] = {} options['Model']['Evaluators']['Device_1']['Library'] = 'VectorisedKernels_OpenMP' options['Model']['Evaluators']['Device_1']['Name'] = 'kopenmp' options['Model']['Evaluators']['Device_1']['Groups'] = [] options['Model']['Evaluators']['Device_1']['Kernels'] = ['CahnHilliard_c', 'CahnHilliard_mu'] options['Model']['Evaluators']['Device_1']['Parameters']['numThreads'] = 0 # ILU options for Ncpu = 1: k = 3, rho = 1.0, alpha = 1e-1, w = 0.5 options['LinearSolver']['Preconditioner']['Parameters']['fact: level-of-fill'] = 3 options['LinearSolver']['Preconditioner']['Parameters']['fact: relax value'] = 0.0 options['LinearSolver']['Preconditioner']['Parameters']['fact: absolute threshold'] = 1e-1 options['LinearSolver']['Preconditioner']['Parameters']['fact: relative threshold'] = 1.0 mb.SimulationOptions = options # Partition the system to create the OpenCS model for a single CPU simulation. # In this case (Npe = 1) the graph partitioner is not required. Npe = 1 graphPartitioner = None cs_models = mb.PartitionSystem(Npe, graphPartitioner) csModelBuilder_t.ExportModels(cs_models, inputFilesDirectory, mb.SimulationOptions) print("Single CPU OpenCS model generated successfully!") # 5. Run simulation using the exported model from the specified directory. csSimulate(inputFilesDirectory) try: csvFilename = os.path.join(inputFilesDirectory, 'results', 'results-0.csv') csv_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), csvFilename) df = pandas.read_csv(csv_filepath, sep=';', header=2, skiprows=None, quotechar='"', skipinitialspace=True, dtype=float) # The data layout in memory is as (for each time point): # [Time][BoundaryConditions][CahnHilliard_c][CahnHilliard_mu] # [ 1 ][ 2*(2*Nx+2*Ny-4) ][ (Nx-2)*(Ny-2)][ (Nx-2)*(Ny-2) ] # Offset the data by time and the whole BoundaryConditions group. c_start = 1 + 2*(2*Nx + 2*Ny - 4) c_end = c_start + (Nx-2)*(Ny-2) mu_start = c_end mu_end = mu_start + (Nx-2)*(Ny-2) # a) For simplicity, plot it without the boundaries. def getData_c_no_bc(data): c = data[c_start : c_end] c = numpy.array(c).reshape(Nx-2,Ny-2) return c # b) Add both boundary and the interior points. def getData_c(data): cnt = 1 # The first point is always the Time. c = numpy.zeros((Nx,Ny), dtype=float) # The equations are sorted by groups so that the boundary points are located # at the beggining of the data array. # First, iterate over boundary points and set their values for x in range(Nx): for y in range(Ny): if (x == 0) or (x == Nx-1) or (y == 0) or (y == Ny-1): c[x,y] = data[cnt] cnt += 1 else: continue # Second, set the interior points (the group CahnHilliard_c is after the BoundaryConditions) c[1:Nx-1,1:Ny-1] = numpy.array(data[c_start : c_end]).reshape(Nx-2,Ny-2) return c Nframes = len(df.values) # Plot setup and the first frame data = df.values[0] t = data[0] c = getData_c(data) fig = plt.figure() im = plt.imshow(c, cmap = 'viridis') plt.colorbar() plt.title('time %.2f s' % t) def animate(i): data = df.values[i] t = data[0] c = getData_c(data) im.set_data(c) plt.title('time %.2f s' % t) return im # Save the animation object so it does not go out if scope while animation is running. plt.__anim__ = animation.FuncAnimation(fig, animate, frames=Nframes, interval = 500, repeat = False) plt.show() except Exception as e: print(str(e)) if __name__ == "__main__": if len(sys.argv) == 1: Nx = 100 Ny = 100 elif len(sys.argv) == 3: Nx = int(sys.argv[1]) Ny = int(sys.argv[2]) else: print('Usage: python tutorial_opencs_dae_8_vector_kernels.py Nx Ny') sys.exit() c_flux_bc = 0.0 mu_flux_bc = 0.0 inputFilesDirectory = 'tutorial_opencs_dae_8_vector_kernels' run(Nx = Nx, Ny = Ny, c_flux_bc = c_flux_bc, mu_flux_bc = mu_flux_bc, inputFilesDirectory = inputFilesDirectory)