Physics-informed regularization: Solving ODEs

MAKE_BOOK_FIGURES=Trueimport numpy as npimport scipy.stats as stimport matplotlib as mplimport matplotlib.pyplot as plt%matplotlib inlineimport matplotlib_inlinematplotlib_inline.backend_inline.set_matplotlib_formats('svg')import seaborn as snssns.set_context("paper")sns.set_style("ticks")def set_book_style():    plt.style.use('seaborn-v0_8-white')     sns.set_style("ticks")    sns.set_palette("deep")    mpl.rcParams.update({        # Font settings        'font.family': 'serif',  # For academic publishing        'font.size': 8,  # As requested, 10pt font        'axes.labelsize': 8,        'axes.titlesize': 8,        'xtick.labelsize': 7,  # Slightly smaller for better readability        'ytick.labelsize': 7,        'legend.fontsize': 7,                # Line and marker settings for consistency        'axes.linewidth': 0.5,        'grid.linewidth': 0.5,        'lines.linewidth': 1.0,        'lines.markersize': 4,                # Layout to prevent clipped labels        'figure.constrained_layout.use': True,                # Default DPI (will override when saving)        'figure.dpi': 600,        'savefig.dpi': 600,                # Despine - remove top and right spines        'axes.spines.top': False,        'axes.spines.right': False,                # Remove legend frame        'legend.frameon': False,                # Additional trim settings        'figure.autolayout': True,  # Alternative to constrained_layout        'savefig.bbox': 'tight',    # Trim when saving        'savefig.pad_inches': 0.1   # Small padding to ensure nothing gets cut off    })def set_notebook_style():    plt.style.use('seaborn-v0_8-white')    sns.set_style("ticks")    sns.set_palette("deep")    mpl.rcParams.update({        # Font settings - using default sizes        'font.family': 'serif',        'axes.labelsize': 10,        'axes.titlesize': 10,        'xtick.labelsize': 9,        'ytick.labelsize': 9,        'legend.fontsize': 9,                # Line and marker settings        'axes.linewidth': 0.5,        'grid.linewidth': 0.5,        'lines.linewidth': 1.0,        'lines.markersize': 4,                # Layout settings        'figure.constrained_layout.use': True,                # Remove only top and right spines        'axes.spines.top': False,        'axes.spines.right': False,                # Remove legend frame        'legend.frameon': False,                # Additional settings        'figure.autolayout': True,        'savefig.bbox': 'tight',        'savefig.pad_inches': 0.1    })def save_for_book(fig, filename, is_vector=True, **kwargs):    """    Save a figure with book-optimized settings.        Parameters:    -----------    fig : matplotlib figure        The figure to save    filename : str        Filename without extension    is_vector : bool        If True, saves as vector at 1000 dpi. If False, saves as raster at 600 dpi.    **kwargs : dict        Additional kwargs to pass to savefig    """        # Set appropriate DPI and format based on figure type    if is_vector:        dpi = 1000        ext = '.pdf'    else:        dpi = 600        ext = '.tif'        # Save the figure with book settings    fig.savefig(f"{filename}{ext}", dpi=dpi, **kwargs)def make_full_width_fig():    return plt.subplots(figsize=(4.7, 2.9), constrained_layout=True)def make_half_width_fig():    return plt.subplots(figsize=(2.35, 1.45), constrained_layout=True)if MAKE_BOOK_FIGURES:    set_book_style()else:    set_notebook_style()make_full_width_fig = make_full_width_fig if MAKE_BOOK_FIGURES else lambda: plt.subplots()make_half_width_fig = make_half_width_fig if MAKE_BOOK_FIGURES else lambda: plt.subplots()

Physics-informed regularization: Solving ODEs#

We learn how to solve ODEs with neural networks. This notebook replicates some of the results of [].

import torch
import torch.nn as nn

# This is useful for taking derivatives:
def grad(outputs, inputs):
    return torch.autograd.grad(outputs, inputs, grad_outputs=torch.ones_like(outputs), create_graph=True)[0]

Example 1: Single ODE#

Consider the ODE:

\[ \frac{d\Psi}{dx} = f(x, \Psi), \]

with \(x \in [0,1]\) and initial conditions (IC):

\[ \Psi(0) = A. \]

We write the trial solution by:

\[ \hat{\Psi}(x; \theta) = A + x N(x; \theta), \]

where \(N(x; \theta)\) is a neural network (NN). The solution is \(\hat{\Psi}(x;\theta)\) automatically satisfied the initial conditions. The loss function we would like to minimize to train the NN is:

\[ L(\theta) = \int_0^1 \left[\frac{d\Psi_t(x;\theta)}{dx} - f(x,\hat{\Psi}(x;\theta))\right]^2dx. \]

# N is a Neural Network - This is exactly the network used by Lagaris et al. 1997
N = nn.Sequential(nn.Linear(1, 50), nn.Sigmoid(), nn.Linear(50,1, bias=False))

# Initial condition
A = 0.

# The Psi_t function
Psi_t = lambda x: A + x * N(x)

# The right hand side function
f = lambda x, Psi: torch.exp(-x / 5.0) * torch.cos(x) - Psi / 5.0

# The loss function
def loss(x):
    x.requires_grad = True
    outputs = Psi_t(x)
    Psi_t_x = torch.autograd.grad(outputs, x, grad_outputs=torch.ones_like(outputs),
                                  create_graph=True)[0]
    return torch.mean((Psi_t_x - f(x, outputs)) ** 2)

First, I will use the method we find in Lagaris et al. Instead of stochastic optimization, they use a lot of points to estimate the loss integral (I am going to use 100) and then do gradient-based optimization (I am going to do BFGS).

# Optimize (same algorithm as in Lagaris)
optimizer = torch.optim.LBFGS(N.parameters())

# The collocation points used by Lagaris
x = torch.Tensor(np.linspace(0, 2, 100)[:, None])

# Run the optimizer
def closure():
    optimizer.zero_grad()
    l = loss(x)
    l.backward()
    return l
    
for i in range(10):
    optimizer.step(closure)

# Let's compare the result to the true solution
xx = np.linspace(0, 2, 100)[:, None]
with torch.no_grad():
    yy = Psi_t(torch.Tensor(xx)).numpy()
yt = np.exp(-xx / 5.0) * np.sin(xx)

fig, ax = plt.subplots(dpi=100)
ax.plot(xx, yt, label='True')
ax.plot(xx, yy, '--', label='Neural network approximation')
ax.set_xlabel('$x$')
ax.set_ylabel('$\Psi(x)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);

../_images/f844f91c374b75753e639b544f56923bdcd93075862d425d03ce20b3b10e102a.svg

Now, we are going to do the same thing using stochastic gradient descent:

# We need to reinitialize the network
N = nn.Sequential(nn.Linear(1, 50), nn.Sigmoid(), nn.Linear(50,1, bias=False))

# Let's see now if a stochastic optimizer makes a difference
adam = torch.optim.Adam(N.parameters(), lr=0.01)

# The batch size you want to use (how many points to use per iteration)
n_batch = 5

# The maximum number of iterations to do
max_it = 1000

for i in range(max_it):
    # Randomly pick n_batch random x's:
    x = 2 * torch.rand(n_batch, 1)
    # Zero-out the gradient buffers
    adam.zero_grad()
    # Evaluate the loss
    l = loss(x)
    # Calculate the gradients
    l.backward()
    # Update the network
    adam.step()
    # Print the iteration number
    if i % 100 == 99:
        print(i)

# Let's compare the result to the true solution
xx = np.linspace(0, 2, 100)[:, None]
with torch.no_grad():
    yy = Psi_t(torch.Tensor(xx)).numpy()
yt = np.exp(-xx / 5.0) * np.sin(xx)

fig, ax = plt.subplots(dpi=100)
ax.plot(xx, yt, label='True')
ax.plot(xx, yy, '--', label='Neural network approximation')
ax.set_xlabel('$x$')
ax.set_ylabel('$\Psi(x)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);

../_images/fb12e0172efd72be8aabce76bcaf36a70a62abcd906465caac3abf1d0d7b54c0.svg

Questions#

In the following questions, use the stochastic gradient approach.

Change n_batch to just 1. Does the algorithm work? Did you have to increase the iterations to achieve the same accuracy?
Modify the code to solve the problem for \(x\) between 0 and 5. Play with the n_batch and max_it until you get a good solution.

Solving a dynamical system with neural networks#

Let’s write some code that will allow us to solve any dynamical system:

class DynamicalSystem(object):
    """A class representing an initial value problem.
    
    Arguments
    ---------
    dim               -- The dimensionality of the problem.
    rhs               -- The right hand side of the equation. This must be a function with signature
                         rhs(t, y) where t is time and y is the state of the system.
    init_conditions   -- Initial conditions. Must be a vector of dimension dim.
    net               -- A neural network for representing the solution. This must have
                         one-dimensional input and dim-dimensional output.
    """
    
    def __init__(self, dim, rhs, init_conditions, net):
        assert isinstance(dim, int)
        assert dim > 0
        self._dim = dim
        self._rhs = rhs
        if isinstance(init_conditions, float):
            init_conditions = np.atleast_1d(init_conditions)
        init_conditions = torch.Tensor(init_conditions)
        self._init_conditions = init_conditions
        self._net = net
        self._solution = lambda T: self.init_conditions + T * self.net(T)
        
    @property
    def dim(self):
        return self._dim
    
    @property
    def rhs(self):
        return self._rhs
    
    @property
    def init_conditions(self):
        return self._init_conditions
    
    @property
    def net(self):
        return self._net
    
    @property
    def solution(self):
        """Return the solution function.
        """
        return self._solution
    
    def squared_residual_loss(self, T):
        """Returns the squared residual loss at times T.
        
        Arguments
        ---------
        T       --    Must be a 1D torch tensor.
        """
        T.requires_grad = True
        sol = self.solution(T)
        dsol_dt = torch.empty_like(sol)
        for d in range(self.dim):
            go = torch.zeros_like(sol)
            go[:, d] = 1.0
            dsol_dt[:, d] = torch.autograd.grad(sol, T, grad_outputs=go,
                                                create_graph=True)[0][:, 0]
        return torch.mean((dsol_dt - self.rhs(T, sol)) ** 2.)
    
    def solve_lbfgs(self, T_colloc, max_iter=10):
        """Solve the problem by minimizing the squared residual loss.
        
        Arguments
        ---------
        T_colloc    --  The collocation points used to solve the problem.
        """
        optimizer = torch.optim.LBFGS(self.net.parameters())

        # Run the optimizer
        def closure():
            optimizer.zero_grad()
            l = self.squared_residual_loss(T_colloc)
            l.backward()
            return l
    
        for i in range(max_iter):
            optimizer.step(closure)
    
    def solve_sgd(self, a, b, max_iter=1000, batch_size=10, lr=0.01):
        """Solve the problem using stochastic gradient descent.
        
        Arguments
        ---------
        a          -- The left end point of the interval.
        b          -- The right end point of the interval.
        max_iter   -- The maximum number of iterations to do. Default is 1000.
        batch_size -- The batch size to use. Default is 10.
        lr         -- The learning rate. Default is 0.01.
        """
        optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)

        for i in range(max_iter):
            # Randomly pick n_batch random x's:
            T = (b - a) * torch.rand(batch_size, 1) + a
            # Zero-out the gradient buffers
            optimizer.zero_grad()
            # Evaluate the loss
            l = self.squared_residual_loss(T)
            # Calculate the gradients
            l.backward()
            # Update the network
            optimizer.step()
            # Print the iteration number
            if i % 100 == 99:
                print(f"it = {i}, loss = {l.item()}")

# Let's use this class with Problem 2 of Lagaris
ex2 = DynamicalSystem(1,
                      lambda t, y: torch.exp(-t / 5.0) * torch.cos(t) - y / 5.0,
                      0.0,
                      nn.Sequential(nn.Linear(1, 10), nn.Sigmoid(), nn.Linear(10,1, bias=False))
                      )
T_colloc = torch.Tensor(np.linspace(0, 2, 20)[:, None])
ex2.solve_lbfgs(T_colloc)

# Now we can evaluate the solution anywhere
T = torch.Tensor(np.linspace(0, 2, 100)[:, None])
sol = ex2.solution(T)
y_true = np.exp(-T / 5.0) * np.sin(T)
fig, ax = plt.subplots(dpi=100)
ax.plot(T.numpy(), y_true.numpy(), label='True solution')
ax.plot(T.numpy(), sol.detach().numpy(), '--', label='Net solution')
ax.set_xlabel('$t$')
ax.set_ylabel('$y(t)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);

../_images/393c11a5c222366d9d911142a69a650056cea2557f21c9e537c0e72d533e308a.svg

# Here is another problem - Problem 1 of Lagaris
rhs = lambda t, y: t ** 3 + 2 * t + t ** 2 * (1.0 + 3.0 * t ** 2) / (1.0 + t + t ** 3) \
                   -(t + (1.0 + 3.0 * t ** 2) / (1.0 + t + t ** 3)) * y
ex1 = DynamicalSystem(1,
                      rhs,
                      1.0,
                      nn.Sequential(nn.Linear(1, 10), nn.Sigmoid(), nn.Linear(10,1, bias=False))
                      )
T_colloc = torch.Tensor(np.linspace(0, 1, 10)[:, None])
ex1.solve_lbfgs(T_colloc)
T = torch.Tensor(np.linspace(0, 1, 100)[:, None])
sol = ex1.solution(T)
y_true = np.exp( -0.5 * T ** 2) / (1.0 + T + T ** 3) + T ** 2
fig, ax = plt.subplots(dpi=100)
ax.plot(T.numpy(), y_true.numpy(), label='True solution')
ax.plot(T.numpy(), sol.detach().numpy(), '--', label='Net solution')
ax.set_xlabel('$t$')
ax.set_ylabel('$y(t)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);

../_images/8db9c0848303238caac9475bc61dfa3af31f189543ffe2d5e08cc390ce77dcec.svg

# Let's do a 2D problem - Problem 4 of Lagaris
def rhs(t, y):
    t = torch.flatten(t)
    res = torch.empty_like(y)
    res[:, 0] = torch.cos(t) + y[:, 0] ** 2 + y[:, 1] - (1.0 + t ** 2 + torch.sin(t) ** 2)
    res[:, 1] = 2.0 * t - (1.0 + t ** 2) * torch.sin(t) + y[:, 0] * y[:, 1]
    return res
ex4 = DynamicalSystem(2,
                      rhs,
                      torch.Tensor([0.0, 1.0]),
                      nn.Sequential(nn.Linear(1, 20), nn.Sigmoid(), nn.Linear(20,2, bias=False))
                      )
#T_colloc = torch.Tensor(np.linspace(0, 3, 10)[:, None])
#ex4.solve_lbfgs(T_colloc, max_iter=20) # Does not work everytime. Sometimes the optimization fails.
ex4.solve_sgd(0.0, 3.0, max_iter=10000, batch_size=10, lr=0.01)

it = 99, loss = 0.3463767468929291
it = 199, loss = 0.06628534942865372
it = 299, loss = 0.042410608381032944
it = 399, loss = 0.029202649369835854
it = 499, loss = 0.017537761479616165
it = 599, loss = 0.021197479218244553
it = 699, loss = 0.016754930838942528
it = 799, loss = 0.014986802823841572
it = 899, loss = 0.01844801940023899
it = 999, loss = 0.0070878989063203335
it = 1099, loss = 0.007116688881069422
it = 1199, loss = 0.015013654716312885
it = 1299, loss = 0.006453638430684805
it = 1399, loss = 0.07719659805297852
it = 1499, loss = 0.004118545912206173
it = 1599, loss = 0.007084548473358154
it = 1699, loss = 0.02322331815958023
it = 1799, loss = 0.00993071123957634
it = 1899, loss = 0.008378610946238041
it = 1999, loss = 0.007640938274562359
it = 2099, loss = 0.01079554297029972
it = 2199, loss = 0.002479827031493187
it = 2299, loss = 0.008639696054160595
it = 2399, loss = 0.012458952143788338
it = 2499, loss = 0.058060139417648315
it = 2599, loss = 0.0054211700335145
it = 2699, loss = 0.009485768154263496
it = 2799, loss = 0.005846458487212658
it = 2899, loss = 0.004885559435933828
it = 2999, loss = 0.005111613776534796
it = 3099, loss = 0.002829298609867692
it = 3199, loss = 0.0035969086457043886
it = 3299, loss = 0.0021282562520354986
it = 3399, loss = 0.0019377758726477623
it = 3499, loss = 0.012861981987953186
it = 3599, loss = 0.022117432206869125
it = 3699, loss = 0.0049068862572312355
it = 3799, loss = 0.04736264422535896
it = 3899, loss = 0.003565039485692978
it = 3999, loss = 0.0035635209642350674
it = 4099, loss = 0.002026042202487588
it = 4199, loss = 0.0021670334972441196
it = 4299, loss = 0.002549380762502551
it = 4399, loss = 0.04531533643603325
it = 4499, loss = 0.11380358040332794
it = 4599, loss = 0.002592099830508232
it = 4699, loss = 0.0027779857628047466
it = 4799, loss = 0.0014052388723939657
it = 4899, loss = 0.0021159271709620953
it = 4999, loss = 0.002115819603204727
it = 5099, loss = 0.10842223465442657
it = 5199, loss = 0.04687771201133728
it = 5299, loss = 0.0020898880902677774
it = 5399, loss = 0.0017970474436879158
it = 5499, loss = 0.0011046119034290314
it = 5599, loss = 0.0007937207701615989
it = 5699, loss = 0.008249855600297451
it = 5799, loss = 0.006347476039081812
it = 5899, loss = 0.0010504350066184998
it = 5999, loss = 0.0012247497215867043
it = 6099, loss = 0.0006221215589903295
it = 6199, loss = 0.0009926960337907076
it = 6299, loss = 0.0007292762165889144
it = 6399, loss = 0.0035610590130090714
it = 6499, loss = 0.0031069221440702677
it = 6599, loss = 0.02476700022816658
it = 6699, loss = 0.0014403884997591376
it = 6799, loss = 0.0008173917303793132
it = 6899, loss = 0.00023808248806744814
it = 6999, loss = 0.0016775386175140738
it = 7099, loss = 0.0008319422486238182
it = 7199, loss = 0.0005546907195821404
it = 7299, loss = 0.0002487456367816776
it = 7399, loss = 8.758953481446952e-05
it = 7499, loss = 0.00033935444662347436
it = 7599, loss = 0.0004222230927553028
it = 7699, loss = 0.0005156350089237094
it = 7799, loss = 0.07708616554737091
it = 7899, loss = 0.0013448568060994148
it = 7999, loss = 0.00025449026725254953
it = 8099, loss = 0.26882433891296387
it = 8199, loss = 0.0012748611625283957
it = 8299, loss = 0.00039095402462407947
it = 8399, loss = 0.00028596268384717405
it = 8499, loss = 0.000193130734260194
it = 8599, loss = 0.003904965939000249
it = 8699, loss = 0.00032825476955622435
it = 8799, loss = 0.00019814984989352524
it = 8899, loss = 0.045916058123111725
it = 8999, loss = 0.0035664972383528948
it = 9099, loss = 0.0004057659243699163
it = 9199, loss = 0.00014216727868188173
it = 9299, loss = 0.0002512291248422116
it = 9399, loss = 0.00038475735345855355
it = 9499, loss = 0.0010441048070788383
it = 9599, loss = 0.005958168767392635
it = 9699, loss = 0.00010795214620884508
it = 9799, loss = 0.00018100043234881014
it = 9899, loss = 8.72066302690655e-05
it = 9999, loss = 0.00011468210868770257

T = torch.Tensor(np.linspace(0, 3, 100)[:, None])
sol = ex4.solution(T)
y1_true = np.sin(T)
y2_true = 1.0 + T ** 2
fig, ax = plt.subplots()
ax.plot(T.numpy(), y1_true.numpy(), label='True solution (0)')
ax.plot(T.numpy(), sol.detach().numpy()[:, 0], '--', label='Net solution (0)')
ax.plot(T.numpy(), y2_true.numpy(), ':', label='True solution (1)')
ax.plot(T.numpy(), sol.detach().numpy()[:, 1], '.-', label='Net solution (1)')
ax.set_xlabel('$t$')
ax.set_ylabel('$y(t)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);

../_images/99e7623b7fb0e9d0217f4fe29a376de82ee38cb72dbb79f96f1952bdb4956830.svg

Question#

Try BFGS on the above problem. It does not work all the time. Why?