Show code cell source
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
import seaborn as sns
sns.set_context("paper")
sns.set_style("ticks")
Physics-informed regularization: Solving ODEs#
We learn how to solve ODEs with neural networks. This notebook replicates some of the results of [].
import torch
import torch.nn as nn
# This is useful for taking derivatives:
def grad(outputs, inputs):
return torch.autograd.grad(outputs, inputs, grad_outputs=torch.ones_like(outputs), create_graph=True)[0]
Example 1: Single ODE#
Consider the ODE:
with \(x \in [0,1]\) and initial conditions (IC):
We write the trial solution by:
where \(N(x; \theta)\) is a neural network (NN). The solution is \(\hat{\Psi}(x;\theta)\) automatically satisfied the initial conditions. The loss function we would like to minimize to train the NN is:
# N is a Neural Network - This is exactly the network used by Lagaris et al. 1997
N = nn.Sequential(nn.Linear(1, 50), nn.Sigmoid(), nn.Linear(50,1, bias=False))
# Initial condition
A = 0.
# The Psi_t function
Psi_t = lambda x: A + x * N(x)
# The right hand side function
f = lambda x, Psi: torch.exp(-x / 5.0) * torch.cos(x) - Psi / 5.0
# The loss function
def loss(x):
x.requires_grad = True
outputs = Psi_t(x)
Psi_t_x = torch.autograd.grad(outputs, x, grad_outputs=torch.ones_like(outputs),
create_graph=True)[0]
return torch.mean((Psi_t_x - f(x, outputs)) ** 2)
First, I will use the method we find in Lagaris et al. Instead of stochastic optimization, they use a lot of points to estimate the loss integral (I am going to use 100) and then do gradient-based optimization (I am going to do BFGS).
# Optimize (same algorithm as in Lagaris)
optimizer = torch.optim.LBFGS(N.parameters())
# The collocation points used by Lagaris
x = torch.Tensor(np.linspace(0, 2, 100)[:, None])
# Run the optimizer
def closure():
optimizer.zero_grad()
l = loss(x)
l.backward()
return l
for i in range(10):
optimizer.step(closure)
# Let's compare the result to the true solution
xx = np.linspace(0, 2, 100)[:, None]
with torch.no_grad():
yy = Psi_t(torch.Tensor(xx)).numpy()
yt = np.exp(-xx / 5.0) * np.sin(xx)
fig, ax = plt.subplots(dpi=100)
ax.plot(xx, yt, label='True')
ax.plot(xx, yy, '--', label='Neural network approximation')
ax.set_xlabel('$x$')
ax.set_ylabel('$\Psi(x)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);
Now, we are going to do the same thing using stochastic gradient descent:
# We need to reinitialize the network
N = nn.Sequential(nn.Linear(1, 50), nn.Sigmoid(), nn.Linear(50,1, bias=False))
# Let's see now if a stochastic optimizer makes a difference
adam = torch.optim.Adam(N.parameters(), lr=0.01)
# The batch size you want to use (how many points to use per iteration)
n_batch = 5
# The maximum number of iterations to do
max_it = 1000
for i in range(max_it):
# Randomly pick n_batch random x's:
x = 2 * torch.rand(n_batch, 1)
# Zero-out the gradient buffers
adam.zero_grad()
# Evaluate the loss
l = loss(x)
# Calculate the gradients
l.backward()
# Update the network
adam.step()
# Print the iteration number
if i % 100 == 99:
print(i)
99
199
299
399
499
599
699
799
899
999
# Let's compare the result to the true solution
xx = np.linspace(0, 2, 100)[:, None]
with torch.no_grad():
yy = Psi_t(torch.Tensor(xx)).numpy()
yt = np.exp(-xx / 5.0) * np.sin(xx)
fig, ax = plt.subplots(dpi=100)
ax.plot(xx, yt, label='True')
ax.plot(xx, yy, '--', label='Neural network approximation')
ax.set_xlabel('$x$')
ax.set_ylabel('$\Psi(x)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);
Questions#
In the following questions, use the stochastic gradient approach.
Change
n_batch
to just 1. Does the algorithm work? Did you have to increase the iterations to achieve the same accuracy?Modify the code to solve the problem for \(x\) between 0 and 5. Play with the
n_batch
andmax_it
until you get a good solution.
Solving a dynamical system with neural networks#
Let’s write some code that will allow us to solve any dynamical system:
class DynamicalSystem(object):
"""A class representing an initial value problem.
Arguments
---------
dim -- The dimensionality of the problem.
rhs -- The right hand side of the equation. This must be a function with signature
rhs(t, y) where t is time and y is the state of the system.
init_conditions -- Initial conditions. Must be a vector of dimension dim.
net -- A neural network for representing the solution. This must have
one-dimensional input and dim-dimensional output.
"""
def __init__(self, dim, rhs, init_conditions, net):
assert isinstance(dim, int)
assert dim > 0
self._dim = dim
self._rhs = rhs
if isinstance(init_conditions, float):
init_conditions = np.atleast_1d(init_conditions)
init_conditions = torch.Tensor(init_conditions)
self._init_conditions = init_conditions
self._net = net
self._solution = lambda T: self.init_conditions + T * self.net(T)
@property
def dim(self):
return self._dim
@property
def rhs(self):
return self._rhs
@property
def init_conditions(self):
return self._init_conditions
@property
def net(self):
return self._net
@property
def solution(self):
"""Return the solution function.
"""
return self._solution
def squared_residual_loss(self, T):
"""Returns the squared residual loss at times T.
Arguments
---------
T -- Must be a 1D torch tensor.
"""
T.requires_grad = True
sol = self.solution(T)
dsol_dt = torch.empty_like(sol)
for d in range(self.dim):
go = torch.zeros_like(sol)
go[:, d] = 1.0
dsol_dt[:, d] = torch.autograd.grad(sol, T, grad_outputs=go,
create_graph=True)[0][:, 0]
return torch.mean((dsol_dt - self.rhs(T, sol)) ** 2.)
def solve_lbfgs(self, T_colloc, max_iter=10):
"""Solve the problem by minimizing the squared residual loss.
Arguments
---------
T_colloc -- The collocation points used to solve the problem.
"""
optimizer = torch.optim.LBFGS(self.net.parameters())
# Run the optimizer
def closure():
optimizer.zero_grad()
l = self.squared_residual_loss(T_colloc)
l.backward()
return l
for i in range(max_iter):
optimizer.step(closure)
def solve_sgd(self, a, b, max_iter=1000, batch_size=10, lr=0.01):
"""Solve the problem using stochastic gradient descent.
Arguments
---------
a -- The left end point of the interval.
b -- The right end point of the interval.
max_iter -- The maximum number of iterations to do. Default is 1000.
batch_size -- The batch size to use. Default is 10.
lr -- The learning rate. Default is 0.01.
"""
optimizer = torch.optim.Adam(self.net.parameters(), lr=lr)
for i in range(max_iter):
# Randomly pick n_batch random x's:
T = (b - a) * torch.rand(batch_size, 1) + a
# Zero-out the gradient buffers
optimizer.zero_grad()
# Evaluate the loss
l = self.squared_residual_loss(T)
# Calculate the gradients
l.backward()
# Update the network
optimizer.step()
# Print the iteration number
if i % 100 == 99:
print(f"it = {i}, loss = {l.item()}")
# Let's use this class with Problem 2 of Lagaris
ex2 = DynamicalSystem(1,
lambda t, y: torch.exp(-t / 5.0) * torch.cos(t) - y / 5.0,
0.0,
nn.Sequential(nn.Linear(1, 10), nn.Sigmoid(), nn.Linear(10,1, bias=False))
)
T_colloc = torch.Tensor(np.linspace(0, 2, 20)[:, None])
ex2.solve_lbfgs(T_colloc)
# Now we can evaluate the solution anywhere
T = torch.Tensor(np.linspace(0, 2, 100)[:, None])
sol = ex2.solution(T)
y_true = np.exp(-T / 5.0) * np.sin(T)
fig, ax = plt.subplots(dpi=100)
ax.plot(T.numpy(), y_true.numpy(), label='True solution')
ax.plot(T.numpy(), sol.detach().numpy(), '--', label='Net solution')
ax.set_xlabel('$t$')
ax.set_ylabel('$y(t)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);
# Here is another problem - Problem 1 of Lagaris
rhs = lambda t, y: t ** 3 + 2 * t + t ** 2 * (1.0 + 3.0 * t ** 2) / (1.0 + t + t ** 3) \
-(t + (1.0 + 3.0 * t ** 2) / (1.0 + t + t ** 3)) * y
ex1 = DynamicalSystem(1,
rhs,
1.0,
nn.Sequential(nn.Linear(1, 10), nn.Sigmoid(), nn.Linear(10,1, bias=False))
)
T_colloc = torch.Tensor(np.linspace(0, 1, 10)[:, None])
ex1.solve_lbfgs(T_colloc)
T = torch.Tensor(np.linspace(0, 1, 100)[:, None])
sol = ex1.solution(T)
y_true = np.exp( -0.5 * T ** 2) / (1.0 + T + T ** 3) + T ** 2
fig, ax = plt.subplots(dpi=100)
ax.plot(T.numpy(), y_true.numpy(), label='True solution')
ax.plot(T.numpy(), sol.detach().numpy(), '--', label='Net solution')
ax.set_xlabel('$t$')
ax.set_ylabel('$y(t)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);
# Let's do a 2D problem - Problem 4 of Lagaris
def rhs(t, y):
t = torch.flatten(t)
res = torch.empty_like(y)
res[:, 0] = torch.cos(t) + y[:, 0] ** 2 + y[:, 1] - (1.0 + t ** 2 + torch.sin(t) ** 2)
res[:, 1] = 2.0 * t - (1.0 + t ** 2) * torch.sin(t) + y[:, 0] * y[:, 1]
return res
ex4 = DynamicalSystem(2,
rhs,
torch.Tensor([0.0, 1.0]),
nn.Sequential(nn.Linear(1, 20), nn.Sigmoid(), nn.Linear(20,2, bias=False))
)
#T_colloc = torch.Tensor(np.linspace(0, 3, 10)[:, None])
#ex4.solve_lbfgs(T_colloc, max_iter=20) # Does not work everytime. Sometimes the optimization fails.
ex4.solve_sgd(0.0, 3.0, max_iter=10000, batch_size=10, lr=0.01)
it = 99, loss = 0.3463767468929291
it = 199, loss = 0.06628534942865372
it = 299, loss = 0.042410608381032944
it = 399, loss = 0.029202649369835854
it = 499, loss = 0.017537761479616165
it = 599, loss = 0.021197479218244553
it = 699, loss = 0.016754930838942528
it = 799, loss = 0.014986802823841572
it = 899, loss = 0.01844801940023899
it = 999, loss = 0.0070878989063203335
it = 1099, loss = 0.007116688881069422
it = 1199, loss = 0.015013654716312885
it = 1299, loss = 0.006453638430684805
it = 1399, loss = 0.07719659805297852
it = 1499, loss = 0.004118545912206173
it = 1599, loss = 0.007084548473358154
it = 1699, loss = 0.02322331815958023
it = 1799, loss = 0.00993071123957634
it = 1899, loss = 0.008378610946238041
it = 1999, loss = 0.007640938274562359
it = 2099, loss = 0.01079554297029972
it = 2199, loss = 0.002479827031493187
it = 2299, loss = 0.008639696054160595
it = 2399, loss = 0.012458952143788338
it = 2499, loss = 0.058060139417648315
it = 2599, loss = 0.0054211700335145
it = 2699, loss = 0.009485768154263496
it = 2799, loss = 0.005846458487212658
it = 2899, loss = 0.004885559435933828
it = 2999, loss = 0.005111613776534796
it = 3099, loss = 0.002829298609867692
it = 3199, loss = 0.0035969086457043886
it = 3299, loss = 0.0021282562520354986
it = 3399, loss = 0.0019377758726477623
it = 3499, loss = 0.012861981987953186
it = 3599, loss = 0.022117432206869125
it = 3699, loss = 0.0049068862572312355
it = 3799, loss = 0.04736264422535896
it = 3899, loss = 0.003565039485692978
it = 3999, loss = 0.0035635209642350674
it = 4099, loss = 0.002026042202487588
it = 4199, loss = 0.0021670334972441196
it = 4299, loss = 0.002549380762502551
it = 4399, loss = 0.04531533643603325
it = 4499, loss = 0.11380358040332794
it = 4599, loss = 0.002592099830508232
it = 4699, loss = 0.0027779857628047466
it = 4799, loss = 0.0014052388723939657
it = 4899, loss = 0.0021159271709620953
it = 4999, loss = 0.002115819603204727
it = 5099, loss = 0.10842223465442657
it = 5199, loss = 0.04687771201133728
it = 5299, loss = 0.0020898880902677774
it = 5399, loss = 0.0017970474436879158
it = 5499, loss = 0.0011046119034290314
it = 5599, loss = 0.0007937207701615989
it = 5699, loss = 0.008249855600297451
it = 5799, loss = 0.006347476039081812
it = 5899, loss = 0.0010504350066184998
it = 5999, loss = 0.0012247497215867043
it = 6099, loss = 0.0006221215589903295
it = 6199, loss = 0.0009926960337907076
it = 6299, loss = 0.0007292762165889144
it = 6399, loss = 0.0035610590130090714
it = 6499, loss = 0.0031069221440702677
it = 6599, loss = 0.02476700022816658
it = 6699, loss = 0.0014403884997591376
it = 6799, loss = 0.0008173917303793132
it = 6899, loss = 0.00023808248806744814
it = 6999, loss = 0.0016775386175140738
it = 7099, loss = 0.0008319422486238182
it = 7199, loss = 0.0005546907195821404
it = 7299, loss = 0.0002487456367816776
it = 7399, loss = 8.758953481446952e-05
it = 7499, loss = 0.00033935444662347436
it = 7599, loss = 0.0004222230927553028
it = 7699, loss = 0.0005156350089237094
it = 7799, loss = 0.07708616554737091
it = 7899, loss = 0.0013448568060994148
it = 7999, loss = 0.00025449026725254953
it = 8099, loss = 0.26882433891296387
it = 8199, loss = 0.0012748611625283957
it = 8299, loss = 0.00039095402462407947
it = 8399, loss = 0.00028596268384717405
it = 8499, loss = 0.000193130734260194
it = 8599, loss = 0.003904965939000249
it = 8699, loss = 0.00032825476955622435
it = 8799, loss = 0.00019814984989352524
it = 8899, loss = 0.045916058123111725
it = 8999, loss = 0.0035664972383528948
it = 9099, loss = 0.0004057659243699163
it = 9199, loss = 0.00014216727868188173
it = 9299, loss = 0.0002512291248422116
it = 9399, loss = 0.00038475735345855355
it = 9499, loss = 0.0010441048070788383
it = 9599, loss = 0.005958168767392635
it = 9699, loss = 0.00010795214620884508
it = 9799, loss = 0.00018100043234881014
it = 9899, loss = 8.72066302690655e-05
it = 9999, loss = 0.00011468210868770257
T = torch.Tensor(np.linspace(0, 3, 100)[:, None])
sol = ex4.solution(T)
y1_true = np.sin(T)
y2_true = 1.0 + T ** 2
fig, ax = plt.subplots()
ax.plot(T.numpy(), y1_true.numpy(), label='True solution (0)')
ax.plot(T.numpy(), sol.detach().numpy()[:, 0], '--', label='Net solution (0)')
ax.plot(T.numpy(), y2_true.numpy(), ':', label='True solution (1)')
ax.plot(T.numpy(), sol.detach().numpy()[:, 1], '.-', label='Net solution (1)')
ax.set_xlabel('$t$')
ax.set_ylabel('$y(t)$')
plt.legend(loc='best', frameon=False)
sns.despine(trim=True);
Question#
Try BFGS on the above problem. It does not work all the time. Why?