Coverage for pySDC/tutorial/step_9/A_paradiag_for_linear

1"""

2This script introduces ParaDiag for linear problems.

3It is recommended to view this code side by side with `Gaya's paper on ParaDiag with collocation methods

4<https://arxiv.org/abs/2103.12571>`_ as the code is close to the equations presented there but offers no explanations

5about them.

6"""

8import numpy as np

9import scipy.sparse as sp

10import sys

12from pySDC.implementations.problem_classes.TestEquation_0D import testequation0d as problem_class

13from pySDC.implementations.sweeper_classes.generic_implicit import generic_implicit

14from pySDC.implementations.sweeper_classes.ParaDiagSweepers import QDiagonalization

16# setup output

17out_file = open('data/step_9_A_out.txt', 'w')

20def my_print(*args, **kwargs):

21 for output in [sys.stdout, out_file]:

22 print(*args, **kwargs, file=output)

25# setup parameters

26L = 4 # Number of parallel time steps

27M = 3 # Number of collocation nodes

28N = 2 # Number of spatial degrees of freedom

29alpha = 1e-4 # Circular perturbation parameter

30restol = 1e-10 # Residual tolerance for the composite collocation problem

31dt = 0.1 # step size

33my_print(f'Running ParaDiag test script with {L} time steps, {M} collocation nodes and {N} spatial degrees of freedom')

35# setup pySDC infrastructure for Dahlquist problem and quadrature

36prob = problem_class(lambdas=-1.0 * np.ones(shape=(N)), u0=1.0)

37sweeper_params = params = {'num_nodes': M, 'quad_type': 'RADAU-RIGHT'}

38sweep = generic_implicit(sweeper_params, None)

40# Setup a global NumPy array and insert initial conditions in the first step

41u = np.zeros((L, M, N), dtype=complex)

42u[0, :, :] = prob.u_exact(t=0)

44# setup matrices for composite collocation problem. We note the sizes of the matrices in comments after generating them.

46# Start with identity matrices (I) of various sizes

47I_L = sp.eye(L) # LxL

48I_MN = sp.eye((M) * N) # MNxMN

49I_N = sp.eye(N) # NxN

50I_M = sp.eye(M) # MxM

52# E matrix propagates the solution of the steps to be the initial condition for the next step

53E = sp.diags(

54 [

55 -1.0,

56 ]

57 * (L - 1),

58 offsets=-1,

59) # LxL

61"""

62The H matrix computes the solution at the of an individual step from the solutions at the collocation nodes.

63For the RADAU-RIGHT rule we use here, the right node coincides with the end of the interval, so this is simple.

64We start with building the MxM matrix H_M on the node level and then extend to the spatial dimension with a Kronecker product.

65"""

66H_M = sp.eye(M).tolil() * 0 # MxM

67H_M[:, -1] = 1

68H = sp.kron(H_M, I_N) # MNxMN

70"""

71Set up collocation problem.

72Note that the Kronecker product from Q and A is only possible when there is an A, i.e. when the problem is linear.

73We will discuss non-linear problems in later steps in this tutorial

74"""

75Q = sweep.coll.Qmat[1:, 1:] # MxM

76C_coll = I_MN - dt * sp.kron(Q, prob.A) # MNxMN

78# Set up the composite collocation / all-at-once problem

79C = (sp.kron(I_L, C_coll) + sp.kron(E, H)).tocsc() # LMNxLMN

81"""

82Now that we have the full composite collocation problem as one large matrix, we can just solve it directly to get a reference solution.

83Of course, this is prohibitively expensive for any actual application and we would never want to do this in practice.

84"""

85sol_direct = sp.linalg.spsolve(C, u.flatten()).reshape(u.shape)

87"""

88The normal time-stepping approach is to solve the composite collocation problem with forward substitution

89"""

90sol_stepping = u.copy()

91for l in range(L):

92 """

93 Solve the current step (sol_stepping[l] currently contains the initial conditions at step l)

94 Here, we only solve MNxMN systems rather than LMNxLMN systems. This is still really expensive in practice, which is why there is SDC, for example.

95 """

96 sol_stepping[l, :] = sp.linalg.spsolve(C_coll, sol_stepping[l].flatten()).reshape(sol_stepping[l].shape)

98 # place the solution to the current step as the initial conditions to the next step

99 if l < L - 1:

100 sol_stepping[l + 1, ...] = sol_stepping[l, -1, :]

101

102assert np.allclose(sol_stepping, sol_direct)

103

104

105"""

106So far, so serial and boring. We will now parallelize this using ParaDiag.

107We will solve the composite collocation problem using preconditioned Picard iterations:

108 C_alpha delta = u_0 - Cu^k = < residual of the composite collocation problem >

109 u^{k+1} = u^k + delta

110The trick behind ParaDiag is to choose the preconditioner C_alpha to be a time-periodic approximation to C that can be diagonalized and therefore inverted in parallel.

111What we change in C_alpha compared to C is the E matrix that propagates the solutions between steps, which we amend to feed the solution to the last step back into the first step.

112"""

113E_alpha = sp.diags(

114 [

115 -1.0,

116 ]

117 * (L - 1),

118 offsets=-1,

119).tolil() # LxL

120E_alpha[0, -1] = -alpha # make the problem time-periodic

121

122"""

123In order to diagonalize C_alpha, on the step level, we need to diagonalize I_L and E_alpha simultaneously.

124I_L and E_alpha are alpha-circular matrices which can be simultaneously diagonalized by a weighted Fourier transform.

125We start by setting the weighting matrices for the Fourier transforms and then compute the diagonal entries of the diagonal version D_alpha of E_alpha.

126We refrain from actually setting up the preconditioner because we will not use the expanded version here.

127"""

128gamma = alpha ** (-np.arange(L) / L)

129J = sp.diags(gamma) # LxL

130J_inv = sp.diags(1 / gamma) # LxL

131

132# compute diagonal entries via Fourier transform

133D_alpha_diag_vals = np.fft.fft(1 / gamma * E_alpha[:, 0].toarray().flatten(), norm='backward')

134

135

136"""

137We need some convenience functions for computing matrix vector multiplication and the composite collocation problem residual here

138"""

139

140

141def mat_vec(mat, vec):

142 """

143 Matrix vector product

144

145 Args:

146 mat (np.ndarray or scipy.sparse) : Matrix

147 vec (np.ndarray) : vector

148

149 Returns:

150 np.ndarray: mat @ vec

151 """

152 res = np.zeros_like(vec).astype(complex)

153 for l in range(vec.shape[0]):

154 for k in range(vec.shape[0]):

155 res[l] += mat[l, k] * vec[k]

156 return res

157

158

159def residual(_u, u0):

160 """

161 Compute the residual of the composite collocation problem

162

163 Args:

164 _u (np.ndarray): Current iterate

165 u0 (np.ndarray): Initial conditions

166

167 Returns:

168 np.ndarray: LMN size array with the residual

169 """

170 res = _u * 0j

171 for l in range(L):

172 # build step local residual

173

174 # communicate initial conditions for each step

175 if l == 0:

176 res[l, ...] = u0[l, ...]

177 else:

178 res[l, ...] = _u[l - 1, -1, ...]

179

180 # evaluate and subtract integral over right hand side functions

181 f_evals = np.array([prob.eval_f(_u[l, m], 0) for m in range(M)])

182 Qf = mat_vec(Q, f_evals)

183 for m in range(M):

184 # res[l, m, ...] -= (_u[l] - dt * Qf)[-1]

185 res[l, m, ...] -= (_u[l] - dt * Qf)[m]

186 # res[l, m, ...] -= np.mean((_u[l] - dt * Qf), axis=0)

187

188 return res

189

190

191"""

192We will start with ParaDiag where we parallelize across the L steps but solve the collocation problems directly in serial.

193"""

194sol_ParaDiag_L = u.copy()

195u0 = u.copy()

196niter_ParaDiag_L = 0

197

198res = residual(sol_ParaDiag_L, u0)

199while np.linalg.norm(res) > restol:

200 # compute weighted FFT in time to go to diagonal base of C_alpha

201 x = np.fft.fft(

202 mat_vec(J_inv.tolil(), res),

203 axis=0,

204 norm='ortho',

205 )

206

207 # solve the collocation problems in parallel on the steps

208 y = np.empty_like(x)

209 for l in range(L):

210 # construct local matrix of "collocation problem"

211 local_matrix = (D_alpha_diag_vals[l] * H + C_coll).tocsc()

212

213 # solve local "collocation problem" directly

214 y[l, ...] = sp.linalg.spsolve(local_matrix, x[l, ...].flatten()).reshape(x[l, ...].shape)

215

216 # compute inverse weighted FFT in time to go back from diagonal base of C_alpha

217 sol_ParaDiag_L += mat_vec(J.tolil(), np.fft.ifft(y, axis=0, norm='ortho'))

218

219 # update residual

220 res = residual(sol_ParaDiag_L, u0)

221 niter_ParaDiag_L += 1

222my_print(

223 f'Needed {niter_ParaDiag_L} iterations in parallel across the steps ParaDiag. Stopped at residual {np.linalg.norm(res):.2e}'

224)

225assert np.allclose(sol_ParaDiag_L, sol_direct)

226

227"""

228While we have distributed the work across L tasks, we are still solving perturbed collocation problems directly on a single task here.

229This is very expensive, and we will now additionally diagonalize the quadrature matrix Q in order to distribute the work on LM tasks, where we solve NxN systems each.

230We rearrange the contribution of E_alpha to arrive at a problem (I - dtQG^{-1}A)u = u0.

231After diagonalizing QG^{-1}, we can simply utilize the Euler solves that are implemented in pySDC, but need to keep in mind that complex valued "step sizes" are required.

232

233We start by setting up the G and G^{-1} matrices. Then we will setup pySDC sweepers that solve QG^{-1} with diagonalization.

234Here, we will not use the sweepers, but just the diagonalization computed there in order to make more clear what is going on.

235"""

236G = [(D_alpha_diag_vals[l] * H_M + I_M).tocsc() for l in range(L)] # MxM

237G_inv = [sp.linalg.inv(_G).toarray() for _G in G] # MxM

238sweepers = [QDiagonalization(params={**sweeper_params, 'G_inv': _G_inv}, level=None) for _G_inv in G_inv]

239

240

241sol_ParaDiag = u.copy().astype(complex)

242res = residual(sol_ParaDiag, u0)

243niter = 0

244while np.max(np.abs(residual(sol_ParaDiag, u0))) > restol:

245

246 # weighted FFT in time

247 x = np.fft.fft(

248 mat_vec(J_inv.tolil(), res),

249 axis=0,

250 norm='ortho',

251 )

252

253 # perform local solves of "collocation problems" on the steps in parallel

254 y = np.empty_like(x)

255 for l in range(L):

256

257 # diagonalize QG^-1 matrix

258 w, S, S_inv = sweepers[l].w, sweepers[l].S, sweepers[l].S_inv

259

260 # perform local solves on the collocation nodes in parallel

261 x1 = S_inv @ x[l]

262 x2 = np.empty_like(x1)

263 for m in range(M):

264 x2[m, :] = prob.solve_system(rhs=x1[m], factor=w[m] * dt, u0=x1[m], t=0)

265 z = S @ x2

266 y[l, ...] = G_inv[l] @ z

267

268 # inverse weighted FFT in time

269 sol_ParaDiag += mat_vec(J.tolil(), np.fft.ifft(y, axis=0, norm='ortho'))

270

271 res = residual(sol_ParaDiag, u0)

272 niter += 1

273my_print(

274 f'Needed {niter} iterations in parallel and local paradiag with increment formulation, stopped at residual {np.linalg.norm(res):.2e}'

275)

276assert np.allclose(sol_ParaDiag, sol_direct)

277assert np.allclose(niter, niter_ParaDiag_L)

Coverage for pySDC/tutorial/step_9/A_paradiag_for_linear_problems.py: 100%

111 statements