CoDiPack  3.0.0
A Code Differentiation Package
SciComp TU Kaiserslautern
Loading...
Searching...
No Matches
Example 15 - Preaccumulation of code parts

Goal: Reduce the memory consumption of code regions by storing the Jacobian for that region.

Prerequisite: Tutorial 2 - Reverse mode AD

Function:

// time step ode in a explicit euler sheme
// x'(t) = Ax(t)
// x_n = x_c + dt * Ax(t)
void ode(const Real* start, Real* end, int steps, Real* A, double dt, size_t n) {
Real* cur = new Real[n];
for(size_t i = 0; i < n; ++i) {
end[i] = start[i];
}
for(int curStep = 0; curStep < steps; ++curStep) {
std::swap(end, cur);
for(size_t i = 0; i < n; ++i) {
end[i] = 0.0;
for(size_t j = 0; j < n; ++j) {
end[i] += A[j + i * n] * cur[j];
}
end[i] = cur[i] + dt * end[i];
}
}
// we need to copy the result again if the number of steps is uneven
if(steps % 2 == 1) {
std::swap(end, cur);
for(size_t i = 0; i < n; ++i) {
end[i] = cur[i];
}
}
delete[] cur;
}

Full code:

#include <algorithm>
#include <iostream>
#include <codi.hpp>
using Real = codi::RealReverse;
using Tape = typename Real::Tape;
// time step ode in a explicit euler sheme
// x'(t) = Ax(t)
// x_n = x_c + dt * Ax(t)
void ode(const Real* start, Real* end, int steps, Real* A, double dt, size_t n) {
Real* cur = new Real[n];
for(size_t i = 0; i < n; ++i) {
end[i] = start[i];
}
for(int curStep = 0; curStep < steps; ++curStep) {
std::swap(end, cur);
for(size_t i = 0; i < n; ++i) {
end[i] = 0.0;
for(size_t j = 0; j < n; ++j) {
end[i] += A[j + i * n] * cur[j];
}
end[i] = cur[i] + dt * end[i];
}
}
// we need to copy the result again if the number of steps is uneven
if(steps % 2 == 1) {
std::swap(end, cur);
for(size_t i = 0; i < n; ++i) {
end[i] = cur[i];
}
}
delete[] cur;
}
void compute(bool performPreAcc) {
Real u = 3.0;
Tape& tape = Real::getTape();
tape.setActive();
tape.registerInput(u);
Real A[4] = {u * 1.0, 0.5,
0.0, u * -1.0};
Real start[2] = {u * 10.0, u * 20.0};
Real end[2];
codi::PreaccumulationHelper<Real> ph; // Step 1: Create the helper structure
if(performPreAcc) {
ph.start(start[0], start[1]); // Step 2: Start the preaccumulation region and specify inputs
for(size_t i = 0; i < 4; ++i) {
ph.addInput(A[i]); // Step 3: Add additional inputs (optional)
}
}
ode(start, end, 1000, A, 1.0 / 1000.0, 2); // Step 4: Evaluate the function/region for the accumulation in a normal way
if(performPreAcc) {
ph.addOutput(end[1]); // Step 5: Add additional outputs (optional)
ph.finish(false, end[0]); // Step 6: Finish the preaccumulation region and specify outputs
}
Real w = sqrt(end[0] * end[0] + end[1] * end[1]);
tape.registerOutput(w);
tape.setPassive();
tape.evaluate();
std::cout << "Solution w: " << w << std::endl;
std::cout << "Adjoint u: " << u.getGradient() << std::endl;
tape.printStatistics();
tape.reset();
}
int main(int nargs, char** args) {
std::cout << "Without preaccumulation:" << std::endl;
compute(false);
std::cout << std::endl;
std::cout << "With preaccumulation:" << std::endl;
compute(true);
return 0;
}

The example demonstrates on an ODE how preaccumulation works. Without preaccumulation 165170 Byte are used. If preaccumulation is enabled, then only 261 Byte are required.

Preaccumulation reduces the memory if the code region has only a few inputs and output, but requires a lot of computational effort.