Goal: Reduce the memory consumption of code regions by storing the Jacobian for that region.
Prerequisite: Tutorial 2 - Reverse mode AD
Function:
void ode(const Real* start, Real* end, int steps, Real* A, double dt, size_t n) {
Real* cur = new Real[n];
for(size_t i = 0; i < n; ++i) {
end[i] = start[i];
}
for(int curStep = 0; curStep < steps; ++curStep) {
std::swap(end, cur);
for(size_t i = 0; i < n; ++i) {
end[i] = 0.0;
for(size_t j = 0; j < n; ++j) {
end[i] += A[j + i * n] * cur[j];
}
end[i] = cur[i] + dt * end[i];
}
}
if(steps % 2 == 1) {
std::swap(end, cur);
for(size_t i = 0; i < n; ++i) {
end[i] = cur[i];
}
}
delete[] cur;
}
Full code:
#include <algorithm>
#include <iostream>
#include <codi.hpp>
void ode(const Real* start, Real* end, int steps, Real* A, double dt, size_t n) {
Real* cur = new Real[n];
for(size_t i = 0; i < n; ++i) {
end[i] = start[i];
}
for(int curStep = 0; curStep < steps; ++curStep) {
std::swap(end, cur);
for(size_t i = 0; i < n; ++i) {
end[i] = 0.0;
for(size_t j = 0; j < n; ++j) {
end[i] += A[j + i * n] * cur[j];
}
end[i] = cur[i] + dt * end[i];
}
}
if(steps % 2 == 1) {
std::swap(end, cur);
for(size_t i = 0; i < n; ++i) {
end[i] = cur[i];
}
}
delete[] cur;
}
void compute(bool performPreAcc) {
Real u = 3.0;
tape.setActive();
tape.registerInput(u);
Real A[4] = {u * 1.0, 0.5,
0.0, u * -1.0};
Real start[2] = {u * 10.0, u * 20.0};
Real end[2];
if(performPreAcc) {
ph.
start(start[0], start[1]);
for(size_t i = 0; i < 4; ++i) {
}
}
ode(start, end, 1000, A, 1.0 / 1000.0, 2);
if(performPreAcc) {
}
Real w = sqrt(end[0] * end[0] + end[1] * end[1]);
tape.registerOutput(w);
tape.setPassive();
tape.evaluate();
std::cout << "Solution w: " << w << std::endl;
std::cout <<
"Adjoint u: " << u.
getGradient() << std::endl;
tape.printStatistics();
tape.reset();
}
int main(int nargs, char** args) {
std::cout << "Without preaccumulation:" << std::endl;
compute(false);
std::cout << std::endl;
std::cout << "With preaccumulation:" << std::endl;
compute(true);
return 0;
}
The example demonstrates on an ODE how preaccumulation works. Without preaccumulation 165170 Byte are used. If preaccumulation is enabled, then only 261 Byte are required.
Preaccumulation reduces the memory if the code region has only a few inputs and output, but requires a lot of computational effort.