Goal: Get to know how a simple operator taping approach can be implemented with CoDiPack

Prerequisite: AD reverse mode, see Reverse AD Equation; Identifier management

Full code:

 
#include <codi.hpp>
enum class OperatorCode {
  ADD,
  SUB,
  MUL,
  DIV,
  SIN,
  COS,
  COPY
};
 
struct OperatorCodeLookup {
  public:
    template<typename Op>
    static OperatorCode get();
};
 
template<typename Op>
OperatorCode OperatorCodeLookup::get() {
  std::cerr << "Missing specialization for operator code lookup." << std::endl;
  exit(-1);
}
 
#define SPECIALIZE_LOOKUP(NAME, CODE) \
  template<> \
  OperatorCode OperatorCodeLookup::get<codi:: NAME<double>>() { \
    return OperatorCode:: CODE; \
  }
 
SPECIALIZE_LOOKUP(OperationAdd, ADD)
SPECIALIZE_LOOKUP(OperationSubstract, SUB)
SPECIALIZE_LOOKUP(OperationMultiply, MUL)
SPECIALIZE_LOOKUP(OperationDivide, DIV)
SPECIALIZE_LOOKUP(OperationSin, SIN)
SPECIALIZE_LOOKUP(OperationCos, COS)
 
#undef SPECIALIZE_LOOKUP
 
struct SimpleTape : public codi::ReverseTapeInterface<double, double, int> {
  public:
    using Real = double;
    using Gradient = double;
    using Identifier = int;
    using OperatorData = codi::ChunkedData<codi::Chunk1<OperatorCode>>;
    using IdentifierData = codi::ChunkedData<codi::Chunk1<int>, OperatorData>;
    using PrimalData = codi::ChunkedData<codi::Chunk1<double>, IdentifierData>;
 
    using Position = PrimalData::Position;
 
  private:
    bool active;
    std::vector<double> adjointVec;
 
    int maxIdentifier;
    codi::EmptyData emptyData; // Required for the termination.
    OperatorData operatorData;
    IdentifierData identifierData;
    PrimalData primalData;
 
  public:
 
    SimpleTape() :
      active(false),
      adjointVec(1), // Reserve one for out of bounds gradient access.
      maxIdentifier(0),
      emptyData(),
      operatorData(1024),
      identifierData(1024),
      primalData(1024)
    {
      operatorData.setNested(&emptyData);
      identifierData.setNested(&operatorData);
      primalData.setNested(&identifierData);
    }
 
    /*******************************************************************************/
    // ReverseTapeInterface implementation
    template<typename Lhs> void registerInput(codi::LhsExpressionInterface<Real, Gradient, SimpleTape, Lhs>& value) {
      if (active) {
        value.cast().getIdentifier() = generateIdentifier();
      } else {
        value.cast().getIdentifier() = 0;
      }
    }
 
 
    template<typename Lhs> void registerOutput(codi::LhsExpressionInterface<Real, Gradient, SimpleTape, Lhs>& value) {
      // Do nothing, every identifier is unique.
    }
    void setActive()      {active = true;}
    void setPassive()     {active = false;}
    bool isActive() const {return active;}
    void evaluate() {
      primalData.evaluateReverse(primalData.getPosition(), primalData.getZeroPosition(), SimpleTape::evaluateStack, adjointVec);
    }
    void clearAdjoints() {
      for (double& adj : adjointVec) {
        adj = 0.0;
      }
    }
    void reset(bool resetAdjoints = true) {
      if (resetAdjoints) {
        clearAdjoints();
      }
 
      maxIdentifier = 0;
 
      primalData.reset();
    }
 
    template<typename Stream = std::ostream> void printStatistics(Stream& out = std::cout) const {
      getTapeValues().formatDefault(out);
    }
 
    template<typename Stream = std::ostream> void printTableHeader(Stream& out = std::cout) const {
      getTapeValues().formatHeader(out);
    }
 
    template<typename Stream = std::ostream> void printTableRow(Stream& out = std::cout) const {
      getTapeValues().formatRow(out);
    }
 
    codi::TapeValues getTapeValues() const {
      codi::TapeValues values("Example tape");
 
      values.addSection("Adjoint vector");
      values.addLongEntry("Number of adjoints", (1 + maxIdentifier));
      values.addDoubleEntry("Memory allocated", sizeof(double) * (1 + maxIdentifier),
                            codi::TapeValues::LocalReductionOperation::Sum, true, true);
 
      values.addSection("Index manager");
      values.addLongEntry("Max. live indices", (1 + maxIdentifier));
 
      values.addSection("Primal data");
      primalData.addToTapeValues(values);
      values.addSection("Identifier data");
      identifierData.addToTapeValues(values);
      values.addSection("Operator data");
      operatorData.addToTapeValues(values);
 
      return values;
    }
 
    /*******************************************************************************/
    // InternalStatementRecordingInterface implementation
 
    static bool constexpr AllowJacobianOptimization = false; // If certain operations can be hidden from the tape.
    template<typename Real>
    void initIdentifier(Real& value, Identifier& identifier) {
      identifier = 0; // Initialize with zero we perform an online activity analysis.
    }
 
    template<typename Real>
    void destroyIdentifier(Real& value, Identifier& identifier) {
      // Do nothing: Identifiers are not reused.
    }
    template<typename Lhs, typename Rhs>
    void store(Lhs& lhs, Rhs const& rhs) {
      if (active) {
        storeOperator(rhs, lhs.value(), lhs.getIdentifier(), true);
      } else {
        lhs.value() = rhs.getValue();
        lhs.getIdentifier() = 0;
      }
    }
 
    /*******************************************************************************/
    // GradientAccessTapeInterface implementation
    void setGradient(Identifier const& identifier, Gradient const& grad,
                     codi::AdjointsManagement adjointsManagement = codi::AdjointsManagement::Automatic) {
      gradient(identifier, adjointsManagement) = grad;
    }
    Gradient const& getGradient(Identifier const& identifier) const {
      return gradient(identifier);
    }
 
    Gradient& gradient(Identifier const& identifier,
                       codi::AdjointsManagement adjointsManagement = codi::AdjointsManagement::Automatic) {
      if (codi::AdjointsManagement::Automatic == adjointsManagement) {
        checkAndResizeAdjoints(identifier);
      }
 
      return adjointVec[identifier];
    }
 
    Gradient const& gradient(Identifier const& identifier,
                             codi::AdjointsManagement adjointsManagement = codi::AdjointsManagement::Automatic) const {
      if (codi::AdjointsManagement::Automatic == adjointsManagement && identifier >= (int)adjointVec.size()) {
        return adjointVec[0];
      } else {
        return adjointVec[identifier];
      }
    }
 
  private:
    void checkAndResizeAdjoints(int const& identifier) {
      if (identifier > maxIdentifier) {
        std::cerr << "Error: Tryinig to access an identifier which was not distributed." << std::endl;
      }
      if (identifier >= (int)adjointVec.size()) { // Only resize if necessary.
        adjointVec.resize(maxIdentifier + 1);
      }
    }
 
    int generateIdentifier() {
      maxIdentifier += 1;
 
      return maxIdentifier;
    }
    template<typename Operation, typename = void>
    struct StoreOperator_Impl {
      public:
        template<typename Exp>
        static void store(
            Exp const& exp,
            SimpleTape& tape,
            double& resultValue,
            int& resultIdentifier,
            bool copy);
    };
    template<typename Arg, template<typename> class Op>
    struct StoreOperator_Impl<codi::ComputeExpression<double, Op, Arg>> {
      public:
        static void store(
            codi::ComputeExpression<double, Op, Arg> const& exp, SimpleTape& tape,
            double& resultValue, int& resultIdentifier, bool copy)
        {
 
          double argValue;
          int argIdentifier;
 
          tape.storeOperator(exp.template getLink<0>(), argValue, argIdentifier, false);
 
          if (argIdentifier != 0) {
            // Active argument or branch => store the operator.
            tape.operatorData.reserveItems(1);
            tape.identifierData.reserveItems(2);
            tape.primalData.reserveItems(1);
 
            resultIdentifier = tape.generateIdentifier();
 
            tape.operatorData.pushData(OperatorCodeLookup::get<Op<double>>());
            tape.identifierData.pushData(argIdentifier);
            tape.identifierData.pushData(resultIdentifier);
            tape.primalData.pushData(argValue);
          } else {
            // Passive argument or branch => do not store anything.
            resultIdentifier = 0;
          }
 
          resultValue = exp.getValue();
        }
    };
    template<typename Arg1, typename Arg2, template<typename> class Op>
    struct StoreOperator_Impl<codi::ComputeExpression<double, Op, Arg1, Arg2>> {
      public:
        static void store(
            codi::ComputeExpression<double, Op, Arg1, Arg2> const& exp, SimpleTape& tape,
            double& resultValue, int& resultIdentifier, bool copy)
        {
 
          double argAValue;
          double argBValue;
          int argAIdentifier;
          int argBIdentifier;
 
          tape.storeOperator(exp.template getLink<0>(), argAValue, argAIdentifier, false);
          tape.storeOperator(exp.template getLink<1>(), argBValue, argBIdentifier, false);
 
          if (argAIdentifier != 0 || argBIdentifier != 0) {
            // Active argument or branch => store the operator.
            tape.operatorData.reserveItems(1);
            tape.identifierData.reserveItems(3);
            tape.primalData.reserveItems(2);
 
            resultIdentifier = tape.generateIdentifier();
 
            tape.operatorData.pushData(OperatorCodeLookup::get<Op<double>>());
            tape.identifierData.pushData(argAIdentifier);
            tape.identifierData.pushData(argBIdentifier);
            tape.identifierData.pushData(resultIdentifier);
            tape.primalData.pushData(argAValue);
            tape.primalData.pushData(argBValue);
 
          } else {
            // Passive argument or branch => do not store anything.
            resultIdentifier = 0;
          }
 
          resultValue = exp.getValue();
        }
    };
 
    template<typename Exp>
    struct StoreOperator_Impl<Exp, codi::ExpressionTraits::EnableIfConstantExpression<Exp>> {
      public:
        static void store(
            codi::ConstantExpression<double> const& exp, SimpleTape& tape,
            double& resultValue, int& resultIdentifier, bool copy)
        {
          resultValue = exp.getValue();
          resultIdentifier = 0;
        }
    };
 
    template<typename Exp>
    struct StoreOperator_Impl<Exp, codi::ExpressionTraits::EnableIfLhsExpression<Exp>> {
      public:
        static void store(
            Exp const& exp, SimpleTape& tape,
            double& resultValue, int& resultIdentifier, bool copy)
        {
          if (copy && 0 != exp.getIdentifier()) {
            // Active argument and a copy operation => store the operator.
            tape.operatorData.reserveItems(1);
            tape.identifierData.reserveItems(2);
            tape.primalData.reserveItems(1);
 
            resultIdentifier = tape.generateIdentifier();
 
            tape.operatorData.pushData(OperatorCode::COPY);
            tape.identifierData.pushData(exp.getIdentifier());
            tape.identifierData.pushData(resultIdentifier);
            tape.primalData.pushData(exp.getValue());
          } else {
            // No copy operation or passive value => just pass the data.
            resultIdentifier = exp.getIdentifier();
          }
 
          resultValue = exp.getValue();
        }
    };
 
    template<typename Exp>
    void storeOperator(Exp const& exp, double& value, int& identifier, bool copy) {
      StoreOperator_Impl<Exp>::store(exp, *this, value, identifier, copy);
    }
    static void evaluateStack(
        /* data from call */
        std::vector<double>& adjointVector,
        /* primal data */
        size_t& curPrimalPos, size_t const& endPrimalPos, double const* const primalData,
        /* identifier data */
        size_t& curIdentifierPos, size_t const& endIdentifierPos, int const* const identifierData,
        /* operator data */
        size_t& curOperatorPos, size_t const& endOperatorPos, OperatorCode const* const operatorData) {
 
      while (curOperatorPos > endOperatorPos) {
        curOperatorPos -= 1;
 
        int resultIdentifier = 0;
        int arg1Identifier = 0;
        int arg2Identifier = 0;
        double arg1Value = 0.0;
        double arg2Value = 0.0;
 
        // Get the data from the stacks.
        switch (operatorData[curOperatorPos]) {
          // Binary operations.
          case OperatorCode::ADD:
          case OperatorCode::SUB:
          case OperatorCode::MUL:
          case OperatorCode::DIV:
            // Get the data.
            resultIdentifier = identifierData[curIdentifierPos - 1];
            arg2Identifier = identifierData[curIdentifierPos - 2];
            arg1Identifier = identifierData[curIdentifierPos - 3];
            arg2Value = primalData[curPrimalPos - 1];
            arg1Value = primalData[curPrimalPos - 2];
 
            // Adjust positions.
            curIdentifierPos -= 3;
            curPrimalPos -= 2;
            break;
 
          // Unary operations.
          case OperatorCode::COS:
          case OperatorCode::SIN:
          case OperatorCode::COPY:
            // Get the data
            resultIdentifier = identifierData[curIdentifierPos - 1];
            arg1Identifier = identifierData[curIdentifierPos - 2];
            arg1Value = primalData[curPrimalPos - 1];
 
            // Adjust positions.
            curIdentifierPos -= 2;
            curPrimalPos -= 1;
            break;
        default:
            std::cerr << "Error: unhandled pop operator '" << (int)operatorData[curOperatorPos] << "'." << std::endl;
            exit(-1);
          break;
        }
 
        double resultAdjoint = adjointVector[resultIdentifier];
        adjointVector[resultIdentifier] = 0.0; // Perform the reset of the lhs.
        double& arg1Adjoint = adjointVector[arg1Identifier];
        double& arg2Adjoint = adjointVector[arg2Identifier];
 
        switch (operatorData[curOperatorPos]) {
          // Binary operations
          case OperatorCode::ADD:
            arg1Adjoint += resultAdjoint;
            arg2Adjoint += resultAdjoint;
            break;
          case OperatorCode::SUB:
            arg1Adjoint += resultAdjoint;
            arg2Adjoint -= resultAdjoint;
            break;
          case OperatorCode::MUL:
            arg1Adjoint += arg2Value * resultAdjoint;
            arg2Adjoint += arg1Value * resultAdjoint;
            break;
          case OperatorCode::DIV:
            arg1Adjoint += resultAdjoint / arg2Value;
            arg2Adjoint += - arg1Value * resultAdjoint / arg2Value / arg2Value;
            break;
 
          // Unary operations
          case OperatorCode::COS:
            arg1Adjoint +=  -std::sin(arg1Value) * resultAdjoint;
            break;
          case OperatorCode::SIN:
            arg1Adjoint +=  std::cos(arg1Value) * resultAdjoint;
            break;
          case OperatorCode::COPY:
            arg1Adjoint +=  resultAdjoint;
            break;
        default:
            std::cerr << "Error: unhandled adjoint operator '" << (int)operatorData[curOperatorPos] << "'." << std::endl;
            exit(-1);
          break;
        }
      }
    }
};
template<typename Real>
void eval() {
  using Tape = typename Real::Tape;
  Tape& tape = Real::getTape();
 
  Real a = 3.0;
  Real b = 4.0;
 
  tape.setActive();
  tape.registerInput(a);
  tape.registerInput(b);
 
  Real c = sin(a + b) * cos(a - b); // Single statement.
 
  tape.registerOutput(c);
 
  tape.setPassive();
 
  c.gradient() = 1.0;
 
  tape.evaluate();
 
  std::cout << "c = " << c.getValue() << std::endl;
  std::cout << "d c/d a = " << a.getGradient() << std::endl;
  std::cout << "d c/d b = " << b.getGradient() << std::endl;
 
  tape.printStatistics();
 
  tape.reset();
}
 
int main(int nargs, char** args) {
 
  std::cout << "Simple tape:" << std::endl;
  eval<codi::ActiveType<SimpleTape>>();
 
  std::cout << std::endl;
  std::cout << std::endl;
  std::cout << std::endl;
  std::cout << "codi::RealReverse:" << std::endl;
  eval<codi::RealReverse>();
 
 
  return 0;
}

The implementation of CoDiPack tape starts with two things,

deciding on a taping strategy and the data layout as well as
the codi:ReverseTapeInterface.

We start with the discussion of the data layout. In order to keep things simple we want to implement an operator taping approach that stores the primal input values of each operation. The identifiers are not managed, we will simply generate a new one whenever required.

An operator taping approach will break down all statements in the program to their operations for the AD tool. A statement like w = sqrt(a * a + b * b) will be broken down into the single assignment code

t1 = a * a;
t2 = b * b;
t3 = t1 + t2;
w = sqrt(t3);

With this consideration we only need to handle binary and unary operations. For each of these operations we need to store the necessary data in order to be able to evaluate the reverse AD equation. For a binary operation $y = f_{bin}(x, z)$ the required data consists of

the primal values of and ,
the identifiers of , and as well as
an enumerator to identify the operation $f_{bin}$ .

The analogous data is required for a unary operation $y = f_{un}(x)$ where the data items with respect to are omitted, of course. Each type of data in the above list will be stored in a separate data stream (codi::DataInterface), that is, we have one data stream for primal values, one for identifiers and one for the operators.

With the defined data layout it is then possible to evaluate the reverse AD equation for each operation, therefore we can have a look at the codi::ReverseTapeInterface and start the implementation.

ReverseTapeInterface

The codi::ReverseTapeInterface defines the basic functionality for the reverse evaluation of a tape and the management of the taping process. It inherits from the two interfaces codi::GradientAccessTapeInterface and codi::InternalStatementRecordingTapeInterface. The first one defines the basic access to the adjoint variables. The second one defines the functionality which is called by the CoDiPack expressions to signal to the tape that a statement needs to be recorded. Both interfaces are required such that the basic active type (codi::ActiveType) is able to work with the tape implementation.

In the following the implementation will be discussed step by step.

Data definition

Type definitions:

    using OperatorData = codi::ChunkedData<codi::Chunk1<OperatorCode>>;
    using IdentifierData = codi::ChunkedData<codi::Chunk1<int>, OperatorData>;
    using PrimalData = codi::ChunkedData<codi::Chunk1<double>, IdentifierData>;

Member definitions:

    codi::EmptyData emptyData; // Required for the termination.
    OperatorData operatorData;
    IdentifierData identifierData;
    PrimalData primalData;

Member creation:

      emptyData(),
      operatorData(1024),
      identifierData(1024),
      primalData(1024)
    {
      operatorData.setNested(&emptyData);
      identifierData.setNested(&operatorData);
      primalData.setNested(&identifierData);
    }

The three steps from above will create the data management defined at the beginning. In the first step, we define the three data streams. In this example we use the chunked data stream. It allocates chunks of data every time the already allocated memory is used up. The chunk definition tells each stream what kind of data should be stored. In our example each stream consists only of one data item per entry. The OperatorCode type is an enum class and will be covered in more detail later. Note that the type definitions are nested. In order to define the data stream for the identifier data, the data stream for the operator data is provided as the nested stream. The design decision for this nesting will become clear when we discuss the reverse interpretation of the data.

The second step defines the members and the third step initializes them. The important thing here is the initialization of the nested data stream with a call to codi::DataInterface::setNested.

Identifier management and adjoint vector

As defined in the taping strategy, identifiers are not managed. Every time an identifier is required, a new one is generated. Therefore we only keep track of the largest one and define the adjoint vector as a standard vector:

    std::vector<double> adjointVec;
 
    int maxIdentifier;

The basic size of the adjoint vector is 1 so that we can always safely access the first entry:

      active(false),
      adjointVec(1), // Reserve one for out of bounds gradient access.
      maxIdentifier(0),

This allows us to implement the codi::GradientAccessTapeInterface:

    void setGradient(Identifier const& identifier, Gradient const& grad,
                     codi::AdjointsManagement adjointsManagement = codi::AdjointsManagement::Automatic) {
      gradient(identifier, adjointsManagement) = grad;
    }
    Gradient const& getGradient(Identifier const& identifier) const {
      return gradient(identifier);
    }
 
    Gradient& gradient(Identifier const& identifier,
                       codi::AdjointsManagement adjointsManagement = codi::AdjointsManagement::Automatic) {
      if (codi::AdjointsManagement::Automatic == adjointsManagement) {
        checkAndResizeAdjoints(identifier);
      }
 
      return adjointVec[identifier];
    }
 
    Gradient const& gradient(Identifier const& identifier,
                             codi::AdjointsManagement adjointsManagement = codi::AdjointsManagement::Automatic) const {
      if (codi::AdjointsManagement::Automatic == adjointsManagement && identifier >= (int)adjointVec.size()) {
        return adjointVec[0];
      } else {
        return adjointVec[identifier];
      }
    }

The helper function checkAndResizeAdjoints checks the size of the vector and only resizes it if necessary. The generation of a new identifier is also quite simple but standardized in generateIdentifier:

    void checkAndResizeAdjoints(int const& identifier) {
      if (identifier > maxIdentifier) {
        std::cerr << "Error: Tryinig to access an identifier which was not distributed." << std::endl;
      }
      if (identifier >= (int)adjointVec.size()) { // Only resize if necessary.
        adjointVec.resize(maxIdentifier + 1);
      }
    }
 
    int generateIdentifier() {
      maxIdentifier += 1;
 
      return maxIdentifier;
    }

Since we can now generate identifiers, it is possible to implement the functions for registering input and output variables. An input registration will just generate a new identifier and set it as the identifier for the value. We do not need to create a statement on the tape here since the identifiers and statements are not interlocked (removing the left hand side identifier from the data stream in a linear index management scheme would interlock them). In the registerOutput implementation, nothing needs to be done since all identifiers are assigned to a unique value (copy statement optimizations would require to assign a unique identifier here which would store a copy operation).

    template<typename Lhs> void registerInput(codi::LhsExpressionInterface<Real, Gradient, SimpleTape, Lhs>& value) {
      if (active) {
        value.cast().getIdentifier() = generateIdentifier();
      } else {
        value.cast().getIdentifier() = 0;
      }
    }
 
 
    template<typename Lhs> void registerOutput(codi::LhsExpressionInterface<Real, Gradient, SimpleTape, Lhs>& value) {
      // Do nothing, every identifier is unique.
    }

The identifiers are stored in the AD type provided by CoDiPack. The initialization of the identifier in the AD value is done by the function initIdentifier required by the codi::InternalStatementRecordingTapeInterface. We implement an online activity analysis in this tape. Therefore, all identifiers in the AD values can be initialized with zero. The zero identifier is used in our implementation to track passive values. These are values that do not depend on the input values. How this is done is explained in the next section.

    template<typename Real>
    void initIdentifier(Real& value, Identifier& identifier) {
      identifier = 0; // Initialize with zero we perform an online activity analysis.
    }
 
    template<typename Real>
    void destroyIdentifier(Real& value, Identifier& identifier) {
      // Do nothing: Identifiers are not reused.
    }

Storing of expressions/operators

The entry point for the storing of expressions is defined in the codi::InternalStatementRecordingTapeInterface. The method store is called every time a new value is assigned to a CoDiPack type. The implementation in the tape performs the basic activity checking of the tape. If the tape is currently in recording mode, then it forwards the data to the method storeOperator:

    template<typename Lhs, typename Rhs>
    void store(Lhs& lhs, Rhs const& rhs) {
      if (active) {
        storeOperator(rhs, lhs.value(), lhs.getIdentifier(), true);
      } else {
        lhs.value() = rhs.getValue();
        lhs.getIdentifier() = 0;
      }
    }

storeOperator is the entry point for storing the expressions. Since CoDiPack usually employs a statement taping strategy with expressions (Expressions), the storing of pure operators is a little bit more complicated. We need to walk recursively through the expression tree and store all operators in this tree. In order to do that we define a helper class StoreOperator_Impl. This class is then specialized for all the different nodes in a CoDiPack expression. In our case these will be

codi::ComputeExpression with one argument,
codi::ComputeExpression with two arguments,
codi::ConstantExpression,
codi::LhsExpressionInterface.

The detailed process will be explained for the unary operator, the others are similar:

    template<typename Arg, template<typename> class Op>
    struct StoreOperator_Impl<codi::ComputeExpression<double, Op, Arg>> {
      public:
        static void store(
            codi::ComputeExpression<double, Op, Arg> const& exp, SimpleTape& tape,
            double& resultValue, int& resultIdentifier, bool copy)
        {
 
          double argValue;
          int argIdentifier;
 
          tape.storeOperator(exp.template getLink<0>(), argValue, argIdentifier, false);
 
          if (argIdentifier != 0) {
            // Active argument or branch => store the operator.
            tape.operatorData.reserveItems(1);
            tape.identifierData.reserveItems(2);
            tape.primalData.reserveItems(1);
 
            resultIdentifier = tape.generateIdentifier();
 
            tape.operatorData.pushData(OperatorCodeLookup::get<Op<double>>());
            tape.identifierData.pushData(argIdentifier);
            tape.identifierData.pushData(resultIdentifier);
            tape.primalData.pushData(argValue);
          } else {
            // Passive argument or branch => do not store anything.
            resultIdentifier = 0;
          }
 
          resultValue = exp.getValue();
        }
    };

The arguments to the function are the expression exp, the tape, the result value reference resultValue, the result identifier reference resultIdentifier and the boolean copy. copy is used to indicate a copy operation only if the first level of the expression is a codi::LhsExpression. This is explained later. tape is just used to access the data streams and to call storeOperator recursively. exp ist the current node in the expression tree we want to store. The two reference arguments resultValue and resultIdentifier define the value and the identifier of the current node. The method has to populate these two values and store everything for the reverse interpretation.

The first step is to call storeOperator on the nested expression. This will populate the arguments argValue and argIdentifier with the result of the nested expression and the identifier for the result. Since this is at least the second level in the expression tree, we do not want to generate copy operations.

Afterwards, it is checked if the result of the nested expression is active. This is the case if at least one argument of the operator is active, that is, it has a nonzero identifier. If one argument is active, we store the operation otherwise the result identifier of this operation is also set to passive (zero).

The storing of the operation starts with the reservation of the data items. All reservations for one operation have to be done in one block and from the most nested stream to the root stream. This ensures that all data is stored in the same evaluation block (see also codi::DataInterface). Afterwards the new identifier for the result of the operator is generated and the data is stored. As described in the introduction we store the primal values of the arguments and the identifiers for the argument as well as the result. The operator entry is retrieved via a lookup function. This function is specialized for each operator node in a CoDiPack expression tree. The definition of the enumeration and the lookup function is:

enum class OperatorCode {
  ADD,
  SUB,
  MUL,
  DIV,
  SIN,
  COS,
  COPY
};
 
struct OperatorCodeLookup {
  public:
    template<typename Op>
    static OperatorCode get();
};
 
template<typename Op>
OperatorCode OperatorCodeLookup::get() {
  std::cerr << "Missing specialization for operator code lookup." << std::endl;
  exit(-1);
}
 
#define SPECIALIZE_LOOKUP(NAME, CODE) \
  template<> \
  OperatorCode OperatorCodeLookup::get<codi:: NAME<double>>() { \
    return OperatorCode:: CODE; \
  }
 
SPECIALIZE_LOOKUP(OperationAdd, ADD)
SPECIALIZE_LOOKUP(OperationSubstract, SUB)
SPECIALIZE_LOOKUP(OperationMultiply, MUL)
SPECIALIZE_LOOKUP(OperationDivide, DIV)
SPECIALIZE_LOOKUP(OperationSin, SIN)
SPECIALIZE_LOOKUP(OperationCos, COS)
 
#undef SPECIALIZE_LOOKUP

The implementation for the other operators is nearly the same:

    template<typename Arg1, typename Arg2, template<typename> class Op>
    struct StoreOperator_Impl<codi::ComputeExpression<double, Op, Arg1, Arg2>> {
      public:
        static void store(
            codi::ComputeExpression<double, Op, Arg1, Arg2> const& exp, SimpleTape& tape,
            double& resultValue, int& resultIdentifier, bool copy)
        {
 
          double argAValue;
          double argBValue;
          int argAIdentifier;
          int argBIdentifier;
 
          tape.storeOperator(exp.template getLink<0>(), argAValue, argAIdentifier, false);
          tape.storeOperator(exp.template getLink<1>(), argBValue, argBIdentifier, false);
 
          if (argAIdentifier != 0 || argBIdentifier != 0) {
            // Active argument or branch => store the operator.
            tape.operatorData.reserveItems(1);
            tape.identifierData.reserveItems(3);
            tape.primalData.reserveItems(2);
 
            resultIdentifier = tape.generateIdentifier();
 
            tape.operatorData.pushData(OperatorCodeLookup::get<Op<double>>());
            tape.identifierData.pushData(argAIdentifier);
            tape.identifierData.pushData(argBIdentifier);
            tape.identifierData.pushData(resultIdentifier);
            tape.primalData.pushData(argAValue);
            tape.primalData.pushData(argBValue);
 
          } else {
            // Passive argument or branch => do not store anything.
            resultIdentifier = 0;
          }
 
          resultValue = exp.getValue();
        }
    };
 
    template<typename Exp>
    struct StoreOperator_Impl<Exp, codi::ExpressionTraits::EnableIfConstantExpression<Exp>> {
      public:
        static void store(
            codi::ConstantExpression<double> const& exp, SimpleTape& tape,
            double& resultValue, int& resultIdentifier, bool copy)
        {
          resultValue = exp.getValue();
          resultIdentifier = 0;
        }
    };
 
    template<typename Exp>
    struct StoreOperator_Impl<Exp, codi::ExpressionTraits::EnableIfLhsExpression<Exp>> {
      public:
        static void store(
            Exp const& exp, SimpleTape& tape,
            double& resultValue, int& resultIdentifier, bool copy)
        {
          if (copy && 0 != exp.getIdentifier()) {
            // Active argument and a copy operation => store the operator.
            tape.operatorData.reserveItems(1);
            tape.identifierData.reserveItems(2);
            tape.primalData.reserveItems(1);
 
            resultIdentifier = tape.generateIdentifier();
 
            tape.operatorData.pushData(OperatorCode::COPY);
            tape.identifierData.pushData(exp.getIdentifier());
            tape.identifierData.pushData(resultIdentifier);
            tape.primalData.pushData(exp.getValue());
          } else {
            // No copy operation or passive value => just pass the data.
            resultIdentifier = exp.getIdentifier();
          }
 
          resultValue = exp.getValue();
        }
    };

The specialization for the constant expressions returns just the value with a passive identifier. The left hand side expression specialization checks for the copy flag. This will only be true if the method is directly called from the root node of the expression. In this case we have the assignment c = a which needs to be stored. Otherwise it is a node in the expression tree, e.g. a or b in c = a * b. In this case we just provide the identifier and the primal value.

Reverse evaluation

The implementation of the reverse evaluation procedure is done in two functions. The entry point is the evaluate function from the codi::ReverseTapeInterface definition:

    void evaluate() {
      primalData.evaluateReverse(primalData.getPosition(), primalData.getZeroPosition(), SimpleTape::evaluateStack, adjointVec);
    }

Here, we use the evaluateReverse function for the stack evaluation. This function will call the provided function object SimpleTape::evaluateStack for each valid region in the stored data. A valid region is defined as a contiguous set of memory for all data streams that is not interrupted by any chunk boundaries. It is therefore safe to access all memory pointers in the specified ranges. For each data stream, the range and the pointers to the data are added to the argument list, that is, size_t& curPos, size_t endPos, Data1* data1, Data2* data2, etc.. First the data of the root vector is appended, then the one of the nested vector and so on. Additional arguments are added at the beginning of the argument list. The implementation of SimpleTape::evaluateStack shows the final result of the argument aggregation:

    static void evaluateStack(
        /* data from call */
        std::vector<double>& adjointVector,
        /* primal data */
        size_t& curPrimalPos, size_t const& endPrimalPos, double const* const primalData,
        /* identifier data */
        size_t& curIdentifierPos, size_t const& endIdentifierPos, int const* const identifierData,
        /* operator data */
        size_t& curOperatorPos, size_t const& endOperatorPos, OperatorCode const* const operatorData) {
 
      while (curOperatorPos > endOperatorPos) {
        curOperatorPos -= 1;
 
        int resultIdentifier = 0;
        int arg1Identifier = 0;
        int arg2Identifier = 0;
        double arg1Value = 0.0;
        double arg2Value = 0.0;
 
        // Get the data from the stacks.
        switch (operatorData[curOperatorPos]) {
          // Binary operations.
          case OperatorCode::ADD:
          case OperatorCode::SUB:
          case OperatorCode::MUL:
          case OperatorCode::DIV:
            // Get the data.
            resultIdentifier = identifierData[curIdentifierPos - 1];
            arg2Identifier = identifierData[curIdentifierPos - 2];
            arg1Identifier = identifierData[curIdentifierPos - 3];
            arg2Value = primalData[curPrimalPos - 1];
            arg1Value = primalData[curPrimalPos - 2];
 
            // Adjust positions.
            curIdentifierPos -= 3;
            curPrimalPos -= 2;
            break;
 
          // Unary operations.
          case OperatorCode::COS:
          case OperatorCode::SIN:
          case OperatorCode::COPY:
            // Get the data
            resultIdentifier = identifierData[curIdentifierPos - 1];
            arg1Identifier = identifierData[curIdentifierPos - 2];
            arg1Value = primalData[curPrimalPos - 1];
 
            // Adjust positions.
            curIdentifierPos -= 2;
            curPrimalPos -= 1;
            break;
        default:
            std::cerr << "Error: unhandled pop operator '" << (int)operatorData[curOperatorPos] << "'." << std::endl;
            exit(-1);
          break;
        }
 
        double resultAdjoint = adjointVector[resultIdentifier];
        adjointVector[resultIdentifier] = 0.0; // Perform the reset of the lhs.
        double& arg1Adjoint = adjointVector[arg1Identifier];
        double& arg2Adjoint = adjointVector[arg2Identifier];
 
        switch (operatorData[curOperatorPos]) {
          // Binary operations
          case OperatorCode::ADD:
            arg1Adjoint += resultAdjoint;
            arg2Adjoint += resultAdjoint;
            break;
          case OperatorCode::SUB:
            arg1Adjoint += resultAdjoint;
            arg2Adjoint -= resultAdjoint;
            break;
          case OperatorCode::MUL:
            arg1Adjoint += arg2Value * resultAdjoint;
            arg2Adjoint += arg1Value * resultAdjoint;
            break;
          case OperatorCode::DIV:
            arg1Adjoint += resultAdjoint / arg2Value;
            arg2Adjoint += - arg1Value * resultAdjoint / arg2Value / arg2Value;
            break;
 
          // Unary operations
          case OperatorCode::COS:
            arg1Adjoint +=  -std::sin(arg1Value) * resultAdjoint;
            break;
          case OperatorCode::SIN:
            arg1Adjoint +=  std::cos(arg1Value) * resultAdjoint;
            break;
          case OperatorCode::COPY:
            arg1Adjoint +=  resultAdjoint;
            break;
        default:
            std::cerr << "Error: unhandled adjoint operator '" << (int)operatorData[curOperatorPos] << "'." << std::endl;
            exit(-1);
          break;
        }
      }
    }

The current position offsets are proved by reference, it is expected that they are decremented in the function and reach the end offset. CoDiPack performs an assertion on this assumption in order to detect tape corruptions during the evaluation.

The body of the function consists of three major parts:

the while loop goes through all operator codes,
the first switch detects if a unary or binary operation was stored and gets the data from the other two data streams,
the second switch implements for each operator code the associated reverse AD formulation.

Remaining interface functions

The other functions from the codi::ReverseTapeInterface are the activity functions, the reset functions and the statistics function. Most of them do not require a detailed discussion, only the reset of the data stream needs to be explained. It is sufficient to call reset on the root data stream, which will recursively call reset on all nested data streams.

    void setActive()      {active = true;}
    void setPassive()     {active = false;}
    bool isActive() const {return active;}

    void clearAdjoints() {
      for (double& adj : adjointVec) {
        adj = 0.0;
      }
    }
    void reset(bool resetAdjoints = true) {
      if (resetAdjoints) {
        clearAdjoints();
      }
 
      maxIdentifier = 0;
 
      primalData.reset();
    }
 
    template<typename Stream = std::ostream> void printStatistics(Stream& out = std::cout) const {
      getTapeValues().formatDefault(out);
    }
 
    template<typename Stream = std::ostream> void printTableHeader(Stream& out = std::cout) const {
      getTapeValues().formatHeader(out);
    }
 
    template<typename Stream = std::ostream> void printTableRow(Stream& out = std::cout) const {
      getTapeValues().formatRow(out);
    }
 
    codi::TapeValues getTapeValues() const {
      codi::TapeValues values("Example tape");
 
      values.addSection("Adjoint vector");
      values.addLongEntry("Number of adjoints", (1 + maxIdentifier));
      values.addDoubleEntry("Memory allocated", sizeof(double) * (1 + maxIdentifier),
                            codi::TapeValues::LocalReductionOperation::Sum, true, true);
 
      values.addSection("Index manager");
      values.addLongEntry("Max. live indices", (1 + maxIdentifier));
 
      values.addSection("Primal data");
      primalData.addToTapeValues(values);
      values.addSection("Identifier data");
      identifierData.addToTapeValues(values);
      values.addSection("Operator data");
      operatorData.addToTapeValues(values);
 
      return values;
    }

Example and analysis

The tape is now ready to use with the codi::ActiveType. For demonstration purposes the example is once evaluated with the SimpleTape and once with the default codi::RealReverse type. The example itself consists only of a single statement that is differentiated.

template<typename Real>
void eval() {
  using Tape = typename Real::Tape;
  Tape& tape = Real::getTape();
 
  Real a = 3.0;
  Real b = 4.0;
 
  tape.setActive();
  tape.registerInput(a);
  tape.registerInput(b);
 
  Real c = sin(a + b) * cos(a - b); // Single statement.
 
  tape.registerOutput(c);
 
  tape.setPassive();
 
  c.gradient() = 1.0;
 
  tape.evaluate();
 
  std::cout << "c = " << c.getValue() << std::endl;
  std::cout << "d c/d a = " << a.getGradient() << std::endl;
  std::cout << "d c/d b = " << b.getGradient() << std::endl;
 
  tape.printStatistics();
 
  tape.reset();
}
 
int main(int nargs, char** args) {
 
  std::cout << "Simple tape:" << std::endl;
  eval<codi::ActiveType<SimpleTape>>();
 
  std::cout << std::endl;
  std::cout << std::endl;
  std::cout << std::endl;
  std::cout << "codi::RealReverse:" << std::endl;
  eval<codi::RealReverse>();
 
 
  return 0;
}

Since the example is quite simple, it is possible to understand the taping process by steping through it with the debugger.

The output and data consumption of both tapes is:

Simple tape:
c = 0.354971
d c/d a = 0.96017
d c/d b = -0.1455
-------------------------------------
Example tape
-------------------------------------
  Total memory used      :     200.00 B
  Total memory allocated :      16.06 KB
-------------------------------------
Adjoint vector
-------------------------------------
  Number of adjoints     :          8
  Memory allocated       :      64.00 B
-------------------------------------
Index manager
-------------------------------------
  Max. live indices      :          8
-------------------------------------
Primal data
-------------------------------------
  Total number           :          8
  Number of chunks       :          1
  Memory used            :      64.00 B
  Memory allocated       :       8.00 KB
-------------------------------------
Identifier data
-------------------------------------
  Total number           :         13
  Number of chunks       :          1
  Memory used            :      52.00 B
  Memory allocated       :       4.00 KB
-------------------------------------
Operator data
-------------------------------------
  Total number           :          5
  Number of chunks       :          1
  Memory used            :      20.00 B
  Memory allocated       :       4.00 KB
-------------------------------------
 
 
 
codi::RealReverse:
c = 0.354971
d c/d a = 0.96017
d c/d b = -0.1455
-------------------------------------
CoDi Tape Statistics ( JacobianLinearTape )
-------------------------------------
  Total memory used      :       96.00 B
  Total memory allocated :       28.50 MB
-------------------------------------
Adjoint vector
-------------------------------------
  Number of adjoints     :           4
  Memory allocated       :       32.00 B
-------------------------------------
Index manager
-------------------------------------
  Max. live indices      :           4
-------------------------------------
Statement entries
-------------------------------------
  Total number           :           4
  Number of chunks       :           1
  Memory used            :        4.00 B
  Memory allocated       :        2.00 MB
-------------------------------------
Jacobian entries
-------------------------------------
  Total number           :           5
  Number of chunks       :           1
  Memory used            :       60.00 B
  Memory allocated       :       24.00 MB
-------------------------------------
External function entries
-------------------------------------
  Total number           :           0
  Number of chunks       :           1
  Memory used            :        0.00 B
  Memory allocated       :        2.50 MB
-------------------------------------

As expected, they yield the same result, but the required memory is different. The allocated memory does not interest us since this memory is defined by the default chunk size, that is different for the two tapes.The codi::RealReverse tape needs 96 bytes of memory and the simple tape requires 200 bytes of memory. In order to keep it simple, no advanced optimizations have been implemented in the SimpleTape. The data management is also quite simple and does not result in the minimal memory footprint.