perf(GridFire)

More preformance improvmnets

1. Switch to mimalloc which gave a roughly 10% improvment accross the
board
2. Use much faster compososition caching
3. Reusing work vector
This commit is contained in:
2025-12-07 12:34:12 -05:00
parent e48b62f231
commit 8cfa067ad0
23 changed files with 306 additions and 97 deletions

View File

@@ -133,7 +133,8 @@ namespace gridfire::engine {
std::expected<StepDerivatives<double>, EngineStatus> GraphEngine::calculateRHSAndEnergy(
const fourdst::composition::CompositionAbstract &comp,
const double T9,
const double rho
const double rho,
bool trust
) const {
return calculateRHSAndEnergy(comp, T9, rho, m_reactions);
}
@@ -744,6 +745,7 @@ namespace gridfire::engine {
void GraphEngine::setUseReverseReactions(const bool useReverse) {
m_useReverseReactions = useReverse;
syncInternalMaps();
}
size_t GraphEngine::getSpeciesIndex(const fourdst::atomic::Species &species) const {
@@ -1034,6 +1036,7 @@ namespace gridfire::engine {
const double rho,
const SparsityPattern &sparsityPattern
) const {
// --- Compute the intersection of the requested sparsity pattern with the full sparsity pattern ---
SparsityPattern intersectionSparsityPattern;
for (const auto& entry : sparsityPattern) {
if (m_full_sparsity_set.contains(entry)) {
@@ -1044,10 +1047,6 @@ namespace gridfire::engine {
// --- Pack the input variables into a vector for CppAD ---
const size_t numSpecies = m_networkSpecies.size();
std::vector<double> x(numSpecies + 2, 0.0);
// const std::vector<double>& Y_dynamic = comp.getMolarAbundanceVector();
// for (size_t i = 0; i < numSpecies; ++i) {
// x[i] = Y_dynamic[i];
// }
size_t i = 0;
for (const auto& species: m_networkSpecies) {
double Yi = 0.0; // Small floor to avoid issues with zero abundances
@@ -1075,18 +1074,25 @@ namespace gridfire::engine {
const size_t num_cols_jac = numSpecies + 2; // +2 for T9 and rho
CppAD::sparse_rc<std::vector<size_t>> CppAD_sparsity_pattern(num_rows_jac, num_cols_jac, nnz);
std::size_t sparsity_hash = 0;
for (size_t k = 0; k < nnz; ++k) {
size_t local_intersection_hash = utils::hash_combine(intersectionSparsityPattern[k].first, intersectionSparsityPattern[k].second);
sparsity_hash = utils::hash_combine(sparsity_hash, local_intersection_hash);
CppAD_sparsity_pattern.set(k, intersectionSparsityPattern[k].first, intersectionSparsityPattern[k].second);
}
CppAD::sparse_rcv<std::vector<size_t>, std::vector<double>> jac_subset(CppAD_sparsity_pattern);
// PERF: one of *the* most pressing things that needs to be done is remove the need for this call every
// time the jacobian is needed since coloring is expensive and we are throwing away the caching
// power of CppAD by clearing the work vector each time. We do this since we make a new subset every
// time. However, a better solution would be to make the subset stateful so it only changes if the requested
// sparsity pattern changes. This way we could reuse the work vector.
m_jac_work.clear();
// --- Check cache for existing subset ---
if (!m_jacobianSubsetCache.contains(sparsity_hash)) {
m_jacobianSubsetCache.emplace(sparsity_hash, CppAD_sparsity_pattern);
m_jac_work.clear();
} else {
if (m_jacWorkCache.contains(sparsity_hash)) {
m_jac_work.clear();
m_jac_work = m_jacWorkCache.at(sparsity_hash);
}
}
auto& jac_subset = m_jacobianSubsetCache.at(sparsity_hash);
m_rhsADFun.sparse_jac_rev(
x,
jac_subset, // Sparse Jacobian output
@@ -1095,6 +1101,11 @@ namespace gridfire::engine {
m_jac_work // Work vector for CppAD
);
// --- Stash the now populated work vector in the cache if not already present ---
if (!m_jacWorkCache.contains(sparsity_hash)) {
m_jacWorkCache.emplace(sparsity_hash, m_jac_work);
}
Eigen::SparseMatrix<double> jacobianMatrix(numSpecies, numSpecies);
std::vector<Eigen::Triplet<double> > triplets;
for (size_t k = 0; k < nnz; ++k) {
@@ -1391,6 +1402,7 @@ namespace gridfire::engine {
dependentVector.push_back(result.nuclearEnergyGenerationRate);
m_rhsADFun.Dependent(adInput, dependentVector);
m_rhsADFun.optimize();
LOG_TRACE_L1(m_logger, "AD tape recorded successfully for the RHS and Eps calculation. Number of independent variables: {}.", adInput.size());
}
@@ -1400,7 +1412,7 @@ namespace gridfire::engine {
m_atomicReverseRates.reserve(m_reactions.size());
for (const auto& reaction: m_reactions) {
if (reaction->qValue() != 0.0) {
if (reaction->qValue() != 0.0 and m_useReverseReactions) {
m_atomicReverseRates.push_back(std::make_unique<AtomicReverseRate>(*reaction, *this));
} else {
m_atomicReverseRates.push_back(nullptr);

View File

@@ -7,6 +7,7 @@
#include "gridfire/types/types.h"
#include "gridfire/exceptions/error_engine.h"
#include "gridfire/utils/hashing.h"
#include "quill/LogMacros.h"
#include "quill/Logger.h"
@@ -80,7 +81,7 @@ namespace gridfire::engine {
std::expected<StepDerivatives<double>, EngineStatus> AdaptiveEngineView::calculateRHSAndEnergy(
const fourdst::composition::CompositionAbstract &comp,
const double T9,
const double rho
const double rho, bool trust
) const {
LOG_TRACE_L2(m_logger, "Calculating RHS and Energy in AdaptiveEngineView at T9 = {}, rho = {}.", T9, rho);
validateState();
@@ -99,7 +100,14 @@ namespace gridfire::engine {
}
return ss.str();
}());
fourdst::composition::Composition collectedComp = collectComposition(comp, T9, rho);
fourdst::composition::Composition collectedComp;
std::size_t state_hash = utils::hash_state(comp, T9, rho, m_activeReactions);
if (m_collected_composition_cache.contains(state_hash)) {
collectedComp = m_collected_composition_cache.at(state_hash);
} else {
collectedComp = collectComposition(comp, T9, rho);
m_collected_composition_cache[state_hash] = collectedComp;
}
LOG_TRACE_L2(
m_logger,
"Composition Collected prior to passing to base engine. Collected Composition: {}",
@@ -118,7 +126,7 @@ namespace gridfire::engine {
}
return ss.str();
}());
auto result = m_baseEngine.calculateRHSAndEnergy(collectedComp, T9, rho);
auto result = m_baseEngine.calculateRHSAndEnergy(collectedComp, T9, rho, true);
LOG_TRACE_L2(m_logger, "Base engine calculation of RHS and Energy complete.");
if (!result) {

View File

@@ -43,7 +43,7 @@ namespace gridfire::engine {
std::expected<StepDerivatives<double>, EngineStatus> DefinedEngineView::calculateRHSAndEnergy(
const fourdst::composition::CompositionAbstract &comp,
const double T9,
const double rho
const double rho, bool trust
) const {
validateNetworkState();

View File

@@ -216,7 +216,8 @@ namespace gridfire::engine {
std::expected<StepDerivatives<double>, EngineStatus> MultiscalePartitioningEngineView::calculateRHSAndEnergy(
const fourdst::composition::CompositionAbstract &comp,
const double T9,
const double rho
const double rho,
bool trust
) const {
LOG_TRACE_L2(m_logger, "Calculating RHS and Energy in MultiscalePartitioningEngineView at T9 = {}, rho = {}.", T9, rho);
LOG_TRACE_L2(m_logger, "Input composition is {}", [&comp]() -> std::string {
@@ -231,7 +232,8 @@ namespace gridfire::engine {
}
return ss.str();
}());
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
// TODO: Figure out why setting trust -> trust causes issues. The only place I think I am setting that to true is in AdaptiveEngineView which has just called getNormalizedEquilibratedComposition...
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
LOG_TRACE_L2(m_logger, "Equilibrated composition prior to calling base engine is {}", [&qseComposition, &comp]() -> std::string {
std::stringstream ss;
size_t i = 0;
@@ -248,7 +250,7 @@ namespace gridfire::engine {
return ss.str();
}());
const auto result = m_baseEngine.calculateRHSAndEnergy(qseComposition, T9, rho);
const auto result = m_baseEngine.calculateRHSAndEnergy(qseComposition, T9, rho, false);
LOG_TRACE_L2(m_logger, "Base engine calculation of RHS and Energy complete.");
if (!result) {
@@ -271,7 +273,7 @@ namespace gridfire::engine {
const double T9,
const double rho
) const {
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
return m_baseEngine.calculateEpsDerivatives(qseComposition, T9, rho);
}
@@ -280,7 +282,7 @@ namespace gridfire::engine {
const double T9,
const double rho
) const {
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
return m_baseEngine.generateJacobianMatrix(qseComposition, T9, rho, m_dynamic_species);
}
@@ -318,7 +320,7 @@ namespace gridfire::engine {
}
}
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
return m_baseEngine.generateJacobianMatrix(qseComposition, T9, rho, dynamicActiveSpeciesIntersection);
}
@@ -329,7 +331,7 @@ namespace gridfire::engine {
const double rho,
const SparsityPattern &sparsityPattern
) const {
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
return m_baseEngine.generateJacobianMatrix(qseComposition, T9, rho, sparsityPattern);
}
@@ -350,7 +352,7 @@ namespace gridfire::engine {
const double T9,
const double rho
) const {
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
return m_baseEngine.calculateMolarReactionFlow(reaction, qseComposition, T9, rho);
}
@@ -369,7 +371,7 @@ namespace gridfire::engine {
const double T9,
const double rho
) const {
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
const auto result = m_baseEngine.getSpeciesTimescales(qseComposition, T9, rho);
if (!result) {
return std::unexpected{result.error()};
@@ -386,7 +388,7 @@ namespace gridfire::engine {
const double T9,
const double rho
) const {
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
const auto result = m_baseEngine.getSpeciesDestructionTimescales(qseComposition, T9, rho);
if (!result) {
return std::unexpected{result.error()};
@@ -803,7 +805,7 @@ namespace gridfire::engine {
LOG_TRACE_L1(m_logger, "{} QSE solvers created.", m_qse_solvers.size());
LOG_TRACE_L1(m_logger, "Calculating final equilibrated composition...");
fourdst::composition::Composition result = getNormalizedEquilibratedComposition(comp, T9, rho);
fourdst::composition::Composition result = getNormalizedEquilibratedComposition(comp, T9, rho, false);
LOG_TRACE_L1(m_logger, "Final equilibrated composition calculated...");
return result;
@@ -836,7 +838,7 @@ namespace gridfire::engine {
}
}
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho);
const fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(comp, T9, rho, false);
// Calculate reaction flows and find min/max for logarithmic scaling of transparency
std::vector<double> reaction_flows;
reaction_flows.reserve(all_reactions.size());
@@ -1072,8 +1074,12 @@ namespace gridfire::engine {
fourdst::composition::Composition MultiscalePartitioningEngineView::getNormalizedEquilibratedComposition(
const fourdst::composition::CompositionAbstract& comp,
const double T9,
const double rho
const double rho,
const bool trust
) const {
if (trust) {
return fourdst::composition::Composition(comp);
}
// Caching mechanism to avoid redundant QSE solves
const std::array<uint64_t, 3> hashes = {
fourdst::composition::utils::CompositionHash::hash_exact(comp),
@@ -1108,7 +1114,7 @@ namespace gridfire::engine {
) const {
const fourdst::composition::Composition result = m_baseEngine.collectComposition(comp, T9, rho);
fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(result, T9, rho);
fourdst::composition::Composition qseComposition = getNormalizedEquilibratedComposition(result, T9, rho, false);
return qseComposition;
}
@@ -1893,7 +1899,7 @@ namespace gridfire::engine {
scale_data[i] = 1.0 / Y;
}
auto initial_rhs = m_engine.calculateRHSAndEnergy(result, T9, rho);
auto initial_rhs = m_engine.calculateRHSAndEnergy(result, T9, rho, false);
if (!initial_rhs) {
throw std::runtime_error("In QSE solver failed to calculate initial RHS");
}
@@ -2068,7 +2074,7 @@ namespace gridfire::engine {
data->comp.setMolarAbundance(species, y_data[index]);
}
const auto result = data->engine.calculateRHSAndEnergy(data->comp, data->T9, data->rho);
const auto result = data->engine.calculateRHSAndEnergy(data->comp, data->T9, data->rho, false);
if (!result) {
return 1; // Potentially recoverable error