[PATCH] Stop doing the (very expensive) pow() calculation pointlessly
Linus Torvalds
torvalds at linux-foundation.org
Sun Jan 19 12:11:07 UTC 2014
From: Linus Torvalds <torvalds at linux-foundation.org>
Date: Wed, 25 Sep 2013 20:42:19 -0700
Subject: [PATCH] Stop doing the (very expensive) pow() calculation pointlessly
This re-organizes the saturation calculations to be in my opinion
clearer: we used to have the "one second" case completely separate from
the "generic interval" case, and this undoes that.
It *does* keep the special static cache for the one-second buehlmann
factors, and expands that with a *dynamic* cache for each tissue index
that contains the previous value of the buehlmann factor for a
particular duration.
The point is, usually we end up using some fixed duration, so the cache
hit ratio is quite high. And doing a memory load from a cache is *much*
faster than calculating exponentials.
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
---
The original patch also changed the deco "time_stepsize" from 10 to 60
seconds, which is still likely a good thing to do. But that's an
independent issue of calling "pow()" too damn much.
Tomaz, does this make things snappier for you?
deco.c | 85 ++++++++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 54 insertions(+), 31 deletions(-)
diff --git a/deco.c b/deco.c
index 300904724ca5..6958946392e7 100644
--- a/deco.c
+++ b/deco.c
@@ -129,6 +129,49 @@ static double tissue_tolerance_calc(const struct dive *dive)
return ret_tolerance_limit_ambient_pressure;
}
+/*
+ * Return buelman factor for a particular period and tissue index.
+ *
+ * We cache the last factor, since we commonly call this with the
+ * same values... We have a special "fixed cache" for the one second
+ * case, although I wonder if that's even worth it considering the
+ * more general-purpose cache.
+ */
+struct factor_cache {
+ int last_period;
+ double last_factor;
+};
+
+double n2_factor(int period_in_seconds, int ci)
+{
+ static struct factor_cache cache[16];
+
+ if (period_in_seconds == 1)
+ return buehlmann_N2_factor_expositon_one_second[ci];
+
+ if (period_in_seconds != cache[ci].last_period) {
+ cache[ci].last_period = period_in_seconds;
+ cache[ci].last_factor = 1 - pow(2.0, - period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60));
+ }
+
+ return cache[ci].last_factor;
+}
+
+double he_factor(int period_in_seconds, int ci)
+{
+ static struct factor_cache cache[16];
+
+ if (period_in_seconds == 1)
+ return buehlmann_He_factor_expositon_one_second[ci];
+
+ if (period_in_seconds != cache[ci].last_period) {
+ cache[ci].last_period = period_in_seconds;
+ cache[ci].last_factor = 1 - pow(2.0, - period_in_seconds / (buehlmann_He_t_halflife[ci] * 60));
+ }
+
+ return cache[ci].last_factor;
+}
+
/* add period_in_seconds at the given pressure and gas to the deco calculation */
double add_segment(double pressure, const struct gasmix *gasmix, int period_in_seconds, int ccpo2, const struct dive *dive)
{
@@ -152,37 +195,17 @@ double add_segment(double pressure, const struct gasmix *gasmix, int period_in_s
pphe *= f_dilutent;
}
}
- if (period_in_seconds == 1) { /* one second interval during dive */
- for (ci = 0; ci < 16; ci++) {
- if (ppn2 - tissue_n2_sat[ci] > 0)
- tissue_n2_sat[ci] += buehlmann_config.satmult * (ppn2 - tissue_n2_sat[ci]) *
- buehlmann_N2_factor_expositon_one_second[ci];
- else
- tissue_n2_sat[ci] += buehlmann_config.desatmult * (ppn2 - tissue_n2_sat[ci]) *
- buehlmann_N2_factor_expositon_one_second[ci];
- if (pphe - tissue_he_sat[ci] > 0)
- tissue_he_sat[ci] += buehlmann_config.satmult * (pphe - tissue_he_sat[ci]) *
- buehlmann_He_factor_expositon_one_second[ci];
- else
- tissue_he_sat[ci] += buehlmann_config.desatmult * (pphe - tissue_he_sat[ci]) *
- buehlmann_He_factor_expositon_one_second[ci];
- }
- } else { /* all other durations */
- for (ci = 0; ci < 16; ci++)
- {
- if (ppn2 - tissue_n2_sat[ci] > 0)
- tissue_n2_sat[ci] += buehlmann_config.satmult * (ppn2 - tissue_n2_sat[ci]) *
- (1 - pow(2.0,(- period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60))));
- else
- tissue_n2_sat[ci] += buehlmann_config.desatmult * (ppn2 - tissue_n2_sat[ci]) *
- (1 - pow(2.0,(- period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60))));
- if (pphe - tissue_he_sat[ci] > 0)
- tissue_he_sat[ci] += buehlmann_config.satmult * (pphe - tissue_he_sat[ci]) *
- (1 - pow(2.0,(- period_in_seconds / (buehlmann_He_t_halflife[ci] * 60))));
- else
- tissue_he_sat[ci] += buehlmann_config.desatmult * (pphe - tissue_he_sat[ci]) *
- (1 - pow(2.0,(- period_in_seconds / (buehlmann_He_t_halflife[ci] * 60))));
- }
+
+ for (ci = 0; ci < 16; ci++) {
+ double ppn2_oversat = ppn2 - tissue_n2_sat[ci];
+ double pphe_oversat = pphe - tissue_he_sat[ci];
+ double n2_f = n2_factor(period_in_seconds, ci);
+ double he_f = he_factor(period_in_seconds, ci);
+ double n2_satmult = ppn2_oversat > 0 ? buehlmann_config.satmult : buehlmann_config.desatmult;
+ double he_satmult = pphe_oversat > 0 ? buehlmann_config.satmult : buehlmann_config.desatmult;
+
+ tissue_n2_sat[ci] += n2_satmult * ppn2_oversat * n2_f;
+ tissue_he_sat[ci] += he_satmult * pphe_oversat * he_f;
}
return tissue_tolerance_calc(dive);
}
--
1.8.5.1.163.gd7aced9
More information about the subsurface
mailing list