CPU hogging in the current master
Linus Torvalds
torvalds at linux-foundation.org
Wed Sep 25 20:42:19 UTC 2013
Ok, this is my suggested fix for the CPU hogging of the TTS code.
It is two-pronged:
a) the one-liner patch to profile.c just says "we don't bother
calculating TTS at 10s granularity, just do one-minute one"
b) stop doing the (very expensive) pow() calculation every time.
The two are independent, and it could be two separate commits, but I think
both of these fall under the issue of "don't waste CPU time", so here it
is as one patch.
I think (a) is pretty obvious, no need to expose on that any more.
But (b) is slightly more complicated. It re-organizes the saturation
calculations to be in my opinion clearer: we used to have the "one second"
case completely separate from the "generic interval" case, and this undoes
that.
It *does* keep the special static cache for the one-second buehlmann
factors, and expands that with a *dynamic* cache for each tissue index
that contains the previous value of the buehlmann factor for a particular
duration.
The point is, usually we end up using some fixed duration, so the cache
hit ratio is quite high. And doing a memory load from a cache is *much*
faster than calculating exponentials.
Somebody should double-check that I didn't do anything bad when I
re-organized the math, but quite frankly, I think my code is easier to
read than the old code. Not that that protects us from typos or thinkos.
Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
---
Comments? Please, check my math. It really is a very straightforward
transform, and it seems to give the same profiles, but I still want people
to double-check it.
deco.c | 85 ++++++++++++++++++++++++++++++++++++++++-----------------------
profile.c | 2 +-
2 files changed, 55 insertions(+), 32 deletions(-)
diff --git a/deco.c b/deco.c
index c7ad616607a0..3c500d6765fd 100644
--- a/deco.c
+++ b/deco.c
@@ -131,6 +131,49 @@ static double tissue_tolerance_calc(const struct dive *dive)
return ret_tolerance_limit_ambient_pressure;
}
+/*
+ * Return buelman factor for a particular period and tissue index.
+ *
+ * We cache the last factor, since we commonly call this with the
+ * same values... We have a special "fixed cache" for the one second
+ * case, although I wonder if that's even worth it considering the
+ * more general-purpose cache.
+ */
+struct factor_cache {
+ int last_period;
+ double last_factor;
+};
+
+double n2_factor(int period_in_seconds, int ci)
+{
+ static struct factor_cache cache[16];
+
+ if (period_in_seconds == 1)
+ return buehlmann_N2_factor_expositon_one_second[ci];
+
+ if (period_in_seconds != cache[ci].last_period) {
+ cache[ci].last_period = period_in_seconds;
+ cache[ci].last_factor = 1 - pow(2.0, - period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60));
+ }
+
+ return cache[ci].last_factor;
+}
+
+double he_factor(int period_in_seconds, int ci)
+{
+ static struct factor_cache cache[16];
+
+ if (period_in_seconds == 1)
+ return buehlmann_He_factor_expositon_one_second[ci];
+
+ if (period_in_seconds != cache[ci].last_period) {
+ cache[ci].last_period = period_in_seconds;
+ cache[ci].last_factor = 1 - pow(2.0, - period_in_seconds / (buehlmann_He_t_halflife[ci] * 60));
+ }
+
+ return cache[ci].last_factor;
+}
+
/* add period_in_seconds at the given pressure and gas to the deco calculation */
double add_segment(double pressure, const struct gasmix *gasmix, int period_in_seconds, int ccpo2, const struct dive *dive)
{
@@ -156,37 +199,17 @@ double add_segment(double pressure, const struct gasmix *gasmix, int period_in_s
pphe *= f_dilutent;
}
}
- if (period_in_seconds == 1) { /* one second interval during dive */
- for (ci = 0; ci < 16; ci++) {
- if (ppn2 - tissue_n2_sat[ci] > 0)
- tissue_n2_sat[ci] += buehlmann_config.satmult * (ppn2 - tissue_n2_sat[ci]) *
- buehlmann_N2_factor_expositon_one_second[ci];
- else
- tissue_n2_sat[ci] += buehlmann_config.desatmult * (ppn2 - tissue_n2_sat[ci]) *
- buehlmann_N2_factor_expositon_one_second[ci];
- if (pphe - tissue_he_sat[ci] > 0)
- tissue_he_sat[ci] += buehlmann_config.satmult * (pphe - tissue_he_sat[ci]) *
- buehlmann_He_factor_expositon_one_second[ci];
- else
- tissue_he_sat[ci] += buehlmann_config.desatmult * (pphe - tissue_he_sat[ci]) *
- buehlmann_He_factor_expositon_one_second[ci];
- }
- } else { /* all other durations */
- for (ci = 0; ci < 16; ci++)
- {
- if (ppn2 - tissue_n2_sat[ci] > 0)
- tissue_n2_sat[ci] += buehlmann_config.satmult * (ppn2 - tissue_n2_sat[ci]) *
- (1 - pow(2.0,(- period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60))));
- else
- tissue_n2_sat[ci] += buehlmann_config.desatmult * (ppn2 - tissue_n2_sat[ci]) *
- (1 - pow(2.0,(- period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60))));
- if (pphe - tissue_he_sat[ci] > 0)
- tissue_he_sat[ci] += buehlmann_config.satmult * (pphe - tissue_he_sat[ci]) *
- (1 - pow(2.0,(- period_in_seconds / (buehlmann_He_t_halflife[ci] * 60))));
- else
- tissue_he_sat[ci] += buehlmann_config.desatmult * (pphe - tissue_he_sat[ci]) *
- (1 - pow(2.0,(- period_in_seconds / (buehlmann_He_t_halflife[ci] * 60))));
- }
+
+ for (ci = 0; ci < 16; ci++) {
+ double ppn2_oversat = ppn2 - tissue_n2_sat[ci];
+ double pphe_oversat = pphe - tissue_he_sat[ci];
+ double n2_f = n2_factor(period_in_seconds, ci);
+ double he_f = he_factor(period_in_seconds, ci);
+ double n2_satmult = ppn2_oversat > 0 ? buehlmann_config.satmult : buehlmann_config.desatmult;
+ double he_satmult = pphe_oversat > 0 ? buehlmann_config.satmult : buehlmann_config.desatmult;
+
+ tissue_n2_sat[ci] += n2_satmult * ppn2_oversat * n2_f;
+ tissue_he_sat[ci] += he_satmult * pphe_oversat * he_f;
}
return tissue_tolerance_calc(dive);
}
diff --git a/profile.c b/profile.c
index a5c5d11204a0..a0c99114d365 100644
--- a/profile.c
+++ b/profile.c
@@ -1067,7 +1067,7 @@ static void calculate_deco_information(struct dive *dive, struct divecomputer *d
const int ascent_s_per_deco_step = 1;
const int ascent_mm_per_deco_step = 16; /* 1 m/min */
/* how long time steps in deco calculations? */
- const int time_stepsize = 10;
+ const int time_stepsize = 60;
const int deco_stepsize = 3000;
/* at what depth is the current deco-step? */
int next_stop = ROUND_UP(deco_allowed_depth(tissue_tolerance, surface_pressure, dive, 1), deco_stepsize);
More information about the subsurface
mailing list