[PATCH] Stop doing the (very expensive) pow() calculation pointlessly

Linus Torvalds torvalds at linux-foundation.org
Sun Jan 19 12:11:07 UTC 2014


From: Linus Torvalds <torvalds at linux-foundation.org>
Date: Wed, 25 Sep 2013 20:42:19 -0700
Subject: [PATCH] Stop doing the (very expensive) pow() calculation pointlessly

This re-organizes the saturation calculations to be in my opinion
clearer: we used to have the "one second" case completely separate from
the "generic interval" case, and this undoes that.

It *does* keep the special static cache for the one-second buehlmann
factors, and expands that with a *dynamic* cache for each tissue index
that contains the previous value of the buehlmann factor for a
particular duration.

The point is, usually we end up using some fixed duration, so the cache
hit ratio is quite high.  And doing a memory load from a cache is *much*
faster than calculating exponentials.

Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
---

The original patch also changed the deco "time_stepsize" from 10 to 60 
seconds, which is still likely a good thing to do. But that's an 
independent issue of calling "pow()" too damn much.

Tomaz, does this make things snappier for you?

 deco.c | 85 ++++++++++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 54 insertions(+), 31 deletions(-)

diff --git a/deco.c b/deco.c
index 300904724ca5..6958946392e7 100644
--- a/deco.c
+++ b/deco.c
@@ -129,6 +129,49 @@ static double tissue_tolerance_calc(const struct dive *dive)
 	return ret_tolerance_limit_ambient_pressure;
 }
 
+/*
+ * Return buelman factor for a particular period and tissue index.
+ *
+ * We cache the last factor, since we commonly call this with the
+ * same values... We have a special "fixed cache" for the one second
+ * case, although I wonder if that's even worth it considering the
+ * more general-purpose cache.
+ */
+struct factor_cache {
+	int last_period;
+	double last_factor;
+};
+
+double n2_factor(int period_in_seconds, int ci)
+{
+	static struct factor_cache cache[16];
+
+	if (period_in_seconds == 1)
+		return buehlmann_N2_factor_expositon_one_second[ci];
+
+	if (period_in_seconds != cache[ci].last_period) {
+		cache[ci].last_period = period_in_seconds;
+		cache[ci].last_factor = 1 - pow(2.0, - period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60));
+	}
+
+	return cache[ci].last_factor;
+}
+
+double he_factor(int period_in_seconds, int ci)
+{
+	static struct factor_cache cache[16];
+
+	if (period_in_seconds == 1)
+		return buehlmann_He_factor_expositon_one_second[ci];
+
+	if (period_in_seconds != cache[ci].last_period) {
+		cache[ci].last_period = period_in_seconds;
+		cache[ci].last_factor = 1 - pow(2.0, - period_in_seconds / (buehlmann_He_t_halflife[ci] * 60));
+	}
+
+	return cache[ci].last_factor;
+}
+
 /* add period_in_seconds at the given pressure and gas to the deco calculation */
 double add_segment(double pressure, const struct gasmix *gasmix, int period_in_seconds, int ccpo2, const struct dive *dive)
 {
@@ -152,37 +195,17 @@ double add_segment(double pressure, const struct gasmix *gasmix, int period_in_s
 			pphe *= f_dilutent;
 		}
 	}
-	if (period_in_seconds == 1) { /* one second interval during dive */
-		for (ci = 0; ci < 16; ci++) {
-			if (ppn2 - tissue_n2_sat[ci] > 0)
-				tissue_n2_sat[ci] += buehlmann_config.satmult * (ppn2 - tissue_n2_sat[ci]) *
-								buehlmann_N2_factor_expositon_one_second[ci];
-			else
-				tissue_n2_sat[ci] += buehlmann_config.desatmult * (ppn2 - tissue_n2_sat[ci]) *
-								buehlmann_N2_factor_expositon_one_second[ci];
-			if (pphe - tissue_he_sat[ci] > 0)
-				tissue_he_sat[ci] += buehlmann_config.satmult * (pphe - tissue_he_sat[ci]) *
-								buehlmann_He_factor_expositon_one_second[ci];
-			else
-				tissue_he_sat[ci] += buehlmann_config.desatmult * (pphe - tissue_he_sat[ci]) *
-								buehlmann_He_factor_expositon_one_second[ci];
-		}
-	} else { /* all other durations */
-		for (ci = 0; ci < 16; ci++)
-		{
-			if (ppn2 - tissue_n2_sat[ci] > 0)
-				tissue_n2_sat[ci] += buehlmann_config.satmult * (ppn2 - tissue_n2_sat[ci]) *
-					(1 - pow(2.0,(- period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60))));
-			else
-				tissue_n2_sat[ci] += buehlmann_config.desatmult * (ppn2 - tissue_n2_sat[ci]) *
-					(1 - pow(2.0,(- period_in_seconds / (buehlmann_N2_t_halflife[ci] * 60))));
-			if (pphe - tissue_he_sat[ci] > 0)
-				tissue_he_sat[ci] += buehlmann_config.satmult * (pphe - tissue_he_sat[ci]) *
-					(1 - pow(2.0,(- period_in_seconds / (buehlmann_He_t_halflife[ci] * 60))));
-			else
-				tissue_he_sat[ci] += buehlmann_config.desatmult * (pphe - tissue_he_sat[ci]) *
-					(1 - pow(2.0,(- period_in_seconds / (buehlmann_He_t_halflife[ci] * 60))));
-		}
+
+	for (ci = 0; ci < 16; ci++) {
+		double ppn2_oversat = ppn2 - tissue_n2_sat[ci];
+		double pphe_oversat = pphe - tissue_he_sat[ci];
+		double n2_f = n2_factor(period_in_seconds, ci);
+		double he_f = he_factor(period_in_seconds, ci);
+		double n2_satmult = ppn2_oversat > 0 ? buehlmann_config.satmult : buehlmann_config.desatmult;
+		double he_satmult = pphe_oversat > 0 ? buehlmann_config.satmult : buehlmann_config.desatmult;
+
+		tissue_n2_sat[ci] += n2_satmult * ppn2_oversat * n2_f;
+		tissue_he_sat[ci] += he_satmult * pphe_oversat * he_f;
 	}
 	return tissue_tolerance_calc(dive);
 }
-- 
1.8.5.1.163.gd7aced9



More information about the subsurface mailing list