[PATCH] Stop doing the (very expensive) pow() calculation pointlessly

Tomaz Canabrava tcanabrava at kde.org
Sun Jan 19 12:37:01 UTC 2014


Linus, it makes things instant. *NOW* we are talking. o/


On Sun, Jan 19, 2014 at 6:34 PM, Tomaz Canabrava <tcanabrava at kde.org> wrote:

> Testing...
>
>
> On Sun, Jan 19, 2014 at 6:11 PM, Linus Torvalds <
> torvalds at linux-foundation.org> wrote:
>
>>
>> From: Linus Torvalds <torvalds at linux-foundation.org>
>> Date: Wed, 25 Sep 2013 20:42:19 -0700
>> Subject: [PATCH] Stop doing the (very expensive) pow() calculation
>> pointlessly
>>
>> This re-organizes the saturation calculations to be in my opinion
>> clearer: we used to have the "one second" case completely separate from
>> the "generic interval" case, and this undoes that.
>>
>> It *does* keep the special static cache for the one-second buehlmann
>> factors, and expands that with a *dynamic* cache for each tissue index
>> that contains the previous value of the buehlmann factor for a
>> particular duration.
>>
>> The point is, usually we end up using some fixed duration, so the cache
>> hit ratio is quite high.  And doing a memory load from a cache is *much*
>> faster than calculating exponentials.
>>
>> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
>> ---
>>
>> The original patch also changed the deco "time_stepsize" from 10 to 60
>> seconds, which is still likely a good thing to do. But that's an
>> independent issue of calling "pow()" too damn much.
>>
>> Tomaz, does this make things snappier for you?
>>
>>  deco.c | 85
>> ++++++++++++++++++++++++++++++++++++++++++------------------------
>>  1 file changed, 54 insertions(+), 31 deletions(-)
>>
>> diff --git a/deco.c b/deco.c
>> index 300904724ca5..6958946392e7 100644
>> --- a/deco.c
>> +++ b/deco.c
>> @@ -129,6 +129,49 @@ static double tissue_tolerance_calc(const struct
>> dive *dive)
>>         return ret_tolerance_limit_ambient_pressure;
>>  }
>>
>> +/*
>> + * Return buelman factor for a particular period and tissue index.
>> + *
>> + * We cache the last factor, since we commonly call this with the
>> + * same values... We have a special "fixed cache" for the one second
>> + * case, although I wonder if that's even worth it considering the
>> + * more general-purpose cache.
>> + */
>> +struct factor_cache {
>> +       int last_period;
>> +       double last_factor;
>> +};
>> +
>> +double n2_factor(int period_in_seconds, int ci)
>> +{
>> +       static struct factor_cache cache[16];
>> +
>> +       if (period_in_seconds == 1)
>> +               return buehlmann_N2_factor_expositon_one_second[ci];
>> +
>> +       if (period_in_seconds != cache[ci].last_period) {
>> +               cache[ci].last_period = period_in_seconds;
>> +               cache[ci].last_factor = 1 - pow(2.0, - period_in_seconds
>> / (buehlmann_N2_t_halflife[ci] * 60));
>> +       }
>> +
>> +       return cache[ci].last_factor;
>> +}
>> +
>> +double he_factor(int period_in_seconds, int ci)
>> +{
>> +       static struct factor_cache cache[16];
>> +
>> +       if (period_in_seconds == 1)
>> +               return buehlmann_He_factor_expositon_one_second[ci];
>> +
>> +       if (period_in_seconds != cache[ci].last_period) {
>> +               cache[ci].last_period = period_in_seconds;
>> +               cache[ci].last_factor = 1 - pow(2.0, - period_in_seconds
>> / (buehlmann_He_t_halflife[ci] * 60));
>> +       }
>> +
>> +       return cache[ci].last_factor;
>> +}
>> +
>>  /* add period_in_seconds at the given pressure and gas to the deco
>> calculation */
>>  double add_segment(double pressure, const struct gasmix *gasmix, int
>> period_in_seconds, int ccpo2, const struct dive *dive)
>>  {
>> @@ -152,37 +195,17 @@ double add_segment(double pressure, const struct
>> gasmix *gasmix, int period_in_s
>>                         pphe *= f_dilutent;
>>                 }
>>         }
>> -       if (period_in_seconds == 1) { /* one second interval during dive
>> */
>> -               for (ci = 0; ci < 16; ci++) {
>> -                       if (ppn2 - tissue_n2_sat[ci] > 0)
>> -                               tissue_n2_sat[ci] +=
>> buehlmann_config.satmult * (ppn2 - tissue_n2_sat[ci]) *
>> -
>> buehlmann_N2_factor_expositon_one_second[ci];
>> -                       else
>> -                               tissue_n2_sat[ci] +=
>> buehlmann_config.desatmult * (ppn2 - tissue_n2_sat[ci]) *
>> -
>> buehlmann_N2_factor_expositon_one_second[ci];
>> -                       if (pphe - tissue_he_sat[ci] > 0)
>> -                               tissue_he_sat[ci] +=
>> buehlmann_config.satmult * (pphe - tissue_he_sat[ci]) *
>> -
>> buehlmann_He_factor_expositon_one_second[ci];
>> -                       else
>> -                               tissue_he_sat[ci] +=
>> buehlmann_config.desatmult * (pphe - tissue_he_sat[ci]) *
>> -
>> buehlmann_He_factor_expositon_one_second[ci];
>> -               }
>> -       } else { /* all other durations */
>> -               for (ci = 0; ci < 16; ci++)
>> -               {
>> -                       if (ppn2 - tissue_n2_sat[ci] > 0)
>> -                               tissue_n2_sat[ci] +=
>> buehlmann_config.satmult * (ppn2 - tissue_n2_sat[ci]) *
>> -                                       (1 - pow(2.0,(- period_in_seconds
>> / (buehlmann_N2_t_halflife[ci] * 60))));
>> -                       else
>> -                               tissue_n2_sat[ci] +=
>> buehlmann_config.desatmult * (ppn2 - tissue_n2_sat[ci]) *
>> -                                       (1 - pow(2.0,(- period_in_seconds
>> / (buehlmann_N2_t_halflife[ci] * 60))));
>> -                       if (pphe - tissue_he_sat[ci] > 0)
>> -                               tissue_he_sat[ci] +=
>> buehlmann_config.satmult * (pphe - tissue_he_sat[ci]) *
>> -                                       (1 - pow(2.0,(- period_in_seconds
>> / (buehlmann_He_t_halflife[ci] * 60))));
>> -                       else
>> -                               tissue_he_sat[ci] +=
>> buehlmann_config.desatmult * (pphe - tissue_he_sat[ci]) *
>> -                                       (1 - pow(2.0,(- period_in_seconds
>> / (buehlmann_He_t_halflife[ci] * 60))));
>> -               }
>> +
>> +       for (ci = 0; ci < 16; ci++) {
>> +               double ppn2_oversat = ppn2 - tissue_n2_sat[ci];
>> +               double pphe_oversat = pphe - tissue_he_sat[ci];
>> +               double n2_f = n2_factor(period_in_seconds, ci);
>> +               double he_f = he_factor(period_in_seconds, ci);
>> +               double n2_satmult = ppn2_oversat > 0 ?
>> buehlmann_config.satmult : buehlmann_config.desatmult;
>> +               double he_satmult = pphe_oversat > 0 ?
>> buehlmann_config.satmult : buehlmann_config.desatmult;
>> +
>> +               tissue_n2_sat[ci] += n2_satmult * ppn2_oversat * n2_f;
>> +               tissue_he_sat[ci] += he_satmult * pphe_oversat * he_f;
>>         }
>>         return tissue_tolerance_calc(dive);
>>  }
>> --
>> 1.8.5.1.163.gd7aced9
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.hohndel.org/pipermail/subsurface/attachments/20140119/59677c04/attachment.html>


More information about the subsurface mailing list