[PATCH 5/6] Improve on divecomputer data handling

Sun Nov 25 11:58:36 PST 2012

From: Linus Torvalds <torvalds at linux-foundation.org>
Date: Sun, 25 Nov 2012 11:44:27 -0800
Subject: [PATCH 5/6] Improve on divecomputer data handling

This simplifies the vendor/product fields into just a single "model"
string for the dive computer, since we can't really validly ever use it
any other way anyway.

Also, add 'deviceid' and 'diveid' fields: they are just 32-bit hex
values that are unique for that particular dive computer model.  For
libdivecomputer, they are basically the first word of the SHA1 of the
data that libdivecomputer gives us.

(Trying to expose it in some other way is insane - different dive
computers use different models for the ID, so don't try to do some kind
of serial number or something like that)

For the Uemis Zurich, which doesn't use the libdivecomputer import, we
currently only set the model name.  The computer does have some kind of
device ID string, and we could/should just do the same "SHA1 over the
ID" to give it a unique ID, but the pseudo-xml parsing confuses me, so
I'll let Dirk fix that up.

Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
---

Ok, this actually adds a dive computer device ID and a dive ID that may be 
useful for matching up dives. They're not really used yet, and I'd suggest 
adding at least the Uemis device ID code before starting to really use 
them, but it's a good thing.

NOTE NOTE NOTE! This introduces a dependency on libssl and libcrypt. In 
order to generate sane ID's, we want to use a good hash of the various 
random data we can get from the import, and the natural choice is SHA1.

Judging by git development, libssl/libcrypt dependencies should be 
perfectly portable, but I have neither OS X nor Windows, so this is only 
tested on Linux.

 Makefile          |  2 +-
 dive.h            | 18 +++++++++++++++-
 libdivecomputer.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 libdivecomputer.h |  1 +
 parse-xml.c       | 15 +++++++++++---
 save-xml.c        | 10 +++++----
 uemis.c           | 11 ++++++++++
 7 files changed, 105 insertions(+), 14 deletions(-)

diff --git a/Makefile b/Makefile
index ee74d4b98fe7..a20d32061fe9 100644
--- a/Makefile
+++ b/Makefile
@@ -125,7 +125,7 @@ ifneq ($(strip $(LIBXSLT)),)
 	endif
 endif
 
-LIBS = $(LIBXML2) $(LIBXSLT) $(LIBGTK) $(LIBGCONF2) $(LIBDIVECOMPUTER) $(EXTRALIBS) $(LIBZIP) -lpthread -lm
+LIBS = $(LIBXML2) $(LIBXSLT) $(LIBGTK) $(LIBGCONF2) $(LIBDIVECOMPUTER) $(EXTRALIBS) $(LIBZIP) -lpthread -lm -lssl -lcrypto
 
 MSGLANGS=$(notdir $(wildcard po/*po))
 MSGOBJS=$(addprefix share/locale/,$(MSGLANGS:.po=.UTF-8/LC_MESSAGES/subsurface.mo))
diff --git a/dive.h b/dive.h
index 2e7841b45e0e..44023b7054b5 100644
--- a/dive.h
+++ b/dive.h
@@ -2,11 +2,13 @@
 #define DIVE_H
 
 #include <stdlib.h>
+#include <stdint.h>
 #include <time.h>
 
 #include <glib.h>
 #include <glib/gstdio.h>
 #include <libxml/tree.h>
+#include <openssl/sha.h>
 
 /*
  * Some silly typedefs to make our units very explicit.
@@ -239,9 +241,23 @@ struct event {
 	char name[];
 };
 
+/*
+ * NOTE! The deviceid and diveid are model-specific *hashes* of
+ * whatever device identification that model may have. Different
+ * dive computers will have different identifying data, it could
+ * be a firmware number or a serial ID (in either string or in
+ * numeric format), and we do not care.
+ *
+ * The only thing we care about is that subsurface will hash
+ * that information the same way. So then you can check the ID
+ * of a dive computer by comparing the hashes for equality.
+ *
+ * A deviceid or diveid of zero is assumed to be "no ID".
+ */
 struct divecomputer {
 	timestamp_t when;
-	const char *vendor, *product;
+	const char *model;
+	uint32_t deviceid, diveid;
 	int samples, alloc_samples;
 	struct sample *sample;
 	struct event *events;
diff --git a/libdivecomputer.c b/libdivecomputer.c
index 82b1ba3cd88d..3325c68ffbb6 100644
--- a/libdivecomputer.c
+++ b/libdivecomputer.c
@@ -186,9 +186,7 @@ static inline int match_dc(struct divecomputer *a, struct divecomputer *b)
 {
 	if (a->when != b->when)
 		return 0;
-	if (a->vendor && b->vendor && strcasecmp(a->vendor, b->vendor))
-		return 0;
-	if (a->product && b->product && strcasecmp(a->product, b->product))
+	if (a->model && b->model && strcasecmp(a->model, b->model))
 		return 0;
 	return 1;
 }
@@ -226,6 +224,39 @@ static inline int year(int year)
 	return year;
 }
 
+/*
+ * Like g_strdup_printf(), but without the stupid g_malloc/g_free confusion.
+ * And we limit the string to some arbitrary size.
+ */
+static char *str_printf(const char *fmt, ...)
+{
+	va_list args;
+	char buf[80];
+
+	va_start(args, fmt);
+	vsnprintf(buf, sizeof(buf)-1, fmt, args);
+	va_end(args);
+	buf[sizeof(buf)-1] = 0;
+	return strdup(buf);
+}
+
+/*
+ * The dive ID for libdivecomputer dives is the first word of the
+ * SHA1 of the fingerprint, if it exists.
+ *
+ * NOTE! This is byte-order dependent, and I don't care.
+ */
+static uint32_t calculate_diveid(const unsigned char *fingerprint, unsigned int fsize)
+{
+	uint32_t csum[5];
+
+	if (!fingerprint || !fsize)
+		return 0;
+
+	SHA1(fingerprint, fsize, (unsigned char *)csum);
+	return csum[0];
+}
+
 static int dive_cb(const unsigned char *data, unsigned int size,
 	const unsigned char *fingerprint, unsigned int fsize,
 	void *userdata)
@@ -258,8 +289,9 @@ static int dive_cb(const unsigned char *data, unsigned int size,
 		dc_parser_destroy(parser);
 		return rc;
 	}
-	dive->dc.vendor = strdup(devdata->vendor);
-	dive->dc.product = strdup(devdata->product);
+	dive->dc.model = str_printf("%s %s", devdata->vendor, devdata->product);
+	dive->dc.deviceid = devdata->deviceid;
+	dive->dc.diveid = calculate_diveid(fingerprint, fsize);
 
 	tm.tm_year = dt.year;
 	tm.tm_mon = dt.month-1;
@@ -345,6 +377,25 @@ static dc_status_t import_device_data(dc_device_t *device, device_data_t *device
 	return dc_device_foreach(device, dive_cb, devicedata);
 }
 
+/*
+ * The device ID for libdivecomputer devices is the first 32-bit word
+ * of the SHA1 hash of the model/firmware/serial numbers.
+ *
+ * NOTE! This is byte-order-dependent. And I can't find it in myself to
+ * care.
+ */
+static uint32_t calculate_sha1(unsigned int model, unsigned int firmware, unsigned int serial)
+{
+	SHA_CTX ctx;
+	uint32_t csum[5];
+
+	SHA1_Init(&ctx);
+	SHA1_Update(&ctx, &model, sizeof(model));
+	SHA1_Update(&ctx, &firmware, sizeof(firmware));
+	SHA1_Update(&ctx, &serial, sizeof(serial));
+	SHA1_Final((unsigned char *)csum, &ctx);
+	return csum[0];
+}
 
 static void event_cb(dc_device_t *device, dc_event_type_t event, const void *data, void *userdata)
 {
@@ -367,6 +418,7 @@ static void event_cb(dc_device_t *device, dc_event_type_t event, const void *dat
 			devinfo->model, devinfo->model,
 			devinfo->firmware, devinfo->firmware,
 			devinfo->serial, devinfo->serial);
+		devdata->deviceid = calculate_sha1(devinfo->model, devinfo->firmware, devinfo->serial);
 		break;
 	case DC_EVENT_CLOCK:
 			dev_info(devdata, _("Event: systime=%"PRId64", devtime=%u\n"),
diff --git a/libdivecomputer.h b/libdivecomputer.h
index 2121c27bcdab..81eb78abdf8e 100644
--- a/libdivecomputer.h
+++ b/libdivecomputer.h
@@ -13,6 +13,7 @@
 typedef struct device_data_t {
 	dc_descriptor_t *descriptor;
 	const char *vendor, *product, *devname;
+	unsigned int deviceid, diveid;
 	dc_device_t *device;
 	dc_context_t *context;
 	progressbar_t progress;
diff --git a/parse-xml.c b/parse-xml.c
index 98e7fc9e48c3..5f1dfed82692 100644
--- a/parse-xml.c
+++ b/parse-xml.c
@@ -523,6 +523,13 @@ static void get_index(char *buffer, void *_i)
 	free(buffer);
 }
 
+static void hex_value(char *buffer, void *_i)
+{
+	uint32_t *i = _i;
+	*i = strtol(buffer, NULL, 16);
+	free(buffer);
+}
+
 static void get_tripflag(char *buffer, void *_tf)
 {
 	tripflag_t *tf = _tf;
@@ -715,9 +722,11 @@ static void try_to_fill_dc(struct divecomputer *dc, const char *name, char *buf)
 		return;
 	if (MATCH(".time", divetime, &dc->when))
 		return;
-	if (MATCH(".vendor", utf8_string, &dc->vendor))
+	if (MATCH(".model", utf8_string, &dc->model))
+		return;
+	if (MATCH(".deviceid", hex_value, &dc->deviceid))
 		return;
-	if (MATCH(".product", utf8_string, &dc->product))
+	if (MATCH(".diveid", hex_value, &dc->diveid))
 		return;
 
 	nonmatch("divecomputer", name, buf);
@@ -1333,7 +1342,7 @@ static void divecomputer_start(void)
 		dc = dc->next;
 
 	/* Did we already fill that in? */
-	if (dc->samples || dc->vendor || dc->product || dc->when) {
+	if (dc->samples || dc->model || dc->when) {
 		struct divecomputer *newdc = calloc(1, sizeof(*newdc));
 		if (newdc) {
 			dc->next = newdc;
diff --git a/save-xml.c b/save-xml.c
index af435684088b..58faf6aeb3ab 100644
--- a/save-xml.c
+++ b/save-xml.c
@@ -351,10 +351,12 @@ static void save_dc(FILE *f, struct dive *dive, struct divecomputer *dc)
 	int i;
 
 	fprintf(f, "  <divecomputer");
-	if (dc->vendor)
-		show_utf8(f, dc->vendor, " vendor='", "'", 1);
-	if (dc->product)
-		show_utf8(f, dc->product, " product='", "'", 1);
+	if (dc->model)
+		show_utf8(f, dc->model, " model='", "'", 1);
+	if (dc->deviceid)
+		fprintf(f, " deviceid='%08x'", dc->deviceid);
+	if (dc->diveid)
+		fprintf(f, " diveid='%08x'", dc->diveid);
 	if (dc->when && dc->when != dive->when)
 		show_date(f, dc->when);
 	fprintf(f, ">\n");
diff --git a/uemis.c b/uemis.c
index dd98012a4d8d..88099beb3713 100644
--- a/uemis.c
+++ b/uemis.c
@@ -199,6 +199,17 @@ void uemis_parse_divelog_binary(char *base64, void *datap) {
 	else
 		dive->salinity = 10000; /* grams per 10l fresh water */
 
+	dc->model = strdup("Uemis Zurich");
+
+	/*
+	 * FIXME:
+	 *  - make the deive ID  the first word of the SHA1 sum of the deviceid string
+	 *  - do we want to have a dive ID? Without one, we'll just use the dive date,
+	 *    which is likely fine.
+	 */
+	dc->deviceid = 0;
+	dc->diveid = 0;
+
 	/* dive template in use:
 	   0 = air
 	   1 = nitrox (B)
-- 
1.8.0.dirty