pcp
[Top] [All Lists]

interesting non-optimization: hash % vs &

To: pcp developers <pcp@xxxxxxxxxxx>
Subject: interesting non-optimization: hash % vs &
From: "Frank Ch. Eigler" <fche@xxxxxxxxxx>
Date: Mon, 7 Sep 2015 22:59:47 -0400
Delivered-to: pcp@xxxxxxxxxxx
User-agent: Mutt/1.4.2.2i
Hi -


While browsing through some detailed profile info, I came across a
case where __pmHashSearch was taking a noticeable amount of time;
regularly 4ish% of the total cpu.  (This workload is my usual
cpu-hungry case of pmwebd scanning many large archives in parallel, to
populate grafana graphs.)

An assembler-level profile suggested looking at the div instruction
coming from the "[key % hcp->hsize]" style computation.  On a lark, I
applied a patch (below) that limits hsize to powers of two, thus
letting us replace the division by a boolean-and: "[key & (hcp->hsize-1)]".
The code appears to work ...  and halves the fraction of CPU time due
to __pmHashSearch ... but annoyingly, overall CPU consumption stays
about the same.  So I'm attaching it just as a curiosity, in case
someone's motivated to think about it more.



diff --git a/src/include/pcp/impl.h b/src/include/pcp/impl.h
index 84dfa4f3c136..bd5f846a6e93 100644
--- a/src/include/pcp/impl.h
+++ b/src/include/pcp/impl.h
@@ -335,7 +335,7 @@ typedef struct __pmHashNode {
 
 typedef struct __pmHashCtl {
     int                        nodes;
-    int                        hsize;
+    int                        hsize; /* a power of 2 */
     __pmHashNode       **hash;
     __pmHashNode       *next;
     unsigned int       index;
diff --git a/src/libpcp/src/hash.c b/src/libpcp/src/hash.c
index 9bdf97fb25ac..3365a0f9f562 100644
--- a/src/libpcp/src/hash.c
+++ b/src/libpcp/src/hash.c
@@ -31,7 +31,7 @@ __pmHashSearch(unsigned int key, __pmHashCtl *hcp)
     if (hcp->hsize == 0)
        return NULL;
 
-    for (hp = hcp->hash[key % hcp->hsize]; hp != NULL; hp = hp->next) {
+    for (hp = hcp->hash[key & (hcp->hsize-1)]; hp != NULL; hp = hp->next) {
        if (hp->key == key)
            return hp;
     }
@@ -59,9 +59,11 @@ __pmHashAdd(unsigned int key, void *data, __pmHashCtl *hcp)
        int             oldsize = hcp->hsize;
 
        hcp->hsize *= 2;
-       if (hcp->hsize % 2) hcp->hsize++;
-       if (hcp->hsize % 3) hcp->hsize += 2;
-       if (hcp->hsize % 5) hcp->hsize += 2;
+       /* Simply double hsize, to keep it a power of 2.
+          if (hcp->hsize % 2) hcp->hsize++;
+          if (hcp->hsize % 3) hcp->hsize += 2;
+          if (hcp->hsize % 5) hcp->hsize += 2;
+        */
        if ((hcp->hash = (__pmHashNode **)calloc(hcp->hsize, 
sizeof(__pmHashNode *))) == NULL) {
            hcp->hsize = oldsize;
            hcp->hash = old;
@@ -74,7 +76,7 @@ __pmHashAdd(unsigned int key, void *data, __pmHashCtl *hcp)
            for (hp = old[--oldsize]; hp != NULL; ) {
                tp = hp;
                hp = hp->next;
-               k = tp->key % hcp->hsize;
+               k = tp->key & (hcp->hsize-1);
                tp->next = hcp->hash[k];
                hcp->hash[k] = tp;
            }
@@ -85,7 +87,7 @@ __pmHashAdd(unsigned int key, void *data, __pmHashCtl *hcp)
     if ((hp = (__pmHashNode *)malloc(sizeof(__pmHashNode))) == NULL)
        return -oserror();
 
-    k = key % hcp->hsize;
+    k = key & (hcp->hsize-1);
     hp->key = key;
     hp->data = data;
     hp->next = hcp->hash[k];
@@ -103,10 +105,10 @@ __pmHashDel(unsigned int key, void *data, __pmHashCtl 
*hcp)
     if (hcp->hsize == 0)
        return 0;
 
-    for (hp = hcp->hash[key % hcp->hsize]; hp != NULL; hp = hp->next) {
+    for (hp = hcp->hash[key & (hcp->hsize-1)]; hp != NULL; hp = hp->next) {
        if (hp->key == key && hp->data == data) {
            if (lhp == NULL)
-               hcp->hash[key % hcp->hsize] = hp->next;
+               hcp->hash[key & (hcp->hsize-1)] = hp->next;
            else
                lhp->next = hp->next;
            free(hp);

<Prev in Thread] Current Thread [Next in Thread>
  • interesting non-optimization: hash % vs &, Frank Ch. Eigler <=