pcp
[Top] [All Lists]

Derived metrics sigsegv on failed fetch

To: kenj <kenj@xxxxxxxxxxxxxxxx>, Paul Cowan <cowan@xxxxxxxxxx>
Subject: Derived metrics sigsegv on failed fetch
From: nathans@xxxxxxxxxx
Date: Wed, 12 May 2010 09:51:03 +1000 (EST)
Cc: pcp <pcp@xxxxxxxxxxx>
In-reply-to: <981329764.80721273621649161.JavaMail.root@xxxxxxxxxxxxxxxxxx>
Sender: nscott@xxxxxxxxxx
Hi Ken,

Paul came across this problem monitoring our production boxen, its
reproducible locally with one of the example derived metric configs,
start pmdumptext (or pmval or...) monitoring a derived metric which
has a counter-semantics component, then restart pmcd...

$ cat $PCP_DERIVED_CONFIG
bad_in_pkts = network.interface.in.errors + network.interface.in.drops

$ pmdumptext -t 1 -h localhost bad_in_pkts
Wed May 12 09:27:53     ?       ?       ?                        
Wed May 12 09:27:54     0.000   0.000   0.000                    
Wed May 12 09:27:55     0.000   0.000   0.000                    
Wed May 12 09:27:56     0.000   0.000   0.000                    
Wed May 12 09:27:57     0.000   0.000   0.000                    
Segmentation fault                                               

...

Reading symbols from /usr/bin/pmdumptext...(no debugging symbols found)...done.
(gdb) r -t 1 -h localhost bad_in_pkts                                          
Starting program: /usr/bin/pmdumptext -t 1 -h localhost bad_in_pkts            

[Thread debugging using libthread_db enabled]
Wed May 12 09:29:27     ?       ?       ?    
Wed May 12 09:29:28     0.000   0.000   0.000
Wed May 12 09:29:29     0.000   0.000   0.000

Program received signal SIGSEGV, Segmentation fault.
0xb7fb103e in __dmpostfetch (ctxp=0x807a880, result=0xbffff3a8) at 
derive_fetch.c:1101
1101            newrp->vset[j]->pmid = rp->vset[j]->pmid;                       
      
(gdb) bt                                                                        
      
#0  0xb7fb103e in __dmpostfetch (ctxp=0x807a880, result=0xbffff3a8) at 
derive_fetch.c:1101                                                             
                     
#1  0xb7f82cfd in pmFetch (numpmid=3, pmidlist=0x807b600, result=0xbffff3a8) at 
fetch.c:167                                                                     
            
#2  0x08050ad1 in ?? ()                                                         
      
#3  0x0805565e in ?? ()                                                         
      
#4  0x0804e8ce in ?? ()                                                         
      
#5  0xb7aa6775 in __libc_start_main () from /lib/i686/cmov/libc.so.6            
      
#6  0x0804aa91 in ?? ()                                                         
      
(gdb) l                                                                         
      
1096                if ((newrp->vset[j] = (pmValueSet *)malloc(need)) == NULL) 
{      
1097                    __pmNoMem("__dmpostfetch: vset", need, PM_FATAL_ERR);   
      
1098                    /*NOTREACHED*/                                          
      
1099                }                                                           
      
1100            }                                                               
      
1101            newrp->vset[j]->pmid = rp->vset[j]->pmid;                       
      
1102            newrp->vset[j]->numval = numval;                                
      
1103            newrp->vset[j]->valfmt = valfmt;                                
      
1104            if (numval < 0)                                                 
      
1105                continue;
(gdb) p rp                                                                      
      
$2 = (pmResult *) 0x807a4e0                                                     
      
(gdb) p rp->vset                                                                
      
$3 = {0x0}                                                                      
      
(gdb) up 1                                                                      
      
#1  0xb7f82cfd in pmFetch (numpmid=3, pmidlist=0x807b600, result=0xbffff3a8) at 
fetch.c:167                                                                     
            
167                 __dmpostfetch(ctxp, result);                                
      
(gdb) l                                                                         
      
162                 }                                                           
      
163             }                                                               
      
164                                                                             
      
165             /* process derived metrics, if any */                           
      
166             if (have_dm) {                                                  
      
167                 __dmpostfetch(ctxp, result);                                
      
168                 if (newlist != NULL) {                                      
      
169                     free(newlist);                                          
      
170                 }                                                           
      
171             }                                                               
      
(gdb) p n                                                                       
      
$7 = -12366                                                                     
      
(gdb)                                                                           
      


[ insert potential fix (please verify, Ken?) ]

$ git diff .
diff --git a/src/libpcp/src/fetch.c b/src/libpcp/src/fetch.c
index 84deb0c..1b8298f 100644                                 
--- a/src/libpcp/src/fetch.c                                  
+++ b/src/libpcp/src/fetch.c                                  
@@ -163,7 +163,7 @@ pmFetch(int numpmid, pmID pmidlist[], pmResult **result)
        }                                                                   
                                                                            
        /* process derived metrics, if any */                               
-       if (have_dm) {
+       if (n >= 0 && have_dm) {
            __dmpostfetch(ctxp, result);
            if (newlist != NULL) {
                free(newlist);


$ pmdumptext -t 1 -h localhost bad_in_pkts
Wed May 12 09:37:39     ?       ?       ?
Wed May 12 09:37:40     0.000   0.000   0.000
Wed May 12 09:37:41     0.000   0.000   0.000
Wed May 12 09:37:42     0.000   0.000   0.000
Wed May 12 09:37:43     0.000   0.000   0.000
Wed May 12 09:37:44     0.000   0.000   0.000
Wed May 12 09:37:45     ?       ?       ?
Wed May 12 09:37:46     ?       ?       ?
Wed May 12 09:37:47     ?       ?       ?
Wed May 12 09:37:48     ?       ?       ?
Wed May 12 09:37:49     ?       ?       ?
Wed May 12 09:37:50     ?       ?       ?
Wed May 12 09:37:51     0.000   0.000   0.000
Wed May 12 09:37:52     0.000   0.000   0.000
Wed May 12 09:37:53     0.000   0.000   0.000
Wed May 12 09:37:54     0.000   0.000   0.000


cheers.

-- 
Nathan

<Prev in Thread] Current Thread [Next in Thread>