SGI has an ancient bug about pmlogreduce crashing on certain
(possibly corrupted) archives. The signature of this is:
*** glibc detected *** /usr/lib64/pcp/bin/pmlogreduce: free(): invalid pointer:
0x000000000099e020 ***
======= Backtrace: =========
/lib64/libc.so.6(+0x75018)[0x7fa149345018]
/lib64/libc.so.6(cfree+0x6c)[0x7fa149349f6c]
/usr/lib64/libpcp.so.3(__pmFreeResultValues+0x2d5)[0x7fa14963c9f5]
/usr/lib64/libpcp.so.3(pmFreeResult+0x44)[0x7fa14963ca53]
/usr/lib64/libpcp.so.3(+0x2a493)[0x7fa149658493]
/usr/lib64/libpcp.so.3(+0x2b258)[0x7fa149659258]
/usr/lib64/libpcp.so.3(__pmLogFetchInterp+0xdc4)[0x7fa14965a14b]
/usr/lib64/libpcp.so.3(__pmLogFetch+0x66)[0x7fa149655c28]
/usr/lib64/libpcp.so.3(pmFetch+0x116)[0x7fa14963c3a8]
/usr/lib64/pcp/bin/pmlogreduce[0x4029b2]
/lib64/libc.so.6(__libc_start_main+0xe6)[0x7fa1492eebc6]
/usr/lib64/pcp/bin/pmlogreduce[0x4022a9]
....
Also, valgrind complains about memory being leaked, e.g.:
....
==5986== 294,720 bytes in 9,210 blocks are definitely lost in loss record 137
of 140
==5986== at 0x4C261D7: malloc (in
/usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
==5986== by 0x404329: rewrite (rewrite.c:40)
==5986== by 0x402AF9: main (pmlogreduce.c:285)
==5986==
==5986== LEAK SUMMARY:
==5986== definitely lost: 294,720 bytes in 9,210 blocks
==5986== indirectly lost: 0 bytes in 0 blocks
==5986== possibly lost: 84,456 bytes in 3,184 blocks
==5986== still reachable: 3,025,502 bytes in 15,285 blocks
==5986== suppressed: 0 bytes in 0 blocks
Jonathan made a patch which addresses the memory leak, and also a
problem where an incorrect index to the metriclist[] could be used
in rewrite_free(). (I tweaked the patch a bit, but it's basically his.)
Anyway, this seems to fix the problem in the one reproducible test case
that's available to me. Look OK?
---
diff --git a/src/pmlogreduce/rewrite.c b/src/pmlogreduce/rewrite.c
index 7aa4f16..a5157d0 100644
--- a/src/pmlogreduce/rewrite.c
+++ b/src/pmlogreduce/rewrite.c
@@ -3,6 +3,7 @@
#include <assert.h>
static pmResult *orp;
+static metric_t **rw_metriclist;
/*
* Must either re-write the pmResult, or report a fatal error and
@@ -14,12 +15,17 @@ rewrite(pmResult *rp)
int i;
int sts;
- if ((orp = (pmResult *)malloc(sizeof(pmResult) +
- (rp->numpmid - 1) * sizeof(pmValueSet *))) == NULL) {
- fprintf(stderr,
- "%s: rewrite: cannot malloc pmResult for %d metrics\n",
- pmProgname, rp->numpmid);
- return NULL;
+ orp = (pmResult *)malloc(sizeof(pmResult) +
+ (rp->numpmid - 1) * sizeof(pmValueSet *));
+ rw_metriclist = (metric_t **) calloc(rp->numpmid, sizeof(metric_t*));
+
+ if (orp == NULL || rw_metriclist == NULL) {
+ fprintf(stderr, "%s: rewrite: cannot malloc pmResult for both %d "
+ "metrics and rw_metriclist\n", pmProgname, rp->numpmid);
+ /* we may have succeeded with one of them */
+ free(orp);
+ free(rw_metriclist);
+ return NULL;
}
orp->numpmid = 0;
orp->timestamp = rp->timestamp; /* struct assignment */
@@ -114,6 +120,7 @@ rewrite(pmResult *rp)
}
}
if (ovsp->numval > 0) {
+ rw_metriclist[orp->numpmid] = mp;
orp->vset[orp->numpmid] = ovsp;
orp->numpmid++;
}
@@ -137,11 +144,8 @@ rewrite_free(void)
for (i = 0; i < orp->numpmid; i++) {
pmValueSet *vsp = orp->vset[i];
+ metric_t *mp = rw_metriclist[i];
int j;
- metric_t *mp;
-
- if (vsp->numval <= 0)
- continue;
for (j = 0; j < numpmid; j++) {
if (vsp->pmid == pmidlist[j])
@@ -153,17 +157,21 @@ rewrite_free(void)
pmProgname, pmIDStr(vsp->pmid));
exit(1);
}
- mp = &metriclist[j];
- if (mp->mode == MODE_REWRITE) {
+ assert((vsp->numval > 0) ? (mp != NULL) : (mp == NULL));
+
+ if ((vsp->numval > 0) && (mp->mode == MODE_REWRITE)) {
for (j = 0; j < vsp->numval; j++) {
free(vsp->vlist[j].value.pval);
}
}
- free(orp->vset[i]);
+ free(vsp);
}
free(orp);
orp = NULL;
+
+ free(rw_metriclist);
+ rw_metriclist = NULL;
}
|