pcp
[Top] [All Lists]

pmrep: add xlsx support

To: pcp developers <pcp@xxxxxxxxxxx>
Subject: pmrep: add xlsx support
From: Marko Myllynen <myllynen@xxxxxxxxxx>
Date: Tue, 15 Dec 2015 15:40:27 +0200
Delivered-to: pcp@xxxxxxxxxxx
Organization: Red Hat
Reply-to: myllynen@xxxxxxxxxx
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.4.0
Hi,

the patch below implements xlsx output support for pmrep, works both
with live / archive inputs. Meaning that it now becomes trivial to pick
up any PCP archive and general a spreadsheet out of it. Compared to the
previously supported CSV format this is much more user friendly, after
opening the file in Excel/LibreOffice one instantly sees the trends
there and can start creating charts (with CSV files some hardcore
spreadsheet skills are needed to get to this point).

I checked the available Python XLSX modules at

http://www.python-excel.org/

and decided to use

https://pypi.python.org/pypi/XlsxWriter

It's way too heavy to be embedded (but it's trivial to install e.g.
with pip anyway). It is actively maintained and has lots of features
available if we want to create something fancier in the future.

We're a bit late in the release cycle but given how well contained this
patch is I think it'd be nice to include it, this is a really nice
feature. (Unrelated to anything else, this is a good example what an
additional output alternative needs on pmrep level so gives a bit
insight if we want to modularize things later.) Since the end result
is completely dependent on xlsxwrite I'm not sure is PCP QA applicable
here.

(In the future we could consider optionally creating different sheets
for different metric categories but at least with reasonable amount
of metrics I'm finding everything in one sheet more convenient,
having one or two metrics per sheet and half a dozen sheets wouldn't
be that helpful.)

---
 src/pmrep/TODO     |   1 -
 src/pmrep/pmrep.1  |  19 +++++---
 src/pmrep/pmrep.py | 125
++++++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 129 insertions(+), 16 deletions(-)

diff --git a/src/pmrep/TODO b/src/pmrep/TODO
index 4c4259a..4a2e615 100644
--- a/src/pmrep/TODO
+++ b/src/pmrep/TODO
@@ -14,7 +14,6 @@
 -  verify CSV format
 -  add JSON output support
 -  add XML output support (like sadf)
--  add XLS output support (like sar2xls)
 -  look for config in ./, ~/.pcp, ~/, /etc/pcp or so
 -  possibly add more command line switch sanity checking
 -  modularize code to allow creating custom output plugins
diff --git a/src/pmrep/pmrep.1 b/src/pmrep/pmrep.1
index e6911da..29724e9 100644
--- a/src/pmrep/pmrep.1
+++ b/src/pmrep/pmrep.1
@@ -27,7 +27,7 @@
 [\f3\-e\f1 \f2derived\f1]
 [\f3\-E\f1 \f2lines\f1]
 [\f3\-f\f1 \f2format\f1]
-[\f3\-F\f1 \f2archive\f1]
+[\f3\-F\f1 \f2outfile\f1]
 [\f3\-h\f1 \f2host\f1]
 [\f3\-K\f1 \f2spec\f1]
 [\f3\-l\f1 \f2delimiter\f1]
@@ -287,17 +287,21 @@ string for formatting the timestamp.
 The format will be used with
 Python's
 .B datetime.strftime
-method which is mostly the same as that described in
+method (except with xlsx) which is mostly the same as that described in
 .BR strftime (3).
 An empty
 .I format
 string (i.e., '') will remove the timestamps from the output.
-The default is
+The default with stdout is
 .BR %H:%M:%S .
+The default with csv is
+.BR "%Y-%m-%d %H:%M:%S" .
+The default with xlsx is
+.BR "yyyy-mm-dd hh:mm:ss" .
 .TP
 .B \-F
-Specify the output
-.IR archive .
+Specify the output file
+.IR outfile .
 See
 .BR \-o .
 .TP
@@ -379,6 +383,11 @@ Print metrics in CSV format (subject to formatting
options).
 .I stdout
 Print metrics to stdout (format subject to formatting options).
 .TP 2
+.I xlsx
+Record metrics into an XLSX file for office/spreadsheet suites.
+Uses the \fBxlsxwriter\fR module. Requires
+.BR \-F .
+.TP 2
 .I zabbix
 Send metrics to a Zabbix server.
 See
diff --git a/src/pmrep/pmrep.py b/src/pmrep/pmrep.py
index a8d5d28..60952ae 100644
--- a/src/pmrep/pmrep.py
+++ b/src/pmrep/pmrep.py
@@ -52,6 +52,10 @@ try:
     import json
 except:
     import simplejson as json
+try:
+    import xlsxwriter
+except:
+    pass
 import socket
 import struct
 import time
@@ -75,6 +79,7 @@ CSVSEP  = ","
 CSVTIME = "%Y-%m-%d %H:%M:%S"
 OUTSEP  = "  "
 OUTTIME = "%H:%M:%S"
+XLSTIME = "yyyy-mm-dd hh:mm:ss"
 ZBXPORT = 10051
 ZBXPRFX = "pcp."
 NO_VAL  = "N/A"
@@ -85,6 +90,7 @@ VERSION = 1
 OUTPUT_ARCHIVE = "archive"
 OUTPUT_CSV     = "csv"
 OUTPUT_STDOUT  = "stdout"
+OUTPUT_XLS     = "xlsx"
 OUTPUT_ZABBIX  = "zabbix"

 class ZabbixMetric(object):
@@ -186,8 +192,9 @@ class PMReporter(object):
         self.version = VERSION
         self.source = "local:"
         self.output = OUTPUT_STDOUT
-        self.archive = None # output archive
+        self.outfile = None # output file
         self.log = None # pmi handle
+        self.wb = None # xlsx workbook
         self.derived = None
         self.header = 1
         self.unitinfo = 1
@@ -317,8 +324,8 @@ class PMReporter(object):
         opts.pmSetLongOptionSpecLocal()    # -K/--spec-local
         opts.pmSetLongOption("config", 1, "c", "FILE", "config file path")
         opts.pmSetLongOption("check", 0, "C", "", "check config and
metrics and exit")
-        opts.pmSetLongOption("output", 1, "o", "OUTPUT", "output
target: archive, csv, stdout (default), or zabbix")
-        opts.pmSetLongOption("output-archive", 1, "F", "ARCHIVE",
"output archive (with -o archive)")
+        opts.pmSetLongOption("output", 1, "o", "OUTPUT", "output
target: archive, csv, stdout (default), xlsx, zabbix")
+        opts.pmSetLongOption("output-file", 1, "F", "OUTFILE", "output
file (with -o archive/xlsx)")
         opts.pmSetLongOption("derived", 1, "e", "FILE|DFNT", "derived
metrics definitions")
         opts.pmSetLongOptionDebug()        # -D/--debug
         opts.pmSetLongOptionVersion()      # -V/--version
@@ -371,6 +378,8 @@ class PMReporter(object):
                 self.output = OUTPUT_CSV
             elif optarg == OUTPUT_STDOUT:
                 self.output = OUTPUT_STDOUT
+            elif optarg == OUTPUT_XLS:
+                self.output = OUTPUT_XLS
             elif optarg == OUTPUT_ZABBIX:
                 self.output = OUTPUT_ZABBIX
             else:
@@ -380,7 +389,10 @@ class PMReporter(object):
             if os.path.exists(optarg + ".index"):
                 sys.stderr.write("Archive %s already exists.\n" % optarg)
                 sys.exit(1)
-            self.archive = optarg
+            if os.path.exists(optarg):
+                sys.stderr.write("File %s already exists.\n" % optarg)
+                sys.exit(1)
+            self.outfile = optarg
         elif opt == 'e':
             self.derived = optarg
         elif opt == 'H':
@@ -573,8 +585,16 @@ class PMReporter(object):
         if self.context.type == PM_CONTEXT_LOCAL:
             self.source = "@" # PCPIntro(1), RHBZ#1272082

-        if self.output == OUTPUT_ARCHIVE and not self.archive:
-            sys.stderr.write("Archive must be defined with archive
output.\n")
+        if self.output == OUTPUT_ARCHIVE and not self.outfile:
+            sys.stderr.write("Outfile must be defined with archive
output.\n")
+            sys.exit(1)
+
+        if self.output == OUTPUT_XLS and "xlsxwriter" not in sys.modules:
+            sys.stderr.write("Failed to load xlsxwriter module.\n")
+            sys.exit(1)
+
+        if self.output == OUTPUT_XLS and not self.outfile:
+            sys.stderr.write("Outfile must be defined with xlsx output.\n")
             sys.exit(1)

         if self.output == OUTPUT_ZABBIX and (not self.zabbix_server or \
@@ -790,6 +810,8 @@ class PMReporter(object):
         if self.timefmt == None:
             if self.output == OUTPUT_CSV:
                 self.timefmt = CSVTIME
+            elif self.output == OUTPUT_XLS:
+                self.timefmt = XLSTIME
             else:
                 self.timefmt = OUTTIME
         if not self.timefmt:
@@ -954,6 +976,7 @@ class PMReporter(object):

     def report(self, tstamp, values):
         """ Report the metric values """
+        dt = None
         if tstamp != None:
             ts = self.context.pmLocaltime(tstamp.tv_sec)
             us = int(tstamp.tv_usec)
@@ -967,6 +990,8 @@ class PMReporter(object):
             self.write_csv(tstamp, values)
         if self.output == OUTPUT_STDOUT:
             self.write_stdout(tstamp, values)
+        if self.output == OUTPUT_XLS:
+            self.write_xlsx(dt, values)
         if self.output == OUTPUT_ZABBIX:
             self.write_zabbix(tstamp, values)

@@ -1063,16 +1088,17 @@ class PMReporter(object):

     def write_header(self):
         """ Write metrics header """
-        if self.output == OUTPUT_ARCHIVE:
-            sys.stdout.write("Recording archive %s" % self.archive)
+        if self.output == OUTPUT_ARCHIVE or self.output == OUTPUT_XLS:
+            sys.stdout.write("Recording %s" % self.outfile)
             if self.runtime != -1:
                 sys.stdout.write(":\n%s samples(s) with %.1f sec
interval ~ %d sec duration.\n" % (self.samples, float(self.interval),
self.runtime))
             elif self.samples:
                 duration = (self.samples - 1) * int(self.interval)
                 sys.stdout.write(":\n%s samples(s) with %.1f sec
interval ~ %d sec duration.\n" % (self.samples, float(self.interval),
duration))
             else:
+                sys.stdout.write("...")
                 if self.context.type != PM_CONTEXT_ARCHIVE:
-                    sys.stdout.write("... (Ctrl-C to stop)")
+                    sys.stdout.write(" (Ctrl-C to stop)")
                 sys.stdout.write("\n")
             return

@@ -1138,7 +1164,7 @@ class PMReporter(object):

         if self.log == None:
             # Create a new archive
-            self.log = pmi.pmiLogImport(self.archive)
+            self.log = pmi.pmiLogImport(self.outfile)
             if self.context.type == PM_CONTEXT_ARCHIVE:

self.log.pmiSetHostname(self.context.pmGetArchiveLabel().hostname)

self.log.pmiSetTimezone(self.context.pmGetArchiveLabel().tz)
@@ -1287,6 +1313,77 @@ class PMReporter(object):
         nfmt = nfmt[:-l]
         print(nfmt.format(*tuple(line)))

+    def write_xlsx(self, timestamp, values):
+        """ Write results in XLSX format """
+        if timestamp == None and values == None:
+            # Complete and close
+            self.wb.close()
+            self.wb = None
+            return
+
+        # Current row
+        try:
+            self.row += 1
+        except:
+            self.row = 0
+
+        # Create the file and write the header
+        if not self.wb:
+            col = 0
+            self.wb = xlsxwriter.Workbook(self.outfile,
{'constant_memory': True, 'in_memory': False, 'default_date_format':
self.timefmt})
+            self.ws = self.wb.add_worksheet("pmrep data")
+            format = self.wb.add_format({'bold': True})
+            format.set_align('right')
+            self.ws.set_column(col, col, 20)
+            self.ws.write_string(self.row, col, "Time", format)
+            # Labels
+            for i, metric in enumerate(self.metrics):
+                ins = 1 if self.insts[i][0][0] == PM_IN_NULL else
len(self.insts[i][0])
+                for j in range(ins):
+                    col += 1
+                    token = self.metrics[metric][0]
+                    if self.insts[i][1][j]:
+                        token += "~" + str(self.insts[i][1][j])
+                    self.ws.write_string(self.row, col, token, format)
+                    l = len(token) if self.metrics[metric][4] <
len(token) else self.metrics[metric][4]
+                    self.ws.set_column(col, col, l + 5)
+            self.row += 1
+            # Units
+            col = 0
+            for i, metric in enumerate(self.metrics):
+                ins = 1 if self.insts[i][0][0] == PM_IN_NULL else
len(self.insts[i][0])
+                for j in range(ins):
+                    col += 1
+                    unit = self.metrics[metric][2][0]
+                    self.ws.write_string(self.row, col, unit, format)
+            self.row += 1
+            # Add an empty line for readability
+            col = 0
+            format = self.wb.add_format()
+            format.set_top(2)
+            self.ws.write_blank(self.row, col, None, format)
+            for i, metric in enumerate(self.metrics):
+                ins = 1 if self.insts[i][0][0] == PM_IN_NULL else
len(self.insts[i][0])
+                for j in range(ins):
+                    col += 1
+                    self.ws.write_blank(self.row, col, None, format)
+            self.row += 1
+
+        # Add current values
+        col = 0
+        self.ws.write_datetime(self.row, col, timestamp)
+        for i, metric in enumerate(self.metrics):
+            ins = 1 if self.insts[i][0][0] == PM_IN_NULL else
len(self.insts[i][0])
+            for j in range(ins):
+                col += 1
+                if str(list(values[i])[j][2]) != NO_VAL:
+                    if self.descs[i].contents.type == PM_TYPE_STRING:
+                        self.ws.write_string(self.row, col,
str(values[i][j][2]))
+                    else:
+                        self.ws.write_number(self.row, col,
list(values[i])[j][2])
+                else:
+                    self.ws.write_blank(self.row, col, None)
+
     def write_zabbix(self, timestamp, values):
         """ Write (send) metrics to a Zabbix server """
         if timestamp == None and values == None:
@@ -1323,6 +1420,12 @@ class PMReporter(object):
         """ Establish a PMAPI context to archive, host or local, via
args """
         self.context = pmapi.pmContext.fromOptions(self.opts, sys.argv)

+    def finalize(self):
+        """ Finalize and clean up """
+        if self.wb:
+            self.wb.close()
+            self.wb = None
+
 if __name__ == '__main__':
     try:
         P = PMReporter()
@@ -1332,6 +1435,7 @@ if __name__ == '__main__':
         P.validate_config()
         P.validate_metrics()
         P.execute()
+        P.finalize()

     except pmapi.pmErr as error:
         sys.stderr.write('%s: %s\n' % (error.progname(), error.message()))
@@ -1341,3 +1445,4 @@ if __name__ == '__main__':
         sys.stderr.write("%s\n" % str(error))
     except KeyboardInterrupt:
         sys.stdout.write("\n")
+        P.finalize()

Thanks,

-- 
Marko Myllynen

<Prev in Thread] Current Thread [Next in Thread>