pcp
[Top] [All Lists]

python api unicode exception

To: pcp@xxxxxxxxxxx
Subject: python api unicode exception
From: Martins Innus <minnus@xxxxxxxxxxx>
Date: Tue, 22 Sep 2015 14:42:50 -0400
Delivered-to: pcp@xxxxxxxxxxx
User-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:38.0) Gecko/20100101 Thunderbird/38.2.0
Hi,
 We are running into a problem where the python api throws an exception when it sees non ascii characters in an instance name. The condition we saw it was calling pmGetIndom on proc metrics when the "makewhatis" process is running on the system.


Pasted at the end of this email is the instance snippet that causes the error. Not sure if it will come through properly. Process 8087 has non ascii characters in the instance name in the "if" block following the "while (!done && readline() > 0) {" code section.

When trying to run pmGetIndom on this set of metrics the error returned is:

  File "/usr/lib64/python2.7/site-packages/pcp/pmapi.py", line 1393, in
pmGetInDom
    nameL = list(map(lambda x: str(nameA_p[x].decode()), range(status)))
  File "/usr/lib64/python2.7/site-packages/pcp/pmapi.py", line 1393, in
<lambda>
    nameL = list(map(lambda x: str(nameA_p[x].decode()), range(status)))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc9 in position
2247: ordinal not in range(128)



Our simple local fix to get things working is:


https://github.com/ubccr/pcp/tree/pyapi_unicode_exception

or

diff --git a/src/python/pcp/pmapi.py b/src/python/pcp/pmapi.py
index 91fff1c..9b8917c 100644
--- a/src/python/pcp/pmapi.py
+++ b/src/python/pcp/pmapi.py
@@ -1401,7 +1401,7 @@ def pmGetInDom(self, pmdescp):
         if status < 0:
             raise pmErr(status)
         if status > 0:
-            nameL = list(map(lambda x: str(nameA_p[x].decode()), range(status)))
+            nameL = list(map(lambda x: str(nameA_p[x].decode('ascii', 'ignore')), range(status)))
             instL = list(map(lambda x: int(instA_p[x]), range(status)))
             LIBC.free(instA_p)
             LIBC.free(nameA_p)


Maybe someone who understands passing unicode between C and python has a better understanding of the correct/different fix?

Thanks

Martins



ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ 8086 or "008086 find /usr/share/man/man3 -name * -xtype f -size +0 -cnewer /var/cache/man/whatis"
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ 8087 or "008087 /bin/awk

ÂÂÂÂÂÂÂÂÂÂÂ function readline() {
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat || use_bzcat || use_lzcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ result = (pipe_cmd | getline);
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (result < 0) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ print "Pipe error: " pipe_cmd " " ERRNO > "/dev/stderr";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ result = (getline < filename);
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (result < 0) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ print "Read file error: " filename " " ERRNO > "/dev/stderr";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ return result;
ÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂ
ÂÂÂÂÂÂÂÂÂÂÂ function closeline() {
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat || use_bzcat || use_lzcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return close(pipe_cmd);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return close(filename);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂ
ÂÂÂÂÂÂÂÂÂÂÂ function do_one() {
ÂÂÂÂÂÂÂÂÂÂÂÂÂ insh = 0; thisjoin = 1; done = 0;
ÂÂÂÂÂÂÂÂÂÂÂÂÂ entire_line = "";

ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (verbose) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ print "adding " filename > "/dev/stderr"
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ
ÂÂÂÂÂÂÂÂÂÂÂÂÂ use_zcat = match(filename,"\\.Z$") ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ match(filename,"\\.z$") || match(filename,"\\.gz$");
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (!use_zcat)
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ use_bzcat = match(filename,"\\.bz2");
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if(!use_bzcat && !use_zcat)
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ use_lzcat = match(filename,"\\.lzma");
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat || use_bzcat || use_lzcat ) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ filename_no_gz = substr(filename, 0, RSTART - 1);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ filename_no_gz = filename;
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ match(filename_no_gz, "/[^/]+$");
ÂÂÂÂÂÂÂÂÂÂÂÂÂ progname = substr(filename, RSTART + 1, RLENGTH - 1);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (match(progname, "\\." section "[A-Za-z]+")) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ actual_section = substr(progname, RSTART + 1, RLENGTH - 1);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ actual_section = section;
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ sub(/\..*/, "", progname);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat || use_bzcat || use_lzcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pipe_cmd = "zcat \"" filename "\" 2>/dev/null";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ } else if (use_bzcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pipe_cmd = "bzcat \"" filename "\" 2>/dev/null";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pipe_cmd = "lzcat \"" filename "\" 2>/dev/null";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ # Chuck output unless it is utf-8
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pipe_cmd = pipe_cmd " |iconv -f utf-8 -t utf-8 2>/dev/null"
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ # try to avoid suspicious stuff
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (filename ~ /[;&|`$(]/) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ print "ignored strange file name " filename " in " curdir > "/dev/stderr";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return;
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂ
ÂÂÂÂÂÂÂÂÂÂÂÂÂ while (!done && readline() > 0) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ gsub(/.\b/, "");
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (($1 ~ /^\.[Ss][Hh]/ &&
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ ($2 ~ /[Nn][Aa][Mm][Ee]/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^JM<C9>NO/ || $2 ~ /^NAVN/ || $2 ~ /^NUME/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^BEZEICHNUNG/ || $2 ~ /^NOMBRE/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^NIMI/ || $2 ~ /^NOM/ || $2 ~ /^IME/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^N[<C9>E]V/ || $2 ~ /^NAMA/ || $2 ~ /^Ì<C1><B0>/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^Ì<BE><CE>/ || $2 ~ /^<C0>Ì<A7>/ || $2 ~ /^NAZWA/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^<EE><E1><FA><F7><E1><EE><E9><E5>/ || $2 ~ /^<C3><FB><B3><C6>/ || $2 ~ /^<A6>W<BA><D9>/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^NOME/ || $2 ~ /^NAAM/ || $2 ~ /^<C8><CC><C5>/)) ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ (pages == "cat" && $1 ~ /^NAME/)) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (!insh) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ insh = 1;

<Prev in Thread] Current Thread [Next in Thread>