Hi,
ÂÂÂ We are running into a problem where the python api throws an
exception when it sees non ascii characters in an instance name.Â
The condition we saw it was calling pmGetIndom on proc metrics when
the "makewhatis" process is running on the system.
Pasted at the end of this email is the instance snippet that causes
the error. Not sure if it will come through properly. Process 8087
has non ascii characters in the instance name in the "if" block
following the "while (!done && readline() > 0) {" code
section.
When trying to run pmGetIndom on this set of metrics the error
returned is:
File "/usr/lib64/python2.7/site-packages/pcp/pmapi.py", line 1393, in
pmGetInDom
nameL = list(map(lambda x: str(nameA_p[x].decode()), range(status)))
File "/usr/lib64/python2.7/site-packages/pcp/pmapi.py", line 1393, in
<lambda>
nameL = list(map(lambda x: str(nameA_p[x].decode()), range(status)))
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc9 in position
2247: ordinal not in range(128)
Our simple local fix to get things working is:
https://github.com/ubccr/pcp/tree/pyapi_unicode_exception
or
diff --git a/src/python/pcp/pmapi.py b/src/python/pcp/pmapi.py
index 91fff1c..9b8917c 100644
--- a/src/python/pcp/pmapi.py
+++ b/src/python/pcp/pmapi.py
@@ -1401,7 +1401,7 @@ def pmGetInDom(self, pmdescp):
if status < 0:
raise pmErr(status)
if status > 0:
- nameL = list(map(lambda x: str(nameA_p[x].decode()), range(status)))
+ nameL = list(map(lambda x: str(nameA_p[x].decode('ascii', 'ignore')), range(status)))
instL = list(map(lambda x: int(instA_p[x]), range(status)))
LIBC.free(instA_p)
LIBC.free(nameA_p)
Maybe someone who understands passing unicode between C and python
has a better understanding of the correct/different fix?
Thanks
Martins
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ 8086 or "008086 find /usr/share/man/man3 -name *
-xtype f -size +0 -cnewer /var/cache/man/whatis"
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ 8087 or "008087 /bin/awk
ÂÂÂÂÂÂÂÂÂÂÂ function readline() {
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat || use_bzcat || use_lzcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ result = (pipe_cmd | getline);
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (result < 0) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ print "Pipe error: " pipe_cmd " " ERRNO >
"/dev/stderr";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ result = (getline < filename);
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (result < 0) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ print "Read file error: " filename " " ERRNO >
"/dev/stderr";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ return result;
ÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂ
ÂÂÂÂÂÂÂÂÂÂÂ function closeline() {
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat || use_bzcat || use_lzcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return close(pipe_cmd);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return close(filename);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂ
ÂÂÂÂÂÂÂÂÂÂÂ function do_one() {
ÂÂÂÂÂÂÂÂÂÂÂÂÂ insh = 0; thisjoin = 1; done = 0;
ÂÂÂÂÂÂÂÂÂÂÂÂÂ entire_line = "";
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (verbose) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ print "adding " filename > "/dev/stderr"
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ
ÂÂÂÂÂÂÂÂÂÂÂÂÂ use_zcat = match(filename,"\\.Z$") ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ match(filename,"\\.z$") ||
match(filename,"\\.gz$");
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (!use_zcat)
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ use_bzcat = match(filename,"\\.bz2");
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if(!use_bzcat && !use_zcat)
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ use_lzcat = match(filename,"\\.lzma");
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat || use_bzcat || use_lzcat ) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ filename_no_gz = substr(filename, 0, RSTART - 1);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ filename_no_gz = filename;
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ match(filename_no_gz, "/[^/]+$");
ÂÂÂÂÂÂÂÂÂÂÂÂÂ progname = substr(filename, RSTART + 1, RLENGTH - 1);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (match(progname, "\\." section "[A-Za-z]+")) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ actual_section = substr(progname, RSTART + 1,
RLENGTH - 1);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ actual_section = section;
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ sub(/\..*/, "", progname);
ÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat || use_bzcat || use_lzcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (use_zcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pipe_cmd = "zcat \"" filename "\" 2>/dev/null";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ } else if (use_bzcat) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pipe_cmd = "bzcat \"" filename "\"
2>/dev/null";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ } else {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pipe_cmd = "lzcat \"" filename "\"
2>/dev/null";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ # Chuck output unless it is utf-8
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ pipe_cmd = pipe_cmd " |iconv -f utf-8 -t utf-8
2>/dev/null"
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ # try to avoid suspicious stuff
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (filename ~ /[;&|`$(]/) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ print "ignored strange file name " filename " in "
curdir > "/dev/stderr";
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ return;
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
ÂÂÂÂÂÂÂÂÂÂÂ
ÂÂÂÂÂÂÂÂÂÂÂÂÂ while (!done && readline() > 0) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ gsub(/.\b/, "");
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (($1 ~ /^\.[Ss][Hh]/ &&
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ ($2 ~ /[Nn][Aa][Mm][Ee]/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^JM<C9>NO/ || $2 ~ /^NAVN/ || $2 ~
/^NUME/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^BEZEICHNUNG/ || $2 ~ /^NOMBRE/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^NIMI/ || $2 ~ /^NOM/ || $2 ~ /^IME/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^N[<C9>E]V/ || $2 ~ /^NAMA/ || $2 ~
/^Ì<C1><B0>/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^Ì<BE><CE>/ || $2 ~
/^<C0>Ì<A7>/ || $2 ~ /^NAZWA/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~
/^<EE><E1><FA><F7><E1><EE><E9><E5>/
|| $2 ~ /^<C3><FB><B3><C6>/ || $2 ~
/^<A6>W<BA><D9>/ ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ $2 ~ /^NOME/ || $2 ~ /^NAAM/ || $2 ~
/^<C8><CC><C5>/)) ||
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ (pages == "cat" && $1 ~ /^NAME/)) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ if (!insh) {
ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ insh = 1;
|
|