pcp
[Top] [All Lists]

[Bug 1106] New: pmdalinux / pmdaroot container problems

To: pcp@xxxxxxxxxxx
Subject: [Bug 1106] New: pmdalinux / pmdaroot container problems
From: bugzilla-daemon@xxxxxxxxxxx
Date: Sun, 05 Apr 2015 21:11:42 +0000
Auto-submitted: auto-generated
Delivered-to: pcp@xxxxxxxxxxx
Bug ID 1106
Summary pmdalinux / pmdaroot container problems
Product pcp
Version unspecified
Hardware All
OS Linux
Status NEW
Severity major
Priority P5
Component pcp
Assignee pcp@oss.sgi.com
Reporter fche@redhat.com
CC pcp@oss.sgi.com
Classification Unclassified

With a realistic test scenario with pcpfans.git fche/pmmgr, the pcp --container
mode support shows problems at least with pmdalinux and pmdaroot.  The gist of
it is that it is not possible to have two concurrent pcp clients like pmlogger
against the same pmce, one with and one without --container (whether specified
as a separate option or as a part of a pcp:// url).  The symptoms vary:
corrupted data (host data showing up in container, or vice versa), or missing
data (container side accesses getting "Error: Operation not permitted").

The easiest reproduction would be to fire up the aforementioned branch, set up
a copy of container-logging pmmgr (touch /etc/pcp/pmmgr/subtarget-containers),
and fire up a few docker containers.  The resulting /var/log/pcp/pmmgr/$HOST
and $HOST--$CONTAINER log files will not be right.

A more manual example:

(in another terminal) # docker run -i busybox sh   (and just leave it alone)
# docker ps   # to fetch running container id
# service pmcd restart
% pminfo -f --container=SUBSTRING network.interface.inet_addr
... probably will show a reasonable "172.17.0.*" IP address for the container
... now to mess things up ... generate actual pcp traffic:
% cd /tmp
% pmlogconf -c -r -h 'local:' FOO.conf &
% pmlogconf -c -r -h 'local:?container=SUBSTRING' FOO2.conf &
% wait
... examine the two different FOO*.conf files, as one might expect
% pminfo -f --container=SUBSTRING network.interface.inet_addr
% pminfo -f                       network.interface.inet_addr
... these generally do not show correct results already (both the same, or
EPERM)
% pmlogger -h 'local:?container=SUBSTRING' -c FOO2.conf FOO2 &
% pmlogger -h 'local:'                     -c FOO.conf FOO &
% pminfo -f --container=SUBSTRING network.interface.inet_addr
% pminfo -f                       network.interface.inet_addr
... no correct results before long


In one incantation of the problem (the EPERM variant), pmdaroot appears to go
dumb: Over three separate pminfo queries, it receives messages but sends
nothing.

# strace -f -p `pgrep pmdaroot`
select(8, [0 3 6 7], NULL, NULL, NULL)  = 1 (in [0])
read(0, "\0\0\0\20\0\0p\0\0\0\0\1", 12) = 12
read(0, "\377\377\317\231", 4)          = 4
select(8, [0 3 6 7], NULL, NULL, NULL)  = 1 (in [0])
read(0, "\0\0\0\20\0\0p\0\0\0\0\1", 12) = 12
read(0, "\377\377\317\231", 4)          = 4
select(8, [0 3 6 7], NULL, NULL, NULL)  = 1 (in [0])
read(0, "\0\0\0\20\0\0p\0\0\0\0\1", 12) = 12
read(0, "\377\377\317\231", 4)          = 4

# cat /var/log/pcp/pmcd/root.log
Log for pmdaroot on vm-rawhide-64 started Sun Apr  5 15:40:26 2015

[Sun Apr  5 15:40:27] pmdaroot(22640) Error: bad protocol exchange (fd=8)


In another incantation (host & container side data coming back perpetually
identical, reflecting container side ... even if later more containers are
started and their --container=XXXX is passed):

# strace -f -p `pgrep pmdalinux`
Process 11264 attached
read(0, 

"\0\0\0\26\0\0p\21\0\0\0\4", 12) = 12
read(0, "\0\0\0\01629921\0", 10)        = 10
read(0, "\0\0\0\24\0\0p\21\0\0\0\4", 12) = 12
read(0, "\0\0\0\f100\0", 8)             = 8
read(0, "\0\0\0\24\0\0p\21\0\0\0\4", 12) = 12
read(0, "\0\0\0\v500\0", 8)             = 8
read(0, "\0\0\0\34\0\0p\2\0\0\0\4", 12) = 12
read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16) = 16
read(0, "\0\0\0 \0\0p\3\0\0\0\4", 12)   = 12
read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\17\0\204\0", 20) = 20
ioctl(8, SIOCGIFCONF, {80, {{"lo", {AF_INET, inet_addr("127.0.0.1")}}, {"eth0",
{AF_INET, inet_addr("172.17.0.1")}}}}) = 0
ioctl(8, SIOCGIFADDR, {ifr_name="lo", ifr_addr={AF_INET,
inet_addr("127.0.0.1")}}) = 0
open("/sys/class/net/lo/address", O_RDONLY) = 681
fstat(681, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x7f4b90d08000
read(681, "00:00:00:00:00:00\n", 4096)  = 18
close(681)                              = 0
munmap(0x7f4b90d08000, 4096)            = 0
ioctl(8, SIOCGIFADDR, {ifr_name="eth0", ifr_addr={AF_INET,
inet_addr("172.17.0.1")}}) = 0
open("/sys/class/net/eth0/address", O_RDONLY) = 681
fstat(681, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x7f4b90d08000
read(681, "52:54:00:47:b3:cc\n", 4096)  = 18
close(681)                              = 0
munmap(0x7f4b90d08000, 4096)            = 0
open("/proc/net/if_inet6", O_RDONLY)    = -1 ENOENT (No such file or directory)
write(1,
"\0\0\0T\0\0p\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\17\0\204\0\0\0\0\2"..., 84) =
84
read(0, "\0\0\0\20\0\0p\4\0\0\0\4", 12) = 12
read(0, "\17\0\204\0", 4)               = 4
write(1, "\0\0\0 \0\0p\5\0\0\0\0\17\0\204\0\0\0\0\6\17\0\0\21\0\0\0\3\0\0\0\0",
32) = 32
read(0, "\0\0\0 \0\0p\6\0\0\0\4", 12)   = 12
read(0, "\17\0\0\21\0\0\0\0\0\0\0\0\377\377\377\377\0\0\0\0", 20) = 20
ioctl(8, SIOCGIFCONF, {80, {{"lo", {AF_INET, inet_addr("127.0.0.1")}}, {"eth0",
{AF_INET, inet_addr("172.17.0.1")}}}}) = 0
ioctl(8, SIOCGIFADDR, {ifr_name="lo", ifr_addr={AF_INET,
inet_addr("127.0.0.1")}}) = 0
open("/sys/class/net/lo/address", O_RDONLY) = 681
fstat(681, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x7f4b90d08000
read(681, "00:00:00:00:00:00\n", 4096)  = 18
close(681)                              = 0
munmap(0x7f4b90d08000, 4096)            = 0
ioctl(8, SIOCGIFADDR, {ifr_name="eth0", ifr_addr={AF_INET,
inet_addr("172.17.0.1")}}) = 0
open("/sys/class/net/eth0/address", O_RDONLY) = 681
fstat(681, {st_mode=S_IFREG|0444, st_size=4096, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x7f4b90d08000
read(681, "52:54:00:47:b3:cc\n", 4096)  = 18
close(681)                              = 0
munmap(0x7f4b90d08000, 4096)            = 0
open("/proc/net/if_inet6", O_RDONLY)    = -1 ENOENT (No such file or directory)
write(1, "\0\0\0,\0\0p\7\0\0\0\0\17\0\0\21\0\0\0\2\0\0\0\0\0\0\0\2lo~~"..., 44)
= 44
read(0, "\0\0\0\20\0\0p\0\0\0\0\4", 12) = 12
read(0, "\377\377\317\231", 4)          = 4

It's as though the pmdalinux process has in the past entered the container
namespace, but never left it.  Note also the high file descriptor number (681
here), suggesting another file descriptor leak:

# lsof -p `pgrep pmdalinux`
pmdalinux 11264 root    0r  FIFO                0,8      0t0     414788 pipe
pmdalinux 11264 root    1w  FIFO                0,8      0t0     414789 pipe
pmdalinux 11264 root    2w   REG              253,1       64     552572
/var/log/pcp/pmcd/linux.log
pmdalinux 11264 root    3u  unix 0xffff880003c39e00      0t0     414790 socket
pmdalinux 11264 root    4r   REG              253,1     8268   69116911
/var/lib/pcp/pmdas/linux/help.dir
pmdalinux 11264 root    5r   REG              253,1    70277   69159003
/var/lib/pcp/pmdas/linux/help.pag
pmdalinux 11264 root    6r   REG                0,3        0 4026531956 net
pmdalinux 11264 root    7r   REG                0,3        0 4026531956 net
pmdalinux 11264 root    8u  sock                0,6      0t0     414919
protocol: UDP
pmdalinux 11264 root    9r   REG                0,3        0 4026532028
/proc/stat
pmdalinux 11264 root   10r   REG                0,3        0 4026531956 net
pmdalinux 11264 root   11r   REG                0,3        0 4026531956 net
pmdalinux 11264 root   12r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   13r   REG                0,3        0 4026531956 net
pmdalinux 11264 root   14r   REG                0,3        0 4026531956 net
pmdalinux 11264 root   15r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   16r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   17r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   18r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   19r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   20r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   21r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   22r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   23r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   24r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   25r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   26r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   27r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   28r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   29r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   30r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   31r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   32r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   33r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   34r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   35r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   36r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   37r   REG                0,3        0 4026531956 net
pmdalinux 11264 root   38r   REG                0,3        0 4026531956 net
pmdalinux 11264 root   39r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   40r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   41r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   42r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   43r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   44r   REG                0,3        0 4026531840 mnt
pmdalinux 11264 root   45r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   46r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   47r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   48r   REG                0,3        0 4026531838 uts
pmdalinux 11264 root   49r   REG                0,3        0 4026531838 uts
[...etc...]



This is reproducible with git master or fche/pmmgr pcp on rawhide, rhel7.1,
fedora21 docker versions.  It is not limited to the network.interface.inet_addr
metric; most others are affected.


You are receiving this mail because:
  • You are on the CC list for the bug.
  • You are the assignee for the bug.
<Prev in Thread] Current Thread [Next in Thread>