pcp
[Top] [All Lists]

Re: Review: PCP & pmlogger take too long to start

To: Nathan Scott <nscott@xxxxxxxxxx>
Subject: Re: Review: PCP & pmlogger take too long to start
From: Michael Newton <kimbrr@xxxxxxx>
Date: Fri, 29 Jun 2007 18:11:24 +1000
Cc: pcp@xxxxxxxxxxx
In-reply-to: <1182996127.15488.102.camel@xxxxxxxxxxxxxx>
References: <Pine.SGI.4.58.0706271012280.2186626@xxxxxxxxxxxxxxxxxxxxxxx> <Pine.SGI.4.58.0706271124250.2186626@xxxxxxxxxxxxxxxxxxxxxxx> <Pine.SGI.4.58.0706271715321.2351218@xxxxxxxxxxxxxxxxxxxxxxx> <1182996127.15488.102.camel@xxxxxxxxxxxxxx>
Sender: pcp-bounce@xxxxxxxxxxx
On Thu, 28 Jun 2007, Nathan Scott wrote:
> On Wed, 2007-06-27 at 18:00 +1000, Michael Newton wrote:
> Repeating this in so many scripts is a bit of a shame, and it'd be
> better if they were faster always (not just is usleep found).  We

Russell said something similar..

> should implement a "pmsleep" command (like we did for pmhostname)
> if we want this sub-second sleeper on all platforms (which we do)
> ... its trivial, just use nanosleep(), which is POSIX and is there
> on all supported PCP platforms.

ok here it is:


===========================================================================
mgmt/pcp/man/man1/GNUmakefile
===========================================================================

--- a/mgmt/pcp/man/man1/GNUmakefile     2007-06-29 18:09:45.000000000 +1000
+++ b/mgmt/pcp/man/man1/GNUmakefile     2007-06-29 15:36:13.632867332 +1000
@@ -19,7 +19,7 @@
        pmnsmerge.1 pmpost.1 pmprobe.1 pmsocks.1 pmstat.1 pmstore.1 \
        pmtrace.1 pmval.1 pmdaweblog.1 pmlogsummary.1 pmdashping.1 \
        pmdumptext.1 genpmda.1 pmproxy.1 pmdasummary.1 pmlogreduce.1 \
-       autofsd-probe.1 pmie2col.1 telnet-probe.1
+       autofsd-probe.1 pmie2col.1 telnet-probe.1 pmsleep.1

 MAN_DEST       = $(PCP_MAN_DIR)/man$(MAN_SECTION)
 LSRCFILES      = $(MAN_PAGES)

===========================================================================
mgmt/pcp/man/man1/pmsleep.1
===========================================================================

--- a/mgmt/pcp/man/man1/pmsleep.1       2006-06-17 00:58:24.000000000 +1000
+++ b/mgmt/pcp/man/man1/pmsleep.1       2007-06-29 15:48:28.024750676 +1000
@@ -0,0 +1,41 @@
+'\"macro stdmacro
+.\"
+.\" Copyright (c) 2007 Silicon Graphics, Inc.  All Rights Reserved.
+.\"
+.\" $Id$
+.ie \(.g \{\
+.\" ... groff (hack for khelpcenter, man2html, etc.)
+.TH PMSLEEP 1 "SGI" "Performance Co-Pilot"
+\}
+.el \{\
+.if \nX=0 .ds x} PMSLEEP 1 "SGI" "Performance Co-Pilot"
+.if \nX=1 .ds x} PMSLEEP 1 "Performance Co-Pilot"
+.if \nX=2 .ds x} PMSLEEP 1 "" "\&"
+.if \nX=3 .ds x} PMSLEEP "" "" "\&"
+.TH \*(x}
+.rr X
+\}
+.SH NAME
+\f3pmsleep\f1 \- portable subsecond-capable sleep
+.\" literals use .B or \f3
+.\" arguments use .I or \f2
+.SH SYNOPSIS
+.B $PCP_BINADM_DIR/pmsleep
+.I interval
+.SH DESCRIPTION
+.B pmsleep
+sleeps for
+.I interval.
+The
+.I interval
+argument follows the syntax described in
+.BR PCPIntro (1)
+for
+.B \-t,
+and in the simplest form may be an unsigned integer
+or floating point constant
+(the implied units in this case are seconds).
+
+.PP
+The exit status is 0 for success, or 1 for a malformed command line.
+If the underlying nanosleep fails, an errno is returned.

===========================================================================
mgmt/pcp/src/GNUmakefile
===========================================================================

--- a/mgmt/pcp/src/GNUmakefile  2007-06-29 18:09:45.000000000 +1000
+++ b/mgmt/pcp/src/GNUmakefile  2007-06-29 14:46:06.336727771 +1000
@@ -21,7 +21,7 @@
          pmdumplog pmlogextract pmstore pmhostname pmgenmap pmlogctl \
          pmlogconf pmlogsummary pmclient pmkstat pcp pmlc dbpmda \
          xconfirm pmtrace pmstat pmsocks pmdas pmafm procmemstat \
-         pmlogreduce genpmda pmproxy telnet-probe
+         pmlogreduce genpmda pmproxy telnet-probe pmsleep

 ifneq ($(TARGET_OS), cygwin)
 SUBDIRS += libpcp_pmc pmdumptext autofsd-probe

===========================================================================
mgmt/pcp/src/pmcd/rc_pcp
===========================================================================

--- a/mgmt/pcp/src/pmcd/rc_pcp  2007-06-29 18:09:45.000000000 +1000
+++ b/mgmt/pcp/src/pmcd/rc_pcp  2007-06-29 16:07:49.625951131 +1000
@@ -100,6 +100,8 @@
        ;;
 esac

+SLEEPCMND="$PCP_BINADM_DIR/pmsleep 0.1"
+
 _pmcd_logfile()
 {
 default=$RUNDIR/pmcd.log
@@ -383,16 +385,25 @@
     fi
     $ECHO $PCP_ECHO_N "Waiting for PMCD to terminate ...""$PCP_ECHO_C"
     gone=0
-    for i in 1 2 3 4 5 6
+    i=0
+    j=0
+    while :
     do
-       sleep 3
        _get_pids_by_name pmcd >$tmp.tmp
        if [ ! -s $tmp.tmp ]
        then
            gone=1
            break
        fi
-       $ECHO $PCP_ECHO_N ".""$PCP_ECHO_C"
+       i=`expr $i + 1`
+       if [ $i -ge 10 ]
+       then
+           i=0
+           [ $j -ge $delay ] && break
+           j=`expr $j + 1`
+           $ECHO $PCP_ECHO_N ".""$PCP_ECHO_C"
+       fi
+       $SLEEPCMND
     done
     if [ $gone != 1 ]  # It just WON'T DIE, give up.
     then

===========================================================================
mgmt/pcp/src/pmcd/src/agent.c
===========================================================================

--- a/mgmt/pcp/src/pmcd/src/agent.c     2007-06-29 18:09:45.000000000 +1000
+++ b/mgmt/pcp/src/pmcd/src/agent.c     2007-06-26 14:28:22.912602167 +1000
@@ -166,7 +166,7 @@
        found = 0;
        for ( i = 0; i < nAgents; i++) {
            ap = &agent[i];
-           if (!ap->status.connected)
+           if (!ap->status.connected || ap->ipcType == AGENT_DSO)
                continue;

            found = 1;

===========================================================================
mgmt/pcp/src/pmie/pmie_check.sh
===========================================================================

--- a/mgmt/pcp/src/pmie/pmie_check.sh   2007-06-29 18:09:45.000000000 +1000
+++ b/mgmt/pcp/src/pmie/pmie_check.sh   2007-06-29 16:05:39.146867878 +1000
@@ -14,6 +14,8 @@

 PMIE=pmie

+SLEEPCMND="$PCP_BINADM_DIR/pmsleep 0.1"
+
 # added to handle problem when /var/log/pcp is a symlink, as first
 # reported by Micah_Altman@xxxxxxxxxxx in Nov 2001
 #
@@ -146,7 +148,8 @@
     #
     fail=true
     rm -f $tmp.stamp
-    for try in 1 2 3 4
+    i=0
+    while :
     do
        if pmlock -v $logfile.lock >$tmp.out
        then
@@ -165,7 +168,9 @@
                rm -f $logfile.lock
            fi
        fi
-       sleep 5
+       [ $i -ge 200 ] && break #tenths of a sec
+       $SLEEPCMND
+       i=`expr $i + 1`
     done

     if $fail
@@ -272,9 +277,9 @@
     #
     delay=`expr $delay + 20 \* $x`
     i=0
-    while [ $i -lt $delay ]
+    j=0
+    while :
     do
-       $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
        if [ -f $logfile ]
        then
            # $logfile was previously removed, if it has appeared again then
@@ -286,7 +291,7 @@
                then
                    :
                else
-                   sleep 5
+                   $SLEEPCMND
                    $VERBOSE && echo " done"
                    return 0
                fi
@@ -313,8 +318,15 @@
                return 1
            fi
        fi
-       sleep 5
-       i=`expr $i + 5`
+       i=`expr $i + 1`
+       if [ $i -ge 10 ]
+       then
+           i=0
+           [ $j -ge $delay ] && break
+           j=`expr $j + 1`
+           $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
+       fi
+       $SLEEPCMND
     done
     $VERBOSE || _message restart
     echo " timed out waiting!"
@@ -630,13 +642,20 @@
     then
        $VERY_VERBOSE && ( echo; $PCP_ECHO_PROG $PCP_ECHO_N "+ $KILL -KILL `cat 
$tmp.pmies` ...""$PCP_ECHO_C" )
        eval $KILL -KILL $pmielist >/dev/null 2>&1
-       sleep 3         # give them a chance to go
-       if ps -f -p "$pmielist" >$tmp.alive 2>&1
-       then
+       i=0
+       while ps -f -p "$pmielist" >$tmp.alive 2>&1
+       do
+           if [ $i -lt 30 ]
+           then
+               $SLEEPCMND
+               i=`expr $i + 1`
+               continue;
+           fi
            echo "$prog: Error: pmie process(es) will not die"
            cat $tmp.alive
            status=1
-       fi
+           break
+       done
     fi
 fi


===========================================================================
mgmt/pcp/src/pmlogctl/pmlogger_check.sh
===========================================================================

--- a/mgmt/pcp/src/pmlogctl/pmlogger_check.sh   2007-06-29 18:09:45.000000000 
+1000
+++ b/mgmt/pcp/src/pmlogctl/pmlogger_check.sh   2007-06-29 16:03:21.068767724 
+1000
@@ -51,6 +51,8 @@
     PWDCMND=/bin/pwd
 fi

+SLEEPCMND="$PCP_BINADM_DIR/pmsleep 0.1"
+
 # default location
 #
 logfile=pmlogger.log
@@ -194,9 +196,9 @@
     #
     delay=`expr $delay + 20 \* $x`
     i=0
-    while [ $i -lt $delay ]
+    j=0
+    while :
     do
-       $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
        if [ -f $logfile ]
        then
            # $logfile was previously removed, if it has appeared again
@@ -207,7 +209,7 @@
            then
                :
            else
-               sleep 5
+               $SLEEPCMND
                $VERBOSE && echo " done"
                return 0
            fi
@@ -244,8 +246,15 @@
                return 1
            fi
        fi
-       sleep 5
-       i=`expr $i + 5`
+       i=`expr $i + 1`
+       if [ $i -ge 10 ]
+       then
+           i=0
+           [ $j -ge $delay ] && break
+           j=`expr $j + 1`
+           $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
+       fi
+       $SLEEPCMND
     done
     $VERBOSE || _message restart
     echo " timed out waiting!"
@@ -379,7 +388,8 @@
        #
        fail=true
        rm -f $tmp.stamp
-       for try in 1 2 3 4
+       i=0
+       while :
        do
            if pmlock -v lock >$tmp.out
            then
@@ -407,7 +417,9 @@
                    rm -f lock
                fi
            fi
-           sleep 5
+           [ $i -ge 200 ] && break #tenths of a sec
+           $SLEEPCMND
+           i=`expr $i + 1`
        done

        if $fail

===========================================================================
mgmt/pcp/src/pmsleep/GNUmakefile
===========================================================================

--- a/mgmt/pcp/src/pmsleep/GNUmakefile  2006-06-17 00:58:24.000000000 +1000
+++ b/mgmt/pcp/src/pmsleep/GNUmakefile  2007-06-29 14:33:28.335332331 +1000
@@ -0,0 +1,25 @@
+#!gmake
+#
+# Copyright (c) 2007 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# $Id$
+#
+
+TOPDIR = ../..
+include $(TOPDIR)/src/include/builddefs
+
+LLDLIBS = -lpcp
+CFILES = pmsleep.c
+CMDTARGET = pmsleep$(EXECSUFFIX)
+LDIRT = $(TARGET)
+
+default:       $(CMDTARGET)
+
+include $(BUILDRULES)
+
+install:       $(CMDTARGET)
+       $(INSTALL) -m 755 $(CMDTARGET) $(PCP_BINADM_DIR)/$(CMDTARGET)
+
+default_pcp:   default
+
+install_pcp:   install

===========================================================================
mgmt/pcp/src/pmsleep/pmsleep.c
===========================================================================

--- a/mgmt/pcp/src/pmsleep/pmsleep.c    2006-06-17 00:58:24.000000000 +1000
+++ b/mgmt/pcp/src/pmsleep/pmsleep.c    2007-06-29 14:59:57.087491258 +1000
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2007 Silicon Graphics, Inc.  All Rights Reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <errno.h>
+#include "pmapi.h"
+
+int
+main(int argc, char **argv)
+{
+    struct timespec rqt;
+    struct timeval delta;
+    int r = 0;
+    char *msg;
+
+    if (argc == 2) {
+       if (pmParseInterval(argv[1], &delta, &msg) < 0) {
+           fputs(msg, stderr);
+           free(msg);
+       } else {
+           rqt.tv_sec  = delta.tv_sec;
+           rqt.tv_nsec = delta.tv_usec * 1000;
+           if (0 != nanosleep(&rqt, NULL))
+               r = errno;
+
+           exit(r);
+       }
+    }
+    fprintf(stderr, "Usage: pmsleep [-v] interval\n");
+    exit(1);
+    /*NOTREACHED*/
+}

Dr.Michael("Kimba")Newton  kimbrr@xxxxxxx

<Prev in Thread] Current Thread [Next in Thread>