pcp
[Top] [All Lists]

Re: Review: PCP & pmlogger take too long to start

To: pcp@xxxxxxxxxxx
Subject: Re: Review: PCP & pmlogger take too long to start
From: Michael Newton <kimbrr@xxxxxxx>
Date: Wed, 27 Jun 2007 18:00:37 +1000
In-reply-to: <Pine.SGI.4.58.0706271124250.2186626@xxxxxxxxxxxxxxxxxxxxxxx>
References: <Pine.SGI.4.58.0706271012280.2186626@xxxxxxxxxxxxxxxxxxxxxxx> <Pine.SGI.4.58.0706271124250.2186626@xxxxxxxxxxxxxxxxxxxxxxx>
Sender: pcp-bounce@xxxxxxxxxxx
Ready! Review please..

On Wed, 27 Jun 2007, Michael Newton wrote:
> i wrote:
> > This is a review request. PCP takes >12s to start, and pmlogger_check
> > >10s (in the cases where its actually trying to start a pmlogger).
>
> hold up...
>
> its still taking 3s each.. im going to try to do better

In a number of cases ive moved loop iteration tests into the body.. this
is because
* its a good idea to try the target condition before the first sleep
* its a good idea to try the target condition after the last sleep
This is further complicated by not wanting to print a progress meter
every tenth of a second

On my test box pcp stop is now about 0.3s. By itself, a pmlogger_check
doing an actual launch takes about 1.5s, but immediately following pcp
restart its >3s.


===========================================================================
mgmt/pcp/src/pmcd/rc_pcp
===========================================================================

--- a/mgmt/pcp/src/pmcd/rc_pcp  2007-06-27 17:49:39.000000000 +1000
+++ b/mgmt/pcp/src/pmcd/rc_pcp  2007-06-27 17:49:35.029718805 +1000
@@ -100,6 +100,22 @@
        ;;
 esac

+# got usleep ?
+SLEEPCMND=`which usleep 2>/dev/null | $PCP_AWK_PROG '
+BEGIN          { i = 0 }
+/ not in /     { i = 1 }
+/ aliased to /  { i = 1 }
+               { if ( i == 0 ) print }
+'`
+if [ -z "$SLEEPCMND" ]
+then
+    SLEEPCMND="sleep 1"
+    SLEEPINTVL=10      #tenths of a sec
+else
+    SLEEPINTVL=1       #tenths of a sec
+    SLEEPCMND="$SLEEPCMND 100000"
+fi
+
 _pmcd_logfile()
 {
 default=$RUNDIR/pmcd.log
@@ -383,16 +399,25 @@
     fi
     $ECHO $PCP_ECHO_N "Waiting for PMCD to terminate ...""$PCP_ECHO_C"
     gone=0
-    for i in 1 2 3 4 5 6
+    i=0
+    j=0
+    while :
     do
-       sleep 3
        _get_pids_by_name pmcd >$tmp.tmp
        if [ ! -s $tmp.tmp ]
        then
            gone=1
            break
        fi
-       $ECHO $PCP_ECHO_N ".""$PCP_ECHO_C"
+       i=`expr $i + $SLEEPINTVL`
+       if [ $i -ge 10 ]
+       then
+           i=0
+           [ $j -ge $delay ] && break
+           j=`expr $j + 1`
+           $ECHO $PCP_ECHO_N ".""$PCP_ECHO_C"
+       fi
+       $SLEEPCMND
     done
     if [ $gone != 1 ]  # It just WON'T DIE, give up.
     then

===========================================================================
mgmt/pcp/src/pmcd/src/agent.c
===========================================================================

--- a/mgmt/pcp/src/pmcd/src/agent.c     2007-06-27 17:49:39.000000000 +1000
+++ b/mgmt/pcp/src/pmcd/src/agent.c     2007-06-26 14:28:22.912602167 +1000
@@ -166,7 +166,7 @@
        found = 0;
        for ( i = 0; i < nAgents; i++) {
            ap = &agent[i];
-           if (!ap->status.connected)
+           if (!ap->status.connected || ap->ipcType == AGENT_DSO)
                continue;

            found = 1;

===========================================================================
mgmt/pcp/src/pmie/pmie_check.sh
===========================================================================

--- a/mgmt/pcp/src/pmie/pmie_check.sh   2007-06-27 17:49:39.000000000 +1000
+++ b/mgmt/pcp/src/pmie/pmie_check.sh   2007-06-27 17:37:52.712287915 +1000
@@ -14,6 +14,22 @@

 PMIE=pmie

+# got usleep ?
+SLEEPCMND=`which usleep 2>/dev/null | $PCP_AWK_PROG '
+BEGIN          { i = 0 }
+/ not in /     { i = 1 }
+/ aliased to /  { i = 1 }
+               { if ( i == 0 ) print }
+'`
+if [ -z "$SLEEPCMND" ]
+then
+    SLEEPCMND="sleep 1"
+    SLEEPINTVL=10      #tenths of a sec
+else
+    SLEEPINTVL=1       #tenths of a sec
+    SLEEPCMND="$SLEEPCMND 100000"
+fi
+
 # added to handle problem when /var/log/pcp is a symlink, as first
 # reported by Micah_Altman@xxxxxxxxxxx in Nov 2001
 #
@@ -146,7 +162,8 @@
     #
     fail=true
     rm -f $tmp.stamp
-    for try in 1 2 3 4
+    i=0
+    while :
     do
        if pmlock -v $logfile.lock >$tmp.out
        then
@@ -165,7 +182,9 @@
                rm -f $logfile.lock
            fi
        fi
-       sleep 5
+       [ $i -ge 200 ] && break #tenths of a sec
+       $SLEEPCMND
+       i=`expr $i + $SLEEPINTVL`
     done

     if $fail
@@ -272,9 +291,9 @@
     #
     delay=`expr $delay + 20 \* $x`
     i=0
-    while [ $i -lt $delay ]
+    j=0
+    while :
     do
-       $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
        if [ -f $logfile ]
        then
            # $logfile was previously removed, if it has appeared again then
@@ -286,7 +305,7 @@
                then
                    :
                else
-                   sleep 5
+                   $SLEEPCMND
                    $VERBOSE && echo " done"
                    return 0
                fi
@@ -313,8 +332,15 @@
                return 1
            fi
        fi
-       sleep 5
-       i=`expr $i + 5`
+       i=`expr $i + $SLEEPINTVL`
+       if [ $i -ge 10 ]
+       then
+           i=0
+           [ $j -ge $delay ] && break
+           j=`expr $j + 1`
+           $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
+       fi
+       $SLEEPCMND
     done
     $VERBOSE || _message restart
     echo " timed out waiting!"
@@ -630,13 +656,20 @@
     then
        $VERY_VERBOSE && ( echo; $PCP_ECHO_PROG $PCP_ECHO_N "+ $KILL -KILL `cat 
$tmp.pmies` ...""$PCP_ECHO_C" )
        eval $KILL -KILL $pmielist >/dev/null 2>&1
-       sleep 3         # give them a chance to go
-       if ps -f -p "$pmielist" >$tmp.alive 2>&1
-       then
+       i=0
+       while ps -f -p "$pmielist" >$tmp.alive 2>&1
+       do
+           if [ $i -lt 30 ]
+           then
+               $SLEEPCMND
+               i=`expr $i + $SLEEPINTVL`
+               continue;
+           fi
            echo "$prog: Error: pmie process(es) will not die"
            cat $tmp.alive
            status=1
-       fi
+           break
+       done
     fi
 fi


===========================================================================
mgmt/pcp/src/pmlogctl/pmlogger_check.sh
===========================================================================

--- a/mgmt/pcp/src/pmlogctl/pmlogger_check.sh   2007-06-27 17:49:39.000000000 
+1000
+++ b/mgmt/pcp/src/pmlogctl/pmlogger_check.sh   2007-06-27 17:37:24.843964362 
+1000
@@ -51,6 +51,22 @@
     PWDCMND=/bin/pwd
 fi

+# got usleep ?
+SLEEPCMND=`which usleep 2>/dev/null | $PCP_AWK_PROG '
+BEGIN          { i = 0 }
+/ not in /     { i = 1 }
+/ aliased to /  { i = 1 }
+               { if ( i == 0 ) print }
+'`
+if [ -z "$SLEEPCMND" ]
+then
+    SLEEPCMND="sleep 1"
+    SLEEPINTVL=10      #tenths of a sec
+else
+    SLEEPINTVL=1       #tenths of a sec
+    SLEEPCMND="$SLEEPCMND 100000"
+fi
+
 # default location
 #
 logfile=pmlogger.log
@@ -194,9 +210,9 @@
     #
     delay=`expr $delay + 20 \* $x`
     i=0
-    while [ $i -lt $delay ]
+    j=0
+    while :
     do
-       $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
        if [ -f $logfile ]
        then
            # $logfile was previously removed, if it has appeared again
@@ -207,7 +223,7 @@
            then
                :
            else
-               sleep 5
+               $SLEEPCMND
                $VERBOSE && echo " done"
                return 0
            fi
@@ -244,8 +260,15 @@
                return 1
            fi
        fi
-       sleep 5
-       i=`expr $i + 5`
+       i=`expr $i + $SLEEPINTVL`
+       if [ $i -ge 10 ]
+       then
+           i=0
+           [ $j -ge $delay ] && break
+           j=`expr $j + 1`
+           $VERBOSE && $PCP_ECHO_PROG $PCP_ECHO_N ".""$PCP_ECHO_C"
+       fi
+       $SLEEPCMND
     done
     $VERBOSE || _message restart
     echo " timed out waiting!"
@@ -379,7 +402,8 @@
        #
        fail=true
        rm -f $tmp.stamp
-       for try in 1 2 3 4
+       i=0
+       while :
        do
            if pmlock -v lock >$tmp.out
            then
@@ -407,7 +431,9 @@
                    rm -f lock
                fi
            fi
-           sleep 5
+           [ $i -ge 200 ] && break #tenths of a sec
+           $SLEEPCMND
+           i=`expr $i + $SLEEPINTVL`
        done

        if $fail

--
Dr.Michael("Kimba")Newton  kimbrr@xxxxxxx

<Prev in Thread] Current Thread [Next in Thread>