#! /bin/sh #Tag 0x00010D13 # # Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # # This program is distributed in the hope that it would be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # # Further, this software is distributed without any warranty that it is # free of the rightful claim of any third person regarding infringement # or the like. Any license provided herein, whether implied or # otherwise, applies only to this software file. Patent licenses, if # any, provided herein do not apply to combinations of this program with # other software, or any other product whatsoever. # # You should have received a copy of the GNU General Public License along # with this program; if not, write the Free Software Foundation, Inc., 59 # Temple Place - Suite 330, Boston MA 02111-1307, USA. # # Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, # Mountain View, CA 94043, or: # # http://www.sgi.com # # For further information regarding this notice, see: # # http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ # # Example administrative script to check pmlogger instances are alive, # and restart as required. # # Get standard environment . /etc/pcp.env # error messages should go to stderr, not the GUI notifiers # unset PCP_STDERR # constant setup # tmp=/tmp/$$ status=0 echo >$tmp.lock trap "rm -f \`[ -f $tmp.lock ] && cat $tmp.lock\` $tmp.*; exit \$status" 0 1 2 3 15 prog=`basename $0` # control file for pmlogger administration ... edit the entries in this # file to reflect your local configuration # CONTROL=$PCP_VAR_DIR/config/pmlogger/control # determine real name for localhost _lhnm=`which hostname 1>/dev/null && hostname` LOCALHOSTNAME=${_lhnm:-localhost} # default location # logfile=pmlogger.log # option parsing # SHOWME=false MV=mv VERBOSE=false VERY_VERBOSE=false usage="Usage: $prog [-NV] [-c control]" while getopts c:NV? c do case $c in c) CONTROL="$OPTARG" ;; N) SHOWME=true MV="echo + mv" ;; V) if $VERBOSE then VERY_VERBOSE=true else VERBOSE=true fi ;; ?) echo "$usage" status=1 exit ;; esac done shift `expr $OPTIND - 1` if [ $# -ne 0 ] then echo "$usage" status=1 exit fi if [ ! -f $CONTROL ] then echo "$prog: Error: cannot find control file ($CONTROL)" status=1 exit fi _error() { echo "$prog: [$CONTROL:$line]" echo "Error: $1" echo "... logging for host \"$host\" unchanged" touch $tmp.err } _warning() { echo "$prog [$CONTROL:$line]" echo "Warning: $1" } _message() { case $1 in restart) echo -n "Restarting$iam pmlogger for host \"$host\" ..." ;; esac } _unlock() { rm -f lock echo >$tmp.lock } _get_logfile() { # looking for -lLOGFILE or -l LOGFILE in args # want=false for a in $args do if $want then logfile="$a" want=false break fi case "$a" in -l) want=true ;; -l*) logfile=`echo "$a" | sed -e 's/-l//'` break ;; esac done } _check_logfile() { if [ ! -f $logfile ] then echo "$prog: Error: cannot find pmlogger output file at \"$logfile\"" logdir=`dirname $logfile` echo "Directory (`cd $logdir; pwd`) contents:" ls -la $logdir else echo "Contents of pmlogger output file \"$logfile\" ..." cat $logfile fi } _check_logger() { $VERBOSE && echo -n " [process $1] " # wait until pmlogger process starts, or exits # delay=5 [ ! -z "$PMCD_CONNECT_TIMEOUT" ] && delay=$PMCD_CONNECT_TIMEOUT x=5 [ ! -z "$PMCD_REQUEST_TIMEOUT" ] && x=$PMCD_REQUEST_TIMEOUT # wait for maximum time of a connection and 20 requests # delay=`expr $delay + 20 \* $x` i=0 while [ $i -lt $delay ] do $VERBOSE && echo -n "." if [ -f $logfile ] then # $logfile was previously removed, if it has appeared again # then we know pmlogger has started ... if not just sleep and # try again # if echo "connect $1" | pmlc 2>&1 | grep -q "Unable to connect" then : else sleep 5 $VERBOSE && echo " done" return 0 fi _plist=`_get_pids_by_name pmlogger` _found=false for _p in `echo $_plist` do [ $_p -eq $1 ] && _found=true done if $_found then # process still here, just not accepting pmlc connections # yet, try again : else $VERBOSE || _message restart echo " process exited!" echo "$prog: Error: failed to restart pmlogger" echo "Current pmlogger processes:" ps $PCP_PS_ALL_FLAGS | tee $tmp.tpm | sed -n -e 1p for _p in `echo $_plist` do sed -n -e "/^[ ]*[^ ]* [ ]*$pp /p" < $tmp.tmp done echo _check_logfile return 1 fi fi sleep 5 i=`expr $i + 5` done $VERBOSE || _message restart echo " timed out waiting!" sed -e 's/^/ /' $tmp.out _check_logfile return 1 } # note on control file format version # 1.0 was shipped as part of PCPWEB beta, and did not include the # socks field [this is the default for backwards compatibility] # 1.1 is the first production release, and the version is set in # the control file with a $version=1.1 line (see below) # version=1.0 echo >$tmp.dir rm -f $tmp.err line=0 cat $CONTROL \ | sed -e "s/LOCALHOSTNAME/$LOCALHOSTNAME/g" \ -e "s;PCP_LOG_DIR;$PCP_LOG_DIR;g" \ | while read host primary socks dir args do line=`expr $line + 1` case "$host" in \#*|'') # comment or empty continue ;; \$*) # in-line variable assignment $SHOWME && echo "# $host $primary $socks $dir $args" cmd=`echo "$host $primary $socks $dir $args" \ | sed -n \ -e "/='/s/\(='[^']*'\).*/\1/" \ -e '/="/s/\(="[^"]*"\).*/\1/' \ -e '/=[^"'"'"']/s/[;&<>|].*$//' \ -e '/^\\$[A-Za-z][A-Za-z0-9_]*=/{ s/^\\$// s/^\([A-Za-z][A-Za-z0-9_]*\)=/export \1; \1=/p }'` if [ -z "$cmd" ] then # in-line command, not a variable assignment _warning "in-line command is not a variable assignment, line ignored" else case "$cmd" in 'export PATH;'*) _warning "cannot change \$PATH, line ignored" ;; 'export IFS;'*) _warning "cannot change \$IFS, line ignored" ;; *) $SHOWME && echo "+ $cmd" eval $cmd ;; esac fi continue ;; esac if [ "$version" = "1.0" ] then args="$dir $args" dir="$socks" socks=n fi if [ -z "$primary" -o -z "$socks" -o -z "$dir" -o -z "$args" ] then _error "insufficient fields in control file record" continue fi if $VERY_VERBOSE then pflag='' [ $primary = y ] && pflag=' -P' echo "Check pmlogger$pflag -h $host ... in $dir ..." fi # make sure output directory exists # if [ ! -d $dir ] then mkdir -p $dir >$tmp.err 2>&1 if [ ! -d $dir ] then cat $tmp.err _error "cannot create directory ($dir) for PCP archive files" else _warning "creating directory ($dir) for PCP archive files" fi fi [ ! -d $dir ] && continue # check for directory duplicate entries # if [ "`grep $dir $tmp.dir`" = "$dir" ] then _error "Cannot start more than one pmlogger instance for archive directory \"$dir\"" continue else echo "$dir" >>$tmp.dir fi cd $dir dir=`pwd` $SHOWME && echo "+ cd $dir" if [ ! -w $dir ] then echo "$prog: Warning: no write access in $dir, skip lock file processing" else # demand mutual exclusion # fail=true rm -f $tmp.stamp for try in 1 2 3 4 do if pmlock -v lock >$tmp.out then echo $dir/lock >$tmp.lock fail=false break else if [ ! -f $tmp.stamp ] then if uname -r | grep '^5\.3' >/dev/null then # IRIX 5.3 does not support -t for touch(1) # touch `pmdate -30M %m%d%H%M%y` $tmp.stamp else touch -t `pmdate -30M %Y%m%d%H%M` $tmp.stamp fi fi if [ ! -z "`find lock -newer $tmp.stamp -print 2>/dev/null`" ] then : else echo "$prog: Warning: removing lock file older than 30 minutes" ls -l $dir/lock rm -f lock fi fi sleep 5 done if $fail then # failed to gain mutex lock # if [ -f lock ] then echo "$prog: Warning: is another PCP cron job running concurrently?" ls -l $dir/lock else echo "$prog: `cat $tmp.out`" fi _warning "failed to acquire exclusive lock ($dir/lock) ..." continue fi fi pid='' if [ "X$primary" = Xy ] then if [ "X$host" != "X$LOCALHOSTNAME" ] then _error "\"primary\" only allowed for $LOCALHOSTNAME (localhost, not $host)" _unlock continue fi if which chkconfig >/dev/null 2>&1 then IS_ON=chkconfig else IS_ON=false fi PMLOGGER_CTL=off if [ -f $PCP_SYSCONFIG_DIR/pcp ] then # Try the Redhat way, VERBOSE from here . $PCP_SYSCONFIG_DIR/pcp else # Try the IRIX way if [ -f /etc/config/pmlogger ] then PMLOGGER_CTL=off $IS_ON pmlogger && PMLOGGER_CTL=on fi fi if [ "$PMLOGGER_CTL" = "off" ] then _error "primary logging disabled for $host" _unlock continue fi if [ -L $PCP_TMP_DIR/pmlogger/primary ] then $VERY_VERBOSE && echo -n "... try $PCP_TMP_DIR/pmlogger/primary: " pid=`ls -l $PCP_TMP_DIR/pmlogger/primary | sed -e 's,.*/,,'` if _get_pids_by_name pmlogger | grep -q "^$pid\$" then $VERY_VERBOSE && echo "pmlogger process $pid identified, OK" else $VERY_VERBOSE && echo "pmlogger process $pid not running" pid='' fi fi else fqdn=`pmhostname $host` for log in $PCP_TMP_DIR/pmlogger/[0-9]* do [ "$log" = "$PCP_TMP_DIR/pmlogger/[0-9]*" ] && continue $VERY_VERBOSE && echo -n "... try $log: " # throw away stderr in case $log has been removed by now match=`sed -e '3s/\/[0-9][0-9][0-9][0-9][0-9.]*$//' $log 2>/dev/null \ | $PCP_AWK_PROG ' BEGIN { m = 0 } NR == 2 && $1 == "'$fqdn'" { m = 1; next } NR == 2 && "'$fqdn'" == "'$host'" && ( $1 ~ /^'$host'\./ || $1 ~ /^'$host'$/ ) { m = 1; next } NR == 3 && m == 1 && $0 == "'$dir'" { m = 2; next } END { print m }'` $VERY_VERBOSE && echo -n "match=$match " if [ "$match" = 2 ] then pid=`echo $log | sed -e 's,.*/,,'` if _get_pids_by_name pmlogger | grep -q "^$pid\$" then $VERY_VERBOSE && echo "pmlogger process $pid identified, OK" break fi $VERY_VERBOSE && echo "pmlogger process $pid not running, skip" pid='' elif [ "$match" = 0 ] then $VERY_VERBOSE && echo "different host, skip" elif [ "$match" = 1 ] then $VERY_VERBOSE && echo "different directory, skip" fi done fi if [ -z "$pid" ] then rm -f Latest if [ "X$primary" = Xy ] then args="-P $args" iam=" primary" # clean up port-map, just in case # PM_LOG_PORT_DIR=$PCP_TMP_DIR/pmlogger rm -f $PM_LOG_PORT_DIR/primary else args="-h $host $args" iam="" fi # each new log started is named yyyymmdd.hh.mm # LOGNAME=`date "+%Y%m%d.%H.%M"` # handle duplicates/aliases (happens when pmlogger is restarted # within a minute and LOGNAME is the same) # suff='' for file in $LOGNAME.* do [ "$file" = "$LOGNAME"'.*' ] && continue # we have a clash! ... find a new -number suffix for the # existing files ... we are going to keep $LOGNAME for the # new pmlogger below # if [ -z "$suff" ] then for xx in 0 1 2 3 4 5 6 7 8 9 do for yy in 0 1 2 3 4 5 6 7 8 9 do [ "`echo $LOGNAME-${xx}${yy}.*`" != "$LOGNAME-${xx}${yy}.*" ] && continue suff=${xx}$yy break done [ ! -z "$suff" ] && break done if [ -z "$suff" ] then _error "unable to break duplicate clash for archive basename $LOGNAME" fi $VERBOSE && echo "Duplicate archive basename ... rename $LOGNAME.* files to $LOGNAME-$suff.*" fi eval $MV -f $file `echo $file | sed -e "s/$LOGNAME/&-$suff/"` done $VERBOSE && _message restart sock_me='' [ "$socks" = y ] && sock_me='pmsocks ' _get_logfile if [ -f $logfile ] then $VERBOSE && $SHOWME && echo eval $MV -f $logfile $logfile.prior fi if $SHOWME then echo echo "+ ${sock_me}pmlogger $args $LOGNAME" _unlock continue else ${sock_me}pmlogger $args $LOGNAME >$tmp.out 2>&1 & pid=$! fi # wait for pmlogger to get started, and check on its health _check_logger $pid # the archive folio Latest is for the most recent archive in # this directory # if [ -f $LOGNAME.0 ] then $VERBOSE && echo "Latest folio created for $LOGNAME" mkaf $LOGNAME.0 >Latest else echo "$prog: Error: archive file $LOGNAME.0 missing" logdir=`dirname $LOGNAME` echo "Directory (`cd $logdir; pwd`) contents:" ls -la $logdir fi fi _unlock done [ -f $tmp.err ] && status=1 exit