[BACK]Return to rand_lines.c CVS log [TXT][DIR] Up to [Development] / projects / ltp / tools

File: [Development] / projects / ltp / tools / rand_lines.c (download)

Revision 1.2, Wed Feb 28 17:42:00 2001 UTC (16 years, 7 months ago) by nstraz
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +10 -4 lines

I've applied most of the patch that Urban Widmark <urban@teststation.com>
contributed.  It turns on the "all warnings" compiler options and fixes a lot
of the warnings.

/*
 * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it would be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Further, this software is distributed without any warranty that it is
 * free of the rightful claim of any third person regarding infringement
 * or the like.  Any license provided herein, whether implied or
 * otherwise, applies only to this software file.  Patent licenses, if
 * any, provided herein do not apply to combinations of this program with
 * other software, or any other product whatsoever.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write the Free Software Foundation, Inc., 59
 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
 *
 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
 * Mountain View, CA  94043, or:
 *
 * http://www.sgi.com
 *
 * For further information regarding this notice, see:
 *
 * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
 *
 */
/* $Id: rand_lines.c,v 1.2 2001/02/28 17:42:00 nstraz Exp $ */
/**************************************************************
 *
 *    OS Testing - Silicon Graphics, Inc.
 *
 *    TOOL IDENTIFIER   : rand_lines
 *
 *    DESCRIPTION       : prints lines from a file in random order
 *
 *    SYNOPSIS:
 *      rand_line [-hg][-S seed][-l numlines] [files...]
 *
 *    AUTHOR            : Richard Logan
 *
 *    CO-PILOT(s)       : 
 *
 *    DATE STARTED      : 05/94
 *
 *    INPUT SPECIFICATIONS
 *     This tool will print lines of a file in random order.
 *     There is max line length is 2048.
 *     The options supported are:
 *       -h     This option prints an help message then exits.
 *
 *       -g     This option specifies to count the number of lines
 *		in the file before randomizing.  This option overrides
 *		-l option.  Using this option, will give you the best
 *		randomization, but it requires processing
 *		the file an additional time.
 *		       
 *       -l numlines : This option specifies to randomize file in
 *		numlines chucks.  The default size if 2048.
 *
 *       -S seed     : sets randomization seed to seed. 
 *		The default is time(0).  If seed is zero, time(0) is used.
 *
 *	 file   A readable, seekable filename.  The cmd allows the user
 *	 	to specify multiple files, but each file is dealt with
 *		separately.
 *
 *    DESIGN DESCRIPTION
 *	This tool uses a simple algorithm where the file is read.
 *	The offset to the each line is randomly placed into an
 *	array.  The array is then processed sequentially.  The infile's
 *	line who's offset in the array element is thus reread then printed.
 *	This output will thus be infile's lines in random order.
 *
 *    SPECIAL REQUIREMENTS
 *	None.
 *
 *    UPDATE HISTORY
 *      This should contain the description, author, and date of any
 *      "interesting" modifications (i.e. info should helpful in
 *      maintaining/enhancing this tool).
 *      username     description
 *      ----------------------------------------------------------------
 *	rrl 	    Creatation of program
 *
 *    BUGS/LIMITATIONS
 *	This program can not deal with non-seekable file like
 *	stdin or a pipe.  If more than one file is specified,
 *	each file is randomized one at a time.  The max line
 *	length is 2048 characters.
 *
 **************************************************************/

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <time.h>

void usage();
void help();

/*
 * Structure used to hold file line offset.
 */
struct offset_t {
	long used;
	long offset;
};

#define DEF_SIZE	2048
#define MAX_LN_SZ	2048		/* max line size */

extern int errno;

extern int random_range();
extern int random_range_seed();
int get_numlines(FILE *infile);
int rnd_file(FILE *infile, int numlines, long seed);
int rnd_insert(struct offset_t offsets[], int offset, int size);

#ifndef SEEK_SET
#define SEEK_SET	0
#endif

char *Progname;

/***********************************************************************
 *  MAIN
 ***********************************************************************/
int
main(argc, argv)
int argc;
char **argv;
{
    FILE *infile;
    int c;
    long seed = -1;		/* use time as seed */
    int lsize = DEF_SIZE;	/* num lines to randomize */
    int getfilelines = 0;	/* if set, count lines first */
    extern int optind;
    extern char *optarg;

    Progname = argv[0];

    while ((c = getopt (argc, argv, "hgS:l:")) != EOF){
	switch(c) {
	case 'h':
	    help();
	    exit(0);
	    break;
	case 'S':	/* seed */
	    if ( sscanf(optarg, "%li", &seed) != 1 ) {
		fprintf(stderr, "%s: --S option argument is invalid\n", Progname);
		exit(1);
	    }
	    break;

	case 'l':	/* number of lines */
	    if ( sscanf(optarg, "%i", &lsize) != 1 ) {
		fprintf(stderr, "%s: --s option argument is invalid\n", Progname);
		exit(1);
	    }
	    break;

	case 'g':
	    getfilelines++;
	    break;

	case '?':
	    usage(stderr);
	    exit(1);
	    break;
	}
    }

    if ( optind + 1 != argc ) {
	printf("missing argument\n");
	usage(stderr);
	exit(1);
    }

    if ( seed == -1 ) {
	seed = time(0);
    }
    
    if ( strcmp(argv[argc-1],"-") == 0 ) {
	infile = stdin;
	printf("Can not support stdin processing\n");
	exit(2);
    }
    else {

	if ((infile=fopen(argv[argc-1], "r")) == NULL) {
	    printf("unable to open file %s\n", argv[argc-1]);
	    exit(1);
	}

        if ( getfilelines ) {
	    lsize=get_numlines(infile);
	}

	rnd_file(infile, lsize, seed);
    }

    exit(0);
}

/***********************************************************************
 * Print usage message to stream.
 ***********************************************************************/
void
usage(FILE *stream)
{
    fprintf(stream,
	"Usage %s [-hg][-S seed][-l numlines] [files...]\n", Progname);

}

/***********************************************************************
 * Print help message to stdout.
 ***********************************************************************/
void
help()
{
    usage(stdout);
    printf("This tool will print lines in random order (max line len %d).\n\
  -h          : print this help and exit\n\
  -g          : count the number of lines in the file before randomizing\n\
	        This option overrides -l option.\n\
  -l numlines : randoms lines in numlines chuncks (def %d)\n\
  -S seed     : sets seed to seed (def time(0))\n",
    MAX_LN_SZ, DEF_SIZE);

}

/***********************************************************************
 * counts the number of lines in already open file.
 * Note: File must be seekable (not stdin or a pipe).
 ***********************************************************************/
int
get_numlines(infile)
FILE *infile;
{
    char line[MAX_LN_SZ];		/* max size of a line */
    int cnt=0;

    while ( fgets(line, MAX_LN_SZ, infile) != NULL ) {
	cnt++;
    }

    /* rewind the file */
    fseek(infile, 0, SEEK_SET);

    return cnt;
}

/***********************************************************************
 *
 *  infile must be a fseekable file.  Thus, it can not be stdin.
 * It will read each line in the file, randomly saving the offset
 * of each line in a array of struct offset_t.
 * It will then print each line in the array stored order.
 *
 ***********************************************************************/
int
rnd_file(infile, numlines, seed)
FILE *infile;
int numlines;		/* can be more or less than num lines in file */
			/* most opt randomized when num lines in files */
			/* or just a bit bigger */
long seed;
{

    int c;
    char line[MAX_LN_SZ];		/* max size of a line */
    int cnt;
    int coffset;		/* current line offset */
    int loffset;		/* last line offset */

    struct offset_t *offsets;
    int memsize;

    if ( numlines <= 0 ) {	/*use default */
	numlines = DEF_SIZE;
    }

    /*
     * Malloc space for numlines copies the offset_t structure.
     * This is where the randomization takes place.
     */
    memsize = sizeof(struct offset_t)*numlines;

    if ((offsets=(struct offset_t *)malloc(memsize)) == NULL ) {
	fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize, errno);
	return -1;
    }

    random_range_seed(seed);

    coffset=0;
    loffset=0;

    while ( ! feof(infile) ) {

        fseek(infile, coffset, SEEK_SET);
        loffset=ftell(infile);
        bzero((char *)offsets, memsize);
        cnt=0;

	/*
	 * read the file in and place offset of each line randomly
	 * into offsets array.  Only numlines line can be randomized
	 * at a time.
	 */
        while ( cnt < numlines && fgets(line, MAX_LN_SZ, infile) != NULL ) {

	    c=rnd_insert(offsets, loffset, numlines);
	    cnt++;

	    coffset=ftell(infile);
	    loffset = coffset;
        }

        if ( cnt == 0 ) {
	    continue;
        }

        /*
         * print out lines based on offset.
         */
        for (cnt=0; cnt<numlines; cnt++) {

	    if ( offsets[cnt].used ) {
	        fseek(infile, offsets[cnt].offset, SEEK_SET);
	        fgets(line, MAX_LN_SZ, infile);
	        printf("%s", line);
	    }
        }

    }	/* end of file */

    return 0;
}

/***********************************************************************
 * This function randomly inserts offset information into
 * the offsets array.  The array has a size of size.
 * It will attempt 75 random array indexes before finding the first
 * open array element.
 *
 ***********************************************************************/
int
rnd_insert(offsets, offset, size)
struct offset_t offsets[];
int offset;
int size;
{
    int rand_num;
    int quick = 0;
    int ind;

    /*
     * Loop looking for random unused index.
     * It will only be attempted 75 times.
     */
    while ( quick < 75 ) {

	rand_num=random_range(0, size, 1, NULL);

	if ( ! offsets[rand_num].used ) {
	    offsets[rand_num].offset=offset;
	    offsets[rand_num].used++;
	    return rand_num;
	}
	quick++;
    }

    /*
     * an randomly choosen index was not found, find
     * first open index and use it.
     */
    ind=0;
    while ( ! offsets[ind].used)
        ind++;

    offsets[ind].offset=offset;
    offsets[ind].used++;
    return ind;

}



/***********************************************************************
 *
 * CODE NOT TESTED AT ALL - it must be tested before it is used.
 *
 * This function was written to allow rand_lines to work on non-seekable
 * file (i.e stdin).
 *
 ***********************************************************************/
int
rnd_stdin(infile, space, numlines, seed)
FILE *infile;
int space;		/* amount of space to use to read file into memory, */
			/* randomized and print.  randomize in chunks */
int numlines;		/* can be more or less than num lines in file */
			/* most opt randomized when num lines in files */
			/* or just a bit bigger */
long seed;
{

    int c;
    char line[MAX_LN_SZ];		/* max size of a line */
    int cnt;				/* offset printer counter */
    int loffset;			/* last line address */
    char *buffer;			/* malloc space for file reads */
    char *rdbuff;			/* where to start read */
    long stopaddr;			/* end of read space (address)*/
    int rdsz;				/* amount read */
    int sztord;
    char *chr;				/* buffer processing pointer */
    char *ptr;				/* printing processing pointer */
    char *lptr;				/* printing processing pointer */
    int loopcntl = 1;			/* main loop control flag */
    struct offset_t *offsets;		/* pointer to offset space */
    int memsize;			/* amount of offset space to malloc */
    int newbuffer = 1;			/* need new buffer */

    if ( numlines <= 0 ) {	/*use default */
	numlines = DEF_SIZE;
    }

    /*
     * Malloc space for file contents
     */
    if ((buffer=(char *)malloc(space)) == NULL ) {
	fprintf(stderr, "Unable to malloc(%d): errno:%d\n", space, errno);
	return -1;
    }

    /*
     * Malloc space for numlines copies the offset_t structure.
     * This is where the randomization takes place.
     */
    memsize = sizeof(struct offset_t)*numlines;

    if ((offsets=(struct offset_t *)malloc(memsize)) == NULL ) {
	fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize, errno);
	return -1;
    }

    random_range_seed(seed);
    rdbuff = buffer;		/* read into start of buffer */
    sztord = space;		/* amount of space left in buffer */

    /*
     *  Loop until read doesn't read anything
     *  If last line does not end in newline, it is not printed
     */
    while ( loopcntl ) {
        /*
         *  read in file up to space size
	 *  only works if used as filter.
	 *  The code will randomize one reads worth at a time.
	 *  If typing in lines, read will read only one line - no randomizing.
         */

        chr = buffer;
        if ((rdsz=fread((void *)rdbuff, sztord, 1, infile)) == 0 ) {
	    fprintf(stderr, "input file is empty, done randomizing\n");
	    loopcntl=0;
	    return 0;
        }

	stopaddr = ((long)buffer + rdsz);

        loffset= (long)buffer;

	while ( ! newbuffer ) {

            while ( (long)chr < stopaddr && *chr != '\n' )
	        chr++;

	    chr++;

	    if ( (long)chr >= stopaddr ) {

	        fprintf(stderr, "end of read in buffer\n");

		/*
		 * print out lines based on offset.
		 */
		for (cnt=0; cnt<numlines; cnt++) {

		    if ( offsets[cnt].used ) {
			ptr = (char *)offsets[cnt].offset;
			/*
			 * copy buffer characters into line for printing
			 */
			lptr = line;
			while ( *ptr != '\n' ) 
			    *lptr++ = *ptr++;
				
			printf("%s\n", line);
		    }
		}

		/*
	         * move start of partically read line to beginning of buffer
		 * and adjust rdbuff to end of partically read line
		 */
		memcpy((void *)loffset, buffer, (stopaddr - loffset));
		rdbuff = buffer + (stopaddr - loffset);
		sztord = space - (stopaddr - loffset);

	        newbuffer++;
	    }

	    c=rnd_insert(offsets, loffset, numlines);

            loffset = (long)chr;
	}
    }

    return 0;

}