xfs
[Top] [All Lists]

Re: problems with restore

To: Tim Shimmin <tes@xxxxxxx>
Subject: Re: problems with restore
From: Lars Soltau <lars.soltau@xxxxxxx>
Date: Fri, 26 Jul 2002 11:57:25 +0200
Cc: linux-xfs@xxxxxxxxxxx
Organization: sLAB oHG
References: <20020723114554.J2935839@boing.melbourne.sgi.com>
Reply-to: lars.soltau@xxxxxxx
Sender: owner-linux-xfs@xxxxxxxxxxx
User-agent: Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.0.0) Gecko/20020530
Tim Shimmin wrote:

1. I don't believe that xfsdump/restore is too complicated to use.
It has an awful lot of options (I agree) but there are examples
on what ones are needed to get a basic dump and restore happening:
check out the examples sections in: - cmd/xfsdump/doc/README.xfsdump
- xfsdump(8) and xfsrestore(8) man pages

True, my real peeve was getting xfsrestore to read a tape backup from disk.


2. [...] So one is not supposed to copy a dump from tape onto a disk file and then restore it. [...]

Well, that's not very helpful. :-)

I had some old tapes and borrowed a matching streamer to restore them. After the restore directly from tape failed with an assertion failure (see blow), I copied the files to disk because I had to return the streamer before I had a chance to do a full examination of the assertion failure.

I ended up patching drive_minrmt.c to forget about all SCSI commands and open consecutively numbered files on hard disk instead. It would be a nice feature for xfsrestore to be able to open a tape backup from disk, wouldn't it?

3. I don't know why xfsrestore is aborting with an assertion failure for you.

I do, now. I single-stepped into the assertion and found out that one 64bit offset in each record header, the first_mark_offset, had been written highendian instead of littleendian. All the other offsets, for example file_offset that is located directly before first_mark_offset, had been written correctly.


The backup has been written by a version of xfsdump compiled with egcs on Linux. Personally, I suspect egcs's 64bit arithmetic. Anyway, I patched the source to correct the offset on the fly and the problem vanished.

In case anyone is interested I have attached the patched drive_minrmt.c file from xfsdump-2.0.1.

Greetings
Lars Soltau
/*
 * Copyright (c) 2000 Silicon Graphics, Inc.  All Rights Reserved.
 * 
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it would be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * 
 * Further, this software is distributed without any warranty that it is
 * free of the rightful claim of any third person regarding infringement
 * or the like.  Any license provided herein, whether implied or
 * otherwise, applies only to this software file.  Patent licenses, if
 * any, provided herein do not apply to combinations of this program with
 * other software, or any other product whatsoever.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write the Free Software Foundation, Inc., 59
 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
 * 
 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
 * Mountain View, CA  94043, or:
 * 
 * http://www.sgi.com 
 * 
 * For further information regarding this notice, see: 
 * 
 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
 */

#include <libxfs.h>
#include <jdm.h>

#include <sys/stat.h>
#include <sys/ipc.h>
#include <sys/sem.h>
#include <sys/prctl.h>
#include <time.h>
#include <signal.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/sysmacros.h>
#include <sys/mtio.h>
#include <sys/signal.h>
#include <malloc.h>
#include <sched.h>

#include "types.h"
#include "util.h"
#include "qlock.h"
#include "cldmgr.h"
#include "mlog.h"
#include "dlog.h"
#include "global.h"
#include "drive.h"
#include "media.h"
#include "getopt.h"
#include "stream.h"
#include "ring.h"
#include "rec_hdr.h"
#include "arch_xlate.h"


/* drive_minrmt.c - drive strategy for non-SGI rmt tape devices
 * This strategy is derived from the scsitape strategy. It is designed
 * so that tapes written using this strategy can be read with the
 * scsitape strategy and vice versa.
 */

int lgs_fileno;
char lgs_filename[256];
char lgs_filebase[256];
char *lgs_old_pathname;

/* structure definitions used locally ****************************************/

/* remote tape protocol debug
 */
#ifdef OPENMASKED
static intgen_t open_masked_signals( char *path, int oflags );
#else /* OPENMASKED */
#define open_masked_signals(p,f)        open(p,f)
#endif /* OPENMASKED */

#ifdef RMT
#ifdef RMTDBG
#define open(p,f)               dbgrmtopen(p,f)
#define close(fd)               dbgrmtclose(fd)
#define ioctl(fd,op,arg)        dbgrmtioctl(fd,op,arg)
#define read(fd,p,sz)           dbgrmtread(fd,p,sz)
#define write(fd,p,sz)          dbgrmtwrite(fd,p,sz)
#else /* RMTDBG */
#define open rmtopen
#define close rmtclose
#define ioctl rmtioctl
#define read rmtread
#define write rmtwrite
#endif /* RMTDBG */
#endif /* RMT */

/* if the media file header structure changes, this number must be
 * bumped, and STAPE_VERSION_1 must be defined and recognized.
 */
#define STAPE_VERSION   1

/* a bizarre number to help reduce the odds of mistaking random data
 * for a media file or record header
 */
#define STAPE_MAGIC     0x13579bdf02468acell

/* this much of each record is reserved for header info: the user
 * data always begins at this offset from the beginning of each
 * record. be sure global_hdr_t fits.
 */
#define STAPE_HDR_SZ                    PGSZ

/* maximum tape record size. this is the max size of I/O buffers sent to drive.
 * note that for variable block devices this determines the block size as well.
 */
#define STAPE_MAX_RECSZ                 0x200000        /* 2M */

/* this is the smallest maximum block size for any tape device
 * supported by xfsdump/xfsrestore. we use this when it is not possible
 * to ask the driver for block size info.
 */
#define STAPE_MIN_MAX_BLKSZ             0x3c000         /* 240K, 245760 */

/* QIC tapes always use 512 byte blocks
 */
#define QIC_BLKSZ                       512

/* number of record buffers in the I/O ring
 */
#define RINGLEN_MIN                     1
#define RINGLEN_MAX                     10
#define RINGLEN_DEFAULT                 3

/* tape i/o request retry limit
 */
#define MTOP_TRIES_MAX                  10

/* operational mode. can be reading or writing, but not both
 */
typedef enum { OM_NONE, OM_READ, OM_WRITE } om_t;

/* drive_context - control state
 *
 * NOTE: ring used only if not singlethreaded
 */
struct drive_context {
        om_t dc_mode;
                        /* current mode of operation (READ or WRITE)
                         */
        size_t dc_ringlen;
                        /* number of tape_recsz buffers in ring. only used
                         * for displaying ring info
                         */
        bool_t dc_ringpinnedpr;
                        /* are the ring buffers pinned down
                         */
        ring_t *dc_ringp;
                        /* handle to ring
                         */
        ring_msg_t *dc_msgp;
                        /* currently held ring message
                         */
        char *dc_bufp;
                        /* pre-allocated record buffer (only if ring not used)
                         */
        char *dc_recp;
                        /* pointer to current record buffer. once the current
                         * record is completely read or written by client,
                         * set to NULL.
                         */
        char *dc_recendp;
                        /* always set to point to just off the end of the
                         * current record buffer pointed to by dc_recp. valid
                         * only when dc_recp non-NULL.
                         */
        char *dc_dataendp;
                        /* same as dc_recendp, except for first and last
                         * records in media file. the first record is all
                         * pads after the header page. the last record may
                         * have been padded (as indicated by the rec_used
                         * field of the record header). in either case
                         * dc_dataendp points to first padding byte.
                         */
        char *dc_ownedp;
                        /* first byte in current buffer owned by caller.
                         * given to caller by do_read or do_get_write_buf
                         * set to null by do_return_read_buf or do_write.
                         */
        char *dc_nextp;
                        /* next byte available in current buffer to give
                         * to do_get_write_buf for writing or do_read for
                         * reading.
                         */
        off64_t dc_reccnt;
                        /* count of the number of records completely read or
                         * written by client, and therefore not represented
                         * by current dc_recp. valid initially and after
                         * each call to do_return_read_buf or do_write.
                         * NOT valid after a call to do_read or
                         * do_get_write_buf. always bumped regardless of
                         * read or write error status.
                         */
        off64_t dc_iocnt;
                        /* count of the number of records read or written
                         * to media without error. includes media file header
                         * record. this is incremented when the actual I/O is
                         * done. dc_reccnt is different, indicating what has
                         * been seen by client. slave may have read ahead /
                         * written behind.
                         */
        int dc_fd;
                        /* drive file descriptor. -1 when not open
                         */
        bool_t dc_isQICpr;
                        /* fixed 512 byte block size device.
                         */
        bool_t dc_canfsrpr;
                        /* can seek forward records at a time
                         */
        size_t dc_blksz;
                        /* actual tape blksize selected
                         */
        size_t dc_recsz;
                        /* actual tape record size selected
                         */
        off64_t dc_lostrecmax;
                        /* maximum number of records written without error
                         * which may be lost due to a near end-of-tape
                         * experience. a function of drive type and
                         * compression
                         */
        bool_t dc_singlethreadedpr;
                        /* single-threaded operation (no slave)
                         */
        bool_t dc_errorpr;
                        /* TRUE if error encountered during reading or writing.
                         * used to detect attempts to read or write after
                         * error reported.
                         */
        bool_t dc_recchksumpr;
                        /* TRUE if records should be checksumed
                         */
        bool_t dc_unloadokpr;
                        /* ok to issue unload command when do_eject invoked.
                         */
        bool_t dc_overwritepr;
                        /* overwrite tape without checking whats on it
                         */
        bool_t dc_singlemfilepr;
                        /* use only one media file
                         */
        off64_t dc_filesz;
                        /* media file size given as argument
                         */
};

typedef struct drive_context drive_context_t;

/* macros for shortcut references to context. assumes a local variable named
 * 'contextp'.
 */
#define tape_recsz      ( contextp->dc_recsz )
#define tape_blksz      ( contextp->dc_blksz )

/* declarations of externally defined global variables ***********************/

extern void usage( void );
#ifdef DUMP
#ifdef SIZEEST
extern u_int64_t min_recmfilesz;
extern u_int64_t hdr_mfilesz;
#endif /* SIZEEST */
#endif /* DUMP */

/* remote tape protocol declarations (should be a system header file)
 */
#ifdef RMT
extern int rmtopen( char *, int, ... );
extern int rmtclose( int );
extern int rmtfstat( int, struct stat * );
extern int rmtioctl( int, int, ... );
extern int rmtread( int, void*, uint);
extern int rmtwrite( int, const void *, uint);
#endif /* RMT */


/* forward declarations of locally defined static functions ******************/

/* strategy functions
 */
static intgen_t ds_match( int, char *[], drive_t *, bool_t );
static intgen_t ds_instantiate( int, char *[], drive_t *, bool_t );

/* manager operations
 */
static bool_t do_init( drive_t * );
static bool_t do_sync( drive_t * );
static intgen_t do_begin_read( drive_t * );
static char *do_read( drive_t *, size_t , size_t *, intgen_t * );
static void do_return_read_buf( drive_t *, char *, size_t );
static void do_get_mark( drive_t *, drive_mark_t * );
static intgen_t do_seek_mark( drive_t *, drive_mark_t * );
static intgen_t do_next_mark( drive_t * );
static void do_get_mark( drive_t *, drive_mark_t * );
static void do_end_read( drive_t * );
static intgen_t do_begin_write( drive_t * );
static void do_set_mark( drive_t *, drive_mcbfp_t, void *, drive_markrec_t * );
static char * do_get_write_buf( drive_t *, size_t , size_t * );
static intgen_t do_write( drive_t *, char *, size_t );
static size_t do_get_align_cnt( drive_t * );
static intgen_t do_end_write( drive_t *, off64_t * );
static intgen_t do_fsf( drive_t *, intgen_t , intgen_t *);
static intgen_t do_bsf( drive_t *, intgen_t , intgen_t *);
static intgen_t do_rewind( drive_t * );
static intgen_t do_erase( drive_t * );
static intgen_t do_eject_media( drive_t * );
static intgen_t do_get_device_class( drive_t * );
static void do_display_metrics( drive_t *drivep );
static void do_quit( drive_t * );

/* misc. local utility funcs
 */
static intgen_t mt_op(intgen_t , intgen_t , intgen_t );
static intgen_t determine_write_error( int, int );
static intgen_t read_label( drive_t *);
static bool_t tape_rec_checksum_check( drive_context_t *, char * );
static void set_recommended_sizes( drive_t *, int );
static void display_access_failed_message( drive_t *);
static bool_t get_tpcaps( drive_t * );
static intgen_t prepare_drive( drive_t *drivep );
static bool_t Open( drive_t *drivep );
static void Close( drive_t *drivep );
static intgen_t Read( drive_t *drivep,
                      char *bufp,
                      size_t cnt,
                      intgen_t *errnop );
static intgen_t Write( drive_t *drivep,
                       char *bufp,
                       size_t cnt,
                       intgen_t *saved_errnop );
static intgen_t record_hdr_validate( drive_t *drivep,
                                     char *bufp,
                                     bool_t chkoffpr );
static void ring_thread( void *clientctxp,
                         int ( * entryp )( void *ringctxp ),
                         void *ringctxp );
static int ring_read( void *clientctxp, char *bufp );
static int ring_write( void *clientctxp, char *bufp );
static double percent64( off64_t num, off64_t denom );
static intgen_t getrec( drive_t *drivep );
static intgen_t write_record( drive_t *drivep, char *bufp, bool_t chksumpr,
                              bool_t xlatepr );
static ring_msg_t * Ring_get( ring_t *ringp );
static void Ring_reset(  ring_t *ringp, ring_msg_t *msgp );
static void Ring_put(  ring_t *ringp, ring_msg_t *msgp );
static intgen_t validate_media_file_hdr( drive_t *drivep );
static void calc_max_lost( drive_t *drivep );
static void display_ring_metrics( drive_t *drivep, intgen_t mlog_flags );
#ifdef CLRMTAUD
static u_int32_t rewind_and_verify( drive_t *drivep );
static u_int32_t erase_and_verify( drive_t *drivep ); 
static u_int32_t bsf_and_verify( drive_t *drivep );
static u_int32_t fsf_and_verify( drive_t *drivep );
#else /* CLRMTAUD */
static short rewind_and_verify( drive_t *drivep );
static short erase_and_verify( drive_t *drivep ); 
static short bsf_and_verify( drive_t *drivep );
static short fsf_and_verify( drive_t *drivep );
#endif /* CLRMTAUD */
static bool_t set_best_blk_and_rec_sz( drive_t *drivep );
static bool_t isefsdump( drive_t *drivep );
static bool_t isxfsdumperasetape( drive_t *drivep );

/* RMT trace stubs
 */
#ifdef RMT
#ifdef RMTDBG
static int dbgrmtopen( char *, int );
static int dbgrmtclose( int );
static int dbgrmtioctl( int, int, void * );
static int dbgrmtread( int, void *, uint);
static int dbgrmtwrite( int, void *, uint);
#endif /* RMTDBG */
#endif /* RMT */

#define ERASE_MAGIC "$^*@++! This tape was quick erased by SGI xfsdump $^*@++!"

/* definition of locally defined global variables ****************************/

/* rmt drive strategy. referenced by drive.c
 */
drive_strategy_t drive_strategy_rmt = {
        DRIVE_STRATEGY_RMT,                     /* ds_id */
        "minimum scsi tape (drive_minrmt)",     /* ds_description */
        ds_match,                               /* ds_match */
        ds_instantiate,                         /* ds_instantiate */
        0x1000000ll,                            /* ds_recmarksep  16 MB */
        0x10000000ll,                           /* ds_recmfilesz 256 MB */
};


/* definition of locally defined static variables *****************************/

/* drive operators
 */
static drive_ops_t drive_ops = {
        do_init,                /* do_init */
        do_sync,                /* do_sync */
        do_begin_read,          /* do_begin_read */
        do_read,                /* do_read */
        do_return_read_buf,     /* do_return_read_buf */
        do_get_mark,            /* do_get_mark */
        do_seek_mark,           /* do_seek_mark */
        do_next_mark,           /* do_next_mark */
        do_end_read,            /* do_end_read */
        do_begin_write,         /* do_begin_write */
        do_set_mark,            /* do_set_mark */
        do_get_write_buf,       /* do_get_write_buf */
        do_write,               /* do_write */
        do_get_align_cnt,       /* do_get_align_cnt */
        do_end_write,           /* do_end_write */
        do_fsf,                 /* do_fsf */
        do_bsf,                 /* do_bsf */
        do_rewind,              /* do_rewind */
        do_erase,               /* do_erase */
        do_eject_media,         /* do_eject_media */
        do_get_device_class,    /* do_get_device_class */
        do_display_metrics,     /* do_display_metrics */
        do_quit,                /* do_quit */
};

static u_int32_t cmdlineblksize = 0;

/* definition of locally defined global functions ****************************/


/* definition of locally defined static functions ****************************/

/* strategy match - determines if this is the right strategy
 */
/* ARGSUSED */
static intgen_t
ds_match( int argc, char *argv[], drive_t *drivep, bool_t singlethreaded )
{
        intgen_t fd;
        intgen_t c;
        bool_t minrmt = BOOL_FALSE;

        /* heuristics to determine if this is a drive.
         */

        if ( ! strcmp( drivep->d_pathname, "stdio" )) {
                return -10;
        }

        /* Check if the min rmt flag and block size have 
         * been specified.
         * If so , this is a non-SGI drive and this is the right
         * strategy.
         */
        {
                optind = 1;
                opterr = 0;
                while ( ( c = getopt( argc, argv, GETOPT_CMDSTRING )) != EOF ) {
                   switch (c) {
                      case GETOPT_BLOCKSIZE:
                            if ( ! optarg || optarg[ 0 ] == '-' ) {
                                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                                        "-%c argument missing\n",
                                        optopt );
                                return -10;
                            }
                            cmdlineblksize = ( u_int32_t )atoi( optarg );
                            errno = 0;
                            fd = open_masked_signals( drivep->d_pathname, 
O_RDONLY );
                            if ( fd < 0 ) 
                                    return -10;
                            close( fd );
                            break;
                        case GETOPT_MINRMT:
                            minrmt = BOOL_TRUE;
                            break;
                    }
                }

                if (minrmt == BOOL_TRUE) {
                        if (cmdlineblksize != 0)
                                return 20;
                        else 
                                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                                        "Minimal rmt cannot be used without 
specifying blocksize. Use -%c\n",
                                        GETOPT_BLOCKSIZE );
                }
        } 
        /* catch all */
        return -10;
}

/* strategy instantiate - initializes the pre-allocated drive descriptor
 */
/*ARGSUSED*/
static bool_t
ds_instantiate( int argc, char *argv[], drive_t *drivep, bool_t singlethreaded )
{
        drive_context_t *contextp;
        intgen_t c;

        /* opportunity for sanity checking
         */
        ASSERT( sizeof( global_hdr_t ) <= STAPE_HDR_SZ );
        ASSERT( sizeof( rec_hdr_t )
                ==
                sizeofmember( drive_hdr_t, dh_specific ));
        ASSERT( ! ( STAPE_MAX_RECSZ % PGSZ ));

        /* hook up the drive ops
         */
        drivep->d_opsp = &drive_ops;

        /* allocate context for the drive manager
         */
        contextp = ( drive_context_t * )calloc( 1, sizeof( drive_context_t ));
        ASSERT( contextp );
        memset( ( void * )contextp, 0, sizeof( *contextp ));

        /* transfer indication of singlethreadedness to context
         */
        contextp->dc_singlethreadedpr = singlethreaded;

        /* scan the command line for the I/O buffer ring length
         * and record checksum request
         */
        contextp->dc_ringlen = RINGLEN_DEFAULT;
        contextp->dc_ringpinnedpr = BOOL_FALSE;
        contextp->dc_recchksumpr = BOOL_FALSE;
        contextp->dc_unloadokpr = BOOL_FALSE;
        contextp->dc_singlemfilepr = BOOL_FALSE;
        contextp->dc_filesz = 0;
        contextp->dc_isQICpr = BOOL_FALSE;
        optind = 1;
        opterr = 0;
        while ( ( c = getopt( argc, argv, GETOPT_CMDSTRING )) != EOF ) {
                switch ( c ) {
                case GETOPT_RINGLEN:
                        if ( ! optarg || optarg[ 0 ] == '-' ) {
                                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                                      "-%c argument missing\n",
                                      optopt );
                                return BOOL_FALSE;
                        }
                        contextp->dc_ringlen = ( size_t )atoi( optarg );
                        if ( contextp->dc_ringlen < RINGLEN_MIN
                             ||
                             contextp->dc_ringlen > RINGLEN_MAX ) {
                                mlog( MLOG_NORMAL | MLOG_ERROR | MLOG_DRIVE,
                                      "-%c argument must be "
                                      "between %u and %u: ignoring %u\n",
                                      optopt,
                                      RINGLEN_MIN,
                                      RINGLEN_MAX,
                                      contextp->dc_ringlen );
                                return BOOL_FALSE;
                        }
                        break;
                case GETOPT_RINGPIN:
                        contextp->dc_ringpinnedpr = BOOL_TRUE;
                        break;
                case GETOPT_RECCHKSUM:
                        contextp->dc_recchksumpr = BOOL_TRUE;
                        break;
                case GETOPT_UNLOAD:
                        contextp->dc_unloadokpr = BOOL_TRUE;
                        break;
                case GETOPT_QIC:
                        contextp->dc_isQICpr = BOOL_TRUE;
                        break;
#ifdef DUMP
                case GETOPT_OVERWRITE:
                        contextp->dc_overwritepr = BOOL_TRUE;
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                                "Overwrite command line option \n" );
                        break;
                case GETOPT_SINGLEMFILE:
                        if (contextp->dc_filesz > 0) {
                                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                                      "-%c and -%c options cannot be used 
together\n",
                                      optopt,
                                      GETOPT_FILESZ );
                                return BOOL_FALSE;
                        }
                        contextp->dc_singlemfilepr = BOOL_TRUE;
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                                "Single media file command line option \n" );
                        break;
                case GETOPT_FILESZ:
                        if (contextp->dc_singlemfilepr == BOOL_TRUE) {
                                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                                      "-%c and -%c options cannot be used 
together\n",
                                      optopt,
                                      GETOPT_SINGLEMFILE );
                                return BOOL_FALSE;
                        }
                        if ( ! optarg || optarg [ 0 ] == '-' ) {
                                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                                      "-%c argument missing\n",
                                      optopt );
                                return BOOL_FALSE;
                        }
                        contextp->dc_filesz = (off64_t)atoi( optarg ) * 1024 * 
1024;
                        if (contextp->dc_filesz <= 0) {
                                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                                      "-%c argument must be a "
                                      "positive number (MB): ignoring %u\n",
                                      optopt,
                                      contextp->dc_filesz );
                                return BOOL_FALSE;
                        }
                        break;
#endif
                }
        }

        /* set drive file descriptor to null value
         */
        contextp->dc_fd = -1;

        /* record location of context descriptor in drive descriptor
         */
        drivep->d_contextp = (void *)contextp;

        /* indicate neither capacity nor rate estimates available
         */
        drivep->d_cap_est  = -1;
        drivep->d_rate_est = -1;

        /* if sproc not allowed, allocate a record buffer. otherwise
         * create a ring, from which buffers will be taken.
         */
        if ( singlethreaded ) {
                contextp->dc_bufp = ( char * )memalign( PGSZ, STAPE_MAX_RECSZ );
                ASSERT( contextp->dc_bufp );
        } else {
                intgen_t rval;
                mlog( (MLOG_NITTY + 1) | MLOG_DRIVE,
                      "ring op: create: ringlen == %u\n",
                      contextp->dc_ringlen );
                contextp->dc_ringp = ring_create( contextp->dc_ringlen,
                                                  STAPE_MAX_RECSZ,
                                                  contextp->dc_ringpinnedpr,
                                                  ring_thread,
                                                  ring_read,
                                                  ring_write,
                                                  ( void * )drivep,
                                                  &rval );
                if ( ! contextp->dc_ringp ) {
                        if ( rval == ENOMEM ) {
                                mlog( MLOG_NORMAL | MLOG_ERROR | MLOG_DRIVE,
                                      "unable to allocate memory "
                                      "for I/O buffer ring\n" );
                        } else if ( rval == E2BIG ) {
                                mlog( MLOG_NORMAL | MLOG_ERROR | MLOG_DRIVE,
                                      "not enough physical memory "
                                      "to pin down I/O buffer ring\n" );
                        } else if ( rval == EPERM ) {
                                mlog( MLOG_NORMAL | MLOG_ERROR | MLOG_DRIVE,
                                      "not allowed "
                                      "to pin down I/O buffer ring\n" );
                        } else {
                                ASSERT( 0 );
                        }
                        return BOOL_FALSE;
                }
        }

        /* several of contextp predicates cannot yet be determined.
         * mark them as unknown for now. however, if this is an RMT
         * access, we know immediately some capabilities are missing.
         */

        /* specify that we are currently neither reading nor writing
         */
        contextp->dc_mode = OM_NONE;

        /* set the capabilities flags advertised in the drive_t d_capabilities
         * field that we know a priori to be true. later additional flags
         * may be set
         */
        drivep->d_capabilities = 0
                                 |
                                 DRIVE_CAP_BSF
                                 |
                                 DRIVE_CAP_FSF
                                 |
                                 DRIVE_CAP_REWIND
                                 |
                                 DRIVE_CAP_FILES
                                 |
                                 DRIVE_CAP_NEXTMARK
                                 |
                                 DRIVE_CAP_READ
                                 |
                                 DRIVE_CAP_REMOVABLE
                                 |
                                 DRIVE_CAP_ERASE
                                 |
                                 DRIVE_CAP_EJECT
                                 ;

        return BOOL_TRUE;
}

/* drive op init - do more time-consuming init/checking here. read and
 * write headers are available now.
 *
 * NOTE:
 *      When using a RMT device, the MTIOCGETBLKINFO, MTCAPABILITY and
 *      MTSPECOP ioctl calls are not supported. This means that we have
 *      to assume that the drive does not support the MTCAN_APPEND capability.
 */
/* ARGSUSED */
static bool_t
do_init( drive_t *drivep )
{
#ifdef DUMP
        drive_hdr_t *dwhdrp = drivep->d_writehdrp;
        media_hdr_t *mwhdrp = ( media_hdr_t * )dwhdrp->dh_upper;
#endif /* DUMP */

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: init\n" );

        sscanf(drivep->d_pathname + (strlen(drivep->d_pathname) - 3), "%d", 
&lgs_fileno);
        mlog(MLOG_DEBUG | MLOG_DRIVE,
             "rmt drive: fileno initialised to %03d\n", lgs_fileno);

#ifdef DUMP
        /* fill in media strategy id: artifact of first version of xfsdump
         */
        mwhdrp->mh_strategyid = MEDIA_STRATEGY_RMVTAPE;
#endif /* DUMP */

        return BOOL_TRUE;
}

/* wait here for slave to complete initialization.
 * set drive capabilities flags. NOTE: currently don't make use of this
 * feature: drive initialization done whenever block/record sizes unknown.
 */
/* ARGSUSED */
static bool_t
do_sync( drive_t *drivep )
{
        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: sync\n" );

        return BOOL_TRUE;
}

/* begin_read
 *      Set up the tape device and read the media file header.
 *      if allowed, begin read-ahead.
 *
 * RETURNS:
 *      0 on success
 *      DRIVE_ERROR_* on failure
 *
 */
static intgen_t
do_begin_read( drive_t *drivep )
{
        drive_context_t *contextp;
        intgen_t rval;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: begin read\n" );

        /* get drive context
         */
        contextp = ( drive_context_t * )drivep->d_contextp;

        /* verify protocol being followed
         */
        ASSERT( drivep->d_capabilities & DRIVE_CAP_READ );
        ASSERT( contextp->dc_mode == OM_NONE );
        ASSERT( ! contextp->dc_recp );

        /* get a record buffer to use during initialization.
         */
        if ( contextp->dc_singlethreadedpr ) {
                contextp->dc_recp = contextp->dc_bufp;
        } else {
                ASSERT( contextp->dc_ringp );
                contextp->dc_msgp = Ring_get( contextp->dc_ringp );
                ASSERT( contextp->dc_msgp->rm_stat == RING_STAT_INIT );
                contextp->dc_recp = contextp->dc_msgp->rm_bufp;
        }

        /* if the tape is not open, open, determine the record size, and
         * read the first record. otherwise read a record using the record
         * size previously determined.
         */
        contextp->dc_iocnt = 0;
        if ( contextp->dc_fd < 0 ) {
                ASSERT( contextp->dc_fd == -1 );
                rval = prepare_drive( drivep );
                if ( rval ) {
                        if ( ! contextp->dc_singlethreadedpr ) {
                            Ring_reset( contextp->dc_ringp, contextp->dc_msgp );
                        }
                        contextp->dc_msgp = 0;
                        contextp->dc_recp = 0;
                        return rval;
                }
        } else {
                rval = read_label( drivep ) ;
                if ( rval ) {
                        if ( ! contextp->dc_singlethreadedpr ) {
                            Ring_reset( contextp->dc_ringp, contextp->dc_msgp );
                        }
                        contextp->dc_msgp = 0;
                        contextp->dc_recp = 0;
                        return rval;
                }
        }
        ASSERT( contextp->dc_iocnt == 1 );
                                        /* set by prepare_drive or read_label */

        /* all is well. adjust context. don't kick off read-aheads just yet;
         * the client may not want this media file.
         */
        if ( ! contextp->dc_singlethreadedpr ) {
                contextp->dc_msgp->rm_op = RING_OP_NOP;
                contextp->dc_msgp->rm_user = 0; /* do diff. use in do_seek */
                Ring_put( contextp->dc_ringp, contextp->dc_msgp );
                contextp->dc_msgp = 0;
        }
        contextp->dc_recp = 0;
        contextp->dc_recendp = 0;
        contextp->dc_dataendp = 0;
        contextp->dc_ownedp = 0;
        contextp->dc_nextp = 0;
        contextp->dc_reccnt = 1;

        /* used to detect attempt to read after an error was reported
         */
        contextp->dc_errorpr = BOOL_FALSE;

        /* successfully entered read mode. must do end_read to get out.
         */
        contextp->dc_mode = OM_READ;

        return 0;
}

/* do_read
 *      Supply the caller with all or a portion of the current buffer,
 *      filled with data from a record.
 *
 * RETURNS:
 *      a pointer to a buffer containing "*actual_bufszp" bytes of data
 *      or 0 on failure with "*rvalp" containing the error (DRIVE_ERROR_...)
 *
 */
static char *
do_read( drive_t *drivep,
         size_t wantedcnt,
         size_t *actualcntp,
         intgen_t *rvalp )
{
        drive_context_t *contextp;
        size_t availcnt;
        size_t actualcnt;
        intgen_t rval;

#if 0
        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: read: wanted %u (0x%x)\n",
              wantedcnt,
              wantedcnt );
#endif

        /* get context ptrs
         */
        contextp = ( drive_context_t * )drivep->d_contextp;

        /* assert protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_READ );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( ! contextp->dc_ownedp );
        ASSERT( wantedcnt > 0 );

        /* clear the return status field
         */
        *rvalp = 0;

        /* read a new record if necessary
         */
        rval = getrec( drivep );
        if ( rval ) {
                mlog( MLOG_NITTY | MLOG_DRIVE,
                      "rmt drive op read returning error rval=%d\n",
                      rval );
                *rvalp = rval;
                return 0;
        }

        /* figure how much data is available, and how much should be supplied
         */
        availcnt = ( size_t )( contextp->dc_dataendp - contextp->dc_nextp );
        actualcnt = min( wantedcnt, availcnt );

        /* adjust the context
         */
        contextp->dc_ownedp = contextp->dc_nextp;
        contextp->dc_nextp += actualcnt;
        ASSERT( contextp->dc_nextp <= contextp->dc_dataendp );

        mlog( MLOG_NITTY | MLOG_DRIVE,
              "rmt drive op read actual == %d (0x%x)\n",
              actualcnt,
              actualcnt );

        *actualcntp = actualcnt;
        return contextp->dc_ownedp;
}

/* do_return_read_buf -
 *      Lets the caller give back the buffer portion obtained from the preceding
 *      call to do_read().
 *
 * RETURNS:
 *      void
 */
/* ARGSUSED */
static void
do_return_read_buf( drive_t *drivep, char *bufp, size_t retcnt )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        /* REFERENCED */
        size_t ownedcnt;

#if 0
        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: return read buf: sz %d (0x%x)\n",
              retcnt,
              retcnt );
#endif

        /* assert protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_READ );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( contextp->dc_ownedp );
        ASSERT( bufp == contextp->dc_ownedp );

        /* calculate how much the caller owns
         */
        ASSERT( contextp->dc_nextp >= contextp->dc_ownedp );
        ownedcnt = ( size_t )( contextp->dc_nextp - contextp->dc_ownedp );
        ASSERT( ownedcnt == retcnt );

        /* take possession of buffer portion
         */
        contextp->dc_ownedp = 0;

        /* if caller is done with this record, take the buffer back
         * and (if ring in use) give buffer to ring for read-ahead.
         */
        if ( contextp->dc_nextp >= contextp->dc_dataendp ) {
                ASSERT( contextp->dc_nextp == contextp->dc_dataendp );
                if ( ! contextp->dc_singlethreadedpr ) {
                        contextp->dc_msgp->rm_op = RING_OP_READ;
                        Ring_put( contextp->dc_ringp, contextp->dc_msgp );
                        contextp->dc_msgp = 0;
                }
                contextp->dc_recp = 0;
                contextp->dc_recendp = 0;
                contextp->dc_dataendp = 0;
                contextp->dc_nextp = 0;
                contextp->dc_reccnt++;
        }
}

/* do_get_mark
 *      Get the current read tape location.
 *
 * RETURNS:
 *      void
 */
static void
do_get_mark( drive_t *drivep, drive_mark_t *markp )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        off64_t offset;

        mlog( MLOG_NITTY | MLOG_DRIVE,
              "rmt drive op: get mark\n" );

        /* assert protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_READ );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( ! contextp->dc_ownedp );

        /* the mark is simply the offset into the media file of the
         * next byte to be read.
         */
        offset = contextp->dc_reccnt * ( off64_t )tape_recsz;
        if ( contextp->dc_recp ) {
                offset += ( off64_t )( contextp->dc_nextp - contextp->dc_recp );
        }

        *markp = ( drive_mark_t )offset;

        return;
}

typedef enum { SEEKMODE_BUF, SEEKMODE_RAW } seekmode_t;

/* do_seek_mark
 *      Advance the tape to the given mark. does dummy reads to
 *      advance tape, as well as FSR if supported.
 *
 * RETURNS:
 *      0 on success
 *      DRIVE_ERROR_* on failure
 *
 */
static intgen_t
do_seek_mark( drive_t *drivep, drive_mark_t *markp )
{
        drive_context_t *contextp;
        off64_t wantedoffset;
        off64_t currentoffset;

        /* get the drive context
         */
        contextp = (drive_context_t *)drivep->d_contextp;

        /* assert protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_READ );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( ! contextp->dc_ownedp );


        /* the desired mark is passed by reference, and is really just an
         * offset into the raw (incl rec hdrs) read stream
         */
        wantedoffset = *( off64_t * )markp;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "drive op: seek mark: %lld (0x%llx)\n",
              wantedoffset,
              wantedoffset );

        /* determine the current offset. assert that the wanted offset is
         * not less than the current offset.
         */
        currentoffset = contextp->dc_reccnt * ( off64_t )tape_recsz;
        if ( contextp->dc_recp ) {
                u_int32_t recoff;
#ifdef DEBUG
                rec_hdr_t *rechdrp = ( rec_hdr_t * )contextp->dc_recp;
#endif

                ASSERT( contextp->dc_nextp >= contextp->dc_recp );
                recoff = ( u_int32_t )( contextp->dc_nextp
                                        -
                                        contextp->dc_recp );
                ASSERT( recoff <= tape_recsz );
                ASSERT( rechdrp->rec_used <= tape_recsz );
                ASSERT( recoff >= STAPE_HDR_SZ );
                ASSERT( rechdrp->rec_used >= STAPE_HDR_SZ );
                ASSERT( recoff <= rechdrp->rec_used );
                currentoffset += ( off64_t )recoff;
        }
        ASSERT( wantedoffset >= currentoffset );
        
        /* if we are currently holding a record and the desired offset
         * is not within the current record, eat the current record.
         */
        if ( contextp->dc_recp ) {
                off64_t nextrecoffset;
                rec_hdr_t *rechdrp = ( rec_hdr_t * )contextp->dc_recp;

                nextrecoffset = contextp->dc_reccnt  * ( off64_t )tape_recsz
                                +
                                ( off64_t )rechdrp->rec_used;
                if ( wantedoffset >= nextrecoffset ) {
                        u_int32_t recoff;
                        size_t wantedcnt;
                        char *dummybufp;
                        size_t actualcnt;
                        intgen_t rval;

                        /* if this is the last record, the wanted offset
                         * must be just after it.
                         */
                        if ( rechdrp->rec_used < tape_recsz ) {
                                ASSERT( wantedoffset == nextrecoffset );
                        }

                        /* figure how much to ask for
                         */
                        ASSERT( contextp->dc_nextp >= contextp->dc_recp );
                        recoff = ( u_int32_t )( contextp->dc_nextp
                                                -
                                                contextp->dc_recp );
                        wantedcnt = ( size_t )( rechdrp->rec_used
                                                -
                                                recoff );

                        /* eat that much tape
                         */
                        rval = 0;
                        dummybufp = do_read( drivep,
                                             wantedcnt,
                                             &actualcnt,
                                             &rval );
                        if ( rval ) {
                                return rval;
                        }
                        ASSERT( actualcnt == wantedcnt );
                        do_return_read_buf( drivep, dummybufp, actualcnt );
                        currentoffset += ( off64_t )actualcnt;
                        ASSERT( currentoffset == nextrecoffset );
                        ASSERT( wantedoffset >= currentoffset );
                        ASSERT( ! contextp->dc_recp );
                        ASSERT( currentoffset
                                ==
                                contextp->dc_reccnt * ( off64_t )tape_recsz );
                }
        }

        /* if FSR is supported, while the desired offset is more than a record
         * away, eat records. this is tricky. if read-ahead has already read
         * to the desired point, no need to FSR: fall through to next code block
         * where we get there by eating excess records. if read-ahead has not
         * made it there, suspend read-ahead, eat those readahead records,
         * FSR the remaining, and resume readahead.
         */
        if ( contextp->dc_canfsrpr
             &&
             wantedoffset - currentoffset >= ( off64_t )tape_recsz ) {
                off64_t wantedreccnt;
                seekmode_t seekmode;
                
                ASSERT( ! contextp->dc_recp );
                wantedreccnt = wantedoffset / ( off64_t )tape_recsz;
                if ( contextp->dc_singlethreadedpr ) {
                        seekmode = SEEKMODE_RAW;
                } else {
                        seekmode = SEEKMODE_BUF;
                }
                ASSERT( wantedreccnt != 0 ); /* so NOP below can be
                                              * distinguished from use
                                              * in do_begin_read
                                              */
                while ( contextp->dc_reccnt < wantedreccnt ) {
                        off64_t recskipcnt64;
                        off64_t recskipcnt64remaining;

                        if ( seekmode == SEEKMODE_BUF ) {
                                ring_stat_t rs;
                                ASSERT( ! contextp->dc_msgp );
                                contextp->dc_msgp =
                                                Ring_get( contextp->dc_ringp );
                                rs = contextp->dc_msgp->rm_stat;
                                if ( rs == RING_STAT_ERROR ) {
                                        contextp->dc_errorpr = BOOL_TRUE;
                                        return contextp->dc_msgp->rm_rval;
                                }
                                if ( rs != RING_STAT_OK
                                     &&
                                     rs != RING_STAT_INIT
                                     &&
                                     rs != RING_STAT_NOPACK ) {
                                        ASSERT( 0 );
                                        contextp->dc_errorpr = BOOL_TRUE;
                                        return DRIVE_ERROR_CORE;
                                }
                                if ( rs == RING_STAT_OK ) {
                                        contextp->dc_reccnt++;
                                }
                                if ( rs == RING_STAT_NOPACK
                                     &&
                                     contextp->dc_msgp->rm_user
                                     ==
                                     wantedreccnt ) {
                                        seekmode = SEEKMODE_RAW;
                                }
                                contextp->dc_msgp->rm_op = RING_OP_NOP;
                                contextp->dc_msgp->rm_user = wantedreccnt;
                                Ring_put( contextp->dc_ringp,
                                          contextp->dc_msgp );
                                contextp->dc_msgp = 0;
                                continue;
                        }

                        ASSERT( contextp->dc_reccnt == contextp->dc_iocnt );
                        ASSERT( wantedreccnt > contextp->dc_reccnt );
                        recskipcnt64 = wantedreccnt - contextp->dc_reccnt;
                        recskipcnt64remaining = recskipcnt64;
                        while ( recskipcnt64remaining ) {
                                intgen_t recskipcnt;
                                intgen_t saved_errno;
                                intgen_t rval;

                                ASSERT( recskipcnt64remaining > 0 );
                                if ( recskipcnt64remaining > INTGENMAX ) {
                                        recskipcnt = INTGENMAX;
                                } else {
                                        recskipcnt = ( intgen_t )
                                                     recskipcnt64remaining;
                                }
                                ASSERT( recskipcnt > 0 );
                                rval = mt_op( contextp->dc_fd,
                                              MTFSR,
                                              recskipcnt );
                                saved_errno = errno;
                                if ( rval ) {
                                        mlog( MLOG_NORMAL | MLOG_WARNING | 
MLOG_DRIVE,
                                              "could not forward space %d "
                                              "tape blocks: "
                                              "rval == %d, errno == %d (%s)\n",
                                              rval,
                                              saved_errno,
                                              strerror( saved_errno ));
                                        return DRIVE_ERROR_MEDIA;
                                }
                                recskipcnt64remaining -= ( off64_t )recskipcnt;
                        }
                        contextp->dc_reccnt += recskipcnt64;
                        contextp->dc_iocnt += recskipcnt64;
                        currentoffset = contextp->dc_reccnt
                                        *
                                        ( off64_t )tape_recsz;
                        ASSERT( wantedoffset >= currentoffset );
                        ASSERT( wantedoffset - currentoffset
                                <
                                ( off64_t )tape_recsz );
                }
        }

        /* remove excess records by eating them. won't be any if
         * FSR supported
         */
        while ( wantedoffset - currentoffset >= ( off64_t )tape_recsz ) {
                size_t wantedcnt;
                char *dummybufp;
                size_t actualcnt;
                intgen_t rval;

                ASSERT( ! contextp->dc_recp );

                /* figure how much to ask for. to eat an entire record,
                 * ask for a record sans the header. do_read will eat
                 * the header, we eat the rest.
                 */
                wantedcnt = ( size_t )( tape_recsz - STAPE_HDR_SZ );

                /* eat that much tape
                 */
                rval = 0;
                dummybufp = do_read( drivep, wantedcnt, &actualcnt, &rval );
                if ( rval ) {
                        return rval;
                }
                ASSERT( actualcnt == wantedcnt );
                do_return_read_buf( drivep, dummybufp, actualcnt );
                ASSERT( ! contextp->dc_recp );
                currentoffset += ( off64_t )tape_recsz;
                ASSERT( currentoffset
                        ==
                        contextp->dc_reccnt * ( off64_t )tape_recsz );
        }

        /* eat that portion of the next record leading up to the
         * desired offset.
         */
        if ( wantedoffset != currentoffset ) {
                size_t wantedcnt;
                char *dummybufp;
                size_t actualcnt;

                ASSERT( wantedoffset > currentoffset );
                ASSERT( wantedoffset - currentoffset < ( off64_t )tape_recsz );
                wantedcnt = ( size_t )( wantedoffset - currentoffset );
                if ( contextp->dc_recp ) {
                        u_int32_t recoff;
#ifdef DEBUG
                        rec_hdr_t *rechdrp = ( rec_hdr_t * )contextp->dc_recp;
#endif
                        recoff = ( u_int32_t )( contextp->dc_nextp
                                                -
                                                contextp->dc_recp );
                        ASSERT( recoff <= tape_recsz );
                        ASSERT( rechdrp->rec_used <= tape_recsz );
                        ASSERT( recoff >= STAPE_HDR_SZ );
                        ASSERT( rechdrp->rec_used >= STAPE_HDR_SZ );
                        ASSERT( recoff <= rechdrp->rec_used );
                        ASSERT( recoff + wantedcnt <= rechdrp->rec_used );
                } else {
                        ASSERT( wantedcnt >= STAPE_HDR_SZ );
                        wantedcnt -= STAPE_HDR_SZ;
                }

                /* eat that much tape
                 */
                if ( wantedcnt > 0 ) {
                    intgen_t rval;
                    rval = 0;
                    dummybufp = do_read( drivep, wantedcnt, &actualcnt, &rval );
                    if ( rval ) {
                            return rval;
                    }
                    ASSERT( actualcnt == wantedcnt );
                    do_return_read_buf( drivep, dummybufp, actualcnt );
                }
        }

        /* as a sanity check, refigure the current offset and make sure
         * it is equal to the wanted offset
         */
        currentoffset = contextp->dc_reccnt * ( off64_t )tape_recsz;
        if ( contextp->dc_recp ) {
                u_int32_t recoff;
#ifdef DEBUG
                rec_hdr_t *rechdrp = ( rec_hdr_t * )contextp->dc_recp;
#endif

                ASSERT( contextp->dc_nextp >= contextp->dc_recp );
                recoff = ( u_int32_t )( contextp->dc_nextp
                                        -
                                        contextp->dc_recp );
                ASSERT( recoff <= tape_recsz );
                ASSERT( rechdrp->rec_used <= tape_recsz );
                ASSERT( recoff >= STAPE_HDR_SZ );
                ASSERT( rechdrp->rec_used >= STAPE_HDR_SZ );
                ASSERT( recoff <= rechdrp->rec_used );
                currentoffset += ( off64_t )recoff;
        }
        ASSERT( wantedoffset == currentoffset );

        return 0;
}

/* do_next_mark
 *      Advance the tape position to the next valid mark. if in
 *      error mode, first attempt to move past error by re-reading. if
 *      that fails, try to FSR. also deals with QIC possibility of
 *      reading a block not at a record boundary.
 *
 * RETURNS:
 *      0 on success
 *      DRIVE_ERROR_* on failure
 */
static intgen_t
do_next_mark( drive_t *drivep )
{
        drive_context_t *contextp = (drive_context_t *)drivep->d_contextp;
        rec_hdr_t *rechdrp;
        char *p;
        ix_t trycnt;
        const ix_t maxtrycnt = 5;
        intgen_t nread;
        off64_t markoff;
        intgen_t saved_errno;
        size_t tailsz;
        intgen_t rval;

        /* assert protocol being followed.
         */
        ASSERT( contextp->dc_mode == OM_READ );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( ! contextp->dc_ownedp );

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: next mark\n" );

        trycnt = 0;

        if ( contextp->dc_errorpr ) {
                goto resetring;
        } else {
                goto noerrorsearch;
        }

noerrorsearch:
        for ( ; ; ) {
                rval = getrec( drivep );
                if ( rval == DRIVE_ERROR_CORRUPTION ) {
                        goto resetring;
                } else if ( rval ) {
                        return rval;
                }
                rechdrp = ( rec_hdr_t * )contextp->dc_recp;

                ASSERT( rechdrp->first_mark_offset != 0 );
                if ( rechdrp->first_mark_offset != -1 ) {
                         off64_t markoff = rechdrp->first_mark_offset
                                           -
                                           rechdrp->file_offset;
                         off64_t curoff = ( off64_t )( contextp->dc_nextp
                                                       -
                                                       contextp->dc_recp );

                  if (markoff > (off64_t) tape_recsz)
                    {
                      char *p = (char *) &rechdrp->first_mark_offset;
                      char c;

                      c = p[0]; p[0] = p[7]; p[7] = c;
                      c = p[1]; p[1] = p[6]; p[6] = c;
                      c = p[2]; p[2] = p[5]; p[5] = c;
                      c = p[3]; p[3] = p[4]; p[4] = c;

                      markoff = rechdrp->first_mark_offset - 
rechdrp->file_offset;
                    }

                         ASSERT( markoff > 0 );
                         ASSERT( curoff > 0 );
                         if ( markoff >= curoff ) {
                                break;
                        }
                }

                if ( ! contextp->dc_singlethreadedpr ) {
                        Ring_put( contextp->dc_ringp,
                                  contextp->dc_msgp );
                        contextp->dc_msgp = 0;
                }
                contextp->dc_recp = 0;
                contextp->dc_reccnt++;
        }

        ASSERT( rechdrp->first_mark_offset - rechdrp->file_offset
                <=
                ( off64_t )tape_recsz );
        contextp->dc_nextp = contextp->dc_recp
                             +
                             ( size_t )( rechdrp->first_mark_offset
                                         -
                                         rechdrp->file_offset );
        ASSERT( contextp->dc_nextp <= contextp->dc_dataendp );
        ASSERT( contextp->dc_nextp >= contextp->dc_recp + STAPE_HDR_SZ );
        if ( contextp->dc_nextp == contextp->dc_dataendp ) {
                if ( ! contextp->dc_singlethreadedpr ) {
                        Ring_put( contextp->dc_ringp,
                                  contextp->dc_msgp );
                        contextp->dc_msgp = 0;
                }
                contextp->dc_recp = 0;
                contextp->dc_reccnt++;
        }

        return 0;

resetring:
        if ( ! contextp->dc_singlethreadedpr ) {
                Ring_reset( contextp->dc_ringp, contextp->dc_msgp );
                contextp->dc_msgp = 0;
        }
        contextp->dc_recp = 0;

        /* get a record buffer and cast a record header pointer
         */
        if ( contextp->dc_singlethreadedpr ) {
                contextp->dc_recp = contextp->dc_bufp;
        } else {
                contextp->dc_msgp = Ring_get( contextp->dc_ringp );
                ASSERT( contextp->dc_msgp->rm_stat == RING_STAT_INIT );
                contextp->dc_recp = contextp->dc_msgp->rm_bufp;
        }
        rechdrp = ( rec_hdr_t * )contextp->dc_recp;
        goto readrecord;

readrecord:
        trycnt++;
        if ( trycnt > maxtrycnt ) {
                mlog( MLOG_NORMAL | MLOG_DRIVE,
                      "unable to locate next mark in media file\n" );
                return DRIVE_ERROR_MEDIA;
        }

        nread = Read( drivep, contextp->dc_recp, tape_recsz, &saved_errno );
        goto validateread;

validateread:
        if ( nread == ( intgen_t )tape_recsz ) {
                goto validatehdr;
        }

        if ( nread >= 0 ) {
                ASSERT( ( size_t )nread <= tape_recsz );
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "short read (nread == %d, record size == %d)\n",
                      nread,
                      tape_recsz );
                goto getbeyonderror;
        }

        /* some other error
         */
        mlog( MLOG_NORMAL | MLOG_ERROR | MLOG_DRIVE,
              "unexpected error attempting to read record: "
              "read returns %d, errno %s (%s)\n",
              nread,
              errno,
              strerror( errno ));

        goto getbeyonderror;

validatehdr:
        rval = record_hdr_validate( drivep, contextp->dc_recp, BOOL_FALSE );

        if ( rval
             &&
             ( contextp->dc_isQICpr == BOOL_TRUE
               ||
               contextp->dc_isQICpr == BOOL_UNKNOWN )) {
                goto huntQIC;
        }

        if ( rval ) {
                goto readrecord;
        }

        contextp->dc_reccnt = rechdrp->file_offset / ( off64_t )tape_recsz;
        contextp->dc_iocnt = contextp->dc_reccnt + 1;
        if ( rechdrp->first_mark_offset < 0 ) {
                mlog( MLOG_NORMAL | MLOG_DRIVE,
                      "valid record %lld but no mark\n",
                      contextp->dc_iocnt - 1 );
                goto readrecord;
        }

        ASSERT( ! ( rechdrp->file_offset % ( off64_t )tape_recsz ));
        markoff = rechdrp->first_mark_offset - rechdrp->file_offset;
        ASSERT( markoff >= ( off64_t )STAPE_HDR_SZ );
        ASSERT( markoff < ( off64_t )tape_recsz );
        ASSERT( rechdrp->rec_used > STAPE_HDR_SZ );
        ASSERT( rechdrp->rec_used < tape_recsz );

        goto alliswell;

alliswell:
        contextp->dc_nextp = contextp->dc_recp + ( size_t )markoff;
        ASSERT( ! ( rechdrp->file_offset % ( off64_t )tape_recsz ));
        contextp->dc_reccnt = rechdrp->file_offset / ( off64_t )tape_recsz;
        contextp->dc_iocnt = contextp->dc_reccnt + 1;
        contextp->dc_recendp = contextp->dc_recp + tape_recsz;
        contextp->dc_dataendp = contextp->dc_recp + rechdrp->rec_used;
        ASSERT( contextp->dc_dataendp <= contextp->dc_recendp );
        ASSERT( contextp->dc_nextp < contextp->dc_dataendp );
        contextp->dc_errorpr = BOOL_FALSE;

        mlog( MLOG_NORMAL | MLOG_DRIVE,
              "resynchronized at record %lld "
              "offset %u\n",
              contextp->dc_iocnt - 1,
              contextp->dc_nextp
              -
              contextp->dc_recp );
        return 0;

getbeyonderror:
        rval = mt_op( contextp->dc_fd, MTFSR, 1 );
        saved_errno = errno;

        if ( rval ) {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "could not forward space one tape block beyond "
                      "read error: rval == %d, errno == %d (%s)\n",
                      rval,
                      saved_errno,
                      strerror( saved_errno ));
                return DRIVE_ERROR_MEDIA;
        }

        goto readrecord;

huntQIC:
        /* we have a full tape_recsz record. look for the magic number at the
         * beginning of each 512 byte block. If we find one, shift that and
         * the following blocks to the head of the record buffer, and try
         * to read the remaining blocks in the record.
         */
        for ( p = contextp->dc_recp + QIC_BLKSZ
              ;
              p < contextp->dc_recendp
              ;
              p += QIC_BLKSZ ) {
                if ( *( u_int64_t * )p == STAPE_MAGIC ) {
                        goto adjustQIC;
                }
        }

        goto readrecord;

adjustQIC:
        tailsz = ( size_t )( contextp->dc_recendp - p );
        memcpy( ( void * )contextp->dc_recp,
                ( void * )p,
                tailsz );
        nread = Read( drivep,
                      contextp->dc_recp + tailsz,
                      tape_recsz - tailsz,
                      &saved_errno );

        goto validateread;
}

/* do_end_read
 *      Discard any buffered reads.
 *      Tell the reader/writer process to wait.
 *
 * RETURNS:
 *      void
 */
static void
do_end_read( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: end read\n" );

        /* assert protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_READ );
        ASSERT( ! contextp->dc_ownedp );

        /* In the scsi version, read_label() does a status command to the
         * drive to then decide if doing a 'fsf' is appropriate.  For minrmt,
         * we don't have the status command so we need to space forward at
         * the moment we know we need to go forward... which is here.
         */
        ( void )fsf_and_verify( drivep );

        if ( ! contextp->dc_singlethreadedpr ) {
                Ring_reset( contextp->dc_ringp, contextp->dc_msgp );
                contextp->dc_msgp = 0;
        }

        contextp->dc_recp = 0;
        contextp->dc_mode = OM_NONE;
}

/* do_begin_write
 *      prepare drive for writing. set up drive context. write a header record.
 *
 * RETURNS:
 *      0 on success
 *      DRIVE_ERROR_... on failure
 */
static intgen_t
do_begin_write( drive_t *drivep )
{
        drive_context_t         *contextp;
        drive_hdr_t             *dwhdrp;
        global_hdr_t            *gwhdrp;
        rec_hdr_t               *tpwhdrp;
        rec_hdr_t               *rechdrp;
        intgen_t                rval;
        media_hdr_t             *mwhdrp;
        content_hdr_t           *ch;
        content_inode_hdr_t     *cih;

        global_hdr_t            *tmpgh;
        drive_hdr_t             *tmpdh;
        media_hdr_t             *tmpmh;
        rec_hdr_t               *tmprh;
        content_hdr_t           *tmpch;
        content_inode_hdr_t     *tmpcih;

        /* get drive context
         */
        contextp = ( drive_context_t * )drivep->d_contextp;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: begin write\n" );

        /* verify protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_NONE );
        ASSERT( ! drivep->d_markrecheadp );
        ASSERT( ! contextp->dc_recp );

        /* get pointers into global write header
         */
        gwhdrp = drivep->d_gwritehdrp;
        dwhdrp = drivep->d_writehdrp;
        tpwhdrp = ( rec_hdr_t * )dwhdrp->dh_specific;

        /* must already be open. The only way to open is to do a begin_read.
         * so all interaction with tape requires reading first.
         */
        ASSERT( contextp->dc_fd != -1 );

        /* fill in write header's drive specific info
         */
        tpwhdrp->magic = STAPE_MAGIC;
        tpwhdrp->version = STAPE_VERSION;
        tpwhdrp->blksize = ( int32_t )tape_blksz;
        tpwhdrp->recsize = ( int32_t )tape_recsz;
        tpwhdrp->rec_used = 0;
        tpwhdrp->file_offset = 0;
        tpwhdrp->first_mark_offset= 0;
        tpwhdrp->capability = drivep->d_capabilities;

        /* get a record buffer. will be used for the media file header,
         * and is needed to "prime the pump" for first call to do_write.
         */
        ASSERT( ! contextp->dc_recp );
        if ( contextp->dc_singlethreadedpr ) {
                ASSERT( contextp->dc_bufp );
                contextp->dc_recp = contextp->dc_bufp;
        } else {
                ASSERT( contextp->dc_ringp );
                ASSERT( ! contextp->dc_msgp );
                contextp->dc_msgp = Ring_get( contextp->dc_ringp );
                ASSERT( contextp->dc_msgp->rm_stat == RING_STAT_INIT );
                contextp->dc_recp = contextp->dc_msgp->rm_bufp;
        }

        /* write the record. be sure to prevent a record checksum from
         * being produced!
         */
        contextp->dc_iocnt = 0;
        memset( ( void * )contextp->dc_recp, 0, tape_recsz );

        tmpgh  = (global_hdr_t *)contextp->dc_recp;
        tmpdh  = (drive_hdr_t *)tmpgh->gh_upper;
        tmpmh  = (media_hdr_t *)tmpdh->dh_upper;
        tmprh  = (rec_hdr_t *)tmpdh->dh_specific;
        tmpch  = (content_hdr_t *)tmpmh->mh_upper;
        tmpcih = (content_inode_hdr_t *)tmpch->ch_specific;

        mwhdrp = (media_hdr_t *)dwhdrp->dh_upper;
        ch = (content_hdr_t *)mwhdrp->mh_upper;
        cih = (content_inode_hdr_t *)ch->ch_specific;

        xlate_global_hdr(gwhdrp, tmpgh, 1);
        xlate_drive_hdr(dwhdrp, tmpdh, 1);
        xlate_media_hdr(mwhdrp, tmpmh, 1);
        xlate_content_hdr(ch, tmpch, 1);
        xlate_content_inode_hdr(cih, tmpcih, 1);
        xlate_rec_hdr(tpwhdrp, tmprh, 1);

        /* checksum the global header
         */
        global_hdr_checksum_set( tmpgh );

        rval = write_record( drivep, contextp->dc_recp, BOOL_TRUE, BOOL_FALSE );
        if ( rval ) {
                if ( ! contextp->dc_singlethreadedpr ) {
                        Ring_reset( contextp->dc_ringp, contextp->dc_msgp );
                        contextp->dc_msgp = 0;
                }
                contextp->dc_recp = 0;
                return rval;
        }

        /* prepare the drive context. must have a record buffer ready to
         * go, header initialized.
         */
        ASSERT( ! contextp->dc_ownedp );
        contextp->dc_reccnt = 1; /* count the header record */
        contextp->dc_recendp = contextp->dc_recp + tape_recsz;
        contextp->dc_nextp = contextp->dc_recp + STAPE_HDR_SZ;

        /* intialize header in new record
         */
        rechdrp = (rec_hdr_t*)contextp->dc_recp;        
        rechdrp->magic = STAPE_MAGIC;
        rechdrp->version = STAPE_VERSION;
        rechdrp->file_offset = contextp->dc_reccnt * ( off64_t )tape_recsz;
        rechdrp->blksize = ( int32_t )tape_blksz;
        rechdrp->recsize = ( int32_t )tape_recsz;
        rechdrp->capability = drivep->d_capabilities;
        rechdrp->first_mark_offset = -1LL;
        uuid_copy( rechdrp->dump_uuid, gwhdrp->gh_dumpid );

        /* set mode now so operators will work
         */
        contextp->dc_mode = OM_WRITE;

        contextp->dc_errorpr = BOOL_FALSE;

        return 0;
}

/* do_set_mark - queue a mark request. if first mark set in record, record
 * in record.
 */
static void
do_set_mark( drive_t *drivep,
             drive_mcbfp_t cbfuncp,
             void *cbcontextp,
             drive_markrec_t *markrecp )
{
        drive_context_t *contextp;
        off64_t nextoff;
        rec_hdr_t *rechdrp;

        /* get drive context
         */
        contextp = ( drive_context_t * )drivep->d_contextp;

        /* verify protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_WRITE );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( ! contextp->dc_ownedp );
        ASSERT( contextp->dc_recp );
        ASSERT( contextp->dc_nextp );

        /* calculate and fill in the mark record offset
         */
        ASSERT( contextp->dc_recp );
        nextoff = contextp->dc_reccnt * ( off64_t )tape_recsz
                  +
                  ( off64_t )( contextp->dc_nextp - contextp->dc_recp );
        markrecp->dm_log = ( drive_mark_t )nextoff;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: set mark: %lld (0x%llx)\n",
              nextoff,
              nextoff );

        /* note the location of the first mark in this tape record.
         */
        rechdrp = ( rec_hdr_t * )contextp->dc_recp;
        if ( rechdrp->first_mark_offset == -1LL ) {
                ASSERT( nextoff != -1LL );
                rechdrp->first_mark_offset = nextoff;
        }

        /* put the mark on the tail of the queue.
         */
        markrecp->dm_cbfuncp = cbfuncp;
        markrecp->dm_cbcontextp = cbcontextp;
        markrecp->dm_nextp = 0;
        if ( drivep->d_markrecheadp == 0 ) {
                drivep->d_markrecheadp = markrecp;
                drivep->d_markrectailp = markrecp;
        } else {
                ASSERT( drivep->d_markrectailp );
                drivep->d_markrectailp->dm_nextp = markrecp;
                drivep->d_markrectailp = markrecp;
        }
}

/* do_get_write_buf - supply the caller with some or all of the current record
 * buffer. the supplied buffer must be fully returned (via a single call to
 * do_write) prior to the next call to do_get_write_buf.
 *
 * RETURNS:
 *      the address of a buffer
 *      "actual_bufszp" points to the size of the buffer
 */
static char *
do_get_write_buf( drive_t *drivep, size_t wantedcnt, size_t *actualcntp )
{
        drive_context_t *contextp;
        size_t remainingcnt;
        size_t actualcnt;

        /* get drive context
         */
        contextp = ( drive_context_t * )drivep->d_contextp;

        /* verify protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_WRITE );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( ! contextp->dc_ownedp );
        ASSERT( contextp->dc_recp );
        ASSERT( contextp->dc_nextp );
        ASSERT( contextp->dc_nextp < contextp->dc_recendp );

        /* figure how much is available; supply the min of what is
         * available and what is wanted.
         */
        remainingcnt = ( size_t )( contextp->dc_recendp - contextp->dc_nextp );
        actualcnt = min( remainingcnt, wantedcnt );
        *actualcntp = actualcnt;
        contextp->dc_ownedp = contextp->dc_nextp;
        contextp->dc_nextp += actualcnt;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: get write buf: wanted %u (0x%x) actual %u 
(0x%x)\n",
              wantedcnt,
              wantedcnt,
              actualcnt,
              actualcnt );

        return contextp->dc_ownedp;
}

/* do_write - accept ownership of the portion of the current record buffer
 * being returned by the caller. if returned portion includes end of record
 * buffer, write the buffer and get and prepare a new one in anticipation of
 * the next call to do_get_write_buf. also, process any queued marks which
 * are guaranteed to be committed to media. NOTE: the caller must return
 * everything obtained with the preceeding call to do_get_write_buf.
 *
 * RETURNS:
 *      0 on success
 *      non 0 on error
 */
/* ARGSUSED */
static intgen_t
do_write( drive_t *drivep, char *bufp, size_t retcnt )
{
        drive_context_t *contextp;
        rec_hdr_t *rechdrp;
        global_hdr_t *gwhdrp;
        size_t heldcnt;
        off64_t last_rec_wrtn_wo_err; /* zero-based index */
        intgen_t rval;

        /* get drive context and pointer to global write hdr
         */
        contextp = ( drive_context_t * )drivep->d_contextp;
        gwhdrp = drivep->d_gwritehdrp;

        /* calculate how many bytes we believe caller is holding
         */
        heldcnt = ( size_t )( contextp->dc_nextp - contextp->dc_ownedp );

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: write: retcnt %u (0x%x) heldcnt %u (0x%x)\n",
              retcnt,
              retcnt,
              heldcnt,
              heldcnt );

        /* verify protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_WRITE );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( contextp->dc_ownedp );
        ASSERT( contextp->dc_recp );
        ASSERT( contextp->dc_nextp );
        ASSERT( contextp->dc_nextp <= contextp->dc_recendp );

        /* verify the caller is returning exactly what is held
         */
        ASSERT( bufp == contextp->dc_ownedp );
        ASSERT( retcnt == heldcnt );

        /* take it back
         */
        contextp->dc_ownedp = 0;

        /* if some portion of the record buffer has not yet been
         * held by the client, just return.
         */
        if ( contextp->dc_nextp < contextp->dc_recendp ) {
                return 0;
        }

        /* record in record header that entire record is used
         */
        rechdrp = ( rec_hdr_t * )contextp->dc_recp;
        rechdrp->rec_used = tape_recsz;
        
        /* write out the record buffer and get a new one.
         */
        if ( contextp->dc_singlethreadedpr ) {
                rval = write_record( drivep, contextp->dc_recp, BOOL_TRUE, 
BOOL_TRUE );
                last_rec_wrtn_wo_err = contextp->dc_reccnt; /* conv cnt to ix */
        } else {
                contextp->dc_msgp->rm_op = RING_OP_WRITE;
                contextp->dc_msgp->rm_user = contextp->dc_reccnt;
                Ring_put( contextp->dc_ringp,
                          contextp->dc_msgp );
                contextp->dc_msgp = 0;
                contextp->dc_msgp = Ring_get( contextp->dc_ringp );
                contextp->dc_recp = contextp->dc_msgp->rm_bufp;
                last_rec_wrtn_wo_err = contextp->dc_msgp->rm_user;
                switch( contextp->dc_msgp->rm_stat ) {
                case RING_STAT_OK:
                case RING_STAT_INIT:
                        rval = 0;
                        break;
                case RING_STAT_ERROR:
                        rval = contextp->dc_msgp->rm_rval;
                        break;
                default:
                        ASSERT( 0 );
                        return DRIVE_ERROR_CORE;
                }
        }

        /* check for errors. if none, commit all marks before a safety margin
         * before the no error offset.
         */
        if ( rval ) {
                contextp->dc_errorpr = BOOL_TRUE;
        } else {
                off64_t recs_wrtn_wo_err;
                off64_t recs_committed;
                off64_t bytes_committed;
                recs_wrtn_wo_err = last_rec_wrtn_wo_err + 1;
                recs_committed = recs_wrtn_wo_err - contextp->dc_lostrecmax;
                bytes_committed = recs_committed * ( off64_t )tape_recsz;
                drive_mark_commit( drivep, bytes_committed );
        }

        /* adjust context
         */
        contextp->dc_reccnt++;
        contextp->dc_recendp = contextp->dc_recp + tape_recsz;
        contextp->dc_nextp = contextp->dc_recp
                             +
                             STAPE_HDR_SZ;

        /* intialize header in new record
         */
        rechdrp = ( rec_hdr_t * )contextp->dc_recp;
        rechdrp->magic = STAPE_MAGIC;
        rechdrp->version = STAPE_VERSION;
        rechdrp->file_offset = contextp->dc_reccnt * ( off64_t )tape_recsz;
        rechdrp->blksize = ( int32_t )tape_blksz;
        rechdrp->recsize = ( int32_t )tape_recsz;
        rechdrp->capability = drivep->d_capabilities;
        rechdrp->first_mark_offset = -1LL;
        uuid_copy( rechdrp->dump_uuid, gwhdrp->gh_dumpid );

        return rval;
}

/* do_get_align_cnt -
 *      Returns the number of bytes which must be written to
 *      cause the next call to get_write_buf() to be page-aligned.
 *
 * RETURNS:
 *      the number of bytes to next alignment
 */
static size_t
do_get_align_cnt( drive_t * drivep )
{
        char *next_alignment_point;
        __psint_t next_alignment_off;
        drive_context_t *contextp;

        contextp = ( drive_context_t * )drivep->d_contextp;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: get align cnt\n" );

        /* verify protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_WRITE );
        ASSERT( ! contextp->dc_errorpr );
        ASSERT( ! contextp->dc_ownedp );
        ASSERT( contextp->dc_recp );
        ASSERT( contextp->dc_nextp );
        ASSERT( contextp->dc_nextp < contextp->dc_recendp );

        /* calculate the next alignment point at or beyond the current nextp.
         * the following algorithm works because all buffers are page-aligned
         * and a multiple of PGSZ.
         */
        next_alignment_off = ( __psint_t )contextp->dc_nextp;
        next_alignment_off +=  PGMASK;
        next_alignment_off &= ~PGMASK;
        next_alignment_point = ( char * )next_alignment_off;
        ASSERT( next_alignment_point <= contextp->dc_recendp );

        /* return the number of bytes to the next alignment offset
         */
        ASSERT( next_alignment_point >= contextp->dc_nextp );
        return ( size_t )( next_alignment_point - contextp->dc_nextp );
}

/* do_end_write - pad and write pending record if any client data in it.
 * flush all pending writes. write a file mark. figure how many records are
 * guaranteed to be on media, and commit/discard marks accordingly.
 * RETURNS:
 *       0 on success
 *      DRIVE_ERROR_* on failure
 */
static intgen_t
do_end_write( drive_t *drivep, off64_t *ncommittedp )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        off64_t first_rec_w_err; /* zero-based index */
        off64_t recs_wtn_wo_err;
        off64_t recs_guaranteed;
        off64_t bytes_committed;

        intgen_t rval;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: end write\n" );

        /* verify protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_WRITE );
        ASSERT( ! contextp->dc_ownedp );
        ASSERT( contextp->dc_recp );
        ASSERT( contextp->dc_nextp );
        ASSERT( contextp->dc_nextp >= contextp->dc_recp + STAPE_HDR_SZ );
        ASSERT( contextp->dc_nextp < contextp->dc_recendp );

        /* pre-initialize return of count of bytes committed to media
         */
        *ncommittedp = 0;

        /* if in error mode, a write error occured earlier. don't bother
         * to do anymore writes, just cleanup and return 0. don't need to
         * do commits, already done when error occured.
         */
        if ( contextp->dc_errorpr ) {
                if ( ! contextp->dc_singlethreadedpr ) {
                        Ring_reset( contextp->dc_ringp, contextp->dc_msgp );
                        contextp->dc_msgp = 0;
                }
                contextp->dc_mode = OM_NONE;
                drive_mark_discard( drivep );
                *ncommittedp = ( contextp->dc_iocnt - contextp->dc_lostrecmax )
                               *
                               ( off64_t )tape_recsz;
                contextp->dc_recp = 0;
                return 0;
        }

        /* if any user data in current record buffer, send it out.
         */
        if ( contextp->dc_nextp > contextp->dc_recp + STAPE_HDR_SZ ) {
                rec_hdr_t *rechdrp;
                size_t bufusedcnt;

                rechdrp = ( rec_hdr_t * )contextp->dc_recp;
                bufusedcnt = ( size_t )( contextp->dc_nextp
                                         -
                                         contextp->dc_recp );
                rechdrp->rec_used = bufusedcnt;
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "writing padded last record\n" );
                if ( contextp->dc_singlethreadedpr ) {
                        rval = write_record( drivep,
                                             contextp->dc_recp,
                                             BOOL_TRUE, BOOL_TRUE );
                } else {
                        ASSERT( contextp->dc_msgp );
                        contextp->dc_msgp->rm_op = RING_OP_WRITE;
                        contextp->dc_msgp->rm_user = contextp->dc_reccnt;
                        Ring_put( contextp->dc_ringp,
                                  contextp->dc_msgp );
                        contextp->dc_msgp = 0;
                        contextp->dc_msgp = Ring_get( contextp->dc_ringp );
                        switch( contextp->dc_msgp->rm_stat ) {
                        case RING_STAT_OK:
                        case RING_STAT_INIT:
                                rval = 0;
                                break;
                        case RING_STAT_ERROR:
                                rval = contextp->dc_msgp->rm_rval;
                                break;
                        default:
                                ASSERT( 0 );
                                contextp->dc_recp = 0;
                                return DRIVE_ERROR_CORE;
                        }
                }
                contextp->dc_reccnt++;
        } else {
                rval = 0;
        }

        /* now flush the ring until error or tracer bullet seen.
         * note the record index in the first msg received with
         * an error indication. this will be used to calculate
         * the number of records guaranteed to have made it onto
         * media, and that will be used to select which marks
         * to commit and which to discard.
         */
        if ( rval ) {
                first_rec_w_err = contextp->dc_iocnt;
                        /* because dc_iocnt bumped by write_record
                         * only if no error
                         */
        } else {
                first_rec_w_err = -1L;
        }
        if ( ! contextp->dc_singlethreadedpr ) {
                while ( ! rval ) {
                        ASSERT( contextp->dc_msgp );
                        contextp->dc_msgp->rm_op = RING_OP_TRACE;
                        Ring_put( contextp->dc_ringp,
                                  contextp->dc_msgp );
                        contextp->dc_msgp = 0;
                        contextp->dc_msgp = Ring_get( contextp->dc_ringp );
                        if ( contextp->dc_msgp->rm_op == RING_OP_TRACE ) {
                                break;
                        }
                        switch( contextp->dc_msgp->rm_stat ) {
                        case RING_STAT_OK:
                        case RING_STAT_INIT:
                                ASSERT( rval == 0 );
                                break;
                        case RING_STAT_ERROR:
                                rval = contextp->dc_msgp->rm_rval;
                                first_rec_w_err = contextp->dc_msgp->rm_user;
                                break;
                        default:
                                ASSERT( 0 );
                                contextp->dc_recp = 0;
                                return DRIVE_ERROR_CORE;
                        }
                }
        }

        /* the ring is now flushed. reset
         */
        if ( ! contextp->dc_singlethreadedpr ) {
                Ring_reset( contextp->dc_ringp, contextp->dc_msgp );
                contextp->dc_msgp = 0;
        }
        contextp->dc_recp = 0;

        /* if no error so far, write a file mark. this will have the
         * side-effect of flushing the driver/drive of pending writes,
         * exposing any write errors.
         */
        if ( ! rval ) {
                intgen_t weofrval;

                weofrval = mt_op( contextp->dc_fd, MTWEOF, 1 );
                if ( weofrval ) {
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                              "MTWEOF returned %d: errno == %d (%s)\n",
                              weofrval,
                              errno,
                              strerror( errno ));
                        rval = DRIVE_ERROR_EOM;
                }
        }

        /* if an error occured, first_rec_w_err now contains
         * the count of records written without error, all of which
         * were full records. subtract from this dc_lostrecmax,
         * and we have the number of records guaranteed to have made
         * it to media.
         *
         * if no errors have occured, all I/O has been committed.
         * we can use dc_iocnt, which is the count of records actually
         * written without error.
         *
         * commit marks contained in committed records, discard the rest,
         * and return rval. return by reference the number of bytes committed
         * to tape.
         */
        if ( rval ) {
                ASSERT( first_rec_w_err >= 0 );
                recs_wtn_wo_err = first_rec_w_err;
                recs_guaranteed = recs_wtn_wo_err - contextp->dc_lostrecmax;
        } else {
                ASSERT( first_rec_w_err == -1 );
                recs_wtn_wo_err = contextp->dc_iocnt;
                recs_guaranteed = recs_wtn_wo_err;
        }
        bytes_committed = recs_guaranteed * ( off64_t )tape_recsz;
        drive_mark_commit( drivep, bytes_committed );
        drive_mark_discard( drivep );
        contextp->dc_mode = OM_NONE;
        *ncommittedp = bytes_committed;
        return rval;
}

/* do_fsf
 *      Advance the tape by count files.
 *
 * RETURNS:
 *      number of media files skipped
 *      *statp set to zero or DRIVE_ERROR_...
 */
static intgen_t
do_fsf( drive_t *drivep, intgen_t count, intgen_t *statp )
{
        int             i, done, op_failed, opcount;
        drive_context_t *contextp;

        /* get drive context
         */
        contextp = ( drive_context_t * )drivep->d_contextp;

        /* verify protocol being followed
         */
        ASSERT( contextp->dc_mode == OM_NONE );

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: fsf: count %d\n",
              count );

        ASSERT( count );
        ASSERT( contextp->dc_mode == OM_NONE );

        for ( i = 0 ; i < count; i++ ) {
                done = 0;
                opcount = 2;

                /* the tape may encounter errors will trying to
                 * reach the next file.
                 */
                while ( !done ) {

                        /* advance the tape to the next file mark
                         * NOTE:
                         *      ignore return code
                         */
                        mlog( MLOG_VERBOSE | MLOG_DRIVE,
                              "advancing tape to next media file\n");

                        op_failed = 0;
                        ASSERT( contextp->dc_fd >= 0 );
                        if ( mt_op( contextp->dc_fd, MTFSF, 1 ) ) {
                                op_failed = 1;
                        }


                        /* Check for a file mark to
                         * determine if the fsf command worked.
                         */
                        if  (!op_failed) {
                                done = 1;
                        }

                        /* If the FSF command has been issued multiple
                         * times, and a file mark has not been reached,
                         * return an error.
                         */
                        if ( --opcount < 0 ) {
                                mlog( MLOG_VERBOSE | MLOG_DRIVE,
                                        "FSF tape command failed\n");

                                *statp = DRIVE_ERROR_DEVICE;
                                return i;
                        }
                }
        }

        return count;
}

/* do_bsf
 *      Backup the tape by count files. zero means just back up to the beginning
 *      of the last media file read or written.
 *
 * RETURNS:
 *      number of media files skipped
 *      *statp set to zero or DRIVE_ERROR_...
 */
static intgen_t
do_bsf( drive_t *drivep, intgen_t count, intgen_t *statp )
{
#ifdef DEBUG
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
#endif
        intgen_t skipped;
        intgen_t rval;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: bsf: count %d\n",
              count );

        ASSERT( contextp->dc_mode == OM_NONE );

        *statp = 0;

        /* back space - places us to left of previous file mark
         * if we hit BOT, return
         */
        ASSERT( drivep->d_capabilities & DRIVE_CAP_BSF );
        rval = bsf_and_verify( drivep );
        if (rval) {
                if (errno == ENOSPC/*IRIX*/ || errno == EIO/*Linux*/) {
                        if ( count ) {
                                mlog( MLOG_DEBUG | MLOG_DRIVE,
                                      "rmt drive op: bsf reached BOT "
                                            "unexpectedly (%d files to go)\n",
                                       count);
                                /* set statp - BOT is unexpected */
                        } else {
                                mlog( MLOG_DEBUG | MLOG_DRIVE,
                                      "rmt drive op: bsf reached BOT\n");
                                /* don't set statp, BOT is fine */
                                return 0;
                        }
                }
                *statp = DRIVE_ERROR_DEVICE;
                return 0;
        }

        /* now loop, skipping media files
         */
        for ( skipped = 0 ; skipped < count ; skipped++ ) {

                /* move to the left of the next file mark on the left.
                 * check for BOT.
                 */
                rval = bsf_and_verify( drivep );
                if (rval) {
                        if (errno == ENOSPC/*IRIX*/ || errno == EIO/*Linux*/) {
                                if ( count - skipped - 1 ) {
                                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                                              "rmt drive op: bsf reached BOT "
                                              "unexpectedly (%d files to go)\n",
                                              count - skipped - 1);
                                        /* set statp - BOT is unexpected */
                                        *statp = DRIVE_ERROR_DEVICE;
                                        return skipped + 1;
                                } else {
                                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                                              "rmt drive op: bsf reached 
BOT\n");
                                        /* don't set statp - BOT is fine */
                                        return skipped + 1;
                                }
                                *statp = DRIVE_ERROR_DEVICE;
                                return skipped;
                        }
                }
        }

        /* we're not at BOT, so we need to move to the right side of
         * the file mark
         */
        ( void )fsf_and_verify( drivep );

        /* indicate the number of media files skipped
         */
        return skipped;
}

/* do_rewind
 *      Position the tape at the beginning of the recorded media.
 *
 * RETURNS:
 *      0 on sucess
 *      DRIVE_ERROR_* on failure
 */
static intgen_t
do_rewind( drive_t *drivep )
{
#ifdef DEBUG
        drive_context_t *contextp = (drive_context_t *)drivep->d_contextp;
#endif

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: rewind\n" );

        ASSERT( contextp->dc_mode == OM_NONE );
        ASSERT( contextp->dc_fd >= 0 );

        /* use validating tape rewind util func
         */
        ( void )rewind_and_verify( drivep );
        return 0;
}

/* do_erase
 *      erase media from beginning
 *
 * RETURNS:
 *      0 on sucess
 *      DRIVE_ERROR_* on failure
 */
static intgen_t
do_erase( drive_t *drivep )
{
#ifdef DEBUG
        drive_context_t *contextp = (drive_context_t *)drivep->d_contextp;
#endif

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: erase\n" );

        ASSERT( contextp->dc_mode == OM_NONE );
        ASSERT( contextp->dc_fd >= 0 );

        /* use validating tape rewind util func
         */
        (void )rewind_and_verify( drivep );

        /* use validating tape erase util func
         */
        ( void )erase_and_verify( drivep ); 

        /* rewind again
         */
        ( void )rewind_and_verify( drivep );

        /* close the drive so we start from scratch
         */
        Close( drivep );
        return 0;
}

/* do_eject
 *      pop the tape out - may be a nop on some drives
 *
 * RETURNS:
 *      0 on sucess
 *      DRIVE_ERROR_DEVICE on failure
 */
static intgen_t
do_eject_media( drive_t *drivep )
{
        drive_context_t *contextp = (drive_context_t *)drivep->d_contextp;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: eject media\n" );

        /* drive must be open
         */
        ASSERT( contextp->dc_fd >= 0 );
        ASSERT( contextp->dc_mode == OM_NONE );

#if 0
        /* issue tape unload
         */
        if ( contextp->dc_unloadokpr ) {
                ( void )mt_op( contextp->dc_fd, MTUNLOAD, 0 );
        }
#endif

        /* close the device driver
         */
        Close( drivep );

        return 0;
}

/* do_get_device_class
 *      Return the device class
 *
 * RETURNS:
 *      always returns DEVICE_TAPE_REMOVABLE
 */
/* ARGSUSED */
static intgen_t
do_get_device_class( drive_t *drivep)
{
        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: get device class\n" );

        return DEVICE_TAPE_REMOVABLE;
}

/* do_display_metrics - print ring stats if using I/O ring
 */
static void
do_display_metrics( drive_t *drivep )
{
        drive_context_t *contextp = (drive_context_t *)drivep->d_contextp;
        ring_t *ringp = contextp->dc_ringp;

        if ( ringp ) {
                if ( drivecnt > 1 ) {
                        mlog( MLOG_NORMAL | MLOG_BARE | MLOG_NOLOCK | 
MLOG_DRIVE,
                              "drive %u ",
                              drivep->d_index );
                }
                display_ring_metrics( drivep,
                                      MLOG_NORMAL | MLOG_BARE | MLOG_NOLOCK );
        }
}

/* do_quit
 */
static void
do_quit( drive_t *drivep )
{
        drive_context_t *contextp = (drive_context_t *)drivep->d_contextp;
        ring_t *ringp = contextp->dc_ringp;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op: quit\n" );

        /* print the ring metrics and kill the ring
         */
        if ( ringp ) {
                display_ring_metrics( drivep, MLOG_VERBOSE );

                /* tell slave to die
                 */
                mlog( (MLOG_NITTY + 1) | MLOG_DRIVE,
                      "ring op: destroy\n" );
                ring_destroy( ringp );
        }

        Close(drivep);

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt drive op quit complete\n" );
}

static double
percent64( off64_t num, off64_t denom )
{
        return ( double )( num * 100 ) / ( double )denom;
}


/* read_label
 *      responsible for reading and validating the first record from a
 *      media file. can assume that prepare_drive has already been run
 *      on this tape. if read fails due to an encounter with a file mark,
 *      end of media, or end of data, position the media to allow an
 *      append.
 *
 * RETURNS:
 *      0 on success
 *      DRIVE_ERROR_* on failure
 */
static intgen_t
read_label( drive_t *drivep )
{
        drive_context_t *contextp;
        intgen_t nread;
        intgen_t saved_errno;
        intgen_t rval;

        /* initialize context ptr
         */
        contextp = ( drive_context_t * )drivep->d_contextp;

        /* read the first record of the media file directly
         */
        nread = Read( drivep,
                      contextp->dc_recp,
                      tape_recsz,
                      &saved_errno );

        /* if a read error, get status
         */
        if ( nread != ( intgen_t )tape_recsz ) {
                ASSERT( nread < ( intgen_t )tape_recsz );
        } 

        /* check for an unexpected errno
         */
        if ( nread < 0 && saved_errno != ENOSPC ) {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "could not read from drive: %s (%d)\n",
                      strerror( errno ),
                      errno );
                return DRIVE_ERROR_DEVICE;
        }

        /* check for a blank tape/EOD.
         */
        if (( nread == 0 )  /* takes care of sun */
              ||            /* now handle SGI */
              (nread < 0 && saved_errno == ENOSPC )) { 
                mlog( MLOG_NORMAL | MLOG_DRIVE,
                      "encountered EOD : "
#ifdef DUMP
                      "assuming blank media\n" );
#endif
#ifdef RESTORE
                      "end of data\n" );
#endif
                ( void )rewind_and_verify( drivep );
#ifdef DUMP
                return DRIVE_ERROR_BLANK;
#endif
#ifdef RESTORE
                return DRIVE_ERROR_EOD;
#endif
        }


        /* dc_iocnt is count of number of records read without error
         */
        contextp->dc_iocnt = 1;

        rval = validate_media_file_hdr( drivep );

        return rval;
}

static intgen_t
validate_media_file_hdr( drive_t *drivep )
{
        global_hdr_t            *grhdrp = drivep->d_greadhdrp;
        drive_hdr_t             *drhdrp = drivep->d_readhdrp;
        rec_hdr_t               *tprhdrp = (rec_hdr_t *)drhdrp->dh_specific;
        media_hdr_t             *mrhdrp = (media_hdr_t *)drhdrp->dh_upper;
        content_hdr_t           *ch = (content_hdr_t *)mrhdrp->mh_upper;
        content_inode_hdr_t     *cih = (content_inode_hdr_t *)ch->ch_specific;
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        char tmpbuf[GLOBAL_HDR_SZ];
        global_hdr_t            *tmpgh = (global_hdr_t *)&tmpbuf[0];
        drive_hdr_t             *tmpdh = (drive_hdr_t *)tmpgh->gh_upper;
        media_hdr_t             *tmpmh = (media_hdr_t *)tmpdh->dh_upper;
        rec_hdr_t               *tmprh = (rec_hdr_t *)tmpdh->dh_specific;
        content_hdr_t           *tmpch = (content_hdr_t *)tmpmh->mh_upper;
        content_inode_hdr_t     *tmpcih = (content_inode_hdr_t 
*)tmpch->ch_specific;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "validating media file header\n" );

        memcpy( tmpbuf, contextp->dc_recp, GLOBAL_HDR_SZ );

        /* check the checksum
         */
        if ( ! global_hdr_checksum_check( tmpgh )) {
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "bad media file header checksum\n");
                return DRIVE_ERROR_CORRUPTION;
        }

        if ( ! tape_rec_checksum_check( contextp, contextp->dc_recp )) {
                mlog( MLOG_NORMAL | MLOG_DRIVE,
                      "tape record checksum error\n");
                return DRIVE_ERROR_CORRUPTION;

        }

        xlate_global_hdr(tmpgh, grhdrp, 1);
        xlate_drive_hdr(tmpdh, drhdrp, 1);
        xlate_media_hdr(tmpmh, mrhdrp, 1);
        xlate_content_hdr(tmpch, ch, 1);
        xlate_content_inode_hdr(tmpcih, cih, 1);
        xlate_rec_hdr(tmprh, tprhdrp, 1);

        memcpy( contextp->dc_recp, grhdrp, GLOBAL_HDR_SZ );

        /* check the magic number
         */
        if ( strncmp( grhdrp->gh_magic, GLOBAL_HDR_MAGIC,GLOBAL_HDR_MAGIC_SZ)) {
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "missing magic number in tape label\n");
                return DRIVE_ERROR_FORMAT;
        }

        /* check the version
         */
        if ( global_version_check( grhdrp->gh_version ) != BOOL_TRUE ) {
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "invalid version number (%d) in tape label\n",
                      grhdrp->gh_version );
                return DRIVE_ERROR_VERSION;
        }

        /* check the strategy id
         */
        if ( drhdrp->dh_strategyid != drivep->d_strategyp->ds_id ) {
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                       "unrecognized drive strategy ID (%d)\n",
                       drivep->d_readhdrp->dh_strategyid );
                return DRIVE_ERROR_FORMAT;
        }

        /* check the record magic number
         */
        if ( tprhdrp->magic != STAPE_MAGIC ) {
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "invalid record magic number in tape label\n");
                return DRIVE_ERROR_FORMAT;
        }

        /* check the record version number
         */
        if ( tprhdrp->version != STAPE_VERSION ) {
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "invalid record version number in tape label\n");
                return DRIVE_ERROR_VERSION;
        }

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "media file header valid: "
              "media file ix %d\n",
              mrhdrp->mh_mediafileix );

        return 0;
}


/* get_tpcaps
 *      Get the specific tape drive capabilities. Set the
 *      d_capabilities field of the driver structure.
 *      set the blksz limits and tape type in the context structure.
 *
 * RETURNS:
 *      TRUE on success
 *      FALSE on error
 */
static bool_t
get_tpcaps( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;

        ASSERT( contextp->dc_fd >= 0 );

        /* can't ask about blksz, can't set blksz, can't ask about
         * drive types/caps. assume a drive which can overwrite.
         * assume NOT QIC, since fixed blksz devices not supported
         * via RMT.
         */
        drivep->d_capabilities |= DRIVE_CAP_OVERWRITE;
        drivep->d_capabilities |= DRIVE_CAP_BSF;

        set_recommended_sizes( drivep, contextp->dc_isQICpr);

        return BOOL_TRUE;
}

/* set_recommended_sizes
 *      Determine the recommended tape file size and mark separation
 *      based on tape device type.
 *
 * RETURNS:
 *      void
 */
static void
set_recommended_sizes( drive_t *drivep, int isQICpr )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        off64_t fsize = drive_strategy_rmt.ds_recmfilesz;
        off64_t marksep = drive_strategy_rmt.ds_recmarksep;

        if (isQICpr == BOOL_TRUE)
        {
            fsize = 0x3200000ll;            /* 50 MB */
        }

        if ( contextp->dc_singlemfilepr ) /* use only one media file */
        {
                fsize = OFF64MAX;         /* hence set fsize to max */
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                        "Single media file specified. "
                        "Set media file size to 0x%llx bytes\n", 
                        OFF64MAX );
        }
        else if (contextp->dc_filesz > 0) {
                fsize = contextp->dc_filesz;
#ifdef DUMP
#ifdef SIZEEST
                if ( hdr_mfilesz > fsize ) {
                        mlog( MLOG_WARNING,
                              "recomended media file size of %llu Mb less than "
                              "estimated file header size %llu Mb for %s\n",
                              fsize / ( 1024 * 1024 ),
                              hdr_mfilesz / ( 1024 * 1024 ),
                              drivep->d_pathname );
                }
#endif /* SIZEEST */
#endif /* DUMP */
        }
#ifdef DUMP
#ifdef SIZEEST
        else {
                /* override with minimum recommended file size */
                if ( min_recmfilesz > fsize ) {
                        mlog( MLOG_NOTE,
                              "recommended media file size adjusted from "
                              "%llu Mb to %llu Mb for %s",
                              min_recmfilesz / ( 1024 * 1024 ),
                              fsize / ( 1024 * 1024 ),
                              drivep->d_pathname );
                        fsize = min_recmfilesz;
                }
        }

        mlog( MLOG_NITTY,
              "hdr_mfilesz %lluMb, min_recmfilesize %lluMb, "
              "fsize %lluMb, marksep %lluMb\n",
              hdr_mfilesz / ( 1024 * 1024 ),
              min_recmfilesz / ( 1024 * 1024 ),
              fsize / ( 1024 * 1024 ),
              marksep / ( 1024 * 1024 ));
#endif /* SIZEEST */
#endif /* DUMP */

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "recommended tape media file size set to 0x%llx bytes\n",
              fsize );
        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "recommended tape media mark separation set to 0x%llx bytes\n",
              marksep );

        drivep->d_recmfilesz = fsize;
        drivep->d_recmarksep = marksep;

        return;
}



/* mt_op
 *      Issue MTIOCTOP ioctl operation to the tape device.
 *
 * RETURNS:
 *      0 on sucess
 *      -1 on failure
 */
static intgen_t
mt_op(intgen_t fd, intgen_t sub_op, intgen_t param )
{
        struct mtop     mop;
        char *printstr;
        intgen_t rval;

        mop.mt_op       = (short )sub_op;
        mop.mt_count    = param;

        ASSERT( fd >= 0 );

        switch ( sub_op ) {
        case MTSEEK:
                printstr = "seek";
                break;
        case MTBSF:                             /* 2 */
                printstr = "back space file";
                break;
        case MTWEOF:                            /* 0 */
                printstr = "write file mark";
                break;
        case MTFSF:                             /* 1 */
                printstr = "forward space file";
                break;
        case MTREW:                             /* 5 */
                printstr = "rewind";
                break;
        case MTUNLOAD:
                printstr = "unload";
                break;
        case MTEOM:
                printstr = "advance to EOD";
                break;
        case MTFSR:                             /* 3 */
                printstr = "forward space block";
                break;
        case MTERASE:
                printstr = "erase";
                break;
#ifdef CLRMTAUD
#ifdef MTAUD
        case MTAUD:
                printstr = "audio";
                break;
#endif /* MTAUD */
#endif /* CLRMTAUD */
        default:
                printstr = "???";
                break;
        }
        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "tape op: %s %d\n",
              printstr,
              param );

        rval = ioctl( fd, MTIOCTOP, &mop );
        if ( rval < 0 ) {
                /* failure
                 */
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "tape op %s %d returns %d: errno == %d (%s)\n",
                      printstr,
                      param,
                      rval,
                      errno,
                      strerror( errno ));
                return -1;
        }

        /* success
         */
        return 0;
}

/* determine_write_error()
 *      Using the errno and the tape status information, determine the
 *      type of tape write error that has occured.
 *
 * RETURNS:
 *      DRIVE_ERROR_*
 */
static intgen_t
determine_write_error( int nwritten, int saved_errno )
{
        intgen_t        ret = 0;

        if ( saved_errno == EACCES ) {
                mlog(MLOG_NORMAL,
                        "tape is write protected\n");

                ret = DRIVE_ERROR_DEVICE;
        } else if (
                        ( saved_errno == ENOSPC ) 
                                || 
                        ( saved_errno == EIO )
                                ||
                        (( saved_errno == 0 ) && ( nwritten >= 0 ))  /* short
                                                        write indicates EOM */
                ) {
                mlog(MLOG_NORMAL,
                        "tape media error on write operation\n");

                mlog(MLOG_NORMAL,
                        "no more data can be written to this tape\n");
                
                ret = DRIVE_ERROR_EOM;
        } else if ( saved_errno != 0 ) {
                ret = DRIVE_ERROR_CORE;

                mlog( MLOG_DEBUG | MLOG_DRIVE,
                        "tape unknown error on write operation: "
                        "%d, %d\n",
                        nwritten, saved_errno);
        }

        mlog( MLOG_NITTY | MLOG_DRIVE,
                "tape write operation nwritten %d, errno %d\n",
                nwritten,
                saved_errno);

        return ( ret );

}

static void
tape_rec_checksum_set( drive_context_t *contextp, char *bufp )
{
        rec_hdr_t *rechdrp = ( rec_hdr_t * )bufp;
        u_int32_t *beginp = ( u_int32_t * )bufp;
        u_int32_t *endp = ( u_int32_t * )( bufp + tape_recsz );
        u_int32_t *p;
        u_int32_t accum;

        if ( ! contextp->dc_recchksumpr ) {
                return;
        }

        INT_SET(rechdrp->ischecksum, ARCH_CONVERT, 1);
        rechdrp->checksum = 0;
        accum = 0;
        for ( p = beginp ; p < endp ; p++ ) {
                accum += INT_GET(*p, ARCH_CONVERT);
        }
        INT_SET(rechdrp->checksum, ARCH_CONVERT, ( int32_t )( ~accum + 1 ));
}

static bool_t
tape_rec_checksum_check( drive_context_t *contextp, char *bufp )
{
        rec_hdr_t *rechdrp = ( rec_hdr_t * )bufp;
        u_int32_t *beginp = ( u_int32_t * )bufp;
        u_int32_t *endp = ( u_int32_t * )( bufp + tape_recsz );
        u_int32_t *p;
        u_int32_t accum;

        if ( contextp->dc_recchksumpr && INT_GET(rechdrp->ischecksum, 
ARCH_CONVERT) ) {
                accum = 0;
                for ( p = beginp ; p < endp ; p++ ) {
                        accum += INT_GET(*p, ARCH_CONVERT);
                }
                return accum == 0 ? BOOL_TRUE : BOOL_FALSE;
        } else {
                return BOOL_TRUE;
        }
}

/* to trace rmt operations
 */
#ifdef RMT
#ifdef RMTDBG
static int
dbgrmtopen( char *path, int flags )
{
        int rval;
        rval = rmtopen( path, flags );
        mlog( MLOG_NORMAL | MLOG_DRIVE,
              "RMTOPEN( %s, %d ) returns %d: errno=%d (%s)\n",
              path,
              flags,
              rval,
              errno,
              strerror( errno ));
        return rval;
}
static int
dbgrmtclose( int fd )
{
        int rval;
        rval = rmtclose( fd );
        mlog( MLOG_NORMAL | MLOG_DRIVE,
              "RMTCLOSE( %d ) returns %d: errno=%d (%s)\n",
              fd,
              rval,
              errno,
              strerror( errno ));
        return rval;
}
static int
dbgrmtioctl( int fd, int op, void *arg )
{
        int rval;
        rval = rmtioctl( fd, op, arg );
        mlog( MLOG_NORMAL | MLOG_DRIVE,
              "RMTIOCTL( %d, %d, 0x%x ) returns %d: errno=%d (%s)\n",
              fd,
              op,
              arg,
              rval,
              errno,
              strerror( errno ));
        return rval;
}
static int
dbgrmtread( int fd, void *p, uint sz )
{
        int rval;
        rval = rmtread( fd, p, sz );
        mlog( MLOG_NORMAL | MLOG_DRIVE,
              "RMTREAD( %d, 0x%x, %u ) returns %d: errno=%d (%s)\n",
              fd,
              p,
              sz,
              rval,
              errno,
              strerror( errno ));
        return rval;
}
static int
dbgrmtwrite( int fd, void *p, uint sz )
{
        int rval;
        rval = rmtwrite( fd, p, sz );
        mlog( MLOG_NORMAL | MLOG_DRIVE,
              "RMTWRITE( %d, 0x%x, %u ) returns %d: errno=%d (%s)\n",
              fd,
              p,
              sz,
              rval,
              errno,
              strerror( errno ));
        return rval;
}
#endif /* RMTDBG */
#endif /* RMT */

/* display_access_failed_message()
 *      Print tape device open/access failed message.
 *
 * RETURNS:
 *      void
 */
static void
display_access_failed_message( drive_t *drivep )
{
        mlog( MLOG_NORMAL | MLOG_DRIVE,
                "attempt to access/open remote "
                "tape drive %s failed: %d (%s)\n",
                drivep->d_pathname,
                errno,
                strerror( errno ));
        return;
}

/* prepare_drive - called by begin_read if drive device not open.
 * determines record size and sets block size if fixed block device.
 * determines other drive attributes. determines if any previous
 * xfsdumps on media.
 */
static intgen_t
prepare_drive( drive_t *drivep )
{
        drive_context_t *contextp;
        bool_t ok;
        ix_t try;
        ix_t maxtries;
        bool_t changedblkszpr;
        intgen_t rval;

        /* get pointer to drive context
         */
        contextp = ( drive_context_t * )drivep->d_contextp;

/* retry: */
        if ( cldmgr_stop_requested( )) {
                return DRIVE_ERROR_STOP;
        }

        /* shouldn't be here if drive is open
         */
        ASSERT( contextp->dc_fd == -1 );

        mlog( MLOG_VERBOSE | MLOG_DRIVE,
              "preparing drive\n" );

        /* determine if tape is present or write protected. try several times.
         * if not present or write-protected during dump, return.
         */
        maxtries = 15;
        for ( try = 1 ; ; sleep( 10 ), try++ ) {
                if ( cldmgr_stop_requested( )) {
                        return DRIVE_ERROR_STOP;
                }

                /* open the drive
                 */
                ok = Open( drivep );
                if ( ! ok ) {
                        if ( errno != EBUSY ) {
                                display_access_failed_message( drivep );
                                return DRIVE_ERROR_DEVICE;
                        } else {
                                mlog( MLOG_DEBUG | MLOG_DRIVE,
                                      "open drive returns EBUSY\n" );
                                if ( try >= maxtries ) {
                                        mlog( MLOG_TRACE | MLOG_DRIVE,
                                              "giving up waiting for drive "
                                              "to indicate online\n" );
                                        return DRIVE_ERROR_MEDIA;
                                }
                                Close( drivep );
                                continue;
                        }
                } else
                        break;
        }

        /* determine tape capabilities. this will set the drivep->d_capabilities
         * and contextp->dc_{...}blksz and dc_isQICpr, as well as recommended
         * mark separation and media file size.
         */
        ok = get_tpcaps( drivep );
        if ( ! ok ) {
                return DRIVE_ERROR_DEVICE;
        }


        /* establish the initial block and record sizes we will try.
         * if unable to ask drive about fixed block sizes, we will
         * guess.
         */
        tape_blksz = cmdlineblksize;
        if ( tape_blksz > STAPE_MAX_RECSZ ) {
                tape_blksz = STAPE_MAX_RECSZ;
        }
        if ( contextp->dc_isQICpr == BOOL_TRUE )
                tape_recsz = STAPE_MIN_MAX_BLKSZ;
        else
                tape_recsz = tape_blksz;

        /* if the overwrite option was specified , return.
         */
        if ( contextp->dc_overwritepr ) {
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                        "Overwrite option specified.\n" );
                return DRIVE_ERROR_OVERWRITE;
        }

        /* loop trying to read a record. begin with the record size
         * calculated above. if it appears we have selected the wrong
         * block size and if we are able to alter the fixed block size,
         * and the record size we tried initially was not less than
         * the minmax block size, change the block size to minmax and
         * try to read a one block record again.
         */
        maxtries = 5;
        changedblkszpr = BOOL_FALSE;

        for ( try = 1 ; ; try++ ) {
                intgen_t nread;
                intgen_t saved_errno;

                if ( cldmgr_stop_requested( )) {
                        return DRIVE_ERROR_STOP;
                }

                /* bail out if we've tried too many times
                 */
                if ( try > maxtries ) {
                        mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                              "giving up attempt to determining "
                              "tape record size\n" );
                        return DRIVE_ERROR_MEDIA;
                }

                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "determining tape record size: trying %d (0x%x) bytes\n",
                      tape_recsz,
                      tape_recsz );

                /* read a record. use the first ring buffer
                 */
                saved_errno = 0;
                nread = Read( drivep,
                              contextp->dc_recp,
                              tape_recsz,
                              &saved_errno );
                ASSERT( saved_errno == 0 || nread < 0 );

                /* RMT can require a retry
                 */
                if ( saved_errno == EAGAIN ) {
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                              "read returned EAGAIN: retrying\n" );
                        continue;
                }

                /* block size is bigger than buffer; should never happen
                 */
                if ( saved_errno == EINVAL ) {
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                              "read returned EINVAL: "
                              "trying new record size\n" );
                        goto largersize;
                }

                /* block size is bigger than buffer; should never happen
                 */
                if ( saved_errno == ENOMEM ) {
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                              "read returned ENOMEM: "
                              "trying new record size\n" );
                        goto largersize;
                }


                /* I/O error
                 */
                if ( saved_errno == EIO ) {
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                              "read returned EIO: will reopen, "
                              "and try again\n" );
                        Close( drivep );
                        ok = Open( drivep );
                        if ( ! ok ) {
                                display_access_failed_message( drivep );
                                return DRIVE_ERROR_DEVICE;
                        }
#ifdef RESTORE
                        continue;
#endif
#ifdef DUMP
                        if (3 == try)
                                return DRIVE_ERROR_BLANK; /* sun rmt returns 
EIO for blank media */
                        else
                                continue;
#endif
                }

                /* ENOSPC
                 */
                if ( saved_errno == ENOSPC ) {
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                              "read returned ENOSPC: will reopen, "
                              "and try again\n" );
                        Close( drivep );
                        ok = Open( drivep );
                        if ( ! ok ) {
                                display_access_failed_message( drivep );
                                return DRIVE_ERROR_DEVICE;
                        }
#ifdef RESTORE
                        continue;
#endif
#ifdef DUMP
                        if (3 == try)
                                /* IRIX rmt returns ENOSPC for blank media */
                                /* It reads to the EOD */
                                return DRIVE_ERROR_BLANK; 
                        else
                                continue;
#endif
                }

                if ( nread == ( intgen_t )tape_recsz ) {
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                          "nread == selected blocksize "
                          "indicates correct blocksize found\n" );
                        goto checkhdr;
                }

                /* short read */
                if (( saved_errno == 0 ) && ( nread < (intgen_t)tape_recsz)) {
                        mlog( MLOG_DEBUG | MLOG_DRIVE,
                          "nread < selected blocksize "
                          "indicates incorrect blocksize found "
                          "or foreign media\n" );
                        return DRIVE_ERROR_FOREIGN;
                }
                        

                /* if we fell through the seive, code is wrong.
                 * display useful info and abort
                 */
                mlog( MLOG_NORMAL | MLOG_ERROR | MLOG_DRIVE,
                      "unexpected tape error: "
                      "errno %d "
                      "nread %d "
                      "blksz %d "
                      "recsz %d "
                      "\n",
                      saved_errno,
                      nread,
                      tape_blksz,
                      tape_recsz,
                      0 );



                /*return DRIVE_ERROR_CORE; */
                return DRIVE_ERROR_MEDIA;

checkhdr:
                rval = validate_media_file_hdr( drivep );
                if ( rval ) {
                        if ( rval == DRIVE_ERROR_VERSION ) {
                                global_hdr_t *grhdrp = drivep->d_greadhdrp;
                                mlog( MLOG_NORMAL | MLOG_DRIVE,
                                      "media file header version (%d) "
                                      "invalid: advancing\n",
                                      grhdrp->gh_version );
                                continue;
                        } else {
                                if ( isefsdump( drivep )) {
                                        mlog( MLOG_NORMAL
                                              |
                                              MLOG_WARNING
                                              |
                                              MLOG_DRIVE,
                                              "may be an EFS dump at BOT\n" );
                                } else
                                /* Check if the tape was erased by us */
                                if  ( isxfsdumperasetape( drivep )) {
                                        mlog( MLOG_NORMAL | MLOG_DRIVE,
                                              "This tape was erased earlier "
                                              "by xfsdump.\n" );
                                        ( void )rewind_and_verify( drivep );
                                        return DRIVE_ERROR_BLANK;
                                } else {
                                        mlog( MLOG_NORMAL | MLOG_DRIVE,
                                              "bad media file header at BOT "
                                              "indicates foreign or "
                                              "corrupted tape\n" );
                                }
                                ( void )rewind_and_verify( drivep );
                                ok = set_best_blk_and_rec_sz( drivep );
                                if ( ! ok ) {
                                        return DRIVE_ERROR_DEVICE;
                                }
                                return DRIVE_ERROR_FOREIGN;
                        } 
                } else {
                        drive_hdr_t *drhdrp;
                        rec_hdr_t *tprhdrp;
                        drhdrp = drivep->d_readhdrp;
                        tprhdrp = ( rec_hdr_t * )drhdrp->dh_specific;
                        ASSERT( tprhdrp->recsize >= 0 );
                        tape_recsz = ( size_t )tprhdrp->recsize;
                        break;
                }

largersize:
                /* we end up here if we want to try a new larger record size
                 * because the last one was not big enough for the tape block 
                 */
                if ( changedblkszpr ) {
                        mlog( MLOG_NORMAL | MLOG_DRIVE,
                              "cannot determine tape block size "
                              "after two tries\n" );
                                mlog( MLOG_NORMAL | MLOG_DRIVE,
                                      "assuming media is corrupt "
                                      "or contains non-xfsdump data\n" );
                                ok = set_best_blk_and_rec_sz( drivep );
                                if ( ! ok ) {
                                        return DRIVE_ERROR_DEVICE;
                                }
                                return DRIVE_ERROR_FOREIGN;
                }
                /* Make it as large as we can go
                 */
                if ( tape_recsz != STAPE_MAX_RECSZ ) {
                        tape_recsz = STAPE_MAX_RECSZ;
                        if ( ! contextp->dc_isQICpr ) {
                                tape_blksz = tape_recsz;;
                        }
                        changedblkszpr = BOOL_TRUE;
                } else {
                        mlog( MLOG_NORMAL | MLOG_DRIVE,
                              "cannot determine tape block size\n" );
                        return DRIVE_ERROR_MEDIA;
                }
                continue;

        }

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "read first record of first media file encountered on media: "
              "recsz == %u\n",
              tape_recsz );

        /* calculate maximum bytes lost without error at end of tape
         */
        calc_max_lost( drivep );

        contextp->dc_iocnt = 1;
        return 0;
}

/* if BOOL_FALSE returned, errno is valid
 */
static bool_t
Open( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        intgen_t oflags;

#ifdef DUMP
        oflags = O_RDWR;
#endif /* DUMP */
#ifdef RESTORE
        oflags = O_RDONLY;
#endif /* RESTORE */

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "tape op: opening drive '%s'\n", drivep->d_pathname);

        ASSERT( contextp->dc_fd == -1 );

        errno = 0;
        contextp->dc_fd = open_masked_signals( drivep->d_pathname, oflags );

        if ( contextp->dc_fd <= 0 ) {
                return BOOL_FALSE;
        }

        return BOOL_TRUE;
}

static void
Close( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "tape op: closing drive\n" );

        ASSERT( contextp->dc_fd >= 0 );

        ( void )close( contextp->dc_fd );

        contextp->dc_fd = -1;
}

static intgen_t
Read( drive_t *drivep, char *bufp, size_t cnt, intgen_t *errnop )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        intgen_t nread;

#if 0
        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "tape op: reading %u bytes\n",
              cnt );
#endif

        ASSERT( contextp->dc_fd >= 0 );
        ASSERT( bufp );
        *errnop = 0;
        errno = 0;
        nread = read( contextp->dc_fd, ( void * )bufp, cnt );
        if ( nread < 0 ) {
                *errnop = errno;
                mlog( MLOG_NITTY | MLOG_DRIVE,
                      "tape op read of %u bytes failed: errno == %d (%s)\n",
                      cnt,
                      errno,
                      strerror( errno ));
        } else if ( nread != ( intgen_t )cnt ) {
                mlog( MLOG_NITTY | MLOG_DRIVE,
                      "tape op read of %u bytes short: nread == %d\n",
                      cnt,
                      nread );
        } else {
                mlog( MLOG_NITTY | MLOG_DRIVE,
                      "tape op read of %u bytes successful\n",
                      cnt );
        }

        return nread;
}

static intgen_t
Write( drive_t *drivep, char *bufp, size_t cnt, intgen_t *errnop )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        intgen_t nwritten;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "tape op: writing %u bytes\n",
              cnt );

        ASSERT( contextp->dc_fd >= 0 );
        ASSERT( bufp );
        *errnop = 0;
        errno = 0;
        nwritten = write( contextp->dc_fd, ( void * )bufp, cnt );
        if ( nwritten < 0 ) {
                *errnop = errno;
                mlog( MLOG_NITTY | MLOG_DRIVE,
                      "tape op write of %u bytes failed: errno == %d (%s)\n",
                      cnt,
                      errno,
                      strerror( errno ));
        } else if ( nwritten != ( intgen_t )cnt ) {
                mlog( MLOG_NITTY | MLOG_DRIVE,
                      "tape op write of %u bytes short: nwritten == %d\n",
                      cnt,
                      nwritten );
        } else {
                mlog( MLOG_NITTY | MLOG_DRIVE,
                      "tape op write of %u bytes successful\n",
                      cnt );
        }

        return nwritten;
}

/* validate a record header, log any anomolies, and return appropriate
 * indication.
 */
static intgen_t
record_hdr_validate( drive_t *drivep, char *bufp, bool_t chkoffpr )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        global_hdr_t *grhdrp = drivep->d_greadhdrp;
        rec_hdr_t rechdr;
        rec_hdr_t *rechdrp = &rechdr;
        rec_hdr_t *tmprh = ( rec_hdr_t * )bufp;

        if ( ! tape_rec_checksum_check( contextp, bufp )) {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "record %lld corrupt: bad record checksum\n",
                      contextp->dc_iocnt - 1 );
                return DRIVE_ERROR_CORRUPTION;

        }

        xlate_rec_hdr(tmprh, rechdrp, 1);

        if ( rechdrp->magic != STAPE_MAGIC )  {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "record %lld corrupt: bad magic number\n",
                      contextp->dc_iocnt - 1 );
                return DRIVE_ERROR_CORRUPTION;
        }

        if ( uuid_is_null( rechdrp->dump_uuid )) {

                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "record %lld corrupt: null dump id\n",
                      contextp->dc_iocnt - 1 );
                return DRIVE_ERROR_CORRUPTION;
        }

        if (  uuid_compare( grhdrp->gh_dumpid,
                           rechdrp->dump_uuid ) != 0) {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "record %lld corrupt: dump id mismatch\n",
                      contextp->dc_iocnt - 1 );
                return DRIVE_ERROR_CORRUPTION;
        }

        /* can't check size field of record header, since
         * 5.3 version of xfsdump did not set it properly.
         */
#ifdef RECSZCHK
        if ( ( size_t )rechdrp->recsize != tape_recsz ) {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "record %lld corrupt: incorrect record size in header\n",
                      contextp->dc_iocnt - 1 );
                return DRIVE_ERROR_CORRUPTION;
        }
#else /* RECSZCHK */
        mlog( (MLOG_NITTY + 1) | MLOG_NOTE | MLOG_DRIVE,
              "record %lld header record size %d (0x%x)\n",
              contextp->dc_iocnt - 1,
              rechdrp->recsize,
              rechdrp->recsize );
#endif /* RECSZCHK */

        if ( rechdrp->file_offset % ( off64_t )tape_recsz ) {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "record %lld corrupt: record offset in header "
                      "not a multiple of record size\n",
                      contextp->dc_iocnt - 1 );
                return DRIVE_ERROR_CORRUPTION;
        }

        if ( chkoffpr
             &&
             rechdrp->file_offset
             !=
             ( contextp->dc_iocnt - 1 ) * ( off64_t )tape_recsz ) {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "record %lld corrupt: "
                      "incorrect record offset in header (0x%llx)\n",
                      contextp->dc_iocnt - 1,
                      rechdrp->file_offset );
                return DRIVE_ERROR_CORRUPTION;
        }

        if ( rechdrp->rec_used > tape_recsz
             ||
             rechdrp->rec_used < STAPE_HDR_SZ ) {
                mlog( MLOG_NORMAL | MLOG_WARNING | MLOG_DRIVE,
                      "record %lld corrupt: "
                      "incorrect record padding offset in header\n",
                      contextp->dc_iocnt - 1 );
                return DRIVE_ERROR_CORRUPTION;
        }

        memcpy(tmprh, rechdrp, sizeof(*rechdrp));

        return 0;
}

/* do a read, determine DRIVE_ERROR_... if failure, and return failure code.
 * return 0 on success.
 */
static intgen_t
read_record(  drive_t *drivep, char *bufp )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        intgen_t nread;
        intgen_t saved_errno;
        intgen_t rval;

        nread = Read( drivep, bufp, tape_recsz, &saved_errno );

        if ( nread == ( intgen_t )tape_recsz ) {
                contextp->dc_iocnt++;
                rval = record_hdr_validate( drivep, bufp, BOOL_TRUE );
                return rval;
        }

        /* check for a blank tape/EOD.
         */
        if (( nread == 0 )  /* takes care of sun */
                ||            /* now handle SGI */
            (nread < 0 && saved_errno == ENOSPC )) {
                mlog( MLOG_NORMAL | MLOG_DRIVE,
                        "read_record encountered EOD : "
#ifdef DUMP
                        "assuming blank media\n" );
#endif
#ifdef RESTORE
                        "end of data\n" );
#endif
#ifdef DUMP
                return DRIVE_ERROR_BLANK;
#endif
#ifdef RESTORE
                return DRIVE_ERROR_EOD;
#endif
        }

        /* some other error
         */
        if ( saved_errno == EIO ) {
                mlog( MLOG_NORMAL | MLOG_ERROR | MLOG_DRIVE,
                      "unexpected EIO error attempting to read record \n");
#ifdef RESTORE
                return DRIVE_ERROR_EOM;
#endif
#ifdef DUMP
                return DRIVE_ERROR_CORRUPTION;
#endif
        }

        /* short read
         */
        if ( nread >= 0 ) {
                ASSERT( nread <= ( intgen_t )tape_recsz );
                mlog( MLOG_DEBUG | MLOG_DRIVE,
                      "short read record %lld (nread == %d)\n",
                      contextp->dc_iocnt,
                      nread );
                return DRIVE_ERROR_CORRUPTION;
        }

        /* some other error
         */
        mlog( MLOG_NORMAL | MLOG_ERROR | MLOG_DRIVE,
              "unexpected error attempting to read record %lld: "
              "read returns %d, errno %d (%s)\n",
              contextp->dc_iocnt,
              nread,
              saved_errno,
              strerror( saved_errno ));
        return DRIVE_ERROR_CORRUPTION;
}

static int
ring_read( void *clientctxp, char *bufp )
{
        return read_record( ( drive_t * )clientctxp, bufp );
}

/* gets another record IF dc_recp is NULL
 */
static intgen_t
getrec( drive_t *drivep )
{
        drive_context_t *contextp;
        contextp = ( drive_context_t * )drivep->d_contextp;

        while ( ! contextp->dc_recp ) {
                rec_hdr_t *rechdrp;
                if ( contextp->dc_singlethreadedpr ) {
                        intgen_t rval;
                        contextp->dc_recp = contextp->dc_bufp;
                        rval = read_record( drivep, contextp->dc_recp );
                        if ( rval ) {
                                contextp->dc_errorpr = BOOL_TRUE;
                                return rval;
                        }
                } else {
                        contextp->dc_msgp = Ring_get( contextp->dc_ringp );
                        switch( contextp->dc_msgp->rm_stat ) {
                        case RING_STAT_OK:
                                contextp->dc_recp = contextp->dc_msgp->rm_bufp;
                                break;
                        case RING_STAT_INIT:
                        case RING_STAT_NOPACK:
                        case RING_STAT_IGNORE:
                                contextp->dc_msgp->rm_op = RING_OP_READ;
                                Ring_put( contextp->dc_ringp,
                                          contextp->dc_msgp );
                                contextp->dc_msgp = 0;
                                continue;
                        case RING_STAT_ERROR:
                                contextp->dc_errorpr = BOOL_TRUE;
                                return contextp->dc_msgp->rm_rval;
                        default:
                                ASSERT( 0 );
                                contextp->dc_errorpr = BOOL_TRUE;
                                return DRIVE_ERROR_CORE;
                        }
                }
                rechdrp = ( rec_hdr_t * )contextp->dc_recp;
                contextp->dc_recendp = contextp->dc_recp + tape_recsz;
                contextp->dc_dataendp = contextp->dc_recp
                                        +
                                        rechdrp->rec_used;
                contextp->dc_nextp = contextp->dc_recp
                                     +
                                     STAPE_HDR_SZ;
                ASSERT( contextp->dc_nextp <= contextp->dc_dataendp );
        }

        return 0;
}

/* do a write, determine DRIVE_ERROR_... if failure, and return failure code.
 * return 0 on success.
 */
/*ARGSUSED*/
static intgen_t
write_record(  drive_t *drivep, char *bufp, bool_t chksumpr, bool_t xlatepr )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        intgen_t nwritten;
        intgen_t saved_errno;
        intgen_t rval;

        if ( xlatepr ) {
                rec_hdr_t rechdr;
                memcpy( &rechdr, bufp, sizeof(rechdr) );
                xlate_rec_hdr( &rechdr, ( rec_hdr_t * )bufp, 1 );
        }

        if ( chksumpr ) {
                tape_rec_checksum_set( contextp, bufp );
        }

        nwritten = Write( drivep, bufp, tape_recsz, &saved_errno );

        if ( nwritten == ( intgen_t )tape_recsz ) {
                contextp->dc_iocnt++;
                return 0;
        }

        rval = determine_write_error( nwritten, saved_errno );
        ASSERT(rval);

        return rval;
}

static int
ring_write( void *clientctxp, char *bufp )
{
        return write_record( ( drive_t * )clientctxp, bufp, BOOL_TRUE, 
BOOL_TRUE );
}

static void
ring_thread( void *clientctxp,
             int ( * entryp )( void *ringctxp ),
             void *ringctxp )
{
        drive_t *drivep = ( drive_t * )clientctxp;
        /* REFERENCED */
        bool_t ok;

        ok = cldmgr_create( entryp,
                            CLONE_VM,
                            drivep->d_index,
                            "slave",
                            ringctxp );
        ASSERT( ok );
}


static ring_msg_t *
Ring_get( ring_t *ringp )
{
        ring_msg_t *msgp;

        mlog( (MLOG_NITTY + 1) | MLOG_DRIVE,
              "ring op: get\n" );
        
        msgp = ring_get( ringp );
        return msgp;
}

static void
Ring_put(  ring_t *ringp, ring_msg_t *msgp )
{
        mlog( (MLOG_NITTY + 1) | MLOG_DRIVE,
              "ring op: put %d\n",
              msgp->rm_op );
        
        ring_put( ringp, msgp );
}

static void
Ring_reset(  ring_t *ringp, ring_msg_t *msgp )
{
        mlog( (MLOG_NITTY + 1) | MLOG_DRIVE,
              "ring op: reset\n" );
        
        ASSERT( ringp );

        ring_reset( ringp, msgp );
}

/* a simple heuristic to calculate the maximum uncertainty
 * of how much data actually was written prior to encountering
 * end of media.
 */
static void
calc_max_lost( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;

        if ( contextp->dc_isQICpr ) {
                contextp->dc_lostrecmax = 1;
        } else {
                contextp->dc_lostrecmax = 2;
        }

}

static void
display_ring_metrics( drive_t *drivep, intgen_t mlog_flags )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        ring_t *ringp = contextp->dc_ringp;
        char bufszbuf[ 16 ];
        char *bufszsfxp;
        
        if ( tape_recsz == STAPE_MIN_MAX_BLKSZ ) {
                ASSERT( ! ( STAPE_MIN_MAX_BLKSZ % 0x400 ));
                sprintf( bufszbuf, "%u", STAPE_MIN_MAX_BLKSZ / 0x400 );
                ASSERT( strlen( bufszbuf ) < sizeof( bufszbuf ));
                bufszsfxp = "KB";
        } else if ( tape_recsz == STAPE_MAX_RECSZ ) {
                ASSERT( ! ( STAPE_MAX_RECSZ % 0x100000 ));
                sprintf( bufszbuf, "%u", STAPE_MAX_RECSZ / 0x100000 );
                ASSERT( strlen( bufszbuf ) < sizeof( bufszbuf ));
                bufszsfxp = "MB";
        } else {
                sprintf( bufszbuf, "%u", (unsigned int)(tape_recsz / 0x400) );
                bufszsfxp = "KB";
        }

        mlog( mlog_flags,
              "I/O metrics: "
              "%u by %s%s %sring; "
              "%lld/%lld (%.0lf%%) records streamed; "
              "%.0lfB/s\n",
              contextp->dc_ringlen,
              bufszbuf,
              bufszsfxp,
              contextp->dc_ringpinnedpr ? "pinned " : "",
              ringp->r_slave_msgcnt - ringp->r_slave_blkcnt,
              ringp->r_slave_msgcnt,
              percent64( ringp->r_slave_msgcnt - ringp->r_slave_blkcnt,
                         ringp->r_slave_msgcnt ),
              ( double )( ringp->r_all_io_cnt )
              *
              ( double )tape_recsz
              /
              ( double )( time( 0 ) - ringp->r_first_io_time ));
}

#ifdef CLRMTAUD
static u_int32_t
#else /* CLRMTAUD */
static short
#endif /* CLRMTAUD */
rewind_and_verify( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;
        ix_t try;
        intgen_t rval;

#if 0
        rval = mt_op( contextp->dc_fd, MTREW, 0 );
        for ( try = 1 ; ; try++ ) {
                if ( rval ) {
                        sleep( 1 );
                        rval = mt_op( contextp->dc_fd, MTREW, 0 );
                } else 
                        return 0;

                if ( try >= MTOP_TRIES_MAX ) {
                        return 0;
                }
                sleep( 1 );
        }
        /* NOTREACHED */
#else
#if 0
        Close(drivep);
        ++lgs_fileno;
        sprintf(drivep->d_pathname + (strlen(drivep->d_pathname) - 3), "%03d", 
lgs_fileno);
        Open(drivep);
#endif

        return 0;
#endif
}

#ifdef CLRMTAUD
static u_int32_t
#else /* CLRMTAUD */
static short
#endif /* CLRMTAUD */
erase_and_verify( drive_t *drivep ) 
{
        char *tempbufp;
        intgen_t saved_errno;

        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;

        mlog( MLOG_DEBUG | MLOG_DRIVE,
              "rmt : erase_and_verify\n" );

#if 0
        /* Erase is not standard rmt. So we cannot use the line below.

        ( void )mt_op( contextp->dc_fd, MTERASE, 0 ); 

         * So write a record of zeros to fake erase . Poor man's erase!
         * We put the ERASE_MAGIC string at the beginning so we can
         * detect if we have erased the tape.
         */

        tempbufp = (char *) calloc( 1 , ( size_t )tape_recsz );
        strcpy( tempbufp, ERASE_MAGIC );
        Write( drivep, tempbufp, tape_recsz, &saved_errno );
        free(tempbufp);
#endif

        return 0;
}

#ifdef CLRMTAUD
static u_int32_t
#else /* CLRMTAUD */
static short
#endif /* CLRMTAUD */
bsf_and_verify( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;

#if 0
        return mt_op( contextp->dc_fd, MTBSF, 1 );
#else
        Close(drivep);
        ASSERT(lgs_fileno > 0);
        --lgs_fileno;
        mlog(MLOG_DEBUG | MLOG_DRIVE, "drive op: bsf %03d\n", lgs_fileno);
        sprintf(drivep->d_pathname + (strlen(drivep->d_pathname) - 3), "%03d", 
lgs_fileno);
        Open(drivep);
        return 0;
#endif
}

#ifdef CLRMTAUD
static u_int32_t
#else /* CLRMTAUD */
static short
#endif /* CLRMTAUD */
fsf_and_verify( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;

#if 0
        ( void )mt_op( contextp->dc_fd, MTFSF, 1 );
#else
        Close(drivep);
        ++lgs_fileno;
        mlog(MLOG_DEBUG | MLOG_DRIVE, "drive op: fsf %03d\n", lgs_fileno);
        sprintf(drivep->d_pathname + (strlen(drivep->d_pathname) - 3), "%03d", 
lgs_fileno);
        Open(drivep);
#endif

        return 0;
}

static bool_t
set_best_blk_and_rec_sz( drive_t *drivep )
{
        drive_context_t *contextp = ( drive_context_t * )drivep->d_contextp;

        if ( contextp->dc_isQICpr == BOOL_TRUE )
                tape_recsz = STAPE_MIN_MAX_BLKSZ; 
        else
                tape_recsz = cmdlineblksize; 

        return BOOL_TRUE;
}

static bool_t
isefsdump( drive_t *drivep )
{
        int32_t *efshdrp = ( int32_t * )drivep->d_greadhdrp;
        int32_t efsmagic = efshdrp[ 6 ];
        if ( efsmagic == 60011
             ||
             efsmagic == 60012 ) {
                return BOOL_TRUE;
        } else {
                return BOOL_FALSE;
        }
}

static bool_t
isxfsdumperasetape( drive_t *drivep )
{
        if ( !strcmp( ( char * )drivep->d_greadhdrp, ERASE_MAGIC ) )
                return BOOL_TRUE;
        else 
                return BOOL_FALSE;
}

#ifdef OPENMASKED

static intgen_t
open_masked_signals( char *pathname, int oflags )
{
        bool_t isrmtpr;
        SIG_PF sigalrm_save;
        SIG_PF sigint_save;
        SIG_PF sighup_save;
        SIG_PF sigquit_save;
        SIG_PF sigcld_save;
        intgen_t rval;
        intgen_t saved_errno;

        if ( strchr( pathname, ':') ) {
                isrmtpr = BOOL_TRUE;
        } else {
                isrmtpr = BOOL_FALSE;
        }

        if ( isrmtpr ) {
                sigalrm_save = sigset( SIGALRM, SIG_IGN );
                sigint_save = sigset( SIGINT, SIG_IGN );
                sighup_save = sigset( SIGHUP, SIG_IGN );
                sigquit_save = sigset( SIGQUIT, SIG_IGN );
                sigcld_save = sigset( SIGCLD, SIG_IGN );
        }

        errno = 0;
        rval = open( pathname, oflags );
        saved_errno = errno;

        if ( isrmtpr ) {
                ( void )sigset( SIGCLD, sigcld_save );
                ( void )sigset( SIGQUIT, sigquit_save );
                ( void )sigset( SIGHUP, sighup_save );
                ( void )sigset( SIGINT, sigint_save );
                ( void )sigset( SIGALRM, sigalrm_save );
        }

        errno = saved_errno;
        return rval;
}

#endif /* OPENMASKED */

Attachment: smime.p7s
Description: S/MIME Cryptographic Signature

<Prev in Thread] Current Thread [Next in Thread>