[BACK]Return to md_k.h CVS log [TXT][DIR] Up to [Development] / linux-2.4-xfs / include / linux / raid

File: [Development] / linux-2.4-xfs / include / linux / raid / md_k.h (download)

Revision 1.1, Wed Dec 31 00:54:49 2003 UTC (13 years, 10 months ago) by cattelan
Branch: MAIN
CVS Tags: HEAD

Initial Import 2.4.24pre2

/*
   md_k.h : kernel internal structure of the Linux MD driver
          Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
	  
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.
   
   You should have received a copy of the GNU General Public License
   (for example /usr/src/linux/COPYING); if not, write to the Free
   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
*/

#ifndef _MD_K_H
#define _MD_K_H

#define MD_RESERVED       0UL
#define LINEAR            1UL
#define RAID0             2UL
#define RAID1             3UL
#define RAID5             4UL
#define TRANSLUCENT       5UL
#define HSM               6UL
#define MULTIPATH         7UL
#define MAX_PERSONALITY   8UL

static inline int pers_to_level (int pers)
{
	switch (pers) {
		case MULTIPATH:		return -4;
		case HSM:		return -3;
		case TRANSLUCENT:	return -2;
		case LINEAR:		return -1;
		case RAID0:		return 0;
		case RAID1:		return 1;
		case RAID5:		return 5;
	}
	BUG();
	return MD_RESERVED;
}

static inline int level_to_pers (int level)
{
	switch (level) {
		case -4: return MULTIPATH;
		case -3: return HSM;
		case -2: return TRANSLUCENT;
		case -1: return LINEAR;
		case 0: return RAID0;
		case 1: return RAID1;
		case 4:
		case 5: return RAID5;
	}
	return MD_RESERVED;
}

typedef struct mddev_s mddev_t;
typedef struct mdk_rdev_s mdk_rdev_t;

#if (MINORBITS != 8)
#error MD does not handle bigger kdev yet
#endif

#define MAX_MD_DEVS  (1<<MINORBITS)	/* Max number of md dev */

/*
 * Maps a kdev to an mddev/subdev. How 'data' is handled is up to
 * the personality. (eg. HSM uses this to identify individual LVs)
 */
typedef struct dev_mapping_s {
	mddev_t *mddev;
	void *data;
} dev_mapping_t;

extern dev_mapping_t mddev_map [MAX_MD_DEVS];

static inline mddev_t * kdev_to_mddev (kdev_t dev)
{
	if (MAJOR(dev) != MD_MAJOR)
		BUG();
        return mddev_map[MINOR(dev)].mddev;
}

/*
 * options passed in raidrun:
 */

#define MAX_CHUNK_SIZE (4096*1024)

/*
 * default readahead
 */
#define MD_READAHEAD	vm_max_readahead

static inline int disk_faulty(mdp_disk_t * d)
{
	return d->state & (1 << MD_DISK_FAULTY);
}

static inline int disk_active(mdp_disk_t * d)
{
	return d->state & (1 << MD_DISK_ACTIVE);
}

static inline int disk_sync(mdp_disk_t * d)
{
	return d->state & (1 << MD_DISK_SYNC);
}

static inline int disk_spare(mdp_disk_t * d)
{
	return !disk_sync(d) && !disk_active(d) && !disk_faulty(d);
}

static inline int disk_removed(mdp_disk_t * d)
{
	return d->state & (1 << MD_DISK_REMOVED);
}

static inline void mark_disk_faulty(mdp_disk_t * d)
{
	d->state |= (1 << MD_DISK_FAULTY);
}

static inline void mark_disk_active(mdp_disk_t * d)
{
	d->state |= (1 << MD_DISK_ACTIVE);
}

static inline void mark_disk_sync(mdp_disk_t * d)
{
	d->state |= (1 << MD_DISK_SYNC);
}

static inline void mark_disk_spare(mdp_disk_t * d)
{
	d->state = 0;
}

static inline void mark_disk_removed(mdp_disk_t * d)
{
	d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);
}

static inline void mark_disk_inactive(mdp_disk_t * d)
{
	d->state &= ~(1 << MD_DISK_ACTIVE);
}

static inline void mark_disk_nonsync(mdp_disk_t * d)
{
	d->state &= ~(1 << MD_DISK_SYNC);
}

/*
 * MD's 'extended' device
 */
struct mdk_rdev_s
{
	struct md_list_head same_set;	/* RAID devices within the same set */
	struct md_list_head all;	/* all RAID devices */
	struct md_list_head pending;	/* undetected RAID devices */

	kdev_t dev;			/* Device number */
	kdev_t old_dev;			/*  "" when it was last imported */
	unsigned long size;		/* Device size (in blocks) */
	mddev_t *mddev;			/* RAID array if running */
	unsigned long last_events;	/* IO event timestamp */

	struct block_device *bdev;	/* block device handle */

	mdp_super_t *sb;
	struct page *sb_page;
	unsigned long sb_offset;

	int alias_device;		/* device alias to the same disk */
	int faulty;			/* if faulty do not issue IO requests */
	int desc_nr;			/* descriptor index in the superblock */
};


/*
 * disk operations in a working array:
 */
#define DISKOP_SPARE_INACTIVE	0
#define DISKOP_SPARE_WRITE	1
#define DISKOP_SPARE_ACTIVE	2
#define DISKOP_HOT_REMOVE_DISK	3
#define DISKOP_HOT_ADD_DISK	4

typedef struct mdk_personality_s mdk_personality_t;

struct mddev_s
{
	void				*private;
	mdk_personality_t		*pers;
	int				__minor;
	mdp_super_t			*sb;
	int				nb_dev;
	struct md_list_head 		disks;
	int				sb_dirty;
	mdu_param_t			param;
	int				ro;
	unsigned long			curr_resync;	/* blocks scheduled */
	unsigned long			resync_mark;	/* a recent timestamp */
	unsigned long			resync_mark_cnt;/* blocks written at resync_mark */
	char				*name;
	int				recovery_running;
	struct semaphore		reconfig_sem;
	struct semaphore		recovery_sem;
	struct semaphore		resync_sem;
	atomic_t			active;

	atomic_t			recovery_active; /* blocks scheduled, but not written */
	md_wait_queue_head_t		recovery_wait;

	struct md_list_head		all_mddevs;
};

struct mdk_personality_s
{
	char *name;
	int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh);
	int (*run)(mddev_t *mddev);
	int (*stop)(mddev_t *mddev);
	void (*status)(struct seq_file *seq, mddev_t *mddev);
	int (*error_handler)(mddev_t *mddev, kdev_t dev);

/*
 * Some personalities (RAID-1, RAID-5) can have disks hot-added and
 * hot-removed. Hot removal is different from failure. (failure marks
 * a disk inactive, but the disk is still part of the array) The interface
 * to such operations is the 'pers->diskop()' function, can be NULL.
 *
 * the diskop function can change the pointer pointing to the incoming
 * descriptor, but must do so very carefully. (currently only
 * SPARE_ACTIVE expects such a change)
 */
	int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);

	int (*stop_resync)(mddev_t *mddev);
	int (*restart_resync)(mddev_t *mddev);
	int (*sync_request)(mddev_t *mddev, unsigned long block_nr);
};


/*
 * Currently we index md_array directly, based on the minor
 * number. This will have to change to dynamic allocation
 * once we start supporting partitioning of md devices.
 */
static inline int mdidx (mddev_t * mddev)
{
	return mddev->__minor;
}

static inline kdev_t mddev_to_kdev(mddev_t * mddev)
{
	return MKDEV(MD_MAJOR, mdidx(mddev));
}

extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
extern mdp_disk_t *get_spare(mddev_t *mddev);

/*
 * iterates through some rdev ringlist. It's safe to remove the
 * current 'rdev'. Dont touch 'tmp' though.
 */
#define ITERATE_RDEV_GENERIC(head,field,rdev,tmp)			\
									\
	for (tmp = head.next;						\
		rdev = md_list_entry(tmp, mdk_rdev_t, field),		\
			tmp = tmp->next, tmp->prev != &head		\
		; )
/*
 * iterates through the 'same array disks' ringlist
 */
#define ITERATE_RDEV(mddev,rdev,tmp)					\
	ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp)

/*
 * Same as above, but assumes that the device has rdev->desc_nr numbered
 * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order.
 */
#define ITERATE_RDEV_ORDERED(mddev,rdev,i)				\
	for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++)


/*
 * Iterates through all 'RAID managed disks'
 */
#define ITERATE_RDEV_ALL(rdev,tmp)					\
	ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp)

/*
 * Iterates through 'pending RAID disks'
 */
#define ITERATE_RDEV_PENDING(rdev,tmp)					\
	ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)

/*
 * iterates through all used mddevs in the system.
 */
#define ITERATE_MDDEV(mddev,tmp)					\
									\
	for (tmp = all_mddevs.next;					\
		mddev = md_list_entry(tmp, mddev_t, all_mddevs),	\
			tmp = tmp->next, tmp->prev != &all_mddevs	\
		; )

static inline int lock_mddev (mddev_t * mddev)
{
	return down_interruptible(&mddev->reconfig_sem);
}

static inline void unlock_mddev (mddev_t * mddev)
{
	up(&mddev->reconfig_sem);
}

#define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
				x = y; y = __tmp; } while (0)

typedef struct mdk_thread_s {
	void			(*run) (void *data);
	void			*data;
	md_wait_queue_head_t	wqueue;
	unsigned long           flags;
	struct completion	*event;
	struct task_struct	*tsk;
	const char		*name;
} mdk_thread_t;

#define THREAD_WAKEUP  0

#define MAX_DISKNAME_LEN 64

typedef struct dev_name_s {
	struct md_list_head list;
	kdev_t dev;
	char namebuf [MAX_DISKNAME_LEN];
	char *name;
} dev_name_t;


#define __wait_event_lock_irq(wq, condition, lock) 			\
do {									\
	wait_queue_t __wait;						\
	init_waitqueue_entry(&__wait, current);				\
									\
	add_wait_queue(&wq, &__wait);					\
	for (;;) {							\
		set_current_state(TASK_UNINTERRUPTIBLE);		\
		if (condition)						\
			break;						\
		spin_unlock_irq(&lock);					\
		run_task_queue(&tq_disk);				\
		schedule();						\
		spin_lock_irq(&lock);					\
	}								\
	current->state = TASK_RUNNING;					\
	remove_wait_queue(&wq, &__wait);				\
} while (0)

#define wait_event_lock_irq(wq, condition, lock) 			\
do {									\
	if (condition)	 						\
		break;							\
	__wait_event_lock_irq(wq, condition, lock);			\
} while (0)


#define __wait_disk_event(wq, condition) 				\
do {									\
	wait_queue_t __wait;						\
	init_waitqueue_entry(&__wait, current);				\
									\
	add_wait_queue(&wq, &__wait);					\
	for (;;) {							\
		set_current_state(TASK_UNINTERRUPTIBLE);		\
		if (condition)						\
			break;						\
		run_task_queue(&tq_disk);				\
		schedule();						\
	}								\
	current->state = TASK_RUNNING;					\
	remove_wait_queue(&wq, &__wait);				\
} while (0)

#define wait_disk_event(wq, condition) 					\
do {									\
	if (condition)	 						\
		break;							\
	__wait_disk_event(wq, condition);				\
} while (0)

#endif