Erik Jacobson wrote:
Hi there.
Attached is a fresh PAGG patch for the 2.6.6 kernel.
This patch implements the LKML feedback received so far (with very few
exceptions).
I would be happy to post the inescapable jobs patch (a user of PAGG) as well.
It can be found here (along with pagg):
http://oss.sgi.com/projects/pagg/
Attached is a patch the implements per task initialisation during
registration, detaches the relevant paggs from all tasks during
unregistration of a client, and adds hooks for set real uid/gid and
setting of CPU affinity. This patch is based on the one you've just
posted and takes into account the various changes in technique that it
incorporates.
Enjoy
Peter
--
Dr Peter Williams, Chief Scientist peterw@xxxxxxxxxx
Aurema Pty Limited Tel:+61 2 9698 2322
PO Box 305, Strawberry Hills NSW 2012, Australia Fax:+61 2 9699 9174
79 Myrtle Street, Chippendale NSW 2008, Australia http://www.aurema.com
Index: Linux-2.6.X/Documentation/pagg.txt
diff -c Linux-2.6.X/Documentation/pagg.txt:1.1.6.1
Linux-2.6.X/Documentation/pagg.txt:1.1.6.1.2.1
*** Linux-2.6.X/Documentation/pagg.txt:1.1.6.1 Fri May 28 11:26:10 2004
--- Linux-2.6.X/Documentation/pagg.txt Fri May 28 17:36:14 2004
***************
*** 30,32 ****
--- 30,188 ----
used, for example, by other kernel modules that wish to do advanced CPU
placement on multi-processor systems (just one example).
+ The set_user function has been modified to support an optional callout
+ that can be run when a process in a pagg list changes its real uid.
+
+ The sys_setresgid, sy_setregid and sys_setgid functions have been modified
+ to support optional callouts that can be run when a process in a pagg list
changes
+ its real gid.
+
+ The set_cpus_allowed function has been modified to support an optional callout
+ that can be run when a process in a pagg list changes its cpu affinity. It
could be
+ used, for example, to implement CPU sets.
+
+ Additional details concerning this implementation of the process aggregates
+ infrastructure are described in the sections that follow.
+
+
+ 2. Kernel Changes
+
+ This section describe the files and data strcutrues that are involved in this
+ implementation of PAGG. Both modified as well as new files and data
+ structures are discussed.
+
+ 2.1. Modified Files
+
+ The following files were modified to implement PAGG:
+
+ - include/linux/init_task.h
+ - include/linux/sched.h
+ - kernel/Makefile
+ - kernel/exit.c
+ - kernel/fork.c
+ - kernel/sched.c
+ - kernel/sys.c
+ - fs/exec.c
+ - init/Kconfig
+
+ 2.2. New Files
+
+ The following files were added to implement PAGG:
+
+ - Documentation/pagg.txt
+ - include/linux/pagg.h
+ - kernel/pagg.c
+
+
+ 2.3. Modified Data Structures
+
+ The following existing data structures were altered to implement PAGG.
+
+ - struct task_struct: (include/linux/sched.h)
+ struct pagg_list pagg_list; /* List of pagg containers */
+
+ This new member in task_struct, pagg_list, points to the list of pagg
+ containers to which the process is currently attached.
+
+ 2.4. New Data Structures
+
+ The following new data structures were introduced to implement PAGG.
+
+ - struct pagg: (include/linux/pagg.h)
+ struct pagg_hook *hook /* Ptr to pagg module entry */
+ void *data; /* Task specific data */
+ struct list_head entry; /* List connection */
+
+ - struct pagg_hook: (include/linux/pagg.h)
+ struct module *module; /* Ptr to PAGG module */
+ char *name; /* PAGG hook name - restricted
+ * to 32 characters. */
+ int (*attach)(struct task_struct *, /* Function to attach */
+ struct pagg *,
+ void *);
+ int (*detach)(struct task_struct *, /* Function to detach */
+ struct pagg *);
+ int (*init)(struct task_struct *, /* Load task init func. */
+ struct pagg *);
+ void *data; /* Module specific data */
+ struct list_head entry; /* List connection */
+ void (*exec)(struct task_struct *, struct pagg *); /* exec func ptr */
+ void (*setruid)(struct task_struct *, struct pagg *); /* setruid func
ptr */
+ void (*setrgid)(struct task_struct *, struct pagg *); /* setrgid func
ptr */
+ void (*setcpuaffinity)(struct task_struct *, struct pagg *); /*
setcpuaffinity func ptr */
+
+ The pagg structure provides the process' reference to the PAGG
+ containers provided by the PAGG modules. The attach function pointer
+ is the function used to notify the referenced PAGG container that the
+ process is being attached. The detach function pointer is used to notify
+ the referenced PAGG container that the process is exiting or otherwise
+ detaching from the container. The exec function pointer is used when a
+ process in the pagg container exec's a new process. This is optional and
+ may be set to NULL if it is not needed by the pagg module. The setruid
+ function pointer is used when a process in the pagg container changes its
+ real uid. This is optional and may be set to NULL if it is not needed by the
+ pagg module. The setrgid function pointer is used when a process in the
+ pagg container changes its real gid. This is optional and may be set to
+ NULL if it is not needed by the pagg module. The setcpuaffinity function
+ pointer is used when a process in the pagg container changes its cpu
+ affinity. This is optional and may be set to NULL if it is not needed by the
+ pagg module.
+
+ The pagg_hook structure provides the reference to the module that
+ implements a type of PAGG container. In addition to the function pointers
+ described concerning pagg, this structure provides an addition
+ function pointer. The init function pointer is currently not used
+ but will be available in the future. Future use of the init function
+ will be optional and will used to attach currently running processes to
+ a default PAGG container when a PAGG module is loaded on a running system.
+
+
+ 2.5. Modified Functions
+
+ The following functions were changed to implement PAGG:
+
+ - do_fork: (kernel/fork.c)
+ /* execute the following pseudocode before add to run-queue */
+ If parent process pagg list is not empty
+ Call attach_pagg_list function with child task_struct as argument
+ - do_exit: (kernel/exit.c)
+ /* execute the following pseudocode prior to schedule call */
+ If current process pagg list is not empty
+ Call detach_pagg_list function with current task_struct
+ - sys_execve: (fs/exec.c)
+ /* When a process in a pagg exec's, an optional callout can be run. This
+ is implemented with an optional function pointer in the pagg_hook. */
+ - set_user: (kernel/sys.c)
+ /* When a process in a pagg sets its real uid, an optional callout can
be run. This
+ is implemented with an optional function pointer in the pagg_hook. */
+ - sys_setresgid, sy_setregid and sys_setgid: (kernel/sys.c)
+ /* When a process in a pagg sets its real gid, an optional callout can
be run. This
+ is implemented with an optional function pointer in the pagg_hook. */
+ - set_cpus_allowed: (kernel/sched.c)
+ /* When a process in a pagg changes cpu affinity, an optional callout
can be run. This
+ is implemented with an optional function pointer in the pagg_hook. */
+
+ 2.6 New Functions
+
+ The following new functions were added to implement PAGG:
+
+ - int register_pagg_hook(struct pagg_hook *); (kernel/pagg.c)
+ Add module entry into table of pagg modules
+ - int unregister_pagg_hook(struct pagg_hook *); (kernel/pagg.c)
+ Find module entry in list of pagg modules
+ Foreach task
+ If task is attached to this pagg module
+ return error
+ If no tasks are referencing this module
+ remove module entry from list of pagg modules
+ - int attach_pagg_list(struct task_struct *); (kernel/pagg.c)
+ /* Assumed task pagg list pts to paggs that it attaches to */
+ While another pagg container reference
+ Make copy of pagg container reference & insert into new list
+ Attach task to pagg container using new container reference
+ Get next pagg container reference
+ Make task pagg list use the new pagg list
+ - int detach_pagg_list(struct task_struct *); (kernel/pagg.c)
+ While another pagg container reference
+ Detach task from pagg container using reference
+
Index: Linux-2.6.X/include/linux/pagg.h
diff -c Linux-2.6.X/include/linux/pagg.h:1.1.6.1
Linux-2.6.X/include/linux/pagg.h:1.1.6.1.2.1
*** Linux-2.6.X/include/linux/pagg.h:1.1.6.1 Fri May 28 11:26:10 2004
--- Linux-2.6.X/include/linux/pagg.h Fri May 28 17:36:14 2004
***************
*** 105,122 ****
* in the pagg container exec's a new process. This
* is optional and may be set to NULL if it is not
* needed by the pagg module.
* refcnt: Keep track of user count of the pagg hook
*/
struct pagg_hook {
! struct module *module;
! char *name; /* Name Key - restricted to 32 characters */
! void *data; /* Opaque module specific data */
! struct list_head entry; /* List pointers */
! atomic_t refcnt; /* usage counter */
! int (*init)(struct task_struct *, struct pagg *);
! int (*attach)(struct task_struct *, struct pagg *, void*);
! int (*detach)(struct task_struct *, struct pagg *);
! void (*exec)(struct task_struct *, struct pagg *);
};
--- 105,137 ----
* in the pagg container exec's a new process. This
* is optional and may be set to NULL if it is not
* needed by the pagg module.
+ * setruid: Function pointer to function used when a process
+ * in the pagg container changes its real uid. This
+ * is optional and may be set to NULL if it is not
+ * needed by the pagg module.
+ * setrgid: Function pointer to function used when a process
+ * in the pagg container changes its real gid. This
+ * is optional and may be set to NULL if it is not
+ * needed by the pagg module.
+ * setcpuaffinity: Function pointer to function used when a process
+ * in the pagg container changes its cpu affinity. This
+ * is optional and may be set to NULL if it is not
+ * needed by the pagg module.
* refcnt: Keep track of user count of the pagg hook
*/
struct pagg_hook {
! struct module *module;
! char *name; /* Name Key - restricted to 32 characters */
! void *data; /* Opaque module specific data */
! struct list_head entry; /* List pointers */
! atomic_t refcnt; /* usage counter */
! int (*init)(struct task_struct *, struct pagg *);
! int (*attach)(struct task_struct *, struct pagg *, void*);
! int (*detach)(struct task_struct *, struct pagg *);
! void (*exec)(struct task_struct *, struct pagg *);
! void (*setruid)(struct task_struct *, struct pagg *);
! void (*setrgid)(struct task_struct *, struct pagg *);
! void (*setcpuaffinity)(struct task_struct *, struct pagg *);
};
***************
*** 131,136 ****
--- 146,154 ----
struct task_struct *from_task);
extern int __pagg_detach(struct task_struct *task);
extern int __pagg_exec(struct task_struct *task);
+ extern int __pagg_setruid(struct task_struct *task);
+ extern int __pagg_setrgid(struct task_struct *task);
+ extern int __pagg_setcpuaffinity(struct task_struct *task);
/* function used when a child process must inherit attachment to pagg
* containers from the parent.
***************
*** 166,171 ****
--- 184,219 ----
__pagg_exec(task);
}
+ /*
+ * function used when a process setruid's.
+ *
+ */
+ static inline void pagg_setruid(struct task_struct *task)
+ {
+ if (!list_empty(&task->pagg_list))
+ __pagg_setruid(task);
+ }
+
+ /*
+ * function used when a process setrgid's.
+ *
+ */
+ static inline void pagg_setrgid(struct task_struct *task)
+ {
+ if (!list_empty(&task->pagg_list))
+ __pagg_setrgid(task);
+ }
+
+ /*
+ * function used when a process's cpu affinity changes.
+ *
+ */
+ static inline void pagg_setcpuaffinity(struct task_struct *task)
+ {
+ if (!list_empty(&task->pagg_list))
+ __pagg_setcpuaffinity(task);
+ }
+
/*
* Marco Used in INIT_TASK to set the head and sem of pagg_list.
* If CONFIG_PAGG is off, it is defined as an empty macro below.
***************
*** 185,190 ****
--- 233,241 ----
#define pagg_attach(ct, pt) do { } while(0)
#define pagg_detach(t) do { } while(0)
#define pagg_exec(t) do { } while(0)
+ #define pagg_setruid(t) do { } while(0)
+ #define pagg_setrgid(t) do { } while(0)
+ #define pagg_setcpuaffinity(t) do { } while(0)
#endif /* CONFIG_PAGG */
Index: Linux-2.6.X/kernel/pagg.c
diff -c Linux-2.6.X/kernel/pagg.c:1.1.6.1 Linux-2.6.X/kernel/pagg.c:1.1.6.1.2.2
*** Linux-2.6.X/kernel/pagg.c:1.1.6.1 Fri May 28 11:26:10 2004
--- Linux-2.6.X/kernel/pagg.c Fri May 28 19:15:01 2004
***************
*** 157,162 ****
--- 157,203 ----
return NULL;
}
+ /*
+ * remove_named_pagg_from_all_tasks
+ *
+ * Given a pagg hook name key, this function will remove all paggs
+ * associated with that pagg hook from all tasks calling the provided
+ * function on each pagg
+ */
+ static void
+ remove_named_pagg_from_all_tasks(struct pagg_hook *php)
+ {
+ if (php == NULL)
+ return;
+
+ /* Because of internal race conditions we can't gaurantee
+ * getting every task in just one pass so we just keep going
+ * until there are no tasks with paggs from this hook attached.
+ * The inefficiency of this should be tempered by the fact that this
happens
+ * at most once for each registered client.
+ */
+ while (atomic_read(&php->refcnt) != 0) {
+ struct task_struct *p = NULL;
+
+ read_lock(&tasklist_lock);
+ for_each_process(p) {
+ struct pagg *paggp;
+
+ get_task_struct(p);
+ read_unlock(&tasklist_lock);
+ down_write(&p->pagg_sem);
+ paggp = pagg_get(p, php->name);
+ if (paggp != NULL) {
+ (void)php->detach(p, paggp);
+ pagg_free(paggp);
+ }
+ up_write(&p->pagg_sem);
+ read_lock(&tasklist_lock);
+ put_task_struct(p);
+ }
+ read_unlock(&tasklist_lock);
+ }
+ }
/*
* pagg_hook_register
***************
*** 208,213 ****
--- 249,310 ----
atomic_set(&pagg_hook_new->refcnt, 0);
/* printk("DEBUG - pagg hook register - refcnt now: %d\n",
atomic_read(&pagg_hook_new->refcnt)); */
+
+ /* Now we can call the initialiser function (if present) for each task
*/
+ if (pagg_hook_new->init != NULL) {
+ int num_inited = 0;
+ int malloc_failures = 0;
+ int init_result = 0;
+
+ /* Because of internal race conditions we can't gaurantee
+ * getting every task in just one pass so we just keep going
+ * until we don't find any unitialised tasks. The inefficiency
+ * of this should be tempered by the fact that this happens
+ * at most once for each registered client.
+ */
+ do {
+ struct task_struct *p = NULL;
+
+ num_inited = 0;
+ read_lock(&tasklist_lock);
+ for_each_process(p) {
+ struct pagg *paggp;
+
+ get_task_struct(p);
+ read_unlock(&tasklist_lock);
+ down_write(&p->pagg_sem);
+ paggp = pagg_get(p, pagg_hook_new->name);
+ if (paggp == NULL) {
+ paggp = pagg_alloc(p, pagg_hook_new);
+ if (paggp != NULL)
+ init_result =
pagg_hook_new->init(p, paggp);
+ else
+ malloc_failures++;
+ num_inited++;
+ }
+ up_write(&p->pagg_sem);
+ read_lock(&tasklist_lock);
+ put_task_struct(p);
+ }
+ read_unlock(&tasklist_lock);
+ } while ((init_result == 0) && (malloc_failures == 0) &&
(num_inited > 0));
+
+ /*
+ * if anything went wrong during initialisation abandon the
+ * registration process
+ */
+ if ((init_result != 0) || (malloc_failures != 0)) {
+ remove_named_pagg_from_all_tasks(pagg_hook_new);
+ list_del_init(&pagg_hook_new->entry);
+ up_write(&pagg_hook_list_sem);
+
+ printk(KERN_WARNING "Registering PAGG support for"
+ " (name=%s) failed\n", pagg_hook_new->name);
+
+ return malloc_failures ? -ENOMEM : init_result;
/* success */
+ }
+ }
+
up_write(&pagg_hook_list_sem);
printk(KERN_INFO "Registering PAGG support for (name=%s)\n",
***************
*** 247,258 ****
*/
if (pagg_hook && pagg_hook == pagg_hook_old) {
! /* Is the pagg hook busy? Check if the refcnt is zero */
! if (atomic_read(&pagg_hook->refcnt) != 0) {
! up_write(&pagg_hook_list_sem);
! printk(KERN_INFO "Failed attempt to unregister a PAGG
hook from: %s\n", pagg_hook_old->name);
! return -EBUSY;
! }
list_del_init(&pagg_hook->entry);
up_write(&pagg_hook_list_sem);
--- 344,350 ----
*/
if (pagg_hook && pagg_hook == pagg_hook_old) {
! remove_named_pagg_from_all_tasks(pagg_hook);
list_del_init(&pagg_hook->entry);
up_write(&pagg_hook_list_sem);
***************
*** 378,383 ****
--- 470,541 ----
}
+ /*
+ * __pagg_setruid
+ *
+ * Used to process a task's pagg list when changes real user id.
+ *
+ */
+ int __pagg_setruid(struct task_struct *task)
+ {
+ struct pagg *pagg;
+
+ down_read(&task->pagg_sem); /* lock the pagg list */
+
+ list_for_each_entry(pagg, &task->pagg_list, entry) {
+ if (pagg->hook->setruid) /* conditional because it's optional */
+ pagg->hook->setruid(task, pagg);
+ }
+
+ up_read(&task->pagg_sem); /* unlock the pagg list */
+ return 0;
+ }
+
+
+ /*
+ * __pagg_setrgid
+ *
+ * Used to process a task's pagg list when it changes real group id.
+ *
+ */
+ int __pagg_setrgid(struct task_struct *task)
+ {
+ struct pagg *pagg;
+
+ down_read(&task->pagg_sem); /* lock the pagg list */
+
+ list_for_each_entry(pagg, &task->pagg_list, entry) {
+ if (pagg->hook->setrgid) /* conditional because it's optional */
+ pagg->hook->setrgid(task, pagg);
+ }
+
+ up_read(&task->pagg_sem); /* unlock the pagg list */
+ return 0;
+ }
+
+
+ /*
+ * __pagg_setcpuaffinity
+ *
+ * Used to process a task's pagg list when it changes its cpu affinity.
+ *
+ */
+ int __pagg_setcpuaffinity(struct task_struct *task)
+ {
+ struct pagg *pagg;
+
+ down_read(&task->pagg_sem); /* lock the pagg list */
+
+ list_for_each_entry(pagg, &task->pagg_list, entry) {
+ if (pagg->hook->setcpuaffinity) /* conditional because it's
optional */
+ pagg->hook->setcpuaffinity(task, pagg);
+ }
+
+ up_read(&task->pagg_sem); /* unlock the pagg list */
+ return 0;
+ }
+
+
EXPORT_SYMBOL(pagg_get);
EXPORT_SYMBOL(pagg_alloc);
EXPORT_SYMBOL(pagg_free);
Index: Linux-2.6.X/kernel/sched.c
diff -c Linux-2.6.X/kernel/sched.c:1.1.1.7
Linux-2.6.X/kernel/sched.c:1.1.1.7.16.1
*** Linux-2.6.X/kernel/sched.c:1.1.1.7 Thu May 6 18:43:49 2004
--- Linux-2.6.X/kernel/sched.c Fri May 28 17:36:14 2004
***************
*** 2722,2735 ****
int set_cpus_allowed(task_t *p, cpumask_t new_mask)
{
unsigned long flags;
- int ret = 0;
migration_req_t req;
runqueue_t *rq;
rq = task_rq_lock(p, &flags);
if (any_online_cpu(new_mask) == NR_CPUS) {
! ret = -EINVAL;
! goto out;
}
if (__set_cpus_allowed(p, new_mask, &req)) {
--- 2722,2734 ----
int set_cpus_allowed(task_t *p, cpumask_t new_mask)
{
unsigned long flags;
migration_req_t req;
runqueue_t *rq;
rq = task_rq_lock(p, &flags);
if (any_online_cpu(new_mask) == NR_CPUS) {
! task_rq_unlock(rq, &flags);
! return -EINVAL;
}
if (__set_cpus_allowed(p, new_mask, &req)) {
***************
*** 2737,2747 ****
task_rq_unlock(rq, &flags);
wake_up_process(rq->migration_thread);
wait_for_completion(&req.done);
return 0;
}
- out:
task_rq_unlock(rq, &flags);
! return ret;
}
EXPORT_SYMBOL_GPL(set_cpus_allowed);
--- 2736,2747 ----
task_rq_unlock(rq, &flags);
wake_up_process(rq->migration_thread);
wait_for_completion(&req.done);
+ pagg_setcpuaffinity(p);
return 0;
}
task_rq_unlock(rq, &flags);
! pagg_setcpuaffinity(p);
! return 0;
}
EXPORT_SYMBOL_GPL(set_cpus_allowed);
Index: Linux-2.6.X/kernel/sys.c
diff -c Linux-2.6.X/kernel/sys.c:1.1.1.6 Linux-2.6.X/kernel/sys.c:1.1.1.6.22.1
*** Linux-2.6.X/kernel/sys.c:1.1.1.6 Thu May 6 18:43:49 2004
--- Linux-2.6.X/kernel/sys.c Fri May 28 17:36:14 2004
***************
*** 23,28 ****
--- 23,29 ----
#include <linux/security.h>
#include <linux/dcookies.h>
#include <linux/suspend.h>
+ #include <linux/pagg.h>
#include <asm/uaccess.h>
#include <asm/io.h>
***************
*** 593,598 ****
--- 594,600 ----
current->fsgid = new_egid;
current->egid = new_egid;
current->gid = new_rgid;
+ pagg_setrgid(current);
return 0;
}
***************
*** 618,623 ****
--- 620,626 ----
wmb();
}
current->gid = current->egid = current->sgid = current->fsgid =
gid;
+ pagg_setrgid(current);
}
else if ((gid == current->gid) || (gid == current->sgid))
{
***************
*** 656,661 ****
--- 659,665 ----
wmb();
}
current->uid = new_ruid;
+ pagg_setruid(current);
return 0;
}
***************
*** 854,861 ****
current->egid = egid;
}
current->fsgid = current->egid;
! if (rgid != (gid_t) -1)
current->gid = rgid;
if (sgid != (gid_t) -1)
current->sgid = sgid;
return 0;
--- 858,867 ----
current->egid = egid;
}
current->fsgid = current->egid;
! if (rgid != (gid_t) -1) {
current->gid = rgid;
+ pagg_setrgid(current);
+ }
if (sgid != (gid_t) -1)
current->sgid = sgid;
return 0;
***************
*** 905,911 ****
}
/*
! * Samma på svenska..
*/
asmlinkage long sys_setfsgid(gid_t gid)
{
--- 911,917 ----
}
/*
! * Samma p� svenska..
*/
asmlinkage long sys_setfsgid(gid_t gid)
{
|