xfs
[Top] [All Lists]

Re: [PATCH 6/6] workqueue: reimplement WQ_HIGHPRI using a separate worke

To: Tony Luck <tony.luck@xxxxxxxxx>
Subject: Re: [PATCH 6/6] workqueue: reimplement WQ_HIGHPRI using a separate worker_pool
From: Tejun Heo <tj@xxxxxxxxxx>
Date: Thu, 12 Jul 2012 15:32:21 -0700
Cc: Fengguang Wu <fengguang.wu@xxxxxxxxx>, linux-kernel@xxxxxxxxxxxxxxx, torvalds@xxxxxxxxxxxxxxxxxxxx, joshhunt00@xxxxxxxxx, axboe@xxxxxxxxx, rni@xxxxxxxxxx, vgoyal@xxxxxxxxxx, vwadekar@xxxxxxxxxx, herbert@xxxxxxxxxxxxxxxxxxxx, davem@xxxxxxxxxxxxx, linux-crypto@xxxxxxxxxxxxxxx, swhiteho@xxxxxxxxxx, bpm@xxxxxxx, elder@xxxxxxxxxx, xfs@xxxxxxxxxxx, marcel@xxxxxxxxxxxx, gustavo@xxxxxxxxxxx, johan.hedberg@xxxxxxxxx, linux-bluetooth@xxxxxxxxxxxxxxx, martin.petersen@xxxxxxxxxx
Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=sender:date:from:to:cc:subject:message-id:references:mime-version :content-type:content-disposition:in-reply-to:user-agent; bh=XHzucm/PsBk8EkNPc4j5zAobjX3SSsQN3Hi6eJHu67U=; b=nvO0VDALHG5nH4VIDDWKT7OUq/wFsJlC/gfPciEnmgYyE6LZ98ZJPryxTZAmwnLRL5 z2DzavlkvyeqMljAeOe2xASpJFu0/CkDakIs5avHnTeXmeP1UzULpjiKsMqrdfP/SloQ X883kqKxGS71A8MZLGbpqD4H+oPzHiTSDWT70zTcHG+LdC+3IM8Mc+NuzAOJ603EXKl0 9v+hFUsJ3wtvnxHGiMLj2nBQqVwboRdq4n3WDiDb9yoE6PjaSioxQA5KJCHuKX7h4YBm pzFzc9oJ/ySNDU2bA9yItnxpLmSVLQ6sCEgurWbdHeNMnSgKrZd7N5bmmHSN1GNPOipg Z7yQ==
In-reply-to: <CA+8MBb+ghRpmtrk=t5-6MqrPMZt+a69UoAWaubyKBeptGdBrWA@xxxxxxxxxxxxxx>
References: <1341859315-17759-7-git-send-email-tj@xxxxxxxxxx> <20120712130648.GA19214@localhost> <20120712170519.GA20167@xxxxxxxxxx> <20120712214514.GD20167@xxxxxxxxxx> <CA+8MBb+ghRpmtrk=t5-6MqrPMZt+a69UoAWaubyKBeptGdBrWA@xxxxxxxxxxxxxx>
Sender: Tejun Heo <htejun@xxxxxxxxx>
User-agent: Mutt/1.5.20 (2009-06-14)
Hello, Tony.

On Thu, Jul 12, 2012 at 03:16:30PM -0700, Tony Luck wrote:
> On Thu, Jul 12, 2012 at 2:45 PM, Tejun Heo <tj@xxxxxxxxxx> wrote:
> > I was wrong and am now dazed and confused.  That's from
> > init_workqueues() where only cpu0 is running.  How the hell did
> > nr_running manage to become non-zero at that point?  Can you please
> > apply the following patch and report the boot log?  Thank you.
> 
> Patch applied on top of next-20120712 (which still has the same problem).

Can you please try the following debug patch instead?  Yours is
different from Fengguang's.

Thanks a lot!
---
 kernel/workqueue.c |   40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -699,8 +699,10 @@ void wq_worker_waking_up(struct task_str
 {
        struct worker *worker = kthread_data(task);
 
-       if (!(worker->flags & WORKER_NOT_RUNNING))
+       if (!(worker->flags & WORKER_NOT_RUNNING)) {
+               WARN_ON_ONCE(cpu != worker->pool->gcwq->cpu);
                atomic_inc(get_pool_nr_running(worker->pool));
+       }
 }
 
 /**
@@ -730,6 +732,7 @@ struct task_struct *wq_worker_sleeping(s
 
        /* this can only happen on the local cpu */
        BUG_ON(cpu != raw_smp_processor_id());
+       WARN_ON_ONCE(cpu != worker->pool->gcwq->cpu);
 
        /*
         * The counterpart of the following dec_and_test, implied mb,
@@ -1212,9 +1215,30 @@ static void worker_enter_idle(struct wor
         * between setting %WORKER_ROGUE and zapping nr_running, the
         * warning may trigger spuriously.  Check iff trustee is idle.
         */
-       WARN_ON_ONCE(gcwq->trustee_state == TRUSTEE_DONE &&
-                    pool->nr_workers == pool->nr_idle &&
-                    atomic_read(get_pool_nr_running(pool)));
+       if (WARN_ON_ONCE(gcwq->trustee_state == TRUSTEE_DONE &&
+                        pool->nr_workers == pool->nr_idle &&
+                        atomic_read(get_pool_nr_running(pool)))) {
+               static bool once = false;
+               int cpu;
+
+               if (once)
+                       return;
+               once = true;
+
+               printk("XXX nr_running mismatch on gcwq[%d] pool[%ld]\n",
+                      gcwq->cpu, pool - gcwq->pools);
+
+               for_each_gcwq_cpu(cpu) {
+                       gcwq = get_gcwq(cpu);
+
+                       printk("XXX gcwq[%d] flags=0x%x\n", gcwq->cpu, 
gcwq->flags);
+                       for_each_worker_pool(pool, gcwq)
+                               printk("XXX gcwq[%d] pool[%ld] nr_workers=%d 
nr_idle=%d nr_running=%d\n",
+                                      gcwq->cpu, pool - gcwq->pools,
+                                      pool->nr_workers, pool->nr_idle,
+                                      atomic_read(get_pool_nr_running(pool)));
+               }
+       }
 }
 
 /**
@@ -3855,6 +3879,10 @@ static int __init init_workqueues(void)
                for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
                        INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
 
+               if (cpu != WORK_CPU_UNBOUND)
+                       printk("XXX cpu=%d gcwq=%p base=%p\n", cpu, gcwq,
+                              per_cpu_ptr(&pool_nr_running, cpu));
+
                for_each_worker_pool(pool, gcwq) {
                        pool->gcwq = gcwq;
                        INIT_LIST_HEAD(&pool->worklist);
@@ -3868,6 +3896,10 @@ static int __init init_workqueues(void)
                                    (unsigned long)pool);
 
                        ida_init(&pool->worker_ida);
+
+                       printk("XXX cpu=%d nr_running=%d @ %p\n", gcwq->cpu,
+                              atomic_read(get_pool_nr_running(pool)),
+                              get_pool_nr_running(pool));
                }
 
                gcwq->trustee_state = TRUSTEE_DONE;

<Prev in Thread] Current Thread [Next in Thread>