xfs
[Top] [All Lists]

[PATCH] xfs: Fix a deadlock in xfs_log_commit_cil() code path

To: XFS mailing list <xfs@xxxxxxxxxxx>
Subject: [PATCH] xfs: Fix a deadlock in xfs_log_commit_cil() code path
From: Chandra Seetharaman <sekharan@xxxxxxxxxx>
Date: Mon, 15 Jul 2013 17:52:34 -0500
Delivered-to: xfs@xxxxxxxxxxx
Organization: IBM
Reply-to: sekharan@xxxxxxxxxx
While testing and rearranging my pquota/gquota code, I stumbled
on a xfs_shutdown() during a mount. But the mount just hung.

I debugged and found that there is a deadlock involving
&log->l_cilp->xc_ctx_lock.

It is in a code path where &log->l_cilp->xc_ctx_lock is first
acquired in read mode and some levels down the same semaphore
is being acquired in write mode causing a deadlock.

This is the stack:
xfs_log_commit_cil -> acquires &log->l_cilp->xc_ctx_lock in read mode
  xlog_print_tic_res
    xfs_force_shutdown
      xfs_log_force_umount
        xlog_cil_force
          xlog_cil_force_lsn
            xlog_cil_push_foreground
              xlog_cil_push - tries to acquire same semaphore in write mode

This patch fixes the deadlock by not calling xfs_force_shutdown() while
holding the semaphore, instead calling it after dropping teh semaphore.

Thanks to Dave for suggesting this solution.

Signed-off-by: Chandra Seetharaman <sekharan@xxxxxxxxxx>

---
 fs/xfs/xfs_log.c      |    6 +++---
 fs/xfs/xfs_log_cil.c  |   10 ++++++----
 fs/xfs/xfs_log_priv.h |    2 +-
 fs/xfs/xfs_trans.c    |    2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index d852a2b..b9fa2da 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1837,7 +1837,7 @@ xlog_state_finish_copy(
  * print out info relating to regions written which consume
  * the reservation
  */
-void
+int
 xlog_print_tic_res(
        struct xfs_mount        *mp,
        struct xlog_ticket      *ticket)
@@ -1941,7 +1941,7 @@ xlog_print_tic_res(
 
        xfs_alert_tag(mp, XFS_PTAG_LOGRES,
                "xlog_write: reservation ran out. Need to up reservation");
-       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+       return EFSCORRUPTED;
 }
 
 /*
@@ -2215,7 +2215,7 @@ xlog_write(
                ticket->t_curr_res -= sizeof(xlog_op_header_t);
 
        if (ticket->t_curr_res < 0)
-               xlog_print_tic_res(log->l_mp, ticket);
+               return xlog_print_tic_res(log->l_mp, ticket);
 
        index = 0;
        lv = log_vector;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 02b9cf3..93ba7bd 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -730,10 +730,6 @@ xfs_log_commit_cil(
        /* xlog_cil_insert_items() destroys log_vector list */
        xlog_cil_insert_items(log, log_vector, tp->t_ticket);
 
-       /* check we didn't blow the reservation */
-       if (tp->t_ticket->t_curr_res < 0)
-               xlog_print_tic_res(log->l_mp, tp->t_ticket);
-
        /* attach the transaction to the CIL if it has any busy extents */
        if (!list_empty(&tp->t_busy)) {
                spin_lock(&log->l_cilp->xc_cil_lock);
@@ -742,6 +738,12 @@ xfs_log_commit_cil(
                spin_unlock(&log->l_cilp->xc_cil_lock);
        }
 
+       /* check we didn't blow the reservation */
+       if (tp->t_ticket->t_curr_res < 0) {
+               up_read(&log->l_cilp->xc_ctx_lock);
+               return xlog_print_tic_res(log->l_mp, tp->t_ticket);
+       }
+
        tp->t_commit_lsn = *commit_lsn;
        xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
        xfs_trans_unreserve_and_mod_sb(tp);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index b9ea262..4f2fa6d 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -576,7 +576,7 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t 
bytes)
        *off += bytes;
 }
 
-void   xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
+int    xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
 int
 xlog_write(
        struct xlog             *log,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 35a2299..d96022f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1547,7 +1547,7 @@ xfs_trans_commit(
        xfs_trans_apply_dquot_deltas(tp);
 
        error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
-       if (error == ENOMEM) {
+       if (error) {
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
                error = XFS_ERROR(EIO);
                goto out_unreserve;


<Prev in Thread] Current Thread [Next in Thread>