diff -X ignore -urN linux-2.6-xfs.cvs-orig/fs/xfs/xfs_fsops.c linux-2.6-xfs.shrink/fs/xfs/xfs_fsops.c --- linux-2.6-xfs.cvs-orig/fs/xfs/xfs_fsops.c 2007-06-09 18:56:21.509308225 +0200 +++ linux-2.6-xfs.shrink/fs/xfs/xfs_fsops.c 2007-06-10 18:32:36.074856477 +0200 @@ -112,6 +112,53 @@ return 0; } +static void xfs_update_sb( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_agnumber_t nagimax, + xfs_agnumber_t nagcount) /* new number of a.g. */ +{ + xfs_agnumber_t agno; + xfs_buf_t *bp; + xfs_sb_t *sbp; + int error; + + /* New allocation groups fully initialized, so update mount struct */ + if (nagimax) + mp->m_maxagi = nagimax; + if (mp->m_sb.sb_imax_pct) { + __uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; + do_div(icount, 100); + mp->m_maxicount = icount << mp->m_sb.sb_inopblog; + } else + mp->m_maxicount = 0; + for (agno = 1; agno < nagcount; agno++) { + error = xfs_read_buf(mp, mp->m_ddev_targp, + XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), + XFS_FSS_TO_BB(mp, 1), 0, &bp); + if (error) { + xfs_fs_cmn_err(CE_WARN, mp, + "error %d reading secondary superblock for ag %d", + error, agno); + break; + } + sbp = XFS_BUF_TO_SBP(bp); + xfs_xlatesb(sbp, &mp->m_sb, -1, XFS_SB_ALL_BITS); + /* + * If we get an error writing out the alternate superblocks, + * just issue a warning and continue. The real work is + * already done and committed. + */ + if (!(error = xfs_bwrite(mp, bp))) { + continue; + } else { + xfs_fs_cmn_err(CE_WARN, mp, + "write error %d updating secondary superblock for ag %d", + error, agno); + break; /* no point in continuing */ + } + } +} + static int xfs_growfs_data_private( xfs_mount_t *mp, /* mount point for filesystem */ @@ -135,7 +182,6 @@ xfs_rfsblock_t nfree; xfs_agnumber_t oagcount; int pct; - xfs_sb_t *sbp; xfs_trans_t *tp; nb = in->newblocks; @@ -356,44 +402,228 @@ if (error) { return error; } - /* New allocation groups fully initialized, so update mount struct */ - if (nagimax) - mp->m_maxagi = nagimax; - if (mp->m_sb.sb_imax_pct) { - __uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; - do_div(icount, 100); - mp->m_maxicount = icount << mp->m_sb.sb_inopblog; - } else - mp->m_maxicount = 0; - for (agno = 1; agno < nagcount; agno++) { - error = xfs_read_buf(mp, mp->m_ddev_targp, - XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &bp); + xfs_update_sb(mp, nagimax, nagcount); + return 0; + + error0: + xfs_trans_cancel(tp, XFS_TRANS_ABORT); + return error; +} + +static int +xfs_shrinkfs_data_private( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_growfs_data_t *in) /* growfs data input struct */ +{ + xfs_agf_t *agf; + xfs_agnumber_t agno; + xfs_buf_t *bp; + int dpct; + int error; + xfs_agnumber_t nagcount; /* new AG count */ + xfs_agnumber_t oagcount; /* old AG count */ + xfs_agnumber_t nagimax = 0; + xfs_rfsblock_t nb, nb_mod; + xfs_rfsblock_t dbdelta; /* will be used as a + check that we + shrink the fs by + the correct number + of blocks */ + xfs_rfsblock_t fdbdelta; /* will keep track of + how many ag blocks + we need to + remove */ + int pct; + xfs_trans_t *tp; + + nb = in->newblocks; + pct = in->imaxpct; + if (nb >= mp->m_sb.sb_dblocks || pct < 0 || pct > 100) + return XFS_ERROR(EINVAL); + dpct = pct - mp->m_sb.sb_imax_pct; + error = xfs_read_buf(mp, mp->m_ddev_targp, + XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), + XFS_FSS_TO_BB(mp, 1), 0, &bp); + if (error) + return error; + ASSERT(bp); + /* FIXME: we release the buffer here manually because we are + * outside of a transaction? The other buffers read using the + * functions which take a tp parameter are not released in + * growfs + */ + xfs_buf_relse(bp); + + /* Do basic checks (at the fs level) */ + oagcount = mp->m_sb.sb_agcount; + nagcount = nb; + nb_mod = do_div(nagcount, mp->m_sb.sb_agblocks); + if(nb_mod) { + printk("not shrinking on an AG boundary (diff=%d)\n", nb_mod); + return XFS_ERROR(ENOSPC); + } + if(nagcount < 2) { + printk("refusing to shrink below 2 AGs\n"); + return XFS_ERROR(ENOSPC); + } + if(nagcount >= oagcount) { + printk("number of AGs will not decrease\n"); + return XFS_ERROR(EINVAL); + } + printk("Cur ag=%d, cur blocks=%llu\n", + mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks); + printk("New ag=%d, new blocks=%d\n", nagcount, nb); + + printk("Will resize from %llu to %d, delta is %llu\n", + mp->m_sb.sb_dblocks, nb, mp->m_sb.sb_dblocks - nb); + /* Check to see if we trip over the log section */ + printk("logstart=%llu logblocks=%u\n", + mp->m_sb.sb_logstart, mp->m_sb.sb_logblocks); + if (nb < mp->m_sb.sb_logstart + mp->m_sb.sb_logblocks) + return XFS_ERROR(EINVAL); + /* dbdelta starts at the diff and must become zero */ + dbdelta = mp->m_sb.sb_dblocks - nb; + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); + printk("reserving %d\n", XFS_GROWFS_SPACE_RES(mp) + dbdelta); + if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp) + dbdelta, + XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) { + xfs_trans_cancel(tp, 0); + return error; + } + + fdbdelta = 0; + + /* Per-AG checks */ + /* FIXME: do we need to hold m_peraglock while doing this? */ + /* I think that since we do read and write to the m_perag + * stuff, we should be holding the lock for the entire walk & + * modify of the fs + */ + /* Note that because we hold the lock, on any error+early + * return, we must either release manually and return, or + * jump to error0 + */ + down_write(&mp->m_peraglock); + for(agno = oagcount - 1; agno >= nagcount; agno--) { + xfs_extlen_t usedblks; /* total used blocks in this a.g. */ + xfs_extlen_t freeblks; /* free blocks in this a.g. */ + xfs_agblock_t aglen; /* this ag's len */ + struct xfs_perag *pag; /* the m_perag structure */ + + printk("doing agno=%d\n", agno); + + pag = &mp->m_perag[agno]; + + error = xfs_alloc_read_agf(mp, tp, agno, 0, &bp); if (error) { - xfs_fs_cmn_err(CE_WARN, mp, - "error %d reading secondary superblock for ag %d", - error, agno); - break; + goto error0; } - sbp = XFS_BUF_TO_SBP(bp); - xfs_xlatesb(sbp, &mp->m_sb, -1, XFS_SB_ALL_BITS); + ASSERT(bp); + agf = XFS_BUF_TO_AGF(bp); + aglen = INT_GET(agf->agf_length, ARCH_CONVERT); + + /* read the pagf/pagi if not already initialized */ + /* agf should be initialized because of the ablove read_agf */ + ASSERT(pag->pagf_init); + if (!pag->pagi_init) { + if ((error = xfs_ialloc_read_agi(mp, tp, agno, &bp))) + goto error0; + ASSERT(pag->pagi_init); + } + /* - * If we get an error writing out the alternate superblocks, - * just issue a warning and continue. The real work is - * already done and committed. + * Check the inodes: as long as we have pagi_count == + * pagi_freecount == 0, then: a) we don't have to + * update any global inode counters, and b) there are + * no extra blocks in inode btrees */ - if (!(error = xfs_bwrite(mp, bp))) { - continue; - } else { - xfs_fs_cmn_err(CE_WARN, mp, - "write error %d updating secondary superblock for ag %d", - error, agno); - break; /* no point in continuing */ + if(pag->pagi_count > 0 || + pag->pagi_freecount > 0) { + printk("agi %d has %d inodes in total and %d free\n", + agno, pag->pagi_count, pag->pagi_freecount); + error = XFS_ERROR(ENOSPC); + goto error0; + } + + /* Check the AGF: if levels[] == 1, then there should + * be no extra blocks in the btrees beyond the ones + * at the beggining of the AG + */ + if(pag->pagf_levels[XFS_BTNUM_BNOi] > 1 || + pag->pagf_levels[XFS_BTNUM_CNTi] > 1) { + printk("agf %d has level %d bt and %d cnt\n", + agno, + pag->pagf_levels[XFS_BTNUM_BNOi], + pag->pagf_levels[XFS_BTNUM_CNTi]); + error = XFS_ERROR(ENOSPC); + goto error0; } + + freeblks = pag->pagf_freeblks; + printk("Usage: %d prealloc, %d flcount\n", + XFS_PREALLOC_BLOCKS(mp), pag->pagf_flcount); + + /* Done gathering data, check sizes */ + usedblks = XFS_PREALLOC_BLOCKS(mp) + pag->pagf_flcount; + printk("agno=%d agf_length=%d computed used=%d" + " known free=%d\n", agno, aglen, usedblks, freeblks); + + if(usedblks + freeblks != aglen) { + printk("agno %d is not free (%d blocks allocated)\n", + agno, aglen-usedblks-freeblks); + error = XFS_ERROR(ENOSPC); + goto error0; + } + dbdelta -= aglen; + printk("will lower with %d\n", + aglen - XFS_PREALLOC_BLOCKS(mp)); + fdbdelta += aglen - XFS_PREALLOC_BLOCKS(mp); + } + /* + * Check that we removed all blocks + */ + ASSERT(!dbdelta); + ASSERT(nagcount < oagcount); + + printk("to free: %d, oagcount=%d, nagcount=%d\n", + fdbdelta, oagcount, nagcount); + + xfs_trans_agblocks_delta(tp, -((long)fdbdelta)); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS, nb - mp->m_sb.sb_dblocks); + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, -((int64_t)fdbdelta)); + + if (dpct) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); + error = xfs_trans_commit(tp, 0); + if (error) { + up_write(&mp->m_peraglock); + return error; } + /* Free memory as the number of AG has changed */ + for (agno = nagcount; agno < oagcount; agno++) + if (mp->m_perag[agno].pagb_list) + kmem_free(mp->m_perag[agno].pagb_list, + sizeof(xfs_perag_busy_t) * + XFS_PAGB_NUM_SLOTS); + + mp->m_perag = kmem_realloc(mp->m_perag, + sizeof(xfs_perag_t) * nagcount, + sizeof(xfs_perag_t) * oagcount, + KM_SLEEP); + /* FIXME: here we could instead just lower + * nagimax to nagcount; is it better this way? + */ + /* FIXME: why is this flag unconditionally set in growfs? */ + mp->m_flags |= XFS_MOUNT_32BITINODES; + nagimax = xfs_initialize_perag(XFS_MTOVFS(mp), mp, nagcount); + up_write(&mp->m_peraglock); + + xfs_update_sb(mp, nagimax, nagcount); return 0; error0: + up_write(&mp->m_peraglock); xfs_trans_cancel(tp, XFS_TRANS_ABORT); return error; } @@ -435,7 +665,10 @@ int error; if (!cpsema(&mp->m_growlock)) return XFS_ERROR(EWOULDBLOCK); - error = xfs_growfs_data_private(mp, in); + if(in->newblocks < mp->m_sb.sb_dblocks) + error = xfs_shrinkfs_data_private(mp, in); + else + error = xfs_growfs_data_private(mp, in); vsema(&mp->m_growlock); return error; } @@ -633,7 +866,7 @@ xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); thaw_bdev(sb->s_bdev, sb); } - + break; } case XFS_FSOP_GOING_FLAGS_LOGFLUSH: diff -X ignore -urN linux-2.6-xfs.cvs-orig/fs/xfs/xfs_trans.c linux-2.6-xfs.shrink/fs/xfs/xfs_trans.c --- linux-2.6-xfs.cvs-orig/fs/xfs/xfs_trans.c 2007-06-05 17:40:51.000000000 +0200 +++ linux-2.6-xfs.shrink/fs/xfs/xfs_trans.c 2007-06-07 23:01:03.000000000 +0200 @@ -503,11 +503,9 @@ tp->t_res_frextents_delta += delta; break; case XFS_TRANS_SB_DBLOCKS: - ASSERT(delta > 0); tp->t_dblocks_delta += delta; break; case XFS_TRANS_SB_AGCOUNT: - ASSERT(delta > 0); tp->t_agcount_delta += delta; break; case XFS_TRANS_SB_IMAXPCT: