From: Abhijith Das <adas@redhat.com> Date: Mon, 24 Nov 2008 14:22:41 -0600 Subject: [GFS2] recovery stuck Message-id: 492B0D11.50503@redhat.com O-Subject: [RHEL5.3 PATCH][GFS2] - bz 465856 - GFS2: recovery stuck Bugzilla: 465856 RH-Acked-by: Steven Whitehouse <swhiteho@redhat.com> This patch cleans up the quotad code and gives it the added responsibility of checking an recovering partially truncated inodes. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com> Signed-off-by: Abhi Das <adas@redhat.com> diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c index 95cfc44..db5f61c 100644 --- a/fs/gfs2/daemon.c +++ b/fs/gfs2/daemon.c @@ -22,7 +22,6 @@ #include "daemon.h" #include "glock.h" #include "log.h" -#include "quota.h" #include "recovery.h" #include "super.h" #include "util.h" @@ -131,55 +130,3 @@ int gfs2_logd(void *data) return 0; } -/** - * gfs2_quotad - Write cached quota changes into the quota file - * @sdp: Pointer to GFS2 superblock - * - */ - -int gfs2_quotad(void *data) -{ - struct gfs2_sbd *sdp = data; - unsigned long t; - int error; - - while (!kthread_should_stop()) { - /* Update the master statfs file */ - - t = sdp->sd_statfs_sync_time + - gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; - - if (time_after_eq(jiffies, t)) { - error = gfs2_statfs_sync(sdp); - if (error && - error != -EROFS && - !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - fs_err(sdp, "quotad: (1) error=%d\n", error); - sdp->sd_statfs_sync_time = jiffies; - } - - /* Update quota file */ - - t = sdp->sd_quota_sync_time + - gfs2_tune_get(sdp, gt_quota_quantum) * HZ; - - if (time_after_eq(jiffies, t)) { - error = gfs2_quota_sync(sdp); - if (error && - error != -EROFS && - !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - fs_err(sdp, "quotad: (2) error=%d\n", error); - sdp->sd_quota_sync_time = jiffies; - } - - gfs2_quota_scan(sdp); - - t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ; - if (freezing(current)) - refrigerator(); - schedule_timeout_interruptible(t); - } - - return 0; -} - diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index dfde16f..9dc9d7e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -42,6 +42,7 @@ #include "quota.h" #include "super.h" #include "util.h" +#include "bmap.h" struct gfs2_gl_hash_bucket { struct hlist_head hb_list; @@ -321,7 +322,8 @@ static void gfs2_holder_wake(struct gfs2_holder *gh) * do_promote - promote as many requests as possible on the current queue * @gl: The glock * - * Returns: true if there is a blocked holder at the head of the list + * Returns: 1 if there is a blocked holder at the head of the list, or 2 + * if a type specific operation is underway. */ static int do_promote(struct gfs2_glock *gl) @@ -342,6 +344,8 @@ restart: ret = glops->go_lock(gh); spin_lock(&gl->gl_spin); if (ret) { + if (ret == 1) + return 2; gh->gh_error = ret; list_del_init(&gh->gh_list); gfs2_holder_wake(gh); @@ -446,6 +450,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh; unsigned state = ret & LM_OUT_ST_MASK; + int rv; spin_lock(&gl->gl_spin); state_change(gl, state); @@ -500,7 +505,6 @@ retry: gfs2_demote_wake(gl); if (state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { - int rv; spin_unlock(&gl->gl_spin); rv = glops->go_xmote_bh(gl, gh); if (rv == -EAGAIN) @@ -511,10 +515,13 @@ retry: goto out; } } - do_promote(gl); + rv = do_promote(gl); + if (rv == 2) + goto out_locked; } out: clear_bit(GLF_LOCK, &gl->gl_flags); +out_locked: spin_unlock(&gl->gl_spin); gfs2_glock_put(gl); } @@ -606,6 +613,7 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) static void run_queue(struct gfs2_glock *gl, const int nonblock) { struct gfs2_holder *gh = NULL; + int ret; if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) return; @@ -624,8 +632,11 @@ static void run_queue(struct gfs2_glock *gl, const int nonblock) } else { if (test_bit(GLF_DEMOTE, &gl->gl_flags)) gfs2_demote_wake(gl); - if (do_promote(gl) == 0) + ret = do_promote(gl); + if (ret == 0) goto out; + if (ret == 2) + return; gh = find_first_waiter(gl); gl->gl_target = gh->gh_state; if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) @@ -1567,6 +1578,20 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) } } +void gfs2_glock_finish_truncate(struct gfs2_inode *ip) +{ + struct gfs2_glock *gl = ip->i_gl; + int ret; + + ret = gfs2_truncatei_resume(ip); + gfs2_assert_withdraw(gl->gl_sbd, ret == 0); + + spin_lock(&gl->gl_spin); + clear_bit(GLF_LOCK, &gl->gl_flags); + run_queue(gl, 1); + spin_unlock(&gl->gl_spin); +} + static const char *state2str(unsigned state) { switch(state) { diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 8941728..bd5f4f2 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -130,6 +130,7 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); void gfs2_reclaim_glock(struct gfs2_sbd *sdp); void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); +void gfs2_glock_finish_truncate(struct gfs2_inode *ip); int __init gfs2_glock_init(void); void gfs2_glock_exit(void); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index e3dcdd8..40096f8 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -256,6 +256,7 @@ static int inode_go_demote_ok(struct gfs2_glock *gl) static int inode_go_lock(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_inode *ip = gl->gl_object; int error = 0; @@ -270,8 +271,14 @@ static int inode_go_lock(struct gfs2_holder *gh) if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && (gl->gl_state == LM_ST_EXCLUSIVE) && - (gh->gh_state == LM_ST_EXCLUSIVE)) - error = gfs2_truncatei_resume(ip); + (gh->gh_state == LM_ST_EXCLUSIVE)) { + spin_lock(&sdp->sd_trunc_lock); + if (list_empty(&ip->i_trunc_list)) + list_add(&sdp->sd_trunc_list, &ip->i_trunc_list); + spin_unlock(&sdp->sd_trunc_lock); + wake_up(&sdp->sd_quota_wait); + return 1; + } return error; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index d0f974c..99ac86c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -259,6 +259,7 @@ struct gfs2_inode { struct gfs2_alloc *i_alloc; u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; + struct list_head i_trunc_list; u8 i_height; }; @@ -421,7 +422,6 @@ struct gfs2_tune { unsigned int gt_recoverd_secs; unsigned int gt_logd_secs; - unsigned int gt_quotad_secs; unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ @@ -528,7 +528,6 @@ struct gfs2_sbd { spinlock_t sd_statfs_spin; struct gfs2_statfs_change_host sd_statfs_master; struct gfs2_statfs_change_host sd_statfs_local; - unsigned long sd_statfs_sync_time; /* Resource group stuff */ @@ -571,13 +570,15 @@ struct gfs2_sbd { atomic_t sd_quota_count; spinlock_t sd_quota_spin; struct mutex sd_quota_mutex; + wait_queue_head_t sd_quota_wait; + struct list_head sd_trunc_list; + spinlock_t sd_trunc_lock; unsigned int sd_quota_slots; unsigned int sd_quota_chunks; unsigned char **sd_quota_bitmap; u64 sd_quota_sync_gen; - unsigned long sd_quota_sync_time; /* Log stuff */ diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 77e6302..4380eb9 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -31,6 +31,7 @@ static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&ip->i_inode); init_rwsem(&ip->i_rw_mutex); + INIT_LIST_HEAD(&ip->i_trunc_list); ip->i_alloc = NULL; ip->i_gh.gh_gl = NULL; } @@ -98,6 +99,12 @@ static int __init init_gfs2_fs(void) if (!gfs2_rgrpd_cachep) goto fail; + gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad", + sizeof(struct gfs2_quota_data), + 0, 0, NULL, NULL); + if (!gfs2_quotad_cachep) + goto fail; + error = register_filesystem(&gfs2_fs_type); if (error) goto fail; @@ -117,6 +124,9 @@ fail_unregister: fail: gfs2_glock_exit(); + if (gfs2_quotad_cachep) + kmem_cache_destroy(gfs2_quotad_cachep); + if (gfs2_rgrpd_cachep) kmem_cache_destroy(gfs2_rgrpd_cachep); @@ -145,6 +155,7 @@ static void __exit exit_gfs2_fs(void) unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); + kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); kmem_cache_destroy(gfs2_bufdata_cachep); kmem_cache_destroy(gfs2_inode_cachep); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3a716b3..a956c27 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -66,7 +66,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_jindex_refresh_secs = 60; gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; - gt->gt_quotad_secs = 5; gt->gt_quota_simul_sync = 64; gt->gt_quota_warn_period = 10; gt->gt_quota_scale_num = 1; @@ -115,6 +114,9 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_quota_list); spin_lock_init(&sdp->sd_quota_spin); mutex_init(&sdp->sd_quota_mutex); + init_waitqueue_head(&sdp->sd_quota_wait); + INIT_LIST_HEAD(&sdp->sd_trunc_list); + spin_lock_init(&sdp->sd_trunc_lock); spin_lock_init(&sdp->sd_log_lock); @@ -969,9 +971,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo) } sdp->sd_logd_process = p; - sdp->sd_statfs_sync_time = jiffies; - sdp->sd_quota_sync_time = jiffies; - p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); error = IS_ERR(p); if (error) { diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index b6b96fc..feec55c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -46,6 +46,7 @@ #include <linux/bio.h> #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> +#include <linux/kthread.h> #include "gfs2.h" #include "incore.h" @@ -95,7 +96,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, struct gfs2_quota_data *qd; int error; - qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL); + qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS); if (!qd) return -ENOMEM; @@ -120,7 +121,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, return 0; fail: - kfree(qd); + kmem_cache_free(gfs2_quotad_cachep, qd); return error; } @@ -159,7 +160,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, if (qd || !create) { if (new_qd) { gfs2_lvb_unhold(new_qd->qd_gl); - kfree(new_qd); + kmem_cache_free(gfs2_quotad_cachep, new_qd); } *qdp = qd; return 0; @@ -1226,7 +1227,7 @@ void gfs2_quota_scan(struct gfs2_sbd *sdp) gfs2_assert_warn(sdp, !qd->qd_bh_count); gfs2_lvb_unhold(qd->qd_gl); - kfree(qd); + kmem_cache_free(gfs2_quotad_cachep, qd); } } @@ -1261,7 +1262,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) gfs2_assert_warn(sdp, !qd->qd_bh_count); gfs2_lvb_unhold(qd->qd_gl); - kfree(qd); + kmem_cache_free(gfs2_quotad_cachep, qd); spin_lock(&sdp->sd_quota_spin); } @@ -1276,3 +1277,94 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) } } +static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) +{ + if (error == 0 || error == -EROFS) + return; + if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); +} + +static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, + int (*fxn)(struct gfs2_sbd *sdp), + unsigned long t, unsigned long *timeo, + unsigned int *new_timeo) +{ + if (t >= *timeo) { + int error = fxn(sdp); + quotad_error(sdp, msg, error); + *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; + } else { + *timeo -= t; + } +} + +static void quotad_check_trunc_list(struct gfs2_sbd *sdp) +{ + struct gfs2_inode *ip; + + while(1) { + ip = NULL; + spin_lock(&sdp->sd_trunc_lock); + if (!list_empty(&sdp->sd_trunc_list)) { + ip = list_entry(sdp->sd_trunc_list.next, + struct gfs2_inode, i_trunc_list); + list_del_init(&ip->i_trunc_list); + } + spin_unlock(&sdp->sd_trunc_lock); + if (ip == NULL) + return; + gfs2_glock_finish_truncate(ip); + } +} + +/** + * gfs2_quotad - Write cached quota changes into the quota file + * @sdp: Pointer to GFS2 superblock + * + */ + +int gfs2_quotad(void *data) +{ + struct gfs2_sbd *sdp = data; + struct gfs2_tune *tune = &sdp->sd_tune; + unsigned long statfs_timeo = 0; + unsigned long quotad_timeo = 0; + unsigned long t = 0; + DEFINE_WAIT(wait); + int empty; + + while (!kthread_should_stop()) { + + /* Update the master statfs file */ + quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, + &statfs_timeo, &tune->gt_statfs_quantum); + + /* Update quota file */ + quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, + "ad_timeo, &tune->gt_quota_quantum); + + /* FIXME: This should be turned into a shrinker */ + gfs2_quota_scan(sdp); + + /* Check for & recover partially truncated inodes */ + quotad_check_trunc_list(sdp); + + if (freezing(current)) + refrigerator(); + t = min(quotad_timeo, statfs_timeo); + + prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE); + spin_lock(&sdp->sd_trunc_lock); + empty = list_empty(&sdp->sd_trunc_list); + spin_unlock(&sdp->sd_trunc_lock); + if (empty) + t -= schedule_timeout(t); + else + t = 0; + finish_wait(&sdp->sd_quota_wait, &wait); + } + + return 0; +} + diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 6e6a607..e4faab8 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -429,7 +429,6 @@ TUNE_ATTR(stall_secs, 1); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); TUNE_ATTR_DAEMON(logd_secs, logd_process); -TUNE_ATTR_DAEMON(quotad_secs, quotad_process); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); static struct attribute *tune_attrs[] = { @@ -448,7 +447,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_statfs_quantum.attr, &tune_attr_recoverd_secs.attr, &tune_attr_logd_secs.attr, - &tune_attr_quotad_secs.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, &tune_attr_new_files_directio.attr, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index fe9c28e..42ee47a 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -26,6 +26,7 @@ struct kmem_cache *gfs2_glock_cachep __read_mostly; struct kmem_cache *gfs2_inode_cachep __read_mostly; struct kmem_cache *gfs2_bufdata_cachep __read_mostly; struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; +struct kmem_cache *gfs2_quotad_cachep __read_mostly; void gfs2_assert_i(struct gfs2_sbd *sdp) { diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index ac0c567..b75662d 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -148,6 +148,7 @@ extern struct kmem_cache *gfs2_glock_cachep; extern struct kmem_cache *gfs2_inode_cachep; extern struct kmem_cache *gfs2_bufdata_cachep; extern struct kmem_cache *gfs2_rgrpd_cachep; +extern struct kmem_cache *gfs2_quotad_cachep; static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, unsigned int *p)