linux/fs/gfs2/recovery.c
Andreas Gruenbacher 4d927b03a6 gfs2: Rename gfs2_withdrawn to gfs2_withdrawing_or_withdrawn
This function checks whether the filesystem has been been marked to be
withdrawn eventually or has been withdrawn already.  Rename this
function to avoid confusing code like checking for gfs2_withdrawing()
when gfs2_withdrawn() has already returned true.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
2023-12-20 21:29:40 +01:00

583 lines
15 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
#include <linux/ktime.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "recovery.h"
#include "super.h"
#include "util.h"
#include "dir.h"
struct workqueue_struct *gfs2_recovery_wq;
int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
u64 dblock;
u32 extlen;
int error;
extlen = 32;
error = gfs2_get_extent(&ip->i_inode, blk, &dblock, &extlen);
if (error)
return error;
if (!dblock) {
gfs2_consist_inode(ip);
return -EIO;
}
*bh = gfs2_meta_ra(gl, dblock, extlen);
return error;
}
int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
{
struct list_head *head = &jd->jd_revoke_list;
struct gfs2_revoke_replay *rr = NULL, *iter;
list_for_each_entry(iter, head, rr_list) {
if (iter->rr_blkno == blkno) {
rr = iter;
break;
}
}
if (rr) {
rr->rr_where = where;
return 0;
}
rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS);
if (!rr)
return -ENOMEM;
rr->rr_blkno = blkno;
rr->rr_where = where;
list_add(&rr->rr_list, head);
return 1;
}
int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where)
{
struct gfs2_revoke_replay *rr = NULL, *iter;
int wrap, a, b, revoke;
list_for_each_entry(iter, &jd->jd_revoke_list, rr_list) {
if (iter->rr_blkno == blkno) {
rr = iter;
break;
}
}
if (!rr)
return 0;
wrap = (rr->rr_where < jd->jd_replay_tail);
a = (jd->jd_replay_tail < where);
b = (where < rr->rr_where);
revoke = (wrap) ? (a || b) : (a && b);
return revoke;
}
void gfs2_revoke_clean(struct gfs2_jdesc *jd)
{
struct list_head *head = &jd->jd_revoke_list;
struct gfs2_revoke_replay *rr;
while (!list_empty(head)) {
rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list);
list_del(&rr->rr_list);
kfree(rr);
}
}
int __get_log_header(struct gfs2_sbd *sdp, const struct gfs2_log_header *lh,
unsigned int blkno, struct gfs2_log_header_host *head)
{
u32 hash, crc;
if (lh->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
lh->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH) ||
(blkno && be32_to_cpu(lh->lh_blkno) != blkno))
return 1;
hash = crc32(~0, lh, LH_V1_SIZE - 4);
hash = ~crc32_le_shift(hash, 4); /* assume lh_hash is zero */
if (be32_to_cpu(lh->lh_hash) != hash)
return 1;
crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
sdp->sd_sb.sb_bsize - LH_V1_SIZE - 4);
if ((lh->lh_crc != 0 && be32_to_cpu(lh->lh_crc) != crc))
return 1;
head->lh_sequence = be64_to_cpu(lh->lh_sequence);
head->lh_flags = be32_to_cpu(lh->lh_flags);
head->lh_tail = be32_to_cpu(lh->lh_tail);
head->lh_blkno = be32_to_cpu(lh->lh_blkno);
head->lh_local_total = be64_to_cpu(lh->lh_local_total);
head->lh_local_free = be64_to_cpu(lh->lh_local_free);
head->lh_local_dinodes = be64_to_cpu(lh->lh_local_dinodes);
return 0;
}
/**
* get_log_header - read the log header for a given segment
* @jd: the journal
* @blk: the block to look at
* @head: the log header to return
*
* Read the log header for a given segement in a given journal. Do a few
* sanity checks on it.
*
* Returns: 0 on success,
* 1 if the header was invalid or incomplete,
* errno on error
*/
static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
struct gfs2_log_header_host *head)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct buffer_head *bh;
int error;
error = gfs2_replay_read_block(jd, blk, &bh);
if (error)
return error;
error = __get_log_header(sdp, (const struct gfs2_log_header *)bh->b_data,
blk, head);
brelse(bh);
return error;
}
/**
* foreach_descriptor - go through the active part of the log
* @jd: the journal
* @start: the first log header in the active region
* @end: the last log header (don't process the contents of this entry))
* @pass: iteration number (foreach_descriptor() is called in a for() loop)
*
* Call a given function once for every log descriptor in the active
* portion of the log.
*
* Returns: errno
*/
static int foreach_descriptor(struct gfs2_jdesc *jd, u32 start,
unsigned int end, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct buffer_head *bh;
struct gfs2_log_descriptor *ld;
int error = 0;
u32 length;
__be64 *ptr;
unsigned int offset = sizeof(struct gfs2_log_descriptor);
offset += sizeof(__be64) - 1;
offset &= ~(sizeof(__be64) - 1);
while (start != end) {
error = gfs2_replay_read_block(jd, start, &bh);
if (error)
return error;
if (gfs2_meta_check(sdp, bh)) {
brelse(bh);
return -EIO;
}
ld = (struct gfs2_log_descriptor *)bh->b_data;
length = be32_to_cpu(ld->ld_length);
if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
struct gfs2_log_header_host lh;
error = get_log_header(jd, start, &lh);
if (!error) {
gfs2_replay_incr_blk(jd, &start);
brelse(bh);
continue;
}
if (error == 1) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
error = -EIO;
}
brelse(bh);
return error;
} else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
brelse(bh);
return -EIO;
}
ptr = (__be64 *)(bh->b_data + offset);
error = lops_scan_elements(jd, start, ld, ptr, pass);
if (error) {
brelse(bh);
return error;
}
while (length--)
gfs2_replay_incr_blk(jd, &start);
brelse(bh);
}
return 0;
}
/**
* clean_journal - mark a dirty journal as being clean
* @jd: the journal
* @head: the head journal to start from
*
* Returns: errno
*/
static void clean_journal(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
u32 lblock = head->lh_blkno;
gfs2_replay_incr_blk(jd, &lblock);
gfs2_write_log_header(sdp, jd, head->lh_sequence + 1, 0, lblock,
GFS2_LOG_HEAD_UNMOUNT | GFS2_LOG_HEAD_RECOVERY,
REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC);
if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
sdp->sd_log_flush_head = lblock;
gfs2_log_incr_head(sdp);
}
}
static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
unsigned int message)
{
char env_jid[20];
char env_status[20];
char *envp[] = { env_jid, env_status, NULL };
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
ls->ls_recover_jid_done = jid;
ls->ls_recover_jid_status = message;
sprintf(env_jid, "JID=%u", jid);
sprintf(env_status, "RECOVERY=%s",
message == LM_RD_SUCCESS ? "Done" : "Failed");
kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
}
/**
* update_statfs_inode - Update the master statfs inode or zero out the local
* statfs inode for a given journal.
* @jd: The journal
* @head: If NULL, @inode is the local statfs inode and we need to zero it out.
* Otherwise, it @head contains the statfs change info that needs to be
* synced to the master statfs inode (pointed to by @inode).
* @inode: statfs inode to update.
*/
static int update_statfs_inode(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head,
struct inode *inode)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_inode *ip;
struct buffer_head *bh;
struct gfs2_statfs_change_host sc;
int error = 0;
BUG_ON(!inode);
ip = GFS2_I(inode);
error = gfs2_meta_inode_buffer(ip, &bh);
if (error)
goto out;
spin_lock(&sdp->sd_statfs_spin);
if (head) { /* Update the master statfs inode */
gfs2_statfs_change_in(&sc, bh->b_data + sizeof(struct gfs2_dinode));
sc.sc_total += head->lh_local_total;
sc.sc_free += head->lh_local_free;
sc.sc_dinodes += head->lh_local_dinodes;
gfs2_statfs_change_out(&sc, bh->b_data + sizeof(struct gfs2_dinode));
fs_info(sdp, "jid=%u: Updated master statfs Total:%lld, "
"Free:%lld, Dinodes:%lld after change "
"[%+lld,%+lld,%+lld]\n", jd->jd_jid, sc.sc_total,
sc.sc_free, sc.sc_dinodes, head->lh_local_total,
head->lh_local_free, head->lh_local_dinodes);
} else { /* Zero out the local statfs inode */
memset(bh->b_data + sizeof(struct gfs2_dinode), 0,
sizeof(struct gfs2_statfs_change));
/* If it's our own journal, reset any in-memory changes too */
if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) {
memset(&sdp->sd_statfs_local, 0,
sizeof(struct gfs2_statfs_change_host));
}
}
spin_unlock(&sdp->sd_statfs_spin);
mark_buffer_dirty(bh);
brelse(bh);
gfs2_inode_metasync(ip->i_gl);
out:
return error;
}
/**
* recover_local_statfs - Update the master and local statfs changes for this
* journal.
*
* Previously, statfs updates would be read in from the local statfs inode and
* synced to the master statfs inode during recovery.
*
* We now use the statfs updates in the journal head to update the master statfs
* inode instead of reading in from the local statfs inode. To preserve backward
* compatibility with kernels that can't do this, we still need to keep the
* local statfs inode up to date by writing changes to it. At some point in the
* future, we can do away with the local statfs inodes altogether and keep the
* statfs changes solely in the journal.
*
* @jd: the journal
* @head: the journal head
*
* Returns: errno
*/
static void recover_local_statfs(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head)
{
int error;
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (!head->lh_local_total && !head->lh_local_free
&& !head->lh_local_dinodes) /* No change */
goto zero_local;
/* First update the master statfs inode with the changes we
* found in the journal. */
error = update_statfs_inode(jd, head, sdp->sd_statfs_inode);
if (error)
goto out;
zero_local:
/* Zero out the local statfs inode so any changes in there
* are not re-recovered. */
error = update_statfs_inode(jd, NULL,
find_local_statfs_inode(sdp, jd->jd_jid));
out:
return;
}
void gfs2_recover_func(struct work_struct *work)
{
struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work);
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_log_header_host head;
struct gfs2_holder j_gh, ji_gh;
ktime_t t_start, t_jlck, t_jhd, t_tlck, t_rep;
int ro = 0;
unsigned int pass;
int error = 0;
int jlocked = 0;
if (gfs2_withdrawing_or_withdrawn(sdp)) {
fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n",
jd->jd_jid);
goto fail;
}
t_start = ktime_get();
if (sdp->sd_args.ar_spectator)
goto fail;
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
fs_info(sdp, "jid=%u: Trying to acquire journal glock...\n",
jd->jd_jid);
jlocked = 1;
/* Acquire the journal glock so we can do recovery */
error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
LM_ST_EXCLUSIVE,
LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
&j_gh);
switch (error) {
case 0:
break;
case GLR_TRYFAILED:
fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
error = 0;
goto fail;
default:
goto fail;
}
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh);
if (error)
goto fail_gunlock_j;
} else {
fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
}
t_jlck = ktime_get();
fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
error = gfs2_jdesc_check(jd);
if (error)
goto fail_gunlock_ji;
error = gfs2_find_jhead(jd, &head, true);
if (error)
goto fail_gunlock_ji;
t_jhd = ktime_get();
fs_info(sdp, "jid=%u: Journal head lookup took %lldms\n", jd->jd_jid,
ktime_ms_delta(t_jhd, t_jlck));
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
mutex_lock(&sdp->sd_freeze_mutex);
if (test_bit(SDF_FROZEN, &sdp->sd_flags)) {
mutex_unlock(&sdp->sd_freeze_mutex);
fs_warn(sdp, "jid=%u: Can't replay: filesystem "
"is frozen\n", jd->jd_jid);
goto fail_gunlock_ji;
}
if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
ro = 1;
} else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
ro = 1;
} else {
if (sb_rdonly(sdp->sd_vfs)) {
/* check if device itself is read-only */
ro = bdev_read_only(sdp->sd_vfs->s_bdev);
if (!ro) {
fs_info(sdp, "recovery required on "
"read-only filesystem.\n");
fs_info(sdp, "write access will be "
"enabled during recovery.\n");
}
}
}
if (ro) {
fs_warn(sdp, "jid=%u: Can't replay: read-only block "
"device\n", jd->jd_jid);
error = -EROFS;
goto fail_gunlock_nofreeze;
}
t_tlck = ktime_get();
fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
jd->jd_jid, head.lh_tail, head.lh_blkno);
/* We take the sd_log_flush_lock here primarily to prevent log
* flushes and simultaneous journal replays from stomping on
* each other wrt jd_log_bio. */
down_read(&sdp->sd_log_flush_lock);
for (pass = 0; pass < 2; pass++) {
lops_before_scan(jd, &head, pass);
error = foreach_descriptor(jd, head.lh_tail,
head.lh_blkno, pass);
lops_after_scan(jd, error, pass);
if (error) {
up_read(&sdp->sd_log_flush_lock);
goto fail_gunlock_nofreeze;
}
}
recover_local_statfs(jd, &head);
clean_journal(jd, &head);
up_read(&sdp->sd_log_flush_lock);
mutex_unlock(&sdp->sd_freeze_mutex);
t_rep = ktime_get();
fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
"jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
jd->jd_jid, ktime_ms_delta(t_rep, t_start),
ktime_ms_delta(t_jlck, t_start),
ktime_ms_delta(t_jhd, t_jlck),
ktime_ms_delta(t_tlck, t_jhd),
ktime_ms_delta(t_rep, t_tlck));
}
gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
if (jlocked) {
gfs2_glock_dq_uninit(&ji_gh);
gfs2_glock_dq_uninit(&j_gh);
}
fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
goto done;
fail_gunlock_nofreeze:
mutex_unlock(&sdp->sd_freeze_mutex);
fail_gunlock_ji:
if (jlocked) {
gfs2_glock_dq_uninit(&ji_gh);
fail_gunlock_j:
gfs2_glock_dq_uninit(&j_gh);
}
fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
fail:
jd->jd_recover_error = error;
gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
done:
clear_bit(JDF_RECOVERY, &jd->jd_flags);
smp_mb__after_atomic();
wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
}
int gfs2_recover_journal(struct gfs2_jdesc *jd, bool wait)
{
int rv;
if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags))
return -EBUSY;
/* we have JDF_RECOVERY, queue should always succeed */
rv = queue_work(gfs2_recovery_wq, &jd->jd_work);
BUG_ON(!rv);
if (wait)
wait_on_bit(&jd->jd_flags, JDF_RECOVERY,
TASK_UNINTERRUPTIBLE);
return wait ? jd->jd_recover_error : 0;
}