Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7b0d9ad..fa2d05e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -24,13 +24,6 @@
kmem_zone_t *xfs_log_ticket_zone;
/* Local miscellaneous function prototypes */
-STATIC int
-xlog_commit_record(
- struct xlog *log,
- struct xlog_ticket *ticket,
- struct xlog_in_core **iclog,
- xfs_lsn_t *commitlsnp);
-
STATIC struct xlog *
xlog_alloc_log(
struct xfs_mount *mp,
@@ -47,8 +40,7 @@
/* local state machine functions */
STATIC void xlog_state_done_syncing(
- struct xlog_in_core *iclog,
- bool aborted);
+ struct xlog_in_core *iclog);
STATIC int
xlog_state_get_iclog_space(
struct xlog *log,
@@ -57,33 +49,19 @@
struct xlog_ticket *ticket,
int *continued_write,
int *logoffsetp);
-STATIC int
-xlog_state_release_iclog(
- struct xlog *log,
- struct xlog_in_core *iclog);
STATIC void
xlog_state_switch_iclogs(
struct xlog *log,
struct xlog_in_core *iclog,
int eventual_size);
STATIC void
-xlog_state_want_sync(
- struct xlog *log,
- struct xlog_in_core *iclog);
-
-STATIC void
xlog_grant_push_ail(
struct xlog *log,
int need_bytes);
STATIC void
-xlog_regrant_reserve_log_space(
+xlog_sync(
struct xlog *log,
- struct xlog_ticket *ticket);
-STATIC void
-xlog_ungrant_log_space(
- struct xlog *log,
- struct xlog_ticket *ticket);
-
+ struct xlog_in_core *iclog);
#if defined(DEBUG)
STATIC void
xlog_verify_dest_ptr(
@@ -455,7 +433,7 @@
XFS_STATS_INC(mp, xs_try_logspace);
ASSERT(*ticp == NULL);
- tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, 0);
+ tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent);
*ticp = tic;
xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
@@ -485,86 +463,69 @@
return error;
}
-
-/*
- * NOTES:
- *
- * 1. currblock field gets updated at startup and after in-core logs
- * marked as with WANT_SYNC.
- */
-
-/*
- * This routine is called when a user of a log manager ticket is done with
- * the reservation. If the ticket was ever used, then a commit record for
- * the associated transaction is written out as a log operation header with
- * no data. The flag XLOG_TIC_INITED is set when the first write occurs with
- * a given ticket. If the ticket was one with a permanent reservation, then
- * a few operations are done differently. Permanent reservation tickets by
- * default don't release the reservation. They just commit the current
- * transaction with the belief that the reservation is still needed. A flag
- * must be passed in before permanent reservations are actually released.
- * When these type of tickets are not released, they need to be set into
- * the inited state again. By doing this, a start record will be written
- * out when the next write occurs.
- */
-xfs_lsn_t
-xfs_log_done(
- struct xfs_mount *mp,
- struct xlog_ticket *ticket,
- struct xlog_in_core **iclog,
- bool regrant)
-{
- struct xlog *log = mp->m_log;
- xfs_lsn_t lsn = 0;
-
- if (XLOG_FORCED_SHUTDOWN(log) ||
- /*
- * If nothing was ever written, don't write out commit record.
- * If we get an error, just continue and give back the log ticket.
- */
- (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
- (xlog_commit_record(log, ticket, iclog, &lsn)))) {
- lsn = (xfs_lsn_t) -1;
- regrant = false;
- }
-
-
- if (!regrant) {
- trace_xfs_log_done_nonperm(log, ticket);
-
- /*
- * Release ticket if not permanent reservation or a specific
- * request has been made to release a permanent reservation.
- */
- xlog_ungrant_log_space(log, ticket);
- } else {
- trace_xfs_log_done_perm(log, ticket);
-
- xlog_regrant_reserve_log_space(log, ticket);
- /* If this ticket was a permanent reservation and we aren't
- * trying to release it, reset the inited flags; so next time
- * we write, a start record will be written out.
- */
- ticket->t_flags |= XLOG_TIC_INITED;
- }
-
- xfs_log_ticket_put(ticket);
- return lsn;
-}
-
-int
-xfs_log_release_iclog(
- struct xfs_mount *mp,
+static bool
+__xlog_state_release_iclog(
+ struct xlog *log,
struct xlog_in_core *iclog)
{
- if (xlog_state_release_iclog(mp->m_log, iclog)) {
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+ lockdep_assert_held(&log->l_icloglock);
+
+ if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+ /* update tail before writing to iclog */
+ xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+
+ iclog->ic_state = XLOG_STATE_SYNCING;
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ xlog_verify_tail_lsn(log, iclog, tail_lsn);
+ /* cycle incremented when incrementing curr_block */
+ return true;
+ }
+
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+ return false;
+}
+
+/*
+ * Flush iclog to disk if this is the last reference to the given iclog and the
+ * it is in the WANT_SYNC state.
+ */
+static int
+xlog_state_release_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
+{
+ lockdep_assert_held(&log->l_icloglock);
+
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
return -EIO;
+
+ if (atomic_dec_and_test(&iclog->ic_refcnt) &&
+ __xlog_state_release_iclog(log, iclog)) {
+ spin_unlock(&log->l_icloglock);
+ xlog_sync(log, iclog);
+ spin_lock(&log->l_icloglock);
}
return 0;
}
+void
+xfs_log_release_iclog(
+ struct xlog_in_core *iclog)
+{
+ struct xlog *log = iclog->ic_log;
+ bool sync = false;
+
+ if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) {
+ if (iclog->ic_state != XLOG_STATE_IOERROR)
+ sync = __xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+ }
+
+ if (sync)
+ xlog_sync(log, iclog);
+}
+
/*
* Mount a log filesystem
*
@@ -801,32 +762,69 @@
}
/*
- * Final log writes as part of unmount.
- *
- * Mark the filesystem clean as unmount happens. Note that during relocation
- * this routine needs to be executed as part of source-bag while the
- * deallocation must not be done until source-end.
+ * Wait for the iclog to be written disk, or return an error if the log has been
+ * shut down.
*/
-
-/* Actually write the unmount record to disk. */
-static void
-xfs_log_write_unmount_record(
- struct xfs_mount *mp)
+static int
+xlog_wait_on_iclog(
+ struct xlog_in_core *iclog)
+ __releases(iclog->ic_log->l_icloglock)
{
- /* the data section must be 32 bit size aligned */
- struct xfs_unmount_log_format magic = {
+ struct xlog *log = iclog->ic_log;
+
+ if (!XLOG_FORCED_SHUTDOWN(log) &&
+ iclog->ic_state != XLOG_STATE_ACTIVE &&
+ iclog->ic_state != XLOG_STATE_DIRTY) {
+ XFS_STATS_INC(log->l_mp, xs_log_force_sleep);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ } else {
+ spin_unlock(&log->l_icloglock);
+ }
+
+ if (XLOG_FORCED_SHUTDOWN(log))
+ return -EIO;
+ return 0;
+}
+
+/*
+ * Write out an unmount record using the ticket provided. We have to account for
+ * the data space used in the unmount ticket as this write is not done from a
+ * transaction context that has already done the accounting for us.
+ */
+static int
+xlog_write_unmount_record(
+ struct xlog *log,
+ struct xlog_ticket *ticket,
+ xfs_lsn_t *lsn,
+ uint flags)
+{
+ struct xfs_unmount_log_format ulf = {
.magic = XLOG_UNMOUNT_TYPE,
};
struct xfs_log_iovec reg = {
- .i_addr = &magic,
- .i_len = sizeof(magic),
+ .i_addr = &ulf,
+ .i_len = sizeof(ulf),
.i_type = XLOG_REG_TYPE_UNMOUNT,
};
struct xfs_log_vec vec = {
.lv_niovecs = 1,
.lv_iovecp = ®,
};
- struct xlog *log = mp->m_log;
+
+ /* account for space used by record data */
+ ticket->t_curr_res -= sizeof(ulf);
+ return xlog_write(log, &vec, ticket, lsn, NULL, flags, false);
+}
+
+/*
+ * Mark the filesystem clean by writing an unmount record to the head of the
+ * log.
+ */
+static void
+xlog_unmount_write(
+ struct xlog *log)
+{
+ struct xfs_mount *mp = log->l_mp;
struct xlog_in_core *iclog;
struct xlog_ticket *tic = NULL;
xfs_lsn_t lsn;
@@ -837,23 +835,7 @@
if (error)
goto out_err;
- /*
- * If we think the summary counters are bad, clear the unmount header
- * flag in the unmount record so that the summary counters will be
- * recalculated during log recovery at next mount. Refer to
- * xlog_check_unmount_rec for more details.
- */
- if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
- XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
- xfs_alert(mp, "%s: will fix summary counters at next mount",
- __func__);
- flags &= ~XLOG_UNMOUNT_TRANS;
- }
-
- /* remove inited flag, and account for space used */
- tic->t_flags = 0;
- tic->t_curr_res -= sizeof(magic);
- error = xlog_write(log, &vec, tic, &lsn, NULL, flags);
+ error = xlog_write_unmount_record(log, tic, &lsn, flags);
/*
* At this point, we're umounting anyway, so there's no point in
* transitioning log state to IOERROR. Just continue...
@@ -865,31 +847,32 @@
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
atomic_inc(&iclog->ic_refcnt);
- xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
+ if (iclog->ic_state == XLOG_STATE_ACTIVE)
+ xlog_state_switch_iclogs(log, iclog, 0);
+ else
+ ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
+ iclog->ic_state == XLOG_STATE_IOERROR);
error = xlog_state_release_iclog(log, iclog);
-
- spin_lock(&log->l_icloglock);
- switch (iclog->ic_state) {
- default:
- if (!XLOG_FORCED_SHUTDOWN(log)) {
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- break;
- }
- /* fall through */
- case XLOG_STATE_ACTIVE:
- case XLOG_STATE_DIRTY:
- spin_unlock(&log->l_icloglock);
- break;
- }
+ xlog_wait_on_iclog(iclog);
if (tic) {
trace_xfs_log_umount_write(log, tic);
- xlog_ungrant_log_space(log, tic);
- xfs_log_ticket_put(tic);
+ xfs_log_ticket_ungrant(log, tic);
}
}
+static void
+xfs_log_unmount_verify_iclog(
+ struct xlog *log)
+{
+ struct xlog_in_core *iclog = log->l_iclog;
+
+ do {
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+ ASSERT(iclog->ic_offset == 0);
+ } while ((iclog = iclog->ic_next) != log->l_iclog);
+}
+
/*
* Unmount record used to have a string "Unmount filesystem--" in the
* data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
@@ -897,16 +880,11 @@
* currently architecture converted and "Unmount" is a bit foo.
* As far as I know, there weren't any dependencies on the old behaviour.
*/
-
-static int
-xfs_log_unmount_write(xfs_mount_t *mp)
+static void
+xfs_log_unmount_write(
+ struct xfs_mount *mp)
{
- struct xlog *log = mp->m_log;
- xlog_in_core_t *iclog;
-#ifdef DEBUG
- xlog_in_core_t *first_iclog;
-#endif
- int error;
+ struct xlog *log = mp->m_log;
/*
* Don't write out unmount record on norecovery mounts or ro devices.
@@ -915,61 +893,30 @@
if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
xfs_readonly_buftarg(log->l_targ)) {
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
- return 0;
+ return;
}
- error = xfs_log_force(mp, XFS_LOG_SYNC);
- ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
+ xfs_log_force(mp, XFS_LOG_SYNC);
-#ifdef DEBUG
- first_iclog = iclog = log->l_iclog;
- do {
- if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
- ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE);
- ASSERT(iclog->ic_offset == 0);
- }
- iclog = iclog->ic_next;
- } while (iclog != first_iclog);
-#endif
- if (! (XLOG_FORCED_SHUTDOWN(log))) {
- xfs_log_write_unmount_record(mp);
- } else {
- /*
- * We're already in forced_shutdown mode, couldn't
- * even attempt to write out the unmount transaction.
- *
- * Go through the motions of sync'ing and releasing
- * the iclog, even though no I/O will actually happen,
- * we need to wait for other log I/Os that may already
- * be in progress. Do this as a separate section of
- * code so we'll know if we ever get stuck here that
- * we're in this odd situation of trying to unmount
- * a file system that went into forced_shutdown as
- * the result of an unmount..
- */
- spin_lock(&log->l_icloglock);
- iclog = log->l_iclog;
- atomic_inc(&iclog->ic_refcnt);
+ if (XLOG_FORCED_SHUTDOWN(log))
+ return;
- xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
- error = xlog_state_release_iclog(log, iclog);
-
- spin_lock(&log->l_icloglock);
-
- if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE
- || iclog->ic_state == XLOG_STATE_DIRTY
- || iclog->ic_state == XLOG_STATE_IOERROR) ) {
-
- xlog_wait(&iclog->ic_force_wait,
- &log->l_icloglock);
- } else {
- spin_unlock(&log->l_icloglock);
- }
+ /*
+ * If we think the summary counters are bad, avoid writing the unmount
+ * record to force log recovery at next mount, after which the summary
+ * counters will be recalculated. Refer to xlog_check_unmount_rec for
+ * more details.
+ */
+ if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
+ XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
+ xfs_alert(mp, "%s: will fix summary counters at next mount",
+ __func__);
+ return;
}
- return error;
-} /* xfs_log_unmount_write */
+ xfs_log_unmount_verify_iclog(log);
+ xlog_unmount_write(log);
+}
/*
* Empty the log for unmount/freeze.
@@ -1232,7 +1179,6 @@
struct xlog_in_core *iclog =
container_of(work, struct xlog_in_core, ic_end_io_work);
struct xlog *log = iclog->ic_log;
- bool aborted = false;
int error;
error = blk_status_to_errno(iclog->ic_bio.bi_status);
@@ -1248,17 +1194,9 @@
if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
xfs_alert(log->l_mp, "log I/O error %d", error);
xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
- /*
- * This flag will be propagated to the trans-committed
- * callback routines to let them know that the log-commit
- * didn't succeed.
- */
- aborted = true;
- } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
- aborted = true;
}
- xlog_state_done_syncing(iclog, aborted);
+ xlog_state_done_syncing(iclog);
bio_uninit(&iclog->ic_bio);
/*
@@ -1479,7 +1417,7 @@
log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
- mp->m_fsname);
+ mp->m_super->s_id);
if (!log->l_ioend_workqueue)
goto out_free_iclog;
@@ -1504,20 +1442,17 @@
return ERR_PTR(error);
} /* xlog_alloc_log */
-
/*
* Write out the commit record of a transaction associated with the given
- * ticket. Return the lsn of the commit record.
+ * ticket to close off a running log write. Return the lsn of the commit record.
*/
-STATIC int
+int
xlog_commit_record(
struct xlog *log,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
- xfs_lsn_t *commitlsnp)
+ xfs_lsn_t *lsn)
{
- struct xfs_mount *mp = log->l_mp;
- int error;
struct xfs_log_iovec reg = {
.i_addr = NULL,
.i_len = 0,
@@ -1527,24 +1462,27 @@
.lv_niovecs = 1,
.lv_iovecp = ®,
};
+ int error;
- ASSERT_ALWAYS(iclog);
- error = xlog_write(log, &vec, ticket, commitlsnp, iclog,
- XLOG_COMMIT_TRANS);
+ if (XLOG_FORCED_SHUTDOWN(log))
+ return -EIO;
+
+ error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS,
+ false);
if (error)
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
return error;
}
/*
- * Push on the buffer cache code if we ever use more than 75% of the on-disk
- * log space. This code pushes on the lsn which would supposedly free up
- * the 25% which we want to leave free. We may need to adopt a policy which
- * pushes on an lsn which is further along in the log once we reach the high
- * water mark. In this manner, we would be creating a low water mark.
+ * Compute the LSN that we'd need to push the log tail towards in order to have
+ * (a) enough on-disk log space to log the number of bytes specified, (b) at
+ * least 25% of the log space free, and (c) at least 256 blocks free. If the
+ * log free space already meets all three thresholds, this function returns
+ * NULLCOMMITLSN.
*/
-STATIC void
-xlog_grant_push_ail(
+xfs_lsn_t
+xlog_grant_push_threshold(
struct xlog *log,
int need_bytes)
{
@@ -1570,7 +1508,7 @@
free_threshold = max(free_threshold, (log->l_logBBsize >> 2));
free_threshold = max(free_threshold, 256);
if (free_blocks >= free_threshold)
- return;
+ return NULLCOMMITLSN;
xlog_crack_atomic_lsn(&log->l_tail_lsn, &threshold_cycle,
&threshold_block);
@@ -1590,13 +1528,33 @@
if (XFS_LSN_CMP(threshold_lsn, last_sync_lsn) > 0)
threshold_lsn = last_sync_lsn;
+ return threshold_lsn;
+}
+
+/*
+ * Push the tail of the log if we need to do so to maintain the free log space
+ * thresholds set out by xlog_grant_push_threshold. We may need to adopt a
+ * policy which pushes on an lsn which is further along in the log once we
+ * reach the high water mark. In this manner, we would be creating a low water
+ * mark.
+ */
+STATIC void
+xlog_grant_push_ail(
+ struct xlog *log,
+ int need_bytes)
+{
+ xfs_lsn_t threshold_lsn;
+
+ threshold_lsn = xlog_grant_push_threshold(log, need_bytes);
+ if (threshold_lsn == NULLCOMMITLSN || XLOG_FORCED_SHUTDOWN(log))
+ return;
+
/*
* Get the transaction layer to kick the dirty buffers out to
* disk asynchronously. No point in trying to do this if
* the filesystem is shutting down.
*/
- if (!XLOG_FORCED_SHUTDOWN(log))
- xfs_ail_push(log->l_ailp, threshold_lsn);
+ xfs_ail_push(log->l_ailp, threshold_lsn);
}
/*
@@ -1666,9 +1624,7 @@
int i;
int xheads;
- xheads = size / XLOG_HEADER_CYCLE_SIZE;
- if (size % XLOG_HEADER_CYCLE_SIZE)
- xheads++;
+ xheads = DIV_ROUND_UP(size, XLOG_HEADER_CYCLE_SIZE);
for (i = 1; i < xheads; i++) {
crc = crc32c(crc, &xhdr[i].hic_xheader,
@@ -1692,7 +1648,7 @@
&iclog->ic_end_io_work);
}
-static void
+static int
xlog_map_iclog_data(
struct bio *bio,
void *data,
@@ -1703,11 +1659,14 @@
unsigned int off = offset_in_page(data);
size_t len = min_t(size_t, count, PAGE_SIZE - off);
- WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len);
+ if (bio_add_page(bio, page, len, off) != len)
+ return -EIO;
data += len;
count -= len;
} while (count);
+
+ return 0;
}
STATIC void
@@ -1729,7 +1688,7 @@
* across the log IO to archieve that.
*/
down(&iclog->ic_sema);
- if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
+ if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) {
/*
* It would seem logical to return EIO here, but we rely on
* the log state machine to propagate I/O errors instead of
@@ -1737,25 +1696,34 @@
* the buffer manually, the code needs to be kept in sync
* with the I/O completion path.
*/
- xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
+ xlog_state_done_syncing(iclog);
up(&iclog->ic_sema);
return;
}
- iclog->ic_io_size = count;
-
bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
iclog->ic_bio.bi_end_io = xlog_bio_end_io;
iclog->ic_bio.bi_private = iclog;
- iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA;
+
+ /*
+ * We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more
+ * IOs coming immediately after this one. This prevents the block layer
+ * writeback throttle from throttling log writes behind background
+ * metadata writeback and causing priority inversions.
+ */
+ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC |
+ REQ_IDLE | REQ_FUA;
if (need_flush)
iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
- xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
+ if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+ return;
+ }
if (is_vmalloc_addr(iclog->ic_data))
- flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
+ flush_kernel_vmap_range(iclog->ic_data, count);
/*
* If this log buffer would straddle the end of the log we will have
@@ -1966,12 +1934,11 @@
log->l_mp->m_log = NULL;
destroy_workqueue(log->l_ioend_workqueue);
kmem_free(log);
-} /* xlog_dealloc_log */
+}
/*
* Update counters atomically now that memcpy is done.
*/
-/* ARGSUSED */
static inline void
xlog_state_finish_copy(
struct xlog *log,
@@ -1979,16 +1946,11 @@
int record_cnt,
int copy_bytes)
{
- spin_lock(&log->l_icloglock);
+ lockdep_assert_held(&log->l_icloglock);
be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
iclog->ic_offset += copy_bytes;
-
- spin_unlock(&log->l_icloglock);
-} /* xlog_state_finish_copy */
-
-
-
+}
/*
* print out info relating to regions written which consume
@@ -2109,23 +2071,21 @@
}
/*
- * Calculate the potential space needed by the log vector. Each region gets
- * its own xlog_op_header_t and may need to be double word aligned.
+ * Calculate the potential space needed by the log vector. We may need a start
+ * record, and each region gets its own struct xlog_op_header and may need to be
+ * double word aligned.
*/
static int
xlog_write_calc_vec_length(
struct xlog_ticket *ticket,
- struct xfs_log_vec *log_vector)
+ struct xfs_log_vec *log_vector,
+ bool need_start_rec)
{
struct xfs_log_vec *lv;
- int headers = 0;
+ int headers = need_start_rec ? 1 : 0;
int len = 0;
int i;
- /* acct for start rec of xact */
- if (ticket->t_flags & XLOG_TIC_INITED)
- headers++;
-
for (lv = log_vector; lv; lv = lv->lv_next) {
/* we don't write ordered log vectors */
if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
@@ -2147,27 +2107,16 @@
return len;
}
-/*
- * If first write for transaction, insert start record We can't be trying to
- * commit if we are inited. We can't have any "partial_copy" if we are inited.
- */
-static int
+static void
xlog_write_start_rec(
struct xlog_op_header *ophdr,
struct xlog_ticket *ticket)
{
- if (!(ticket->t_flags & XLOG_TIC_INITED))
- return 0;
-
ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
ophdr->oh_clientid = ticket->t_clientid;
ophdr->oh_len = 0;
ophdr->oh_flags = XLOG_START_TRANS;
ophdr->oh_res2 = 0;
-
- ticket->t_flags &= ~XLOG_TIC_INITED;
-
- return sizeof(struct xlog_op_header);
}
static xlog_op_header_t *
@@ -2265,15 +2214,18 @@
int log_offset,
struct xlog_in_core **commit_iclog)
{
+ int error;
+
if (*partial_copy) {
/*
* This iclog has already been marked WANT_SYNC by
* xlog_state_get_iclog_space.
*/
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
*record_cnt = 0;
*data_cnt = 0;
- return xlog_state_release_iclog(log, iclog);
+ goto release_iclog;
}
*partial_copy = 0;
@@ -2281,21 +2233,29 @@
if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
/* no more space in this iclog - push it. */
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
*record_cnt = 0;
*data_cnt = 0;
- spin_lock(&log->l_icloglock);
- xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
-
+ if (iclog->ic_state == XLOG_STATE_ACTIVE)
+ xlog_state_switch_iclogs(log, iclog, 0);
+ else
+ ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
+ iclog->ic_state == XLOG_STATE_IOERROR);
if (!commit_iclog)
- return xlog_state_release_iclog(log, iclog);
+ goto release_iclog;
+ spin_unlock(&log->l_icloglock);
ASSERT(flags & XLOG_COMMIT_TRANS);
*commit_iclog = iclog;
}
return 0;
+
+release_iclog:
+ error = xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+ return error;
}
/*
@@ -2345,39 +2305,28 @@
struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn,
struct xlog_in_core **commit_iclog,
- uint flags)
+ uint flags,
+ bool need_start_rec)
{
struct xlog_in_core *iclog = NULL;
- struct xfs_log_iovec *vecp;
- struct xfs_log_vec *lv;
+ struct xfs_log_vec *lv = log_vector;
+ struct xfs_log_iovec *vecp = lv->lv_iovecp;
+ int index = 0;
int len;
- int index;
int partial_copy = 0;
int partial_copy_len = 0;
int contwr = 0;
int record_cnt = 0;
int data_cnt = 0;
- int error;
-
- *start_lsn = 0;
-
- len = xlog_write_calc_vec_length(ticket, log_vector);
+ int error = 0;
/*
- * Region headers and bytes are already accounted for.
- * We only need to take into account start records and
- * split regions in this function.
+ * If this is a commit or unmount transaction, we don't need a start
+ * record to be written. We do, however, have to account for the
+ * commit or unmount header that gets written. Hence we always have
+ * to account for an extra xlog_op_header here.
*/
- if (ticket->t_flags & XLOG_TIC_INITED)
- ticket->t_curr_res -= sizeof(xlog_op_header_t);
-
- /*
- * Commit record headers need to be accounted for. These
- * come in as separate writes so are easy to detect.
- */
- if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
- ticket->t_curr_res -= sizeof(xlog_op_header_t);
-
+ ticket->t_curr_res -= sizeof(struct xlog_op_header);
if (ticket->t_curr_res < 0) {
xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
"ctx ticket reservation ran out. Need to up reservation");
@@ -2385,9 +2334,8 @@
xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
}
- index = 0;
- lv = log_vector;
- vecp = lv->lv_iovecp;
+ len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec);
+ *start_lsn = 0;
while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
void *ptr;
int log_offset;
@@ -2411,7 +2359,6 @@
while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
struct xfs_log_iovec *reg;
struct xlog_op_header *ophdr;
- int start_rec_copy;
int copy_len;
int copy_off;
bool ordered = false;
@@ -2427,11 +2374,15 @@
ASSERT(reg->i_len % sizeof(int32_t) == 0);
ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
- start_rec_copy = xlog_write_start_rec(ptr, ticket);
- if (start_rec_copy) {
- record_cnt++;
+ /*
+ * Before we start formatting log vectors, we need to
+ * write a start record. Only do this for the first
+ * iclog we write to.
+ */
+ if (need_start_rec) {
+ xlog_write_start_rec(ptr, ticket);
xlog_write_adv_cnt(&ptr, &len, &log_offset,
- start_rec_copy);
+ sizeof(struct xlog_op_header));
}
ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
@@ -2463,8 +2414,13 @@
xlog_write_adv_cnt(&ptr, &len, &log_offset,
copy_len);
}
- copy_len += start_rec_copy + sizeof(xlog_op_header_t);
+ copy_len += sizeof(struct xlog_op_header);
record_cnt++;
+ if (need_start_rec) {
+ copy_len += sizeof(struct xlog_op_header);
+ record_cnt++;
+ need_start_rec = false;
+ }
data_cnt += contwr ? copy_len : 0;
error = xlog_write_copy_finish(log, iclog, flags,
@@ -2508,128 +2464,119 @@
ASSERT(len == 0);
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
- if (!commit_iclog)
- return xlog_state_release_iclog(log, iclog);
+ if (commit_iclog) {
+ ASSERT(flags & XLOG_COMMIT_TRANS);
+ *commit_iclog = iclog;
+ } else {
+ error = xlog_state_release_iclog(log, iclog);
+ }
+ spin_unlock(&log->l_icloglock);
- ASSERT(flags & XLOG_COMMIT_TRANS);
- *commit_iclog = iclog;
- return 0;
+ return error;
}
+static void
+xlog_state_activate_iclog(
+ struct xlog_in_core *iclog,
+ int *iclogs_changed)
+{
+ ASSERT(list_empty_careful(&iclog->ic_callbacks));
-/*****************************************************************************
- *
- * State Machine functions
- *
- *****************************************************************************
- */
+ /*
+ * If the number of ops in this iclog indicate it just contains the
+ * dummy transaction, we can change state into IDLE (the second time
+ * around). Otherwise we should change the state into NEED a dummy.
+ * We don't need to cover the dummy.
+ */
+ if (*iclogs_changed == 0 &&
+ iclog->ic_header.h_num_logops == cpu_to_be32(XLOG_COVER_OPS)) {
+ *iclogs_changed = 1;
+ } else {
+ /*
+ * We have two dirty iclogs so start over. This could also be
+ * num of ops indicating this is not the dummy going out.
+ */
+ *iclogs_changed = 2;
+ }
+
+ iclog->ic_state = XLOG_STATE_ACTIVE;
+ iclog->ic_offset = 0;
+ iclog->ic_header.h_num_logops = 0;
+ memset(iclog->ic_header.h_cycle_data, 0,
+ sizeof(iclog->ic_header.h_cycle_data));
+ iclog->ic_header.h_lsn = 0;
+}
/*
- * An iclog has just finished IO completion processing, so we need to update
- * the iclog state and propagate that up into the overall log state. Hence we
- * prepare the iclog for cleaning, and then clean all the pending dirty iclogs
- * starting from the head, and then wake up any threads that are waiting for the
- * iclog to be marked clean.
- *
- * The ordering of marking iclogs ACTIVE must be maintained, so an iclog
- * doesn't become ACTIVE beyond one that is SYNCING. This is also required to
- * maintain the notion that we use a ordered wait queue to hold off would be
- * writers to the log when every iclog is trying to sync to disk.
- *
- * Caller must hold the icloglock before calling us.
- *
- * State Change: !IOERROR -> DIRTY -> ACTIVE
+ * Loop through all iclogs and mark all iclogs currently marked DIRTY as
+ * ACTIVE after iclog I/O has completed.
*/
+static void
+xlog_state_activate_iclogs(
+ struct xlog *log,
+ int *iclogs_changed)
+{
+ struct xlog_in_core *iclog = log->l_iclog;
+
+ do {
+ if (iclog->ic_state == XLOG_STATE_DIRTY)
+ xlog_state_activate_iclog(iclog, iclogs_changed);
+ /*
+ * The ordering of marking iclogs ACTIVE must be maintained, so
+ * an iclog doesn't become ACTIVE beyond one that is SYNCING.
+ */
+ else if (iclog->ic_state != XLOG_STATE_ACTIVE)
+ break;
+ } while ((iclog = iclog->ic_next) != log->l_iclog);
+}
+
+static int
+xlog_covered_state(
+ int prev_state,
+ int iclogs_changed)
+{
+ /*
+ * We usually go to NEED. But we go to NEED2 if the changed indicates we
+ * are done writing the dummy record. If we are done with the second
+ * dummy recored (DONE2), then we go to IDLE.
+ */
+ switch (prev_state) {
+ case XLOG_STATE_COVER_IDLE:
+ case XLOG_STATE_COVER_NEED:
+ case XLOG_STATE_COVER_NEED2:
+ break;
+ case XLOG_STATE_COVER_DONE:
+ if (iclogs_changed == 1)
+ return XLOG_STATE_COVER_NEED2;
+ break;
+ case XLOG_STATE_COVER_DONE2:
+ if (iclogs_changed == 1)
+ return XLOG_STATE_COVER_IDLE;
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ return XLOG_STATE_COVER_NEED;
+}
+
STATIC void
xlog_state_clean_iclog(
struct xlog *log,
struct xlog_in_core *dirty_iclog)
{
- struct xlog_in_core *iclog;
- int changed = 0;
+ int iclogs_changed = 0;
- /* Prepare the completed iclog. */
- if (!(dirty_iclog->ic_state & XLOG_STATE_IOERROR))
- dirty_iclog->ic_state = XLOG_STATE_DIRTY;
+ dirty_iclog->ic_state = XLOG_STATE_DIRTY;
- /* Walk all the iclogs to update the ordered active state. */
- iclog = log->l_iclog;
- do {
- if (iclog->ic_state == XLOG_STATE_DIRTY) {
- iclog->ic_state = XLOG_STATE_ACTIVE;
- iclog->ic_offset = 0;
- ASSERT(list_empty_careful(&iclog->ic_callbacks));
- /*
- * If the number of ops in this iclog indicate it just
- * contains the dummy transaction, we can
- * change state into IDLE (the second time around).
- * Otherwise we should change the state into
- * NEED a dummy.
- * We don't need to cover the dummy.
- */
- if (!changed &&
- (be32_to_cpu(iclog->ic_header.h_num_logops) ==
- XLOG_COVER_OPS)) {
- changed = 1;
- } else {
- /*
- * We have two dirty iclogs so start over
- * This could also be num of ops indicates
- * this is not the dummy going out.
- */
- changed = 2;
- }
- iclog->ic_header.h_num_logops = 0;
- memset(iclog->ic_header.h_cycle_data, 0,
- sizeof(iclog->ic_header.h_cycle_data));
- iclog->ic_header.h_lsn = 0;
- } else if (iclog->ic_state == XLOG_STATE_ACTIVE)
- /* do nothing */;
- else
- break; /* stop cleaning */
- iclog = iclog->ic_next;
- } while (iclog != log->l_iclog);
-
-
- /*
- * Wake up threads waiting in xfs_log_force() for the dirty iclog
- * to be cleaned.
- */
+ xlog_state_activate_iclogs(log, &iclogs_changed);
wake_up_all(&dirty_iclog->ic_force_wait);
- /*
- * Change state for the dummy log recording.
- * We usually go to NEED. But we go to NEED2 if the changed indicates
- * we are done writing the dummy record.
- * If we are done with the second dummy recored (DONE2), then
- * we go to IDLE.
- */
- if (changed) {
- switch (log->l_covered_state) {
- case XLOG_STATE_COVER_IDLE:
- case XLOG_STATE_COVER_NEED:
- case XLOG_STATE_COVER_NEED2:
- log->l_covered_state = XLOG_STATE_COVER_NEED;
- break;
-
- case XLOG_STATE_COVER_DONE:
- if (changed == 1)
- log->l_covered_state = XLOG_STATE_COVER_NEED2;
- else
- log->l_covered_state = XLOG_STATE_COVER_NEED;
- break;
-
- case XLOG_STATE_COVER_DONE2:
- if (changed == 1)
- log->l_covered_state = XLOG_STATE_COVER_IDLE;
- else
- log->l_covered_state = XLOG_STATE_COVER_NEED;
- break;
-
- default:
- ASSERT(0);
- }
+ if (iclogs_changed) {
+ log->l_covered_state = xlog_covered_state(log->l_covered_state,
+ iclogs_changed);
}
}
@@ -2641,7 +2588,8 @@
xfs_lsn_t lowest_lsn = 0, lsn;
do {
- if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
+ if (iclog->ic_state == XLOG_STATE_ACTIVE ||
+ iclog->ic_state == XLOG_STATE_DIRTY)
continue;
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
@@ -2701,61 +2649,48 @@
xlog_state_iodone_process_iclog(
struct xlog *log,
struct xlog_in_core *iclog,
- struct xlog_in_core *completed_iclog,
bool *ioerror)
{
xfs_lsn_t lowest_lsn;
xfs_lsn_t header_lsn;
- /* Skip all iclogs in the ACTIVE & DIRTY states */
- if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
+ switch (iclog->ic_state) {
+ case XLOG_STATE_ACTIVE:
+ case XLOG_STATE_DIRTY:
+ /*
+ * Skip all iclogs in the ACTIVE & DIRTY states:
+ */
return false;
-
- /*
- * Between marking a filesystem SHUTDOWN and stopping the log, we do
- * flush all iclogs to disk (if there wasn't a log I/O error). So, we do
- * want things to go smoothly in case of just a SHUTDOWN w/o a
- * LOG_IO_ERROR.
- */
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
+ case XLOG_STATE_IOERROR:
+ /*
+ * Between marking a filesystem SHUTDOWN and stopping the log,
+ * we do flush all iclogs to disk (if there wasn't a log I/O
+ * error). So, we do want things to go smoothly in case of just
+ * a SHUTDOWN w/o a LOG_IO_ERROR.
+ */
*ioerror = true;
return false;
- }
-
- /*
- * Can only perform callbacks in order. Since this iclog is not in the
- * DONE_SYNC/ DO_CALLBACK state, we skip the rest and just try to clean
- * up. If we set our iclog to DO_CALLBACK, we will not process it when
- * we retry since a previous iclog is in the CALLBACK and the state
- * cannot change since we are holding the l_icloglock.
- */
- if (!(iclog->ic_state &
- (XLOG_STATE_DONE_SYNC | XLOG_STATE_DO_CALLBACK))) {
- if (completed_iclog &&
- (completed_iclog->ic_state == XLOG_STATE_DONE_SYNC)) {
- completed_iclog->ic_state = XLOG_STATE_DO_CALLBACK;
- }
+ case XLOG_STATE_DONE_SYNC:
+ /*
+ * Now that we have an iclog that is in the DONE_SYNC state, do
+ * one more check here to see if we have chased our tail around.
+ * If this is not the lowest lsn iclog, then we will leave it
+ * for another completion to process.
+ */
+ header_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+ lowest_lsn = xlog_get_lowest_lsn(log);
+ if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0)
+ return false;
+ xlog_state_set_callback(log, iclog, header_lsn);
+ return false;
+ default:
+ /*
+ * Can only perform callbacks in order. Since this iclog is not
+ * in the DONE_SYNC state, we skip the rest and just try to
+ * clean up.
+ */
return true;
}
-
- /*
- * We now have an iclog that is in either the DO_CALLBACK or DONE_SYNC
- * states. The other states (WANT_SYNC, SYNCING, or CALLBACK were caught
- * by the above if and are going to clean (i.e. we aren't doing their
- * callbacks) see the above if.
- *
- * We will do one more check here to see if we have chased our tail
- * around. If this is not the lowest lsn iclog, then we will leave it
- * for another completion to process.
- */
- header_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
- lowest_lsn = xlog_get_lowest_lsn(log);
- if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0)
- return false;
-
- xlog_state_set_callback(log, iclog, header_lsn);
- return false;
-
}
/*
@@ -2770,8 +2705,9 @@
static void
xlog_state_do_iclog_callbacks(
struct xlog *log,
- struct xlog_in_core *iclog,
- bool aborted)
+ struct xlog_in_core *iclog)
+ __releases(&log->l_icloglock)
+ __acquires(&log->l_icloglock)
{
spin_unlock(&log->l_icloglock);
spin_lock(&iclog->ic_callback_lock);
@@ -2781,7 +2717,7 @@
list_splice_init(&iclog->ic_callbacks, &tmp);
spin_unlock(&iclog->ic_callback_lock);
- xlog_cil_process_committed(&tmp, aborted);
+ xlog_cil_process_committed(&tmp);
spin_lock(&iclog->ic_callback_lock);
}
@@ -2794,57 +2730,12 @@
spin_unlock(&iclog->ic_callback_lock);
}
-#ifdef DEBUG
-/*
- * Make one last gasp attempt to see if iclogs are being left in limbo. If the
- * above loop finds an iclog earlier than the current iclog and in one of the
- * syncing states, the current iclog is put into DO_CALLBACK and the callbacks
- * are deferred to the completion of the earlier iclog. Walk the iclogs in order
- * and make sure that no iclog is in DO_CALLBACK unless an earlier iclog is in
- * one of the syncing states.
- *
- * Note that SYNCING|IOERROR is a valid state so we cannot just check for
- * ic_state == SYNCING.
- */
-static void
-xlog_state_callback_check_state(
- struct xlog *log)
-{
- struct xlog_in_core *first_iclog = log->l_iclog;
- struct xlog_in_core *iclog = first_iclog;
-
- do {
- ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK);
- /*
- * Terminate the loop if iclogs are found in states
- * which will cause other threads to clean up iclogs.
- *
- * SYNCING - i/o completion will go through logs
- * DONE_SYNC - interrupt thread should be waiting for
- * l_icloglock
- * IOERROR - give up hope all ye who enter here
- */
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
- iclog->ic_state & XLOG_STATE_SYNCING ||
- iclog->ic_state == XLOG_STATE_DONE_SYNC ||
- iclog->ic_state == XLOG_STATE_IOERROR )
- break;
- iclog = iclog->ic_next;
- } while (first_iclog != iclog);
-}
-#else
-#define xlog_state_callback_check_state(l) ((void)0)
-#endif
-
STATIC void
xlog_state_do_callback(
- struct xlog *log,
- bool aborted,
- struct xlog_in_core *ciclog)
+ struct xlog *log)
{
struct xlog_in_core *iclog;
struct xlog_in_core *first_iclog;
- bool did_callbacks = false;
bool cycled_icloglock;
bool ioerror;
int flushcnt = 0;
@@ -2868,11 +2759,11 @@
do {
if (xlog_state_iodone_process_iclog(log, iclog,
- ciclog, &ioerror))
+ &ioerror))
break;
- if (!(iclog->ic_state &
- (XLOG_STATE_CALLBACK | XLOG_STATE_IOERROR))) {
+ if (iclog->ic_state != XLOG_STATE_CALLBACK &&
+ iclog->ic_state != XLOG_STATE_IOERROR) {
iclog = iclog->ic_next;
continue;
}
@@ -2882,14 +2773,14 @@
* we'll have to run at least one more complete loop.
*/
cycled_icloglock = true;
- xlog_state_do_iclog_callbacks(log, iclog, aborted);
-
- xlog_state_clean_iclog(log, iclog);
+ xlog_state_do_iclog_callbacks(log, iclog);
+ if (XLOG_FORCED_SHUTDOWN(log))
+ wake_up_all(&iclog->ic_force_wait);
+ else
+ xlog_state_clean_iclog(log, iclog);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
- did_callbacks |= cycled_icloglock;
-
if (repeats > 5000) {
flushcnt += repeats;
repeats = 0;
@@ -2899,10 +2790,8 @@
}
} while (!ioerror && cycled_icloglock);
- if (did_callbacks)
- xlog_state_callback_check_state(log);
-
- if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
+ if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
+ log->l_iclog->ic_state == XLOG_STATE_IOERROR)
wake_up_all(&log->l_flush_wait);
spin_unlock(&log->l_icloglock);
@@ -2924,25 +2813,22 @@
*/
STATIC void
xlog_state_done_syncing(
- struct xlog_in_core *iclog,
- bool aborted)
+ struct xlog_in_core *iclog)
{
struct xlog *log = iclog->ic_log;
spin_lock(&log->l_icloglock);
-
- ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
- iclog->ic_state == XLOG_STATE_IOERROR);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
/*
* If we got an error, either on the first buffer, or in the case of
- * split log writes, on the second, we mark ALL iclogs STATE_IOERROR,
- * and none should ever be attempted to be written to disk
- * again.
+ * split log writes, on the second, we shut down the file system and
+ * no iclogs should ever be attempted to be written to disk again.
*/
- if (iclog->ic_state != XLOG_STATE_IOERROR)
+ if (!XLOG_FORCED_SHUTDOWN(log)) {
+ ASSERT(iclog->ic_state == XLOG_STATE_SYNCING);
iclog->ic_state = XLOG_STATE_DONE_SYNC;
+ }
/*
* Someone could be sleeping prior to writing out the next
@@ -2951,9 +2837,8 @@
*/
wake_up_all(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
- xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
-} /* xlog_state_done_syncing */
-
+ xlog_state_do_callback(log);
+}
/*
* If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
@@ -2985,7 +2870,6 @@
int log_offset;
xlog_rec_header_t *head;
xlog_in_core_t *iclog;
- int error;
restart:
spin_lock(&log->l_icloglock);
@@ -3034,24 +2918,22 @@
* can fit into remaining data section.
*/
if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
+ int error = 0;
+
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
/*
- * If I'm the only one writing to this iclog, sync it to disk.
- * We need to do an atomic compare and decrement here to avoid
- * racing with concurrent atomic_dec_and_lock() calls in
+ * If we are the only one writing to this iclog, sync it to
+ * disk. We need to do an atomic compare and decrement here to
+ * avoid racing with concurrent atomic_dec_and_lock() calls in
* xlog_state_release_iclog() when there is more than one
* reference to the iclog.
*/
- if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) {
- /* we are the only one */
- spin_unlock(&log->l_icloglock);
+ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
error = xlog_state_release_iclog(log, iclog);
- if (error)
- return error;
- } else {
- spin_unlock(&log->l_icloglock);
- }
+ spin_unlock(&log->l_icloglock);
+ if (error)
+ return error;
goto restart;
}
@@ -3075,21 +2957,21 @@
*logoffsetp = log_offset;
return 0;
-} /* xlog_state_get_iclog_space */
+}
-/* The first cnt-1 times through here we don't need to
- * move the grant write head because the permanent
- * reservation has reserved cnt times the unit amount.
- * Release part of current permanent unit reservation and
- * reset current reservation to be one units worth. Also
- * move grant reservation head forward.
+/*
+ * The first cnt-1 times a ticket goes through here we don't need to move the
+ * grant write head because the permanent reservation has reserved cnt times the
+ * unit amount. Release part of current permanent unit reservation and reset
+ * current reservation to be one units worth. Also move grant reservation head
+ * forward.
*/
-STATIC void
-xlog_regrant_reserve_log_space(
+void
+xfs_log_ticket_regrant(
struct xlog *log,
struct xlog_ticket *ticket)
{
- trace_xfs_log_regrant_reserve_enter(log, ticket);
+ trace_xfs_log_ticket_regrant(log, ticket);
if (ticket->t_cnt > 0)
ticket->t_cnt--;
@@ -3101,21 +2983,20 @@
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
- trace_xfs_log_regrant_reserve_sub(log, ticket);
+ trace_xfs_log_ticket_regrant_sub(log, ticket);
/* just return if we still have some of the pre-reserved space */
- if (ticket->t_cnt > 0)
- return;
+ if (!ticket->t_cnt) {
+ xlog_grant_add_space(log, &log->l_reserve_head.grant,
+ ticket->t_unit_res);
+ trace_xfs_log_ticket_regrant_exit(log, ticket);
- xlog_grant_add_space(log, &log->l_reserve_head.grant,
- ticket->t_unit_res);
+ ticket->t_curr_res = ticket->t_unit_res;
+ xlog_tic_reset_res(ticket);
+ }
- trace_xfs_log_regrant_reserve_exit(log, ticket);
-
- ticket->t_curr_res = ticket->t_unit_res;
- xlog_tic_reset_res(ticket);
-} /* xlog_regrant_reserve_log_space */
-
+ xfs_log_ticket_put(ticket);
+}
/*
* Give back the space left from a reservation.
@@ -3131,18 +3012,19 @@
* space, the count will stay at zero and the only space remaining will be
* in the current reservation field.
*/
-STATIC void
-xlog_ungrant_log_space(
+void
+xfs_log_ticket_ungrant(
struct xlog *log,
struct xlog_ticket *ticket)
{
- int bytes;
+ int bytes;
+
+ trace_xfs_log_ticket_ungrant(log, ticket);
if (ticket->t_cnt > 0)
ticket->t_cnt--;
- trace_xfs_log_ungrant_enter(log, ticket);
- trace_xfs_log_ungrant_sub(log, ticket);
+ trace_xfs_log_ticket_ungrant_sub(log, ticket);
/*
* If this is a permanent reservation ticket, we may be able to free
@@ -3157,71 +3039,15 @@
xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
- trace_xfs_log_ungrant_exit(log, ticket);
+ trace_xfs_log_ticket_ungrant_exit(log, ticket);
xfs_log_space_wake(log->l_mp);
+ xfs_log_ticket_put(ticket);
}
/*
- * Flush iclog to disk if this is the last reference to the given iclog and
- * the WANT_SYNC bit is set.
- *
- * When this function is entered, the iclog is not necessarily in the
- * WANT_SYNC state. It may be sitting around waiting to get filled.
- *
- *
- */
-STATIC int
-xlog_state_release_iclog(
- struct xlog *log,
- struct xlog_in_core *iclog)
-{
- int sync = 0; /* do we sync? */
-
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
-
- ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
- if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
- return 0;
-
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
- ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
- iclog->ic_state == XLOG_STATE_WANT_SYNC);
-
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
- /* update tail before writing to iclog */
- xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
- sync++;
- iclog->ic_state = XLOG_STATE_SYNCING;
- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
- xlog_verify_tail_lsn(log, iclog, tail_lsn);
- /* cycle incremented when incrementing curr_block */
- }
- spin_unlock(&log->l_icloglock);
-
- /*
- * We let the log lock go, so it's possible that we hit a log I/O
- * error or some other SHUTDOWN condition that marks the iclog
- * as XLOG_STATE_IOERROR before the bwrite. However, we know that
- * this iclog has consistent data, so we ignore IOERROR
- * flags after this point.
- */
- if (sync)
- xlog_sync(log, iclog);
- return 0;
-} /* xlog_state_release_iclog */
-
-
-/*
- * This routine will mark the current iclog in the ring as WANT_SYNC
- * and move the current iclog pointer to the next iclog in the ring.
- * When this routine is called from xlog_state_get_iclog_space(), the
- * exact size of the iclog has not yet been determined. All we know is
- * that every data block. We have run out of space in this log record.
+ * This routine will mark the current iclog in the ring as WANT_SYNC and move
+ * the current iclog pointer to the next iclog in the ring.
*/
STATIC void
xlog_state_switch_iclogs(
@@ -3230,6 +3056,8 @@
int eventual_size)
{
ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+ assert_spin_locked(&log->l_icloglock);
+
if (!eventual_size)
eventual_size = iclog->ic_offset;
iclog->ic_state = XLOG_STATE_WANT_SYNC;
@@ -3264,7 +3092,7 @@
}
ASSERT(iclog == log->l_iclog);
log->l_iclog = iclog->ic_next;
-} /* xlog_state_switch_iclogs */
+}
/*
* Write out all data in the in-core log as of this exact moment in time.
@@ -3309,7 +3137,7 @@
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
if (iclog->ic_state == XLOG_STATE_DIRTY ||
@@ -3324,9 +3152,6 @@
* previous iclog and go to sleep.
*/
iclog = iclog->ic_prev;
- if (iclog->ic_state == XLOG_STATE_ACTIVE ||
- iclog->ic_state == XLOG_STATE_DIRTY)
- goto out_unlock;
} else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
if (atomic_read(&iclog->ic_refcnt) == 0) {
/*
@@ -3339,14 +3164,10 @@
atomic_inc(&iclog->ic_refcnt);
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
-
if (xlog_state_release_iclog(log, iclog))
- return -EIO;
+ goto out_error;
- spin_lock(&log->l_icloglock);
- if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
- iclog->ic_state == XLOG_STATE_DIRTY)
+ if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
goto out_unlock;
} else {
/*
@@ -3366,17 +3187,8 @@
;
}
- if (!(flags & XFS_LOG_SYNC))
- goto out_unlock;
-
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- goto out_error;
- XFS_STATS_INC(mp, xs_log_force_sleep);
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
- return 0;
-
+ if (flags & XFS_LOG_SYNC)
+ return xlog_wait_on_iclog(iclog);
out_unlock:
spin_unlock(&log->l_icloglock);
return 0;
@@ -3398,7 +3210,7 @@
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
@@ -3407,9 +3219,6 @@
goto out_unlock;
}
- if (iclog->ic_state == XLOG_STATE_DIRTY)
- goto out_unlock;
-
if (iclog->ic_state == XLOG_STATE_ACTIVE) {
/*
* We sleep here if we haven't already slept (e.g. this is the
@@ -3427,10 +3236,8 @@
* will go out then.
*/
if (!already_slept &&
- (iclog->ic_prev->ic_state &
- (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
- ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
-
+ (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
+ iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_prev->ic_write_wait,
@@ -3439,27 +3246,14 @@
}
atomic_inc(&iclog->ic_refcnt);
xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
- return -EIO;
+ goto out_error;
if (log_flushed)
*log_flushed = 1;
- spin_lock(&log->l_icloglock);
}
- if (!(flags & XFS_LOG_SYNC) ||
- (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
- goto out_unlock;
-
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- goto out_error;
-
- XFS_STATS_INC(mp, xs_log_force_sleep);
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
- return 0;
-
+ if (flags & XFS_LOG_SYNC)
+ return xlog_wait_on_iclog(iclog);
out_unlock:
spin_unlock(&log->l_icloglock);
return 0;
@@ -3506,33 +3300,6 @@
}
/*
- * Called when we want to mark the current iclog as being ready to sync to
- * disk.
- */
-STATIC void
-xlog_state_want_sync(
- struct xlog *log,
- struct xlog_in_core *iclog)
-{
- assert_spin_locked(&log->l_icloglock);
-
- if (iclog->ic_state == XLOG_STATE_ACTIVE) {
- xlog_state_switch_iclogs(log, iclog, 0);
- } else {
- ASSERT(iclog->ic_state &
- (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR));
- }
-}
-
-
-/*****************************************************************************
- *
- * TICKET functions
- *
- *****************************************************************************
- */
-
-/*
* Free a used ticket when its refcount falls to zero.
*/
void
@@ -3541,7 +3308,7 @@
{
ASSERT(atomic_read(&ticket->t_ref) > 0);
if (atomic_dec_and_test(&ticket->t_ref))
- kmem_zone_free(xfs_log_ticket_zone, ticket);
+ kmem_cache_free(xfs_log_ticket_zone, ticket);
}
xlog_ticket_t *
@@ -3659,15 +3426,12 @@
int unit_bytes,
int cnt,
char client,
- bool permanent,
- xfs_km_flags_t alloc_flags)
+ bool permanent)
{
struct xlog_ticket *tic;
int unit_res;
- tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
- if (!tic)
- return NULL;
+ tic = kmem_cache_zalloc(xfs_log_ticket_zone, GFP_NOFS | __GFP_NOFAIL);
unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes);
@@ -3680,7 +3444,6 @@
tic->t_ocnt = cnt;
tic->t_tid = prandom_u32();
tic->t_clientid = client;
- tic->t_flags = XLOG_TIC_INITED;
if (permanent)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
@@ -3689,13 +3452,6 @@
return tic;
}
-
-/******************************************************************************
- *
- * Log debug routines
- *
- ******************************************************************************
- */
#if defined(DEBUG)
/*
* Make sure that the destination ptr is within the valid data region of
@@ -3781,7 +3537,7 @@
if (blocks < BTOBB(iclog->ic_offset) + 1)
xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
}
-} /* xlog_verify_tail_lsn */
+}
/*
* Perform a number of checks on the iclog before writing to disk.
@@ -3884,7 +3640,7 @@
}
ptr += sizeof(xlog_op_header_t) + op_len;
}
-} /* xlog_verify_iclog */
+}
#endif
/*
@@ -3897,7 +3653,7 @@
xlog_in_core_t *iclog, *ic;
iclog = log->l_iclog;
- if (! (iclog->ic_state & XLOG_STATE_IOERROR)) {
+ if (iclog->ic_state != XLOG_STATE_IOERROR) {
/*
* Mark all the incore logs IOERROR.
* From now on, no log flushes will result.
@@ -3957,7 +3713,7 @@
* Somebody could've already done the hard work for us.
* No need to get locks for this.
*/
- if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) {
+ if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) {
ASSERT(XLOG_FORCED_SHUTDOWN(log));
return 1;
}
@@ -4008,21 +3764,8 @@
spin_lock(&log->l_cilp->xc_push_lock);
wake_up_all(&log->l_cilp->xc_commit_wait);
spin_unlock(&log->l_cilp->xc_push_lock);
- xlog_state_do_callback(log, true, NULL);
+ xlog_state_do_callback(log);
-#ifdef XFSERRORDEBUG
- {
- xlog_in_core_t *iclog;
-
- spin_lock(&log->l_icloglock);
- iclog = log->l_iclog;
- do {
- ASSERT(iclog->ic_callback == 0);
- iclog = iclog->ic_next;
- } while (iclog != log->l_iclog);
- spin_unlock(&log->l_icloglock);
- }
-#endif
/* return non-zero if log IOERROR transition had already happened */
return retval;
}