Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2d658b2..eb34d20 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -257,7 +257,7 @@
* requiring writeback.
*
* This number of dirtyable pages is the base value of which the
- * user-configurable dirty ratio is the effictive number of pages that
+ * user-configurable dirty ratio is the effective number of pages that
* are allowed to be actually dirtied. Per individual zone, or
* globally by using the sum of dirtyable pages over all zones.
*
@@ -387,8 +387,7 @@
* Calculate @dtc->thresh and ->bg_thresh considering
* vm_dirty_{bytes|ratio} and dirty_background_{bytes|ratio}. The caller
* must ensure that @dtc->avail is set before calling this function. The
- * dirty limits will be lifted by 1/4 for PF_LESS_THROTTLE (ie. nfsd) and
- * real-time tasks.
+ * dirty limits will be lifted by 1/4 for real-time tasks.
*/
static void domain_dirty_limits(struct dirty_throttle_control *dtc)
{
@@ -436,7 +435,7 @@
if (bg_thresh >= thresh)
bg_thresh = thresh / 2;
tsk = current;
- if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
+ if (rt_task(tsk)) {
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
}
@@ -486,7 +485,7 @@
else
dirty = vm_dirty_ratio * node_memory / 100;
- if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
+ if (rt_task(tsk))
dirty += dirty / 4;
return dirty;
@@ -505,15 +504,13 @@
unsigned long nr_pages = 0;
nr_pages += node_page_state(pgdat, NR_FILE_DIRTY);
- nr_pages += node_page_state(pgdat, NR_UNSTABLE_NFS);
nr_pages += node_page_state(pgdat, NR_WRITEBACK);
return nr_pages <= limit;
}
int dirty_background_ratio_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -524,8 +521,7 @@
}
int dirty_background_bytes_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -535,9 +531,8 @@
return ret;
}
-int dirty_ratio_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
int old_ratio = vm_dirty_ratio;
int ret;
@@ -551,8 +546,7 @@
}
int dirty_bytes_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
unsigned long old_bytes = vm_dirty_bytes;
int ret;
@@ -759,14 +753,14 @@
* bounded by the bdi->min_ratio and/or bdi->max_ratio parameters, if set.
*
* Return: @wb's dirty limit in pages. The term "dirty" in the context of
- * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
+ * dirty balancing includes all PG_dirty and PG_writeback pages.
*/
static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
{
struct wb_domain *dom = dtc_dom(dtc);
unsigned long thresh = dtc->thresh;
u64 wb_thresh;
- long numerator, denominator;
+ unsigned long numerator, denominator;
unsigned long wb_min_ratio, wb_max_ratio;
/*
@@ -777,7 +771,7 @@
wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
wb_thresh *= numerator;
- do_div(wb_thresh, denominator);
+ wb_thresh = div64_ul(wb_thresh, denominator);
wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
@@ -1102,7 +1096,7 @@
bw = written - min(written, wb->written_stamp);
bw *= HZ;
if (unlikely(elapsed > period)) {
- do_div(bw, elapsed);
+ bw = div64_ul(bw, elapsed);
avg = bw;
goto out;
}
@@ -1567,7 +1561,7 @@
struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
&mdtc_stor : NULL;
struct dirty_throttle_control *sdtc;
- unsigned long nr_reclaimable; /* = file_dirty + unstable_nfs */
+ unsigned long nr_reclaimable; /* = file_dirty */
long period;
long pause;
long max_pause;
@@ -1587,14 +1581,7 @@
unsigned long m_thresh = 0;
unsigned long m_bg_thresh = 0;
- /*
- * Unstable writes are a feature of certain networked
- * filesystems (i.e. NFS) in which data may have been
- * written to the server's write cache, but has not yet
- * been flushed to permanent storage.
- */
- nr_reclaimable = global_node_page_state(NR_FILE_DIRTY) +
- global_node_page_state(NR_UNSTABLE_NFS);
+ nr_reclaimable = global_node_page_state(NR_FILE_DIRTY);
gdtc->avail = global_dirtyable_memory();
gdtc->dirty = nr_reclaimable + global_node_page_state(NR_WRITEBACK);
@@ -1653,8 +1640,12 @@
if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh) &&
(!mdtc ||
m_dirty <= dirty_freerun_ceiling(m_thresh, m_bg_thresh))) {
- unsigned long intv = dirty_poll_interval(dirty, thresh);
- unsigned long m_intv = ULONG_MAX;
+ unsigned long intv;
+ unsigned long m_intv;
+
+free_running:
+ intv = dirty_poll_interval(dirty, thresh);
+ m_intv = ULONG_MAX;
current->dirty_paused_when = now;
current->nr_dirtied = 0;
@@ -1673,9 +1664,20 @@
* Calculate global domain's pos_ratio and select the
* global dtc by default.
*/
- if (!strictlimit)
+ if (!strictlimit) {
wb_dirty_limits(gdtc);
+ if ((current->flags & PF_LOCAL_THROTTLE) &&
+ gdtc->wb_dirty <
+ dirty_freerun_ceiling(gdtc->wb_thresh,
+ gdtc->wb_bg_thresh))
+ /*
+ * LOCAL_THROTTLE tasks must not be throttled
+ * when below the per-wb freerun ceiling.
+ */
+ goto free_running;
+ }
+
dirty_exceeded = (gdtc->wb_dirty > gdtc->wb_thresh) &&
((gdtc->dirty > gdtc->thresh) || strictlimit);
@@ -1689,9 +1691,20 @@
* both global and memcg domains. Choose the one
* w/ lower pos_ratio.
*/
- if (!strictlimit)
+ if (!strictlimit) {
wb_dirty_limits(mdtc);
+ if ((current->flags & PF_LOCAL_THROTTLE) &&
+ mdtc->wb_dirty <
+ dirty_freerun_ceiling(mdtc->wb_thresh,
+ mdtc->wb_bg_thresh))
+ /*
+ * LOCAL_THROTTLE tasks must not be
+ * throttled when below the per-wb
+ * freerun ceiling.
+ */
+ goto free_running;
+ }
dirty_exceeded |= (mdtc->wb_dirty > mdtc->wb_thresh) &&
((mdtc->dirty > mdtc->thresh) || strictlimit);
@@ -1869,7 +1882,7 @@
int ratelimit;
int *p;
- if (!bdi_cap_account_dirty(bdi))
+ if (!(bdi->capabilities & BDI_CAP_WRITEBACK))
return;
if (inode_cgwb_enabled(inode))
@@ -1938,8 +1951,7 @@
* as we're trying to decide whether to put more under writeback.
*/
gdtc->avail = global_dirtyable_memory();
- gdtc->dirty = global_node_page_state(NR_FILE_DIRTY) +
- global_node_page_state(NR_UNSTABLE_NFS);
+ gdtc->dirty = global_node_page_state(NR_FILE_DIRTY);
domain_dirty_limits(gdtc);
if (gdtc->dirty > gdtc->bg_thresh)
@@ -1972,7 +1984,7 @@
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
*/
int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *length, loff_t *ppos)
+ void *buffer, size_t *length, loff_t *ppos)
{
unsigned int old_interval = dirty_writeback_interval;
int ret;
@@ -2064,13 +2076,11 @@
* Called early on to tune the page writeback dirty limits.
*
* We used to scale dirty pages according to how total memory
- * related to pages that could be allocated for buffers (by
- * comparing nr_free_buffer_pages() to vm_total_pages.
+ * related to pages that could be allocated for buffers.
*
* However, that was when we used "dirty_ratio" to scale with
* all memory, and we don't do that any more. "dirty_ratio"
- * is now applied to total non-HIGHPAGE memory (by subtracting
- * totalhigh_pages from vm_total_pages), and as such we can't
+ * is now applied to total non-HIGHPAGE memory, and as such we can't
* get into the old insane situation any more where we had
* large amounts of dirty pages compared to a small amount of
* non-HIGHMEM memory.
@@ -2164,7 +2174,6 @@
int error;
struct pagevec pvec;
int nr_pages;
- pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
@@ -2173,8 +2182,7 @@
pagevec_init(&pvec);
if (wbc->range_cyclic) {
- writeback_index = mapping->writeback_index; /* prev offset */
- index = writeback_index;
+ index = mapping->writeback_index; /* prev offset */
end = -1;
} else {
index = wbc->range_start >> PAGE_SHIFT;
@@ -2182,12 +2190,12 @@
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
}
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
- tag = PAGECACHE_TAG_TOWRITE;
- else
- tag = PAGECACHE_TAG_DIRTY;
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
tag_pages_for_writeback(mapping, index, end);
+ tag = PAGECACHE_TAG_TOWRITE;
+ } else {
+ tag = PAGECACHE_TAG_DIRTY;
+ }
done_index = index;
while (!done && (index <= end)) {
int i;
@@ -2415,7 +2423,7 @@
trace_writeback_dirty_page(page, mapping);
- if (mapping_cap_account_dirty(mapping)) {
+ if (mapping_can_writeback(mapping)) {
struct bdi_writeback *wb;
inode_attach_wb(inode, page);
@@ -2442,7 +2450,7 @@
void account_page_cleaned(struct page *page, struct address_space *mapping,
struct bdi_writeback *wb)
{
- if (mapping_cap_account_dirty(mapping)) {
+ if (mapping_can_writeback(mapping)) {
dec_lruvec_page_state(page, NR_FILE_DIRTY);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
dec_wb_stat(wb, WB_RECLAIMABLE);
@@ -2505,7 +2513,7 @@
{
struct address_space *mapping = page->mapping;
- if (mapping && mapping_cap_account_dirty(mapping)) {
+ if (mapping && mapping_can_writeback(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
struct wb_lock_cookie cookie = {};
@@ -2617,7 +2625,7 @@
{
struct address_space *mapping = page_mapping(page);
- if (mapping_cap_account_dirty(mapping)) {
+ if (mapping_can_writeback(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
struct wb_lock_cookie cookie = {};
@@ -2655,9 +2663,9 @@
struct address_space *mapping = page_mapping(page);
int ret = 0;
- BUG_ON(!PageLocked(page));
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
- if (mapping && mapping_cap_account_dirty(mapping)) {
+ if (mapping && mapping_can_writeback(mapping)) {
struct inode *inode = mapping->host;
struct bdi_writeback *wb;
struct wb_lock_cookie cookie = {};
@@ -2730,7 +2738,7 @@
if (ret) {
__xa_clear_mark(&mapping->i_pages, page_index(page),
PAGECACHE_TAG_WRITEBACK);
- if (bdi_cap_account_writeback(bdi)) {
+ if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
struct bdi_writeback *wb = inode_to_wb(inode);
dec_wb_stat(wb, WB_WRITEBACK);
@@ -2746,12 +2754,6 @@
} else {
ret = TestClearPageWriteback(page);
}
- /*
- * NOTE: Page might be free now! Writeback doesn't hold a page
- * reference on its own, it relies on truncation to wait for
- * the clearing of PG_writeback. The below can only access
- * page state that is static across allocation cycles.
- */
if (ret) {
dec_lruvec_state(lruvec, NR_WRITEBACK);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
@@ -2764,7 +2766,7 @@
int __test_set_page_writeback(struct page *page, bool keep_write)
{
struct address_space *mapping = page_mapping(page);
- int ret;
+ int ret, access_ret;
lock_page_memcg(page);
if (mapping && mapping_use_writeback_tags(mapping)) {
@@ -2783,7 +2785,7 @@
PAGECACHE_TAG_WRITEBACK);
xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
- if (bdi_cap_account_writeback(bdi))
+ if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT)
inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
/*
@@ -2807,6 +2809,13 @@
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
}
unlock_page_memcg(page);
+ access_ret = arch_make_page_accessible(page);
+ /*
+ * If writeback has been triggered on a page that cannot be made
+ * accessible, it is too late to recover here.
+ */
+ VM_BUG_ON_PAGE(access_ret != 0, page);
+
return ret;
}
@@ -2817,7 +2826,7 @@
*/
void wait_on_page_writeback(struct page *page)
{
- if (PageWriteback(page)) {
+ while (PageWriteback(page)) {
trace_wait_on_page_writeback(page, page_mapping(page));
wait_on_page_bit(page, PG_writeback);
}
@@ -2834,7 +2843,8 @@
*/
void wait_for_stable_page(struct page *page)
{
- if (bdi_cap_stable_pages_required(inode_to_bdi(page->mapping->host)))
+ page = thp_head(page);
+ if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
wait_on_page_writeback(page);
}
EXPORT_SYMBOL_GPL(wait_for_stable_page);