diff --git a/mm/vmscan.c b/mm/vmscan.c index 15e3a29fdb2..7b0573f33a2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -751,7 +751,9 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) static unsigned long shrink_page_list(struct list_head *page_list, struct zone *zone, struct scan_control *sc, - int priority) + int priority, + unsigned long *ret_nr_dirty, + unsigned long *ret_nr_writeback) { LIST_HEAD(ret_pages); LIST_HEAD(free_pages); @@ -759,6 +761,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, unsigned long nr_dirty = 0; unsigned long nr_congested = 0; unsigned long nr_reclaimed = 0; + unsigned long nr_writeback = 0; cond_resched(); @@ -795,6 +798,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); if (PageWriteback(page)) { + nr_writeback++; /* * Synchronous reclaim cannot queue pages for * writeback due to the possibility of stack overflow @@ -1000,6 +1004,8 @@ keep_lumpy: list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); + *ret_nr_dirty += nr_dirty; + *ret_nr_writeback += nr_writeback; return nr_reclaimed; } @@ -1460,6 +1466,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, unsigned long nr_taken; unsigned long nr_anon; unsigned long nr_file; + unsigned long nr_dirty = 0; + unsigned long nr_writeback = 0; isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; while (unlikely(too_many_isolated(zone, file, sc))) { @@ -1512,12 +1520,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, spin_unlock_irq(&zone->lru_lock); - nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority); + nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority, + &nr_dirty, &nr_writeback); /* Check if we should syncronously wait for writeback */ if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { set_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, zone, sc, priority); + nr_reclaimed += shrink_page_list(&page_list, zone, sc, + priority, &nr_dirty, &nr_writeback); } local_irq_disable(); @@ -1527,6 +1537,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); + /* + * If reclaim is isolating dirty pages under writeback, it implies + * that the long-lived page allocation rate is exceeding the page + * laundering rate. Either the global limits are not being effective + * at throttling processes due to the page distribution throughout + * zones or there is heavy usage of a slow backing device. The + * only option is to throttle from reclaim context which is not ideal + * as there is no guarantee the dirtying process is throttled in the + * same way balance_dirty_pages() manages. + * + * This scales the number of dirty pages that must be under writeback + * before throttling depending on priority. It is a simple backoff + * function that has the most effect in the range DEF_PRIORITY to + * DEF_PRIORITY-2 which is the priority reclaim is considered to be + * in trouble and reclaim is considered to be in trouble. + * + * DEF_PRIORITY 100% isolated pages must be PageWriteback to throttle + * DEF_PRIORITY-1 50% must be PageWriteback + * DEF_PRIORITY-2 25% must be PageWriteback, kswapd in trouble + * ... + * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any + * isolated page is PageWriteback + */ + if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) + wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); + trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), nr_scanned, nr_reclaimed,