diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 59ac95a6450..aad9377c982 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -100,6 +100,8 @@ extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, + struct zone *zone); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -251,6 +253,13 @@ static inline bool mem_cgroup_oom_called(struct task_struct *task) { return false; } + +static inline int +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +{ + return 1; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 457d671029b..6611328460e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -156,6 +156,9 @@ struct mem_cgroup { unsigned long last_oom_jiffies; int obsolete; atomic_t refcnt; + + unsigned int inactive_ratio; + /* * statistics. This must be placed at the end of memcg. */ @@ -431,6 +434,20 @@ long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, return (nr_pages >> priority); } +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +{ + unsigned long active; + unsigned long inactive; + + inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON); + active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON); + + if (inactive * memcg->inactive_ratio < active) + return 1; + + return 0; +} + unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, @@ -1360,6 +1377,29 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) return 0; } +/* + * The inactive anon list should be small enough that the VM never has to + * do too much work, but large enough that each inactive page has a chance + * to be referenced again before it is swapped out. + * + * this calculation is straightforward porting from + * page_alloc.c::setup_per_zone_inactive_ratio(). + * it describe more detail. + */ +static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg) +{ + unsigned int gb, ratio; + + gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30; + if (gb) + ratio = int_sqrt(10 * gb); + else + ratio = 1; + + memcg->inactive_ratio = ratio; + +} + static DEFINE_MUTEX(set_limit_mutex); static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, @@ -1398,6 +1438,10 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, GFP_KERNEL, false); if (!progress) retry_count--; } + + if (!ret) + mem_cgroup_set_inactive_ratio(memcg); + return ret; } @@ -1982,7 +2026,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) res_counter_init(&mem->res, NULL); res_counter_init(&mem->memsw, NULL); } - + mem_cgroup_set_inactive_ratio(mem); mem->last_scanned_child = NULL; return &mem->css; diff --git a/mm/vmscan.c b/mm/vmscan.c index e2b31a522a6..b2bc06bffcf 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1310,14 +1310,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, pagevec_release(&pvec); } -/** - * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * - * Returns true if the zone does not have enough inactive anon pages, - * meaning some active anon pages need to be deactivated. - */ -static int inactive_anon_is_low(struct zone *zone) +static int inactive_anon_is_low_global(struct zone *zone) { unsigned long active, inactive; @@ -1330,6 +1323,25 @@ static int inactive_anon_is_low(struct zone *zone) return 0; } +/** + * inactive_anon_is_low - check if anonymous pages need to be deactivated + * @zone: zone to check + * @sc: scan control of this context + * + * Returns true if the zone does not have enough inactive anon pages, + * meaning some active anon pages need to be deactivated. + */ +static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) +{ + int low; + + if (scan_global_lru(sc)) + low = inactive_anon_is_low_global(zone); + else + low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); + return low; +} + static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct zone *zone, struct scan_control *sc, int priority) { @@ -1340,8 +1352,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, return 0; } - if (lru == LRU_ACTIVE_ANON && - (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { + if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) { shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; } @@ -1509,9 +1520,7 @@ static void shrink_zone(int priority, struct zone *zone, * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) - shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); - else if (!scan_global_lru(sc)) + if (inactive_anon_is_low(zone, sc)) shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); throttle_vm_writeout(sc->gfp_mask); @@ -1807,7 +1816,7 @@ loop_again: * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. */ - if (inactive_anon_is_low(zone)) + if (inactive_anon_is_low(zone, &sc)) shrink_active_list(SWAP_CLUSTER_MAX, zone, &sc, priority, 0);