| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 1 | #include <net/tcp.h> | 
|  | 2 | #include <net/tcp_memcontrol.h> | 
|  | 3 | #include <net/sock.h> | 
| Glauber Costa | 3dc43e3 | 2011-12-11 21:47:05 +0000 | [diff] [blame] | 4 | #include <net/ip.h> | 
|  | 5 | #include <linux/nsproxy.h> | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 6 | #include <linux/memcontrol.h> | 
|  | 7 | #include <linux/module.h> | 
|  | 8 |  | 
|  | 9 | static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) | 
|  | 10 | { | 
|  | 11 | return container_of(cg_proto, struct tcp_memcontrol, cg_proto); | 
|  | 12 | } | 
|  | 13 |  | 
|  | 14 | static void memcg_tcp_enter_memory_pressure(struct sock *sk) | 
|  | 15 | { | 
| Dan Carpenter | c48e074 | 2011-12-15 01:05:10 +0000 | [diff] [blame] | 16 | if (sk->sk_cgrp->memory_pressure) | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 17 | *sk->sk_cgrp->memory_pressure = 1; | 
|  | 18 | } | 
|  | 19 | EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); | 
|  | 20 |  | 
| Glauber Costa | 1d62e43 | 2012-04-09 19:36:33 -0300 | [diff] [blame] | 21 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 22 | { | 
|  | 23 | /* | 
|  | 24 | * The root cgroup does not use res_counters, but rather, | 
|  | 25 | * rely on the data already collected by the network | 
|  | 26 | * subsystem | 
|  | 27 | */ | 
|  | 28 | struct res_counter *res_parent = NULL; | 
|  | 29 | struct cg_proto *cg_proto, *parent_cg; | 
|  | 30 | struct tcp_memcontrol *tcp; | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 31 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | 
| Glauber Costa | 3dc43e3 | 2011-12-11 21:47:05 +0000 | [diff] [blame] | 32 | struct net *net = current->nsproxy->net_ns; | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 33 |  | 
|  | 34 | cg_proto = tcp_prot.proto_cgroup(memcg); | 
|  | 35 | if (!cg_proto) | 
| Tejun Heo | 6bc1034 | 2012-04-01 12:09:55 -0700 | [diff] [blame] | 36 | return 0; | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 37 |  | 
|  | 38 | tcp = tcp_from_cgproto(cg_proto); | 
|  | 39 |  | 
| Glauber Costa | 3dc43e3 | 2011-12-11 21:47:05 +0000 | [diff] [blame] | 40 | tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0]; | 
|  | 41 | tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1]; | 
|  | 42 | tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2]; | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 43 | tcp->tcp_memory_pressure = 0; | 
|  | 44 |  | 
|  | 45 | parent_cg = tcp_prot.proto_cgroup(parent); | 
|  | 46 | if (parent_cg) | 
|  | 47 | res_parent = parent_cg->memory_allocated; | 
|  | 48 |  | 
|  | 49 | res_counter_init(&tcp->tcp_memory_allocated, res_parent); | 
|  | 50 | percpu_counter_init(&tcp->tcp_sockets_allocated, 0); | 
|  | 51 |  | 
|  | 52 | cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure; | 
|  | 53 | cg_proto->memory_pressure = &tcp->tcp_memory_pressure; | 
|  | 54 | cg_proto->sysctl_mem = tcp->tcp_prot_mem; | 
|  | 55 | cg_proto->memory_allocated = &tcp->tcp_memory_allocated; | 
|  | 56 | cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; | 
|  | 57 | cg_proto->memcg = memcg; | 
|  | 58 |  | 
| Tejun Heo | 6bc1034 | 2012-04-01 12:09:55 -0700 | [diff] [blame] | 59 | return 0; | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 60 | } | 
|  | 61 | EXPORT_SYMBOL(tcp_init_cgroup); | 
|  | 62 |  | 
| Glauber Costa | 1d62e43 | 2012-04-09 19:36:33 -0300 | [diff] [blame] | 63 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 64 | { | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 65 | struct cg_proto *cg_proto; | 
|  | 66 | struct tcp_memcontrol *tcp; | 
| Glauber Costa | 3aaabe2 | 2011-12-11 21:47:06 +0000 | [diff] [blame] | 67 | u64 val; | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 68 |  | 
|  | 69 | cg_proto = tcp_prot.proto_cgroup(memcg); | 
|  | 70 | if (!cg_proto) | 
|  | 71 | return; | 
|  | 72 |  | 
|  | 73 | tcp = tcp_from_cgproto(cg_proto); | 
|  | 74 | percpu_counter_destroy(&tcp->tcp_sockets_allocated); | 
| Glauber Costa | 3aaabe2 | 2011-12-11 21:47:06 +0000 | [diff] [blame] | 75 |  | 
| Glauber Costa | 1398eee | 2012-01-12 02:16:06 +0000 | [diff] [blame] | 76 | val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | 
| Glauber Costa | d1a4c0b | 2011-12-11 21:47:04 +0000 | [diff] [blame] | 77 | } | 
|  | 78 | EXPORT_SYMBOL(tcp_destroy_cgroup); | 
| Glauber Costa | 3aaabe2 | 2011-12-11 21:47:06 +0000 | [diff] [blame] | 79 |  | 
|  | 80 | static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | 
|  | 81 | { | 
|  | 82 | struct net *net = current->nsproxy->net_ns; | 
|  | 83 | struct tcp_memcontrol *tcp; | 
|  | 84 | struct cg_proto *cg_proto; | 
|  | 85 | u64 old_lim; | 
|  | 86 | int i; | 
|  | 87 | int ret; | 
|  | 88 |  | 
|  | 89 | cg_proto = tcp_prot.proto_cgroup(memcg); | 
|  | 90 | if (!cg_proto) | 
|  | 91 | return -EINVAL; | 
|  | 92 |  | 
|  | 93 | if (val > RESOURCE_MAX) | 
|  | 94 | val = RESOURCE_MAX; | 
|  | 95 |  | 
|  | 96 | tcp = tcp_from_cgproto(cg_proto); | 
|  | 97 |  | 
|  | 98 | old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | 
|  | 99 | ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); | 
|  | 100 | if (ret) | 
|  | 101 | return ret; | 
|  | 102 |  | 
|  | 103 | for (i = 0; i < 3; i++) | 
|  | 104 | tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, | 
|  | 105 | net->ipv4.sysctl_tcp_mem[i]); | 
|  | 106 |  | 
| Glauber Costa | 3f13461 | 2012-05-29 15:07:11 -0700 | [diff] [blame] | 107 | if (val == RESOURCE_MAX) | 
|  | 108 | clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | 
|  | 109 | else if (val != RESOURCE_MAX) { | 
|  | 110 | /* | 
|  | 111 | * The active bit needs to be written after the static_key | 
|  | 112 | * update. This is what guarantees that the socket activation | 
|  | 113 | * function is the last one to run. See sock_update_memcg() for | 
|  | 114 | * details, and note that we don't mark any socket as belonging | 
|  | 115 | * to this memcg until that flag is up. | 
|  | 116 | * | 
|  | 117 | * We need to do this, because static_keys will span multiple | 
|  | 118 | * sites, but we can't control their order. If we mark a socket | 
|  | 119 | * as accounted, but the accounting functions are not patched in | 
|  | 120 | * yet, we'll lose accounting. | 
|  | 121 | * | 
|  | 122 | * We never race with the readers in sock_update_memcg(), | 
|  | 123 | * because when this value change, the code to process it is not | 
|  | 124 | * patched in yet. | 
|  | 125 | * | 
|  | 126 | * The activated bit is used to guarantee that no two writers | 
|  | 127 | * will do the update in the same memcg. Without that, we can't | 
|  | 128 | * properly shutdown the static key. | 
|  | 129 | */ | 
|  | 130 | if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags)) | 
|  | 131 | static_key_slow_inc(&memcg_socket_limit_enabled); | 
|  | 132 | set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); | 
|  | 133 | } | 
| Glauber Costa | 3aaabe2 | 2011-12-11 21:47:06 +0000 | [diff] [blame] | 134 |  | 
|  | 135 | return 0; | 
|  | 136 | } | 
|  | 137 |  | 
|  | 138 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | 
|  | 139 | const char *buffer) | 
|  | 140 | { | 
|  | 141 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 
|  | 142 | unsigned long long val; | 
|  | 143 | int ret = 0; | 
|  | 144 |  | 
|  | 145 | switch (cft->private) { | 
|  | 146 | case RES_LIMIT: | 
|  | 147 | /* see memcontrol.c */ | 
|  | 148 | ret = res_counter_memparse_write_strategy(buffer, &val); | 
|  | 149 | if (ret) | 
|  | 150 | break; | 
|  | 151 | ret = tcp_update_limit(memcg, val); | 
|  | 152 | break; | 
|  | 153 | default: | 
|  | 154 | ret = -EINVAL; | 
|  | 155 | break; | 
|  | 156 | } | 
|  | 157 | return ret; | 
|  | 158 | } | 
|  | 159 |  | 
|  | 160 | static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) | 
|  | 161 | { | 
|  | 162 | struct tcp_memcontrol *tcp; | 
|  | 163 | struct cg_proto *cg_proto; | 
|  | 164 |  | 
|  | 165 | cg_proto = tcp_prot.proto_cgroup(memcg); | 
|  | 166 | if (!cg_proto) | 
|  | 167 | return default_val; | 
|  | 168 |  | 
|  | 169 | tcp = tcp_from_cgproto(cg_proto); | 
|  | 170 | return res_counter_read_u64(&tcp->tcp_memory_allocated, type); | 
|  | 171 | } | 
|  | 172 |  | 
| Glauber Costa | 5a6dd34 | 2011-12-11 21:47:07 +0000 | [diff] [blame] | 173 | static u64 tcp_read_usage(struct mem_cgroup *memcg) | 
|  | 174 | { | 
|  | 175 | struct tcp_memcontrol *tcp; | 
|  | 176 | struct cg_proto *cg_proto; | 
|  | 177 |  | 
|  | 178 | cg_proto = tcp_prot.proto_cgroup(memcg); | 
|  | 179 | if (!cg_proto) | 
|  | 180 | return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT; | 
|  | 181 |  | 
|  | 182 | tcp = tcp_from_cgproto(cg_proto); | 
|  | 183 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); | 
|  | 184 | } | 
|  | 185 |  | 
| Glauber Costa | 3aaabe2 | 2011-12-11 21:47:06 +0000 | [diff] [blame] | 186 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) | 
|  | 187 | { | 
|  | 188 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 
|  | 189 | u64 val; | 
|  | 190 |  | 
|  | 191 | switch (cft->private) { | 
|  | 192 | case RES_LIMIT: | 
|  | 193 | val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); | 
|  | 194 | break; | 
| Glauber Costa | 5a6dd34 | 2011-12-11 21:47:07 +0000 | [diff] [blame] | 195 | case RES_USAGE: | 
|  | 196 | val = tcp_read_usage(memcg); | 
|  | 197 | break; | 
| Glauber Costa | ffea59e | 2011-12-11 21:47:08 +0000 | [diff] [blame] | 198 | case RES_FAILCNT: | 
| Glauber Costa | 0850f0f | 2011-12-11 21:47:09 +0000 | [diff] [blame] | 199 | case RES_MAX_USAGE: | 
|  | 200 | val = tcp_read_stat(memcg, cft->private, 0); | 
| Glauber Costa | ffea59e | 2011-12-11 21:47:08 +0000 | [diff] [blame] | 201 | break; | 
| Glauber Costa | 3aaabe2 | 2011-12-11 21:47:06 +0000 | [diff] [blame] | 202 | default: | 
|  | 203 | BUG(); | 
|  | 204 | } | 
|  | 205 | return val; | 
|  | 206 | } | 
|  | 207 |  | 
| Glauber Costa | ffea59e | 2011-12-11 21:47:08 +0000 | [diff] [blame] | 208 | static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event) | 
|  | 209 | { | 
|  | 210 | struct mem_cgroup *memcg; | 
|  | 211 | struct tcp_memcontrol *tcp; | 
|  | 212 | struct cg_proto *cg_proto; | 
|  | 213 |  | 
|  | 214 | memcg = mem_cgroup_from_cont(cont); | 
|  | 215 | cg_proto = tcp_prot.proto_cgroup(memcg); | 
|  | 216 | if (!cg_proto) | 
|  | 217 | return 0; | 
|  | 218 | tcp = tcp_from_cgproto(cg_proto); | 
|  | 219 |  | 
|  | 220 | switch (event) { | 
| Glauber Costa | 0850f0f | 2011-12-11 21:47:09 +0000 | [diff] [blame] | 221 | case RES_MAX_USAGE: | 
|  | 222 | res_counter_reset_max(&tcp->tcp_memory_allocated); | 
|  | 223 | break; | 
| Glauber Costa | ffea59e | 2011-12-11 21:47:08 +0000 | [diff] [blame] | 224 | case RES_FAILCNT: | 
|  | 225 | res_counter_reset_failcnt(&tcp->tcp_memory_allocated); | 
|  | 226 | break; | 
|  | 227 | } | 
|  | 228 |  | 
|  | 229 | return 0; | 
|  | 230 | } | 
|  | 231 |  | 
| Glauber Costa | 3aaabe2 | 2011-12-11 21:47:06 +0000 | [diff] [blame] | 232 | unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) | 
|  | 233 | { | 
|  | 234 | struct tcp_memcontrol *tcp; | 
|  | 235 | struct cg_proto *cg_proto; | 
|  | 236 |  | 
|  | 237 | cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); | 
|  | 238 | if (!cg_proto) | 
|  | 239 | return 0; | 
|  | 240 |  | 
|  | 241 | tcp = tcp_from_cgproto(cg_proto); | 
|  | 242 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | 
|  | 243 | } | 
|  | 244 |  | 
|  | 245 | void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) | 
|  | 246 | { | 
|  | 247 | struct tcp_memcontrol *tcp; | 
|  | 248 | struct cg_proto *cg_proto; | 
|  | 249 |  | 
|  | 250 | cg_proto = tcp_prot.proto_cgroup(memcg); | 
|  | 251 | if (!cg_proto) | 
|  | 252 | return; | 
|  | 253 |  | 
|  | 254 | tcp = tcp_from_cgproto(cg_proto); | 
|  | 255 |  | 
|  | 256 | tcp->tcp_prot_mem[idx] = val; | 
|  | 257 | } | 
| Tejun Heo | 676f7c8 | 2012-04-01 12:09:55 -0700 | [diff] [blame] | 258 |  | 
|  | 259 | static struct cftype tcp_files[] = { | 
|  | 260 | { | 
|  | 261 | .name = "kmem.tcp.limit_in_bytes", | 
|  | 262 | .write_string = tcp_cgroup_write, | 
|  | 263 | .read_u64 = tcp_cgroup_read, | 
|  | 264 | .private = RES_LIMIT, | 
|  | 265 | }, | 
|  | 266 | { | 
|  | 267 | .name = "kmem.tcp.usage_in_bytes", | 
|  | 268 | .read_u64 = tcp_cgroup_read, | 
|  | 269 | .private = RES_USAGE, | 
|  | 270 | }, | 
|  | 271 | { | 
|  | 272 | .name = "kmem.tcp.failcnt", | 
|  | 273 | .private = RES_FAILCNT, | 
|  | 274 | .trigger = tcp_cgroup_reset, | 
|  | 275 | .read_u64 = tcp_cgroup_read, | 
|  | 276 | }, | 
|  | 277 | { | 
|  | 278 | .name = "kmem.tcp.max_usage_in_bytes", | 
|  | 279 | .private = RES_MAX_USAGE, | 
|  | 280 | .trigger = tcp_cgroup_reset, | 
|  | 281 | .read_u64 = tcp_cgroup_read, | 
|  | 282 | }, | 
| Tejun Heo | 6bc1034 | 2012-04-01 12:09:55 -0700 | [diff] [blame] | 283 | { }	/* terminate */ | 
| Tejun Heo | 676f7c8 | 2012-04-01 12:09:55 -0700 | [diff] [blame] | 284 | }; | 
| Tejun Heo | 6bc1034 | 2012-04-01 12:09:55 -0700 | [diff] [blame] | 285 |  | 
|  | 286 | static int __init tcp_memcontrol_init(void) | 
|  | 287 | { | 
|  | 288 | WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files)); | 
|  | 289 | return 0; | 
|  | 290 | } | 
|  | 291 | __initcall(tcp_memcontrol_init); |