| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 1 | /* | 
 | 2 |  * Common Block IO controller cgroup interface | 
 | 3 |  * | 
 | 4 |  * Based on ideas and code from CFQ, CFS and BFQ: | 
 | 5 |  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | 
 | 6 |  * | 
 | 7 |  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | 
 | 8 |  *		      Paolo Valente <paolo.valente@unimore.it> | 
 | 9 |  * | 
 | 10 |  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | 
 | 11 |  * 	              Nauman Rafique <nauman@google.com> | 
 | 12 |  */ | 
 | 13 | #include <linux/ioprio.h> | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 14 | #include <linux/seq_file.h> | 
 | 15 | #include <linux/kdev_t.h> | 
| Vivek Goyal | 9d6a986 | 2009-12-04 10:36:41 -0500 | [diff] [blame] | 16 | #include <linux/module.h> | 
| Stephen Rothwell | accee78 | 2009-12-07 19:29:39 +1100 | [diff] [blame] | 17 | #include <linux/err.h> | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 18 | #include <linux/blkdev.h> | 
| Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 19 | #include <linux/slab.h> | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 20 | #include "blk-cgroup.h" | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 21 | #include <linux/genhd.h> | 
| Vivek Goyal | 3e25206 | 2009-12-04 10:36:42 -0500 | [diff] [blame] | 22 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 23 | #define MAX_KEY_LEN 100 | 
 | 24 |  | 
| Vivek Goyal | 3e25206 | 2009-12-04 10:36:42 -0500 | [diff] [blame] | 25 | static DEFINE_SPINLOCK(blkio_list_lock); | 
 | 26 | static LIST_HEAD(blkio_list); | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 27 |  | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 28 | struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; | 
| Vivek Goyal | 9d6a986 | 2009-12-04 10:36:41 -0500 | [diff] [blame] | 29 | EXPORT_SYMBOL_GPL(blkio_root_cgroup); | 
 | 30 |  | 
| Ben Blum | 67523c4 | 2010-03-10 15:22:11 -0800 | [diff] [blame] | 31 | static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, | 
 | 32 | 						  struct cgroup *); | 
 | 33 | static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, | 
 | 34 | 			      struct task_struct *, bool); | 
 | 35 | static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, | 
 | 36 | 			   struct cgroup *, struct task_struct *, bool); | 
 | 37 | static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); | 
 | 38 | static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); | 
 | 39 |  | 
 | 40 | struct cgroup_subsys blkio_subsys = { | 
 | 41 | 	.name = "blkio", | 
 | 42 | 	.create = blkiocg_create, | 
 | 43 | 	.can_attach = blkiocg_can_attach, | 
 | 44 | 	.attach = blkiocg_attach, | 
 | 45 | 	.destroy = blkiocg_destroy, | 
 | 46 | 	.populate = blkiocg_populate, | 
 | 47 | #ifdef CONFIG_BLK_CGROUP | 
 | 48 | 	/* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */ | 
 | 49 | 	.subsys_id = blkio_subsys_id, | 
 | 50 | #endif | 
 | 51 | 	.use_id = 1, | 
 | 52 | 	.module = THIS_MODULE, | 
 | 53 | }; | 
 | 54 | EXPORT_SYMBOL_GPL(blkio_subsys); | 
 | 55 |  | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 56 | static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, | 
 | 57 | 					    struct blkio_policy_node *pn) | 
 | 58 | { | 
 | 59 | 	list_add(&pn->node, &blkcg->policy_list); | 
 | 60 | } | 
 | 61 |  | 
 | 62 | /* Must be called with blkcg->lock held */ | 
 | 63 | static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) | 
 | 64 | { | 
 | 65 | 	list_del(&pn->node); | 
 | 66 | } | 
 | 67 |  | 
 | 68 | /* Must be called with blkcg->lock held */ | 
 | 69 | static struct blkio_policy_node * | 
 | 70 | blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev) | 
 | 71 | { | 
 | 72 | 	struct blkio_policy_node *pn; | 
 | 73 |  | 
 | 74 | 	list_for_each_entry(pn, &blkcg->policy_list, node) { | 
 | 75 | 		if (pn->dev == dev) | 
 | 76 | 			return pn; | 
 | 77 | 	} | 
 | 78 |  | 
 | 79 | 	return NULL; | 
 | 80 | } | 
 | 81 |  | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 82 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | 
 | 83 | { | 
 | 84 | 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), | 
 | 85 | 			    struct blkio_cgroup, css); | 
 | 86 | } | 
| Vivek Goyal | 9d6a986 | 2009-12-04 10:36:41 -0500 | [diff] [blame] | 87 | EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 88 |  | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 89 | /* | 
 | 90 |  * Add to the appropriate stat variable depending on the request type. | 
 | 91 |  * This should be called with the blkg->stats_lock held. | 
 | 92 |  */ | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 93 | static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction, | 
 | 94 | 				bool sync) | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 95 | { | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 96 | 	if (direction) | 
 | 97 | 		stat[BLKIO_STAT_WRITE] += add; | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 98 | 	else | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 99 | 		stat[BLKIO_STAT_READ] += add; | 
 | 100 | 	if (sync) | 
 | 101 | 		stat[BLKIO_STAT_SYNC] += add; | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 102 | 	else | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 103 | 		stat[BLKIO_STAT_ASYNC] += add; | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 104 | } | 
 | 105 |  | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 106 | /* | 
 | 107 |  * Decrements the appropriate stat variable if non-zero depending on the | 
 | 108 |  * request type. Panics on value being zero. | 
 | 109 |  * This should be called with the blkg->stats_lock held. | 
 | 110 |  */ | 
 | 111 | static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync) | 
 | 112 | { | 
 | 113 | 	if (direction) { | 
 | 114 | 		BUG_ON(stat[BLKIO_STAT_WRITE] == 0); | 
 | 115 | 		stat[BLKIO_STAT_WRITE]--; | 
 | 116 | 	} else { | 
 | 117 | 		BUG_ON(stat[BLKIO_STAT_READ] == 0); | 
 | 118 | 		stat[BLKIO_STAT_READ]--; | 
 | 119 | 	} | 
 | 120 | 	if (sync) { | 
 | 121 | 		BUG_ON(stat[BLKIO_STAT_SYNC] == 0); | 
 | 122 | 		stat[BLKIO_STAT_SYNC]--; | 
 | 123 | 	} else { | 
 | 124 | 		BUG_ON(stat[BLKIO_STAT_ASYNC] == 0); | 
 | 125 | 		stat[BLKIO_STAT_ASYNC]--; | 
 | 126 | 	} | 
 | 127 | } | 
 | 128 |  | 
 | 129 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 130 | /* This should be called with the blkg->stats_lock held. */ | 
 | 131 | static void blkio_set_start_group_wait_time(struct blkio_group *blkg, | 
 | 132 | 						struct blkio_group *curr_blkg) | 
 | 133 | { | 
 | 134 | 	if (blkio_blkg_waiting(&blkg->stats)) | 
 | 135 | 		return; | 
 | 136 | 	if (blkg == curr_blkg) | 
 | 137 | 		return; | 
 | 138 | 	blkg->stats.start_group_wait_time = sched_clock(); | 
 | 139 | 	blkio_mark_blkg_waiting(&blkg->stats); | 
 | 140 | } | 
 | 141 |  | 
 | 142 | /* This should be called with the blkg->stats_lock held. */ | 
 | 143 | static void blkio_update_group_wait_time(struct blkio_group_stats *stats) | 
 | 144 | { | 
 | 145 | 	unsigned long long now; | 
 | 146 |  | 
 | 147 | 	if (!blkio_blkg_waiting(stats)) | 
 | 148 | 		return; | 
 | 149 |  | 
 | 150 | 	now = sched_clock(); | 
 | 151 | 	if (time_after64(now, stats->start_group_wait_time)) | 
 | 152 | 		stats->group_wait_time += now - stats->start_group_wait_time; | 
 | 153 | 	blkio_clear_blkg_waiting(stats); | 
 | 154 | } | 
 | 155 |  | 
 | 156 | /* This should be called with the blkg->stats_lock held. */ | 
 | 157 | static void blkio_end_empty_time(struct blkio_group_stats *stats) | 
 | 158 | { | 
 | 159 | 	unsigned long long now; | 
 | 160 |  | 
 | 161 | 	if (!blkio_blkg_empty(stats)) | 
 | 162 | 		return; | 
 | 163 |  | 
 | 164 | 	now = sched_clock(); | 
 | 165 | 	if (time_after64(now, stats->start_empty_time)) | 
 | 166 | 		stats->empty_time += now - stats->start_empty_time; | 
 | 167 | 	blkio_clear_blkg_empty(stats); | 
 | 168 | } | 
 | 169 |  | 
 | 170 | void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg) | 
 | 171 | { | 
 | 172 | 	unsigned long flags; | 
 | 173 |  | 
 | 174 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 175 | 	BUG_ON(blkio_blkg_idling(&blkg->stats)); | 
 | 176 | 	blkg->stats.start_idle_time = sched_clock(); | 
 | 177 | 	blkio_mark_blkg_idling(&blkg->stats); | 
 | 178 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 179 | } | 
 | 180 | EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats); | 
 | 181 |  | 
 | 182 | void blkiocg_update_idle_time_stats(struct blkio_group *blkg) | 
 | 183 | { | 
 | 184 | 	unsigned long flags; | 
 | 185 | 	unsigned long long now; | 
 | 186 | 	struct blkio_group_stats *stats; | 
 | 187 |  | 
 | 188 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 189 | 	stats = &blkg->stats; | 
 | 190 | 	if (blkio_blkg_idling(stats)) { | 
 | 191 | 		now = sched_clock(); | 
 | 192 | 		if (time_after64(now, stats->start_idle_time)) | 
 | 193 | 			stats->idle_time += now - stats->start_idle_time; | 
 | 194 | 		blkio_clear_blkg_idling(stats); | 
 | 195 | 	} | 
 | 196 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 197 | } | 
 | 198 | EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats); | 
 | 199 |  | 
| Divyesh Shah | a11cdaa | 2010-04-13 19:59:17 +0200 | [diff] [blame] | 200 | void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg) | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 201 | { | 
 | 202 | 	unsigned long flags; | 
 | 203 | 	struct blkio_group_stats *stats; | 
 | 204 |  | 
 | 205 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 206 | 	stats = &blkg->stats; | 
 | 207 | 	stats->avg_queue_size_sum += | 
 | 208 | 			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] + | 
 | 209 | 			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]; | 
 | 210 | 	stats->avg_queue_size_samples++; | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 211 | 	blkio_update_group_wait_time(stats); | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 212 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 213 | } | 
| Divyesh Shah | a11cdaa | 2010-04-13 19:59:17 +0200 | [diff] [blame] | 214 | EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats); | 
 | 215 |  | 
| Vivek Goyal | e5ff082 | 2010-04-26 19:25:11 +0200 | [diff] [blame] | 216 | void blkiocg_set_start_empty_time(struct blkio_group *blkg) | 
| Divyesh Shah | 28baf44 | 2010-04-14 11:22:38 +0200 | [diff] [blame] | 217 | { | 
 | 218 | 	unsigned long flags; | 
 | 219 | 	struct blkio_group_stats *stats; | 
 | 220 |  | 
 | 221 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 222 | 	stats = &blkg->stats; | 
 | 223 |  | 
 | 224 | 	if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] || | 
 | 225 | 			stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) { | 
 | 226 | 		spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 227 | 		return; | 
 | 228 | 	} | 
 | 229 |  | 
 | 230 | 	/* | 
| Vivek Goyal | e5ff082 | 2010-04-26 19:25:11 +0200 | [diff] [blame] | 231 | 	 * group is already marked empty. This can happen if cfqq got new | 
 | 232 | 	 * request in parent group and moved to this group while being added | 
 | 233 | 	 * to service tree. Just ignore the event and move on. | 
| Divyesh Shah | 28baf44 | 2010-04-14 11:22:38 +0200 | [diff] [blame] | 234 | 	 */ | 
| Vivek Goyal | e5ff082 | 2010-04-26 19:25:11 +0200 | [diff] [blame] | 235 | 	if(blkio_blkg_empty(stats)) { | 
 | 236 | 		spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 237 | 		return; | 
 | 238 | 	} | 
 | 239 |  | 
| Divyesh Shah | 28baf44 | 2010-04-14 11:22:38 +0200 | [diff] [blame] | 240 | 	stats->start_empty_time = sched_clock(); | 
 | 241 | 	blkio_mark_blkg_empty(stats); | 
 | 242 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 243 | } | 
 | 244 | EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time); | 
 | 245 |  | 
| Divyesh Shah | a11cdaa | 2010-04-13 19:59:17 +0200 | [diff] [blame] | 246 | void blkiocg_update_dequeue_stats(struct blkio_group *blkg, | 
 | 247 | 			unsigned long dequeue) | 
 | 248 | { | 
 | 249 | 	blkg->stats.dequeue += dequeue; | 
 | 250 | } | 
 | 251 | EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats); | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 252 | #else | 
 | 253 | static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg, | 
 | 254 | 					struct blkio_group *curr_blkg) {} | 
 | 255 | static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {} | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 256 | #endif | 
 | 257 |  | 
| Divyesh Shah | a11cdaa | 2010-04-13 19:59:17 +0200 | [diff] [blame] | 258 | void blkiocg_update_io_add_stats(struct blkio_group *blkg, | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 259 | 			struct blkio_group *curr_blkg, bool direction, | 
 | 260 | 			bool sync) | 
 | 261 | { | 
 | 262 | 	unsigned long flags; | 
 | 263 |  | 
 | 264 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 265 | 	blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction, | 
 | 266 | 			sync); | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 267 | 	blkio_end_empty_time(&blkg->stats); | 
 | 268 | 	blkio_set_start_group_wait_time(blkg, curr_blkg); | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 269 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 270 | } | 
| Divyesh Shah | a11cdaa | 2010-04-13 19:59:17 +0200 | [diff] [blame] | 271 | EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats); | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 272 |  | 
| Divyesh Shah | a11cdaa | 2010-04-13 19:59:17 +0200 | [diff] [blame] | 273 | void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 274 | 						bool direction, bool sync) | 
 | 275 | { | 
 | 276 | 	unsigned long flags; | 
 | 277 |  | 
 | 278 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 279 | 	blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], | 
 | 280 | 					direction, sync); | 
 | 281 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 282 | } | 
| Divyesh Shah | a11cdaa | 2010-04-13 19:59:17 +0200 | [diff] [blame] | 283 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 284 |  | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 285 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 286 | { | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 287 | 	unsigned long flags; | 
 | 288 |  | 
 | 289 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 290 | 	blkg->stats.time += time; | 
 | 291 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 292 | } | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 293 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 294 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 295 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | 
 | 296 | 				uint64_t bytes, bool direction, bool sync) | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 297 | { | 
 | 298 | 	struct blkio_group_stats *stats; | 
 | 299 | 	unsigned long flags; | 
 | 300 |  | 
 | 301 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 302 | 	stats = &blkg->stats; | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 303 | 	stats->sectors += bytes >> 9; | 
 | 304 | 	blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction, | 
 | 305 | 			sync); | 
 | 306 | 	blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes, | 
 | 307 | 			direction, sync); | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 308 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 309 | } | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 310 | EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats); | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 311 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 312 | void blkiocg_update_completion_stats(struct blkio_group *blkg, | 
 | 313 | 	uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 314 | { | 
 | 315 | 	struct blkio_group_stats *stats; | 
 | 316 | 	unsigned long flags; | 
 | 317 | 	unsigned long long now = sched_clock(); | 
 | 318 |  | 
 | 319 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 320 | 	stats = &blkg->stats; | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 321 | 	if (time_after64(now, io_start_time)) | 
 | 322 | 		blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME], | 
 | 323 | 				now - io_start_time, direction, sync); | 
 | 324 | 	if (time_after64(io_start_time, start_time)) | 
 | 325 | 		blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME], | 
 | 326 | 				io_start_time - start_time, direction, sync); | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 327 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 328 | } | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 329 | EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats); | 
| Divyesh Shah | 9195291 | 2010-04-01 15:01:41 -0700 | [diff] [blame] | 330 |  | 
| Divyesh Shah | 812d402 | 2010-04-08 21:14:23 -0700 | [diff] [blame] | 331 | void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, | 
 | 332 | 					bool sync) | 
 | 333 | { | 
 | 334 | 	unsigned long flags; | 
 | 335 |  | 
 | 336 | 	spin_lock_irqsave(&blkg->stats_lock, flags); | 
 | 337 | 	blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction, | 
 | 338 | 			sync); | 
 | 339 | 	spin_unlock_irqrestore(&blkg->stats_lock, flags); | 
 | 340 | } | 
 | 341 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); | 
 | 342 |  | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 343 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 344 | 			struct blkio_group *blkg, void *key, dev_t dev) | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 345 | { | 
 | 346 | 	unsigned long flags; | 
 | 347 |  | 
 | 348 | 	spin_lock_irqsave(&blkcg->lock, flags); | 
| Divyesh Shah | 8d2a91f | 2010-04-16 08:10:51 +0200 | [diff] [blame] | 349 | 	spin_lock_init(&blkg->stats_lock); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 350 | 	rcu_assign_pointer(blkg->key, key); | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 351 | 	blkg->blkcg_id = css_id(&blkcg->css); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 352 | 	hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 
 | 353 | 	spin_unlock_irqrestore(&blkcg->lock, flags); | 
| Vivek Goyal | 2868ef7 | 2009-12-03 12:59:48 -0500 | [diff] [blame] | 354 | 	/* Need to take css reference ? */ | 
 | 355 | 	cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 356 | 	blkg->dev = dev; | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 357 | } | 
| Vivek Goyal | 9d6a986 | 2009-12-04 10:36:41 -0500 | [diff] [blame] | 358 | EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 359 |  | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 360 | static void __blkiocg_del_blkio_group(struct blkio_group *blkg) | 
 | 361 | { | 
 | 362 | 	hlist_del_init_rcu(&blkg->blkcg_node); | 
 | 363 | 	blkg->blkcg_id = 0; | 
 | 364 | } | 
 | 365 |  | 
 | 366 | /* | 
 | 367 |  * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1 | 
 | 368 |  * indicating that blk_group was unhashed by the time we got to it. | 
 | 369 |  */ | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 370 | int blkiocg_del_blkio_group(struct blkio_group *blkg) | 
 | 371 | { | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 372 | 	struct blkio_cgroup *blkcg; | 
 | 373 | 	unsigned long flags; | 
 | 374 | 	struct cgroup_subsys_state *css; | 
 | 375 | 	int ret = 1; | 
 | 376 |  | 
 | 377 | 	rcu_read_lock(); | 
 | 378 | 	css = css_lookup(&blkio_subsys, blkg->blkcg_id); | 
| Jens Axboe | 0f3942a | 2010-05-03 14:28:55 +0200 | [diff] [blame] | 379 | 	if (css) { | 
 | 380 | 		blkcg = container_of(css, struct blkio_cgroup, css); | 
 | 381 | 		spin_lock_irqsave(&blkcg->lock, flags); | 
 | 382 | 		if (!hlist_unhashed(&blkg->blkcg_node)) { | 
 | 383 | 			__blkiocg_del_blkio_group(blkg); | 
 | 384 | 			ret = 0; | 
 | 385 | 		} | 
 | 386 | 		spin_unlock_irqrestore(&blkcg->lock, flags); | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 387 | 	} | 
| Jens Axboe | 0f3942a | 2010-05-03 14:28:55 +0200 | [diff] [blame] | 388 |  | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 389 | 	rcu_read_unlock(); | 
 | 390 | 	return ret; | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 391 | } | 
| Vivek Goyal | 9d6a986 | 2009-12-04 10:36:41 -0500 | [diff] [blame] | 392 | EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 393 |  | 
 | 394 | /* called under rcu_read_lock(). */ | 
 | 395 | struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) | 
 | 396 | { | 
 | 397 | 	struct blkio_group *blkg; | 
 | 398 | 	struct hlist_node *n; | 
 | 399 | 	void *__key; | 
 | 400 |  | 
 | 401 | 	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { | 
 | 402 | 		__key = blkg->key; | 
 | 403 | 		if (__key == key) | 
 | 404 | 			return blkg; | 
 | 405 | 	} | 
 | 406 |  | 
 | 407 | 	return NULL; | 
 | 408 | } | 
| Vivek Goyal | 9d6a986 | 2009-12-04 10:36:41 -0500 | [diff] [blame] | 409 | EXPORT_SYMBOL_GPL(blkiocg_lookup_group); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 410 |  | 
 | 411 | #define SHOW_FUNCTION(__VAR)						\ | 
 | 412 | static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup,		\ | 
 | 413 | 				       struct cftype *cftype)		\ | 
 | 414 | {									\ | 
 | 415 | 	struct blkio_cgroup *blkcg;					\ | 
 | 416 | 									\ | 
 | 417 | 	blkcg = cgroup_to_blkio_cgroup(cgroup);				\ | 
 | 418 | 	return (u64)blkcg->__VAR;					\ | 
 | 419 | } | 
 | 420 |  | 
 | 421 | SHOW_FUNCTION(weight); | 
 | 422 | #undef SHOW_FUNCTION | 
 | 423 |  | 
 | 424 | static int | 
 | 425 | blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) | 
 | 426 | { | 
 | 427 | 	struct blkio_cgroup *blkcg; | 
| Vivek Goyal | f8d461d | 2009-12-03 12:59:52 -0500 | [diff] [blame] | 428 | 	struct blkio_group *blkg; | 
 | 429 | 	struct hlist_node *n; | 
| Vivek Goyal | 3e25206 | 2009-12-04 10:36:42 -0500 | [diff] [blame] | 430 | 	struct blkio_policy_type *blkiop; | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 431 | 	struct blkio_policy_node *pn; | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 432 |  | 
 | 433 | 	if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) | 
 | 434 | 		return -EINVAL; | 
 | 435 |  | 
 | 436 | 	blkcg = cgroup_to_blkio_cgroup(cgroup); | 
| Gui Jianfeng | bcf4dd4 | 2010-02-01 09:58:54 +0100 | [diff] [blame] | 437 | 	spin_lock(&blkio_list_lock); | 
| Vivek Goyal | f8d461d | 2009-12-03 12:59:52 -0500 | [diff] [blame] | 438 | 	spin_lock_irq(&blkcg->lock); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 439 | 	blkcg->weight = (unsigned int)val; | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 440 |  | 
| Vivek Goyal | 3e25206 | 2009-12-04 10:36:42 -0500 | [diff] [blame] | 441 | 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 442 | 		pn = blkio_policy_search_node(blkcg, blkg->dev); | 
 | 443 |  | 
 | 444 | 		if (pn) | 
 | 445 | 			continue; | 
 | 446 |  | 
| Vivek Goyal | 3e25206 | 2009-12-04 10:36:42 -0500 | [diff] [blame] | 447 | 		list_for_each_entry(blkiop, &blkio_list, list) | 
 | 448 | 			blkiop->ops.blkio_update_group_weight_fn(blkg, | 
 | 449 | 					blkcg->weight); | 
| Vivek Goyal | 3e25206 | 2009-12-04 10:36:42 -0500 | [diff] [blame] | 450 | 	} | 
| Vivek Goyal | f8d461d | 2009-12-03 12:59:52 -0500 | [diff] [blame] | 451 | 	spin_unlock_irq(&blkcg->lock); | 
| Gui Jianfeng | bcf4dd4 | 2010-02-01 09:58:54 +0100 | [diff] [blame] | 452 | 	spin_unlock(&blkio_list_lock); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 453 | 	return 0; | 
 | 454 | } | 
 | 455 |  | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 456 | static int | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 457 | blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 458 | { | 
 | 459 | 	struct blkio_cgroup *blkcg; | 
 | 460 | 	struct blkio_group *blkg; | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 461 | 	struct blkio_group_stats *stats; | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 462 | 	struct hlist_node *n; | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 463 | 	uint64_t queued[BLKIO_STAT_TOTAL]; | 
 | 464 | 	int i; | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 465 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 
 | 466 | 	bool idling, waiting, empty; | 
 | 467 | 	unsigned long long now = sched_clock(); | 
 | 468 | #endif | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 469 |  | 
 | 470 | 	blkcg = cgroup_to_blkio_cgroup(cgroup); | 
 | 471 | 	spin_lock_irq(&blkcg->lock); | 
 | 472 | 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 
 | 473 | 		spin_lock(&blkg->stats_lock); | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 474 | 		stats = &blkg->stats; | 
 | 475 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 
 | 476 | 		idling = blkio_blkg_idling(stats); | 
 | 477 | 		waiting = blkio_blkg_waiting(stats); | 
 | 478 | 		empty = blkio_blkg_empty(stats); | 
 | 479 | #endif | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 480 | 		for (i = 0; i < BLKIO_STAT_TOTAL; i++) | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 481 | 			queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i]; | 
 | 482 | 		memset(stats, 0, sizeof(struct blkio_group_stats)); | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 483 | 		for (i = 0; i < BLKIO_STAT_TOTAL; i++) | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 484 | 			stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; | 
 | 485 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 
 | 486 | 		if (idling) { | 
 | 487 | 			blkio_mark_blkg_idling(stats); | 
 | 488 | 			stats->start_idle_time = now; | 
 | 489 | 		} | 
 | 490 | 		if (waiting) { | 
 | 491 | 			blkio_mark_blkg_waiting(stats); | 
 | 492 | 			stats->start_group_wait_time = now; | 
 | 493 | 		} | 
 | 494 | 		if (empty) { | 
 | 495 | 			blkio_mark_blkg_empty(stats); | 
 | 496 | 			stats->start_empty_time = now; | 
 | 497 | 		} | 
 | 498 | #endif | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 499 | 		spin_unlock(&blkg->stats_lock); | 
 | 500 | 	} | 
 | 501 | 	spin_unlock_irq(&blkcg->lock); | 
 | 502 | 	return 0; | 
 | 503 | } | 
 | 504 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 505 | static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str, | 
 | 506 | 				int chars_left, bool diskname_only) | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 507 | { | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 508 | 	snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev)); | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 509 | 	chars_left -= strlen(str); | 
 | 510 | 	if (chars_left <= 0) { | 
 | 511 | 		printk(KERN_WARNING | 
 | 512 | 			"Possibly incorrect cgroup stat display format"); | 
 | 513 | 		return; | 
 | 514 | 	} | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 515 | 	if (diskname_only) | 
 | 516 | 		return; | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 517 | 	switch (type) { | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 518 | 	case BLKIO_STAT_READ: | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 519 | 		strlcat(str, " Read", chars_left); | 
 | 520 | 		break; | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 521 | 	case BLKIO_STAT_WRITE: | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 522 | 		strlcat(str, " Write", chars_left); | 
 | 523 | 		break; | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 524 | 	case BLKIO_STAT_SYNC: | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 525 | 		strlcat(str, " Sync", chars_left); | 
 | 526 | 		break; | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 527 | 	case BLKIO_STAT_ASYNC: | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 528 | 		strlcat(str, " Async", chars_left); | 
 | 529 | 		break; | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 530 | 	case BLKIO_STAT_TOTAL: | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 531 | 		strlcat(str, " Total", chars_left); | 
 | 532 | 		break; | 
 | 533 | 	default: | 
 | 534 | 		strlcat(str, " Invalid", chars_left); | 
 | 535 | 	} | 
 | 536 | } | 
 | 537 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 538 | static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val, | 
 | 539 | 				struct cgroup_map_cb *cb, dev_t dev) | 
 | 540 | { | 
 | 541 | 	blkio_get_key_name(0, dev, str, chars_left, true); | 
 | 542 | 	cb->fill(cb, str, val); | 
 | 543 | 	return val; | 
 | 544 | } | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 545 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 546 | /* This should be called with blkg->stats_lock held */ | 
 | 547 | static uint64_t blkio_get_stat(struct blkio_group *blkg, | 
 | 548 | 		struct cgroup_map_cb *cb, dev_t dev, enum stat_type type) | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 549 | { | 
 | 550 | 	uint64_t disk_total; | 
 | 551 | 	char key_str[MAX_KEY_LEN]; | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 552 | 	enum stat_sub_type sub_type; | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 553 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 554 | 	if (type == BLKIO_STAT_TIME) | 
 | 555 | 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 
 | 556 | 					blkg->stats.time, cb, dev); | 
 | 557 | 	if (type == BLKIO_STAT_SECTORS) | 
 | 558 | 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 
 | 559 | 					blkg->stats.sectors, cb, dev); | 
 | 560 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 561 | 	if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { | 
 | 562 | 		uint64_t sum = blkg->stats.avg_queue_size_sum; | 
 | 563 | 		uint64_t samples = blkg->stats.avg_queue_size_samples; | 
 | 564 | 		if (samples) | 
 | 565 | 			do_div(sum, samples); | 
 | 566 | 		else | 
 | 567 | 			sum = 0; | 
 | 568 | 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev); | 
 | 569 | 	} | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 570 | 	if (type == BLKIO_STAT_GROUP_WAIT_TIME) | 
 | 571 | 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 
 | 572 | 					blkg->stats.group_wait_time, cb, dev); | 
 | 573 | 	if (type == BLKIO_STAT_IDLE_TIME) | 
 | 574 | 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 
 | 575 | 					blkg->stats.idle_time, cb, dev); | 
 | 576 | 	if (type == BLKIO_STAT_EMPTY_TIME) | 
 | 577 | 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 
 | 578 | 					blkg->stats.empty_time, cb, dev); | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 579 | 	if (type == BLKIO_STAT_DEQUEUE) | 
 | 580 | 		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 
 | 581 | 					blkg->stats.dequeue, cb, dev); | 
 | 582 | #endif | 
 | 583 |  | 
 | 584 | 	for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL; | 
 | 585 | 			sub_type++) { | 
 | 586 | 		blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false); | 
 | 587 | 		cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]); | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 588 | 	} | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 589 | 	disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] + | 
 | 590 | 			blkg->stats.stat_arr[type][BLKIO_STAT_WRITE]; | 
 | 591 | 	blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false); | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 592 | 	cb->fill(cb, key_str, disk_total); | 
 | 593 | 	return disk_total; | 
 | 594 | } | 
 | 595 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 596 | #define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total)		\ | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 597 | static int blkiocg_##__VAR##_read(struct cgroup *cgroup,		\ | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 598 | 		struct cftype *cftype, struct cgroup_map_cb *cb)	\ | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 599 | {									\ | 
 | 600 | 	struct blkio_cgroup *blkcg;					\ | 
 | 601 | 	struct blkio_group *blkg;					\ | 
 | 602 | 	struct hlist_node *n;						\ | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 603 | 	uint64_t cgroup_total = 0;					\ | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 604 | 									\ | 
 | 605 | 	if (!cgroup_lock_live_group(cgroup))				\ | 
 | 606 | 		return -ENODEV;						\ | 
 | 607 | 									\ | 
 | 608 | 	blkcg = cgroup_to_blkio_cgroup(cgroup);				\ | 
 | 609 | 	rcu_read_lock();						\ | 
 | 610 | 	hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 611 | 		if (blkg->dev) {					\ | 
 | 612 | 			spin_lock_irq(&blkg->stats_lock);		\ | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 613 | 			cgroup_total += blkio_get_stat(blkg, cb,	\ | 
 | 614 | 						blkg->dev, type);	\ | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 615 | 			spin_unlock_irq(&blkg->stats_lock);		\ | 
 | 616 | 		}							\ | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 617 | 	}								\ | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 618 | 	if (show_total)							\ | 
 | 619 | 		cb->fill(cb, "Total", cgroup_total);			\ | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 620 | 	rcu_read_unlock();						\ | 
 | 621 | 	cgroup_unlock();						\ | 
 | 622 | 	return 0;							\ | 
 | 623 | } | 
 | 624 |  | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 625 | SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0); | 
 | 626 | SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0); | 
 | 627 | SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1); | 
 | 628 | SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1); | 
 | 629 | SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1); | 
 | 630 | SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1); | 
| Divyesh Shah | 812d402 | 2010-04-08 21:14:23 -0700 | [diff] [blame] | 631 | SHOW_FUNCTION_PER_GROUP(io_merged, BLKIO_STAT_MERGED, 1); | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 632 | SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1); | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 633 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 634 | SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0); | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 635 | SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0); | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 636 | SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0); | 
 | 637 | SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0); | 
 | 638 | SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0); | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 639 | #endif | 
 | 640 | #undef SHOW_FUNCTION_PER_GROUP | 
 | 641 |  | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 642 | static int blkio_check_dev_num(dev_t dev) | 
 | 643 | { | 
 | 644 | 	int part = 0; | 
 | 645 | 	struct gendisk *disk; | 
 | 646 |  | 
 | 647 | 	disk = get_gendisk(dev, &part); | 
 | 648 | 	if (!disk || part) | 
 | 649 | 		return -ENODEV; | 
 | 650 |  | 
 | 651 | 	return 0; | 
 | 652 | } | 
 | 653 |  | 
 | 654 | static int blkio_policy_parse_and_set(char *buf, | 
 | 655 | 				      struct blkio_policy_node *newpn) | 
 | 656 | { | 
 | 657 | 	char *s[4], *p, *major_s = NULL, *minor_s = NULL; | 
 | 658 | 	int ret; | 
 | 659 | 	unsigned long major, minor, temp; | 
 | 660 | 	int i = 0; | 
 | 661 | 	dev_t dev; | 
 | 662 |  | 
 | 663 | 	memset(s, 0, sizeof(s)); | 
 | 664 |  | 
 | 665 | 	while ((p = strsep(&buf, " ")) != NULL) { | 
 | 666 | 		if (!*p) | 
 | 667 | 			continue; | 
 | 668 |  | 
 | 669 | 		s[i++] = p; | 
 | 670 |  | 
 | 671 | 		/* Prevent from inputing too many things */ | 
 | 672 | 		if (i == 3) | 
 | 673 | 			break; | 
 | 674 | 	} | 
 | 675 |  | 
 | 676 | 	if (i != 2) | 
 | 677 | 		return -EINVAL; | 
 | 678 |  | 
 | 679 | 	p = strsep(&s[0], ":"); | 
 | 680 | 	if (p != NULL) | 
 | 681 | 		major_s = p; | 
 | 682 | 	else | 
 | 683 | 		return -EINVAL; | 
 | 684 |  | 
 | 685 | 	minor_s = s[0]; | 
 | 686 | 	if (!minor_s) | 
 | 687 | 		return -EINVAL; | 
 | 688 |  | 
 | 689 | 	ret = strict_strtoul(major_s, 10, &major); | 
 | 690 | 	if (ret) | 
 | 691 | 		return -EINVAL; | 
 | 692 |  | 
 | 693 | 	ret = strict_strtoul(minor_s, 10, &minor); | 
 | 694 | 	if (ret) | 
 | 695 | 		return -EINVAL; | 
 | 696 |  | 
 | 697 | 	dev = MKDEV(major, minor); | 
 | 698 |  | 
 | 699 | 	ret = blkio_check_dev_num(dev); | 
 | 700 | 	if (ret) | 
 | 701 | 		return ret; | 
 | 702 |  | 
 | 703 | 	newpn->dev = dev; | 
 | 704 |  | 
 | 705 | 	if (s[1] == NULL) | 
 | 706 | 		return -EINVAL; | 
 | 707 |  | 
 | 708 | 	ret = strict_strtoul(s[1], 10, &temp); | 
 | 709 | 	if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || | 
 | 710 | 	    temp > BLKIO_WEIGHT_MAX) | 
 | 711 | 		return -EINVAL; | 
 | 712 |  | 
 | 713 | 	newpn->weight =  temp; | 
 | 714 |  | 
 | 715 | 	return 0; | 
 | 716 | } | 
 | 717 |  | 
 | 718 | unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, | 
 | 719 | 			      dev_t dev) | 
 | 720 | { | 
 | 721 | 	struct blkio_policy_node *pn; | 
 | 722 |  | 
 | 723 | 	pn = blkio_policy_search_node(blkcg, dev); | 
 | 724 | 	if (pn) | 
 | 725 | 		return pn->weight; | 
 | 726 | 	else | 
 | 727 | 		return blkcg->weight; | 
 | 728 | } | 
 | 729 | EXPORT_SYMBOL_GPL(blkcg_get_weight); | 
 | 730 |  | 
 | 731 |  | 
 | 732 | static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, | 
 | 733 | 				       const char *buffer) | 
 | 734 | { | 
 | 735 | 	int ret = 0; | 
 | 736 | 	char *buf; | 
 | 737 | 	struct blkio_policy_node *newpn, *pn; | 
 | 738 | 	struct blkio_cgroup *blkcg; | 
 | 739 | 	struct blkio_group *blkg; | 
 | 740 | 	int keep_newpn = 0; | 
 | 741 | 	struct hlist_node *n; | 
 | 742 | 	struct blkio_policy_type *blkiop; | 
 | 743 |  | 
 | 744 | 	buf = kstrdup(buffer, GFP_KERNEL); | 
 | 745 | 	if (!buf) | 
 | 746 | 		return -ENOMEM; | 
 | 747 |  | 
 | 748 | 	newpn = kzalloc(sizeof(*newpn), GFP_KERNEL); | 
 | 749 | 	if (!newpn) { | 
 | 750 | 		ret = -ENOMEM; | 
 | 751 | 		goto free_buf; | 
 | 752 | 	} | 
 | 753 |  | 
 | 754 | 	ret = blkio_policy_parse_and_set(buf, newpn); | 
 | 755 | 	if (ret) | 
 | 756 | 		goto free_newpn; | 
 | 757 |  | 
 | 758 | 	blkcg = cgroup_to_blkio_cgroup(cgrp); | 
 | 759 |  | 
 | 760 | 	spin_lock_irq(&blkcg->lock); | 
 | 761 |  | 
 | 762 | 	pn = blkio_policy_search_node(blkcg, newpn->dev); | 
 | 763 | 	if (!pn) { | 
 | 764 | 		if (newpn->weight != 0) { | 
 | 765 | 			blkio_policy_insert_node(blkcg, newpn); | 
 | 766 | 			keep_newpn = 1; | 
 | 767 | 		} | 
 | 768 | 		spin_unlock_irq(&blkcg->lock); | 
 | 769 | 		goto update_io_group; | 
 | 770 | 	} | 
 | 771 |  | 
 | 772 | 	if (newpn->weight == 0) { | 
 | 773 | 		/* weight == 0 means deleteing a specific weight */ | 
 | 774 | 		blkio_policy_delete_node(pn); | 
 | 775 | 		spin_unlock_irq(&blkcg->lock); | 
 | 776 | 		goto update_io_group; | 
 | 777 | 	} | 
 | 778 | 	spin_unlock_irq(&blkcg->lock); | 
 | 779 |  | 
 | 780 | 	pn->weight = newpn->weight; | 
 | 781 |  | 
 | 782 | update_io_group: | 
 | 783 | 	/* update weight for each cfqg */ | 
 | 784 | 	spin_lock(&blkio_list_lock); | 
 | 785 | 	spin_lock_irq(&blkcg->lock); | 
 | 786 |  | 
 | 787 | 	hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { | 
 | 788 | 		if (newpn->dev == blkg->dev) { | 
 | 789 | 			list_for_each_entry(blkiop, &blkio_list, list) | 
 | 790 | 				blkiop->ops.blkio_update_group_weight_fn(blkg, | 
 | 791 | 							 newpn->weight ? | 
 | 792 | 							 newpn->weight : | 
 | 793 | 							 blkcg->weight); | 
 | 794 | 		} | 
 | 795 | 	} | 
 | 796 |  | 
 | 797 | 	spin_unlock_irq(&blkcg->lock); | 
 | 798 | 	spin_unlock(&blkio_list_lock); | 
 | 799 |  | 
 | 800 | free_newpn: | 
 | 801 | 	if (!keep_newpn) | 
 | 802 | 		kfree(newpn); | 
 | 803 | free_buf: | 
 | 804 | 	kfree(buf); | 
 | 805 | 	return ret; | 
 | 806 | } | 
 | 807 |  | 
 | 808 | static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft, | 
 | 809 | 				      struct seq_file *m) | 
 | 810 | { | 
 | 811 | 	struct blkio_cgroup *blkcg; | 
 | 812 | 	struct blkio_policy_node *pn; | 
 | 813 |  | 
 | 814 | 	seq_printf(m, "dev\tweight\n"); | 
 | 815 |  | 
 | 816 | 	blkcg = cgroup_to_blkio_cgroup(cgrp); | 
| Jens Axboe | 0f3942a | 2010-05-03 14:28:55 +0200 | [diff] [blame] | 817 | 	if (!list_empty(&blkcg->policy_list)) { | 
 | 818 | 		spin_lock_irq(&blkcg->lock); | 
 | 819 | 		list_for_each_entry(pn, &blkcg->policy_list, node) { | 
 | 820 | 			seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), | 
 | 821 | 				   MINOR(pn->dev), pn->weight); | 
 | 822 | 		} | 
 | 823 | 		spin_unlock_irq(&blkcg->lock); | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 824 | 	} | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 825 |  | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 826 | 	return 0; | 
 | 827 | } | 
 | 828 |  | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 829 | struct cftype blkio_files[] = { | 
 | 830 | 	{ | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 831 | 		.name = "weight_device", | 
 | 832 | 		.read_seq_string = blkiocg_weight_device_read, | 
 | 833 | 		.write_string = blkiocg_weight_device_write, | 
 | 834 | 		.max_write_len = 256, | 
 | 835 | 	}, | 
 | 836 | 	{ | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 837 | 		.name = "weight", | 
 | 838 | 		.read_u64 = blkiocg_weight_read, | 
 | 839 | 		.write_u64 = blkiocg_weight_write, | 
 | 840 | 	}, | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 841 | 	{ | 
 | 842 | 		.name = "time", | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 843 | 		.read_map = blkiocg_time_read, | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 844 | 	}, | 
 | 845 | 	{ | 
 | 846 | 		.name = "sectors", | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 847 | 		.read_map = blkiocg_sectors_read, | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 848 | 	}, | 
 | 849 | 	{ | 
 | 850 | 		.name = "io_service_bytes", | 
 | 851 | 		.read_map = blkiocg_io_service_bytes_read, | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 852 | 	}, | 
 | 853 | 	{ | 
 | 854 | 		.name = "io_serviced", | 
 | 855 | 		.read_map = blkiocg_io_serviced_read, | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 856 | 	}, | 
 | 857 | 	{ | 
 | 858 | 		.name = "io_service_time", | 
 | 859 | 		.read_map = blkiocg_io_service_time_read, | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 860 | 	}, | 
 | 861 | 	{ | 
 | 862 | 		.name = "io_wait_time", | 
 | 863 | 		.read_map = blkiocg_io_wait_time_read, | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 864 | 	}, | 
 | 865 | 	{ | 
| Divyesh Shah | 812d402 | 2010-04-08 21:14:23 -0700 | [diff] [blame] | 866 | 		.name = "io_merged", | 
 | 867 | 		.read_map = blkiocg_io_merged_read, | 
 | 868 | 	}, | 
 | 869 | 	{ | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 870 | 		.name = "io_queued", | 
 | 871 | 		.read_map = blkiocg_io_queued_read, | 
 | 872 | 	}, | 
 | 873 | 	{ | 
| Divyesh Shah | 84c124d | 2010-04-09 08:31:19 +0200 | [diff] [blame] | 874 | 		.name = "reset_stats", | 
 | 875 | 		.write_u64 = blkiocg_reset_stats, | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 876 | 	}, | 
 | 877 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 878 | 	{ | 
 | 879 | 		.name = "avg_queue_size", | 
 | 880 | 		.read_map = blkiocg_avg_queue_size_read, | 
 | 881 | 	}, | 
 | 882 | 	{ | 
| Divyesh Shah | 812df48 | 2010-04-08 21:15:35 -0700 | [diff] [blame] | 883 | 		.name = "group_wait_time", | 
 | 884 | 		.read_map = blkiocg_group_wait_time_read, | 
 | 885 | 	}, | 
 | 886 | 	{ | 
 | 887 | 		.name = "idle_time", | 
 | 888 | 		.read_map = blkiocg_idle_time_read, | 
 | 889 | 	}, | 
 | 890 | 	{ | 
 | 891 | 		.name = "empty_time", | 
 | 892 | 		.read_map = blkiocg_empty_time_read, | 
 | 893 | 	}, | 
 | 894 | 	{ | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 895 | 		.name = "dequeue", | 
| Divyesh Shah | 303a3ac | 2010-04-01 15:01:24 -0700 | [diff] [blame] | 896 | 		.read_map = blkiocg_dequeue_read, | 
| Divyesh Shah | cdc1184 | 2010-04-08 21:15:10 -0700 | [diff] [blame] | 897 | 	}, | 
| Vivek Goyal | 2208419 | 2009-12-03 12:59:49 -0500 | [diff] [blame] | 898 | #endif | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 899 | }; | 
 | 900 |  | 
 | 901 | static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) | 
 | 902 | { | 
 | 903 | 	return cgroup_add_files(cgroup, subsys, blkio_files, | 
 | 904 | 				ARRAY_SIZE(blkio_files)); | 
 | 905 | } | 
 | 906 |  | 
 | 907 | static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) | 
 | 908 | { | 
 | 909 | 	struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 910 | 	unsigned long flags; | 
 | 911 | 	struct blkio_group *blkg; | 
 | 912 | 	void *key; | 
| Vivek Goyal | 3e25206 | 2009-12-04 10:36:42 -0500 | [diff] [blame] | 913 | 	struct blkio_policy_type *blkiop; | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 914 | 	struct blkio_policy_node *pn, *pntmp; | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 915 |  | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 916 | 	rcu_read_lock(); | 
| Jens Axboe | 0f3942a | 2010-05-03 14:28:55 +0200 | [diff] [blame] | 917 | 	do { | 
 | 918 | 		spin_lock_irqsave(&blkcg->lock, flags); | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 919 |  | 
| Jens Axboe | 0f3942a | 2010-05-03 14:28:55 +0200 | [diff] [blame] | 920 | 		if (hlist_empty(&blkcg->blkg_list)) { | 
 | 921 | 			spin_unlock_irqrestore(&blkcg->lock, flags); | 
 | 922 | 			break; | 
 | 923 | 		} | 
 | 924 |  | 
 | 925 | 		blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, | 
 | 926 | 					blkcg_node); | 
 | 927 | 		key = rcu_dereference(blkg->key); | 
 | 928 | 		__blkiocg_del_blkio_group(blkg); | 
 | 929 |  | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 930 | 		spin_unlock_irqrestore(&blkcg->lock, flags); | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 931 |  | 
| Jens Axboe | 0f3942a | 2010-05-03 14:28:55 +0200 | [diff] [blame] | 932 | 		/* | 
 | 933 | 		 * This blkio_group is being unlinked as associated cgroup is | 
 | 934 | 		 * going away. Let all the IO controlling policies know about | 
 | 935 | 		 * this event. Currently this is static call to one io | 
 | 936 | 		 * controlling policy. Once we have more policies in place, we | 
 | 937 | 		 * need some dynamic registration of callback function. | 
 | 938 | 		 */ | 
 | 939 | 		spin_lock(&blkio_list_lock); | 
 | 940 | 		list_for_each_entry(blkiop, &blkio_list, list) | 
 | 941 | 			blkiop->ops.blkio_unlink_group_fn(key, blkg); | 
 | 942 | 		spin_unlock(&blkio_list_lock); | 
 | 943 | 	} while (1); | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 944 |  | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 945 | 	list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) { | 
 | 946 | 		blkio_policy_delete_node(pn); | 
 | 947 | 		kfree(pn); | 
 | 948 | 	} | 
| Jens Axboe | 0f3942a | 2010-05-03 14:28:55 +0200 | [diff] [blame] | 949 |  | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 950 | 	free_css_id(&blkio_subsys, &blkcg->css); | 
| Vivek Goyal | b1c3576 | 2009-12-03 12:59:47 -0500 | [diff] [blame] | 951 | 	rcu_read_unlock(); | 
| Ben Blum | 67523c4 | 2010-03-10 15:22:11 -0800 | [diff] [blame] | 952 | 	if (blkcg != &blkio_root_cgroup) | 
 | 953 | 		kfree(blkcg); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 954 | } | 
 | 955 |  | 
 | 956 | static struct cgroup_subsys_state * | 
 | 957 | blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) | 
 | 958 | { | 
| Li Zefan | 0341509 | 2010-05-07 08:57:00 +0200 | [diff] [blame] | 959 | 	struct blkio_cgroup *blkcg; | 
 | 960 | 	struct cgroup *parent = cgroup->parent; | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 961 |  | 
| Li Zefan | 0341509 | 2010-05-07 08:57:00 +0200 | [diff] [blame] | 962 | 	if (!parent) { | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 963 | 		blkcg = &blkio_root_cgroup; | 
 | 964 | 		goto done; | 
 | 965 | 	} | 
 | 966 |  | 
 | 967 | 	/* Currently we do not support hierarchy deeper than two level (0,1) */ | 
| Li Zefan | 0341509 | 2010-05-07 08:57:00 +0200 | [diff] [blame] | 968 | 	if (parent != cgroup->top_cgroup) | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 969 | 		return ERR_PTR(-EINVAL); | 
 | 970 |  | 
 | 971 | 	blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); | 
 | 972 | 	if (!blkcg) | 
 | 973 | 		return ERR_PTR(-ENOMEM); | 
 | 974 |  | 
 | 975 | 	blkcg->weight = BLKIO_WEIGHT_DEFAULT; | 
 | 976 | done: | 
 | 977 | 	spin_lock_init(&blkcg->lock); | 
 | 978 | 	INIT_HLIST_HEAD(&blkcg->blkg_list); | 
 | 979 |  | 
| Gui Jianfeng | 34d0f17 | 2010-04-13 16:05:49 +0800 | [diff] [blame] | 980 | 	INIT_LIST_HEAD(&blkcg->policy_list); | 
| Vivek Goyal | 31e4c28 | 2009-12-03 12:59:42 -0500 | [diff] [blame] | 981 | 	return &blkcg->css; | 
 | 982 | } | 
 | 983 |  | 
 | 984 | /* | 
 | 985 |  * We cannot support shared io contexts, as we have no mean to support | 
 | 986 |  * two tasks with the same ioc in two different groups without major rework | 
 | 987 |  * of the main cic data structures.  For now we allow a task to change | 
 | 988 |  * its cgroup only if it's the only owner of its ioc. | 
 | 989 |  */ | 
 | 990 | static int blkiocg_can_attach(struct cgroup_subsys *subsys, | 
 | 991 | 				struct cgroup *cgroup, struct task_struct *tsk, | 
 | 992 | 				bool threadgroup) | 
 | 993 | { | 
 | 994 | 	struct io_context *ioc; | 
 | 995 | 	int ret = 0; | 
 | 996 |  | 
 | 997 | 	/* task_lock() is needed to avoid races with exit_io_context() */ | 
 | 998 | 	task_lock(tsk); | 
 | 999 | 	ioc = tsk->io_context; | 
 | 1000 | 	if (ioc && atomic_read(&ioc->nr_tasks) > 1) | 
 | 1001 | 		ret = -EINVAL; | 
 | 1002 | 	task_unlock(tsk); | 
 | 1003 |  | 
 | 1004 | 	return ret; | 
 | 1005 | } | 
 | 1006 |  | 
 | 1007 | static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup, | 
 | 1008 | 				struct cgroup *prev, struct task_struct *tsk, | 
 | 1009 | 				bool threadgroup) | 
 | 1010 | { | 
 | 1011 | 	struct io_context *ioc; | 
 | 1012 |  | 
 | 1013 | 	task_lock(tsk); | 
 | 1014 | 	ioc = tsk->io_context; | 
 | 1015 | 	if (ioc) | 
 | 1016 | 		ioc->cgroup_changed = 1; | 
 | 1017 | 	task_unlock(tsk); | 
 | 1018 | } | 
 | 1019 |  | 
| Vivek Goyal | 3e25206 | 2009-12-04 10:36:42 -0500 | [diff] [blame] | 1020 | void blkio_policy_register(struct blkio_policy_type *blkiop) | 
 | 1021 | { | 
 | 1022 | 	spin_lock(&blkio_list_lock); | 
 | 1023 | 	list_add_tail(&blkiop->list, &blkio_list); | 
 | 1024 | 	spin_unlock(&blkio_list_lock); | 
 | 1025 | } | 
 | 1026 | EXPORT_SYMBOL_GPL(blkio_policy_register); | 
 | 1027 |  | 
 | 1028 | void blkio_policy_unregister(struct blkio_policy_type *blkiop) | 
 | 1029 | { | 
 | 1030 | 	spin_lock(&blkio_list_lock); | 
 | 1031 | 	list_del_init(&blkiop->list); | 
 | 1032 | 	spin_unlock(&blkio_list_lock); | 
 | 1033 | } | 
 | 1034 | EXPORT_SYMBOL_GPL(blkio_policy_unregister); | 
| Ben Blum | 67523c4 | 2010-03-10 15:22:11 -0800 | [diff] [blame] | 1035 |  | 
 | 1036 | static int __init init_cgroup_blkio(void) | 
 | 1037 | { | 
 | 1038 | 	return cgroup_load_subsys(&blkio_subsys); | 
 | 1039 | } | 
 | 1040 |  | 
 | 1041 | static void __exit exit_cgroup_blkio(void) | 
 | 1042 | { | 
 | 1043 | 	cgroup_unload_subsys(&blkio_subsys); | 
 | 1044 | } | 
 | 1045 |  | 
 | 1046 | module_init(init_cgroup_blkio); | 
 | 1047 | module_exit(exit_cgroup_blkio); | 
 | 1048 | MODULE_LICENSE("GPL"); |