| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | linear.c : Multiple Devices driver for Linux | 
|  | 3 | Copyright (C) 1994-96 Marc ZYNGIER | 
|  | 4 | <zyngier@ufr-info-p7.ibp.fr> or | 
|  | 5 | <maz@gloups.fdn.fr> | 
|  | 6 |  | 
|  | 7 | Linear mode management functions. | 
|  | 8 |  | 
|  | 9 | This program is free software; you can redistribute it and/or modify | 
|  | 10 | it under the terms of the GNU General Public License as published by | 
|  | 11 | the Free Software Foundation; either version 2, or (at your option) | 
|  | 12 | any later version. | 
|  | 13 |  | 
|  | 14 | You should have received a copy of the GNU General Public License | 
|  | 15 | (for example /usr/src/linux/COPYING); if not, write to the Free | 
|  | 16 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
|  | 17 | */ | 
|  | 18 |  | 
|  | 19 | #include <linux/module.h> | 
|  | 20 |  | 
|  | 21 | #include <linux/raid/md.h> | 
|  | 22 | #include <linux/slab.h> | 
|  | 23 | #include <linux/raid/linear.h> | 
|  | 24 |  | 
|  | 25 | #define MAJOR_NR MD_MAJOR | 
|  | 26 | #define MD_DRIVER | 
|  | 27 | #define MD_PERSONALITY | 
|  | 28 |  | 
|  | 29 | /* | 
|  | 30 | * find which device holds a particular offset | 
|  | 31 | */ | 
|  | 32 | static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) | 
|  | 33 | { | 
|  | 34 | dev_info_t *hash; | 
|  | 35 | linear_conf_t *conf = mddev_to_conf(mddev); | 
|  | 36 | sector_t block = sector >> 1; | 
|  | 37 |  | 
|  | 38 | /* | 
|  | 39 | * sector_div(a,b) returns the remainer and sets a to a/b | 
|  | 40 | */ | 
|  | 41 | (void)sector_div(block, conf->smallest->size); | 
|  | 42 | hash = conf->hash_table[block]; | 
|  | 43 |  | 
|  | 44 | while ((sector>>1) >= (hash->size + hash->offset)) | 
|  | 45 | hash++; | 
|  | 46 | return hash; | 
|  | 47 | } | 
|  | 48 |  | 
|  | 49 | /** | 
|  | 50 | *	linear_mergeable_bvec -- tell bio layer if a two requests can be merged | 
|  | 51 | *	@q: request queue | 
|  | 52 | *	@bio: the buffer head that's been built up so far | 
|  | 53 | *	@biovec: the request that could be merged to it. | 
|  | 54 | * | 
|  | 55 | *	Return amount of bytes we can take at this offset | 
|  | 56 | */ | 
|  | 57 | static int linear_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) | 
|  | 58 | { | 
|  | 59 | mddev_t *mddev = q->queuedata; | 
|  | 60 | dev_info_t *dev0; | 
|  | 61 | unsigned long maxsectors, bio_sectors = bio->bi_size >> 9; | 
|  | 62 | sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); | 
|  | 63 |  | 
|  | 64 | dev0 = which_dev(mddev, sector); | 
|  | 65 | maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); | 
|  | 66 |  | 
|  | 67 | if (maxsectors < bio_sectors) | 
|  | 68 | maxsectors = 0; | 
|  | 69 | else | 
|  | 70 | maxsectors -= bio_sectors; | 
|  | 71 |  | 
|  | 72 | if (maxsectors <= (PAGE_SIZE >> 9 ) && bio_sectors == 0) | 
|  | 73 | return biovec->bv_len; | 
|  | 74 | /* The bytes available at this offset could be really big, | 
|  | 75 | * so we cap at 2^31 to avoid overflow */ | 
|  | 76 | if (maxsectors > (1 << (31-9))) | 
|  | 77 | return 1<<31; | 
|  | 78 | return maxsectors << 9; | 
|  | 79 | } | 
|  | 80 |  | 
|  | 81 | static void linear_unplug(request_queue_t *q) | 
|  | 82 | { | 
|  | 83 | mddev_t *mddev = q->queuedata; | 
|  | 84 | linear_conf_t *conf = mddev_to_conf(mddev); | 
|  | 85 | int i; | 
|  | 86 |  | 
|  | 87 | for (i=0; i < mddev->raid_disks; i++) { | 
|  | 88 | request_queue_t *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev); | 
|  | 89 | if (r_queue->unplug_fn) | 
|  | 90 | r_queue->unplug_fn(r_queue); | 
|  | 91 | } | 
|  | 92 | } | 
|  | 93 |  | 
|  | 94 | static int linear_issue_flush(request_queue_t *q, struct gendisk *disk, | 
|  | 95 | sector_t *error_sector) | 
|  | 96 | { | 
|  | 97 | mddev_t *mddev = q->queuedata; | 
|  | 98 | linear_conf_t *conf = mddev_to_conf(mddev); | 
|  | 99 | int i, ret = 0; | 
|  | 100 |  | 
|  | 101 | for (i=0; i < mddev->raid_disks && ret == 0; i++) { | 
|  | 102 | struct block_device *bdev = conf->disks[i].rdev->bdev; | 
|  | 103 | request_queue_t *r_queue = bdev_get_queue(bdev); | 
|  | 104 |  | 
|  | 105 | if (!r_queue->issue_flush_fn) | 
|  | 106 | ret = -EOPNOTSUPP; | 
|  | 107 | else | 
|  | 108 | ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector); | 
|  | 109 | } | 
|  | 110 | return ret; | 
|  | 111 | } | 
|  | 112 |  | 
|  | 113 | static int linear_run (mddev_t *mddev) | 
|  | 114 | { | 
|  | 115 | linear_conf_t *conf; | 
|  | 116 | dev_info_t **table; | 
|  | 117 | mdk_rdev_t *rdev; | 
|  | 118 | int i, nb_zone, cnt; | 
|  | 119 | sector_t start; | 
|  | 120 | sector_t curr_offset; | 
|  | 121 | struct list_head *tmp; | 
|  | 122 |  | 
|  | 123 | conf = kmalloc (sizeof (*conf) + mddev->raid_disks*sizeof(dev_info_t), | 
|  | 124 | GFP_KERNEL); | 
|  | 125 | if (!conf) | 
|  | 126 | goto out; | 
|  | 127 | memset(conf, 0, sizeof(*conf) + mddev->raid_disks*sizeof(dev_info_t)); | 
|  | 128 | mddev->private = conf; | 
|  | 129 |  | 
|  | 130 | /* | 
|  | 131 | * Find the smallest device. | 
|  | 132 | */ | 
|  | 133 |  | 
|  | 134 | conf->smallest = NULL; | 
|  | 135 | cnt = 0; | 
|  | 136 | mddev->array_size = 0; | 
|  | 137 |  | 
|  | 138 | ITERATE_RDEV(mddev,rdev,tmp) { | 
|  | 139 | int j = rdev->raid_disk; | 
|  | 140 | dev_info_t *disk = conf->disks + j; | 
|  | 141 |  | 
|  | 142 | if (j < 0 || j > mddev->raid_disks || disk->rdev) { | 
|  | 143 | printk("linear: disk numbering problem. Aborting!\n"); | 
|  | 144 | goto out; | 
|  | 145 | } | 
|  | 146 |  | 
|  | 147 | disk->rdev = rdev; | 
|  | 148 |  | 
|  | 149 | blk_queue_stack_limits(mddev->queue, | 
|  | 150 | rdev->bdev->bd_disk->queue); | 
|  | 151 | /* as we don't honour merge_bvec_fn, we must never risk | 
|  | 152 | * violating it, so limit ->max_sector to one PAGE, as | 
|  | 153 | * a one page request is never in violation. | 
|  | 154 | */ | 
|  | 155 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn && | 
|  | 156 | mddev->queue->max_sectors > (PAGE_SIZE>>9)) | 
|  | 157 | blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); | 
|  | 158 |  | 
|  | 159 | disk->size = rdev->size; | 
|  | 160 | mddev->array_size += rdev->size; | 
|  | 161 |  | 
|  | 162 | if (!conf->smallest || (disk->size < conf->smallest->size)) | 
|  | 163 | conf->smallest = disk; | 
|  | 164 | cnt++; | 
|  | 165 | } | 
|  | 166 | if (cnt != mddev->raid_disks) { | 
|  | 167 | printk("linear: not enough drives present. Aborting!\n"); | 
|  | 168 | goto out; | 
|  | 169 | } | 
|  | 170 |  | 
|  | 171 | /* | 
|  | 172 | * This code was restructured to work around a gcc-2.95.3 internal | 
|  | 173 | * compiler error.  Alter it with care. | 
|  | 174 | */ | 
|  | 175 | { | 
|  | 176 | sector_t sz; | 
|  | 177 | unsigned round; | 
|  | 178 | unsigned long base; | 
|  | 179 |  | 
|  | 180 | sz = mddev->array_size; | 
|  | 181 | base = conf->smallest->size; | 
|  | 182 | round = sector_div(sz, base); | 
|  | 183 | nb_zone = conf->nr_zones = sz + (round ? 1 : 0); | 
|  | 184 | } | 
|  | 185 |  | 
|  | 186 | conf->hash_table = kmalloc (sizeof (dev_info_t*) * nb_zone, | 
|  | 187 | GFP_KERNEL); | 
|  | 188 | if (!conf->hash_table) | 
|  | 189 | goto out; | 
|  | 190 |  | 
|  | 191 | /* | 
|  | 192 | * Here we generate the linear hash table | 
|  | 193 | */ | 
|  | 194 | table = conf->hash_table; | 
|  | 195 | start = 0; | 
|  | 196 | curr_offset = 0; | 
|  | 197 | for (i = 0; i < cnt; i++) { | 
|  | 198 | dev_info_t *disk = conf->disks + i; | 
|  | 199 |  | 
|  | 200 | disk->offset = curr_offset; | 
|  | 201 | curr_offset += disk->size; | 
|  | 202 |  | 
|  | 203 | /* 'curr_offset' is the end of this disk | 
|  | 204 | * 'start' is the start of table | 
|  | 205 | */ | 
|  | 206 | while (start < curr_offset) { | 
|  | 207 | *table++ = disk; | 
|  | 208 | start += conf->smallest->size; | 
|  | 209 | } | 
|  | 210 | } | 
|  | 211 | if (table-conf->hash_table != nb_zone) | 
|  | 212 | BUG(); | 
|  | 213 |  | 
|  | 214 | blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); | 
|  | 215 | mddev->queue->unplug_fn = linear_unplug; | 
|  | 216 | mddev->queue->issue_flush_fn = linear_issue_flush; | 
|  | 217 | return 0; | 
|  | 218 |  | 
|  | 219 | out: | 
| Jesper Juhl | 990a8ba | 2005-06-21 17:17:30 -0700 | [diff] [blame] | 220 | kfree(conf); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 | return 1; | 
|  | 222 | } | 
|  | 223 |  | 
|  | 224 | static int linear_stop (mddev_t *mddev) | 
|  | 225 | { | 
|  | 226 | linear_conf_t *conf = mddev_to_conf(mddev); | 
|  | 227 |  | 
|  | 228 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 
|  | 229 | kfree(conf->hash_table); | 
|  | 230 | kfree(conf); | 
|  | 231 |  | 
|  | 232 | return 0; | 
|  | 233 | } | 
|  | 234 |  | 
|  | 235 | static int linear_make_request (request_queue_t *q, struct bio *bio) | 
|  | 236 | { | 
|  | 237 | mddev_t *mddev = q->queuedata; | 
|  | 238 | dev_info_t *tmp_dev; | 
|  | 239 | sector_t block; | 
|  | 240 |  | 
|  | 241 | if (bio_data_dir(bio)==WRITE) { | 
|  | 242 | disk_stat_inc(mddev->gendisk, writes); | 
|  | 243 | disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bio)); | 
|  | 244 | } else { | 
|  | 245 | disk_stat_inc(mddev->gendisk, reads); | 
|  | 246 | disk_stat_add(mddev->gendisk, read_sectors, bio_sectors(bio)); | 
|  | 247 | } | 
|  | 248 |  | 
|  | 249 | tmp_dev = which_dev(mddev, bio->bi_sector); | 
|  | 250 | block = bio->bi_sector >> 1; | 
|  | 251 |  | 
|  | 252 | if (unlikely(block >= (tmp_dev->size + tmp_dev->offset) | 
|  | 253 | || block < tmp_dev->offset)) { | 
|  | 254 | char b[BDEVNAME_SIZE]; | 
|  | 255 |  | 
|  | 256 | printk("linear_make_request: Block %llu out of bounds on " | 
|  | 257 | "dev %s size %llu offset %llu\n", | 
|  | 258 | (unsigned long long)block, | 
|  | 259 | bdevname(tmp_dev->rdev->bdev, b), | 
|  | 260 | (unsigned long long)tmp_dev->size, | 
|  | 261 | (unsigned long long)tmp_dev->offset); | 
|  | 262 | bio_io_error(bio, bio->bi_size); | 
|  | 263 | return 0; | 
|  | 264 | } | 
|  | 265 | if (unlikely(bio->bi_sector + (bio->bi_size >> 9) > | 
|  | 266 | (tmp_dev->offset + tmp_dev->size)<<1)) { | 
|  | 267 | /* This bio crosses a device boundary, so we have to | 
|  | 268 | * split it. | 
|  | 269 | */ | 
|  | 270 | struct bio_pair *bp; | 
| NeilBrown | 29ac8e0 | 2005-05-16 21:53:15 -0700 | [diff] [blame] | 271 | bp = bio_split(bio, bio_split_pool, | 
|  | 272 | ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 273 | if (linear_make_request(q, &bp->bio1)) | 
|  | 274 | generic_make_request(&bp->bio1); | 
|  | 275 | if (linear_make_request(q, &bp->bio2)) | 
|  | 276 | generic_make_request(&bp->bio2); | 
|  | 277 | bio_pair_release(bp); | 
|  | 278 | return 0; | 
|  | 279 | } | 
|  | 280 |  | 
|  | 281 | bio->bi_bdev = tmp_dev->rdev->bdev; | 
|  | 282 | bio->bi_sector = bio->bi_sector - (tmp_dev->offset << 1) + tmp_dev->rdev->data_offset; | 
|  | 283 |  | 
|  | 284 | return 1; | 
|  | 285 | } | 
|  | 286 |  | 
|  | 287 | static void linear_status (struct seq_file *seq, mddev_t *mddev) | 
|  | 288 | { | 
|  | 289 |  | 
|  | 290 | #undef MD_DEBUG | 
|  | 291 | #ifdef MD_DEBUG | 
|  | 292 | int j; | 
|  | 293 | linear_conf_t *conf = mddev_to_conf(mddev); | 
|  | 294 | sector_t s = 0; | 
|  | 295 |  | 
|  | 296 | seq_printf(seq, "      "); | 
|  | 297 | for (j = 0; j < conf->nr_zones; j++) | 
|  | 298 | { | 
|  | 299 | char b[BDEVNAME_SIZE]; | 
|  | 300 | s += conf->smallest_size; | 
|  | 301 | seq_printf(seq, "[%s", | 
|  | 302 | bdevname(conf->hash_table[j][0].rdev->bdev,b)); | 
|  | 303 |  | 
|  | 304 | while (s > conf->hash_table[j][0].offset + | 
|  | 305 | conf->hash_table[j][0].size) | 
|  | 306 | seq_printf(seq, "/%s] ", | 
|  | 307 | bdevname(conf->hash_table[j][1].rdev->bdev,b)); | 
|  | 308 | else | 
|  | 309 | seq_printf(seq, "] "); | 
|  | 310 | } | 
|  | 311 | seq_printf(seq, "\n"); | 
|  | 312 | #endif | 
|  | 313 | seq_printf(seq, " %dk rounding", mddev->chunk_size/1024); | 
|  | 314 | } | 
|  | 315 |  | 
|  | 316 |  | 
|  | 317 | static mdk_personality_t linear_personality= | 
|  | 318 | { | 
|  | 319 | .name		= "linear", | 
|  | 320 | .owner		= THIS_MODULE, | 
|  | 321 | .make_request	= linear_make_request, | 
|  | 322 | .run		= linear_run, | 
|  | 323 | .stop		= linear_stop, | 
|  | 324 | .status		= linear_status, | 
|  | 325 | }; | 
|  | 326 |  | 
|  | 327 | static int __init linear_init (void) | 
|  | 328 | { | 
|  | 329 | return register_md_personality (LINEAR, &linear_personality); | 
|  | 330 | } | 
|  | 331 |  | 
|  | 332 | static void linear_exit (void) | 
|  | 333 | { | 
|  | 334 | unregister_md_personality (LINEAR); | 
|  | 335 | } | 
|  | 336 |  | 
|  | 337 |  | 
|  | 338 | module_init(linear_init); | 
|  | 339 | module_exit(linear_exit); | 
|  | 340 | MODULE_LICENSE("GPL"); | 
|  | 341 | MODULE_ALIAS("md-personality-1"); /* LINEAR */ |