dm kcopyd: add WRITE SAME support to dm_kcopyd_zero
Add WRITE SAME support to dm-io and make it accessible to
dm_kcopyd_zero(). dm_kcopyd_zero() provides an asynchronous interface
whereas the blkdev_issue_write_same() interface is synchronous.
WRITE SAME is a SCSI command that can be leveraged for more efficient
zeroing of a specified logical extent of a device which supports it.
Only a single zeroed logical block is transfered to the target for each
WRITE SAME and the target then writes that same block across the
specified extent.
The dm thin target uses this.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 1c46f97..ea49834 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -287,7 +287,8 @@
unsigned num_bvecs;
sector_t remaining = where->count;
struct request_queue *q = bdev_get_queue(where->bdev);
- sector_t discard_sectors;
+ unsigned short logical_block_size = queue_logical_block_size(q);
+ sector_t num_sectors;
/*
* where->count may be zero if rw holds a flush and we need to
@@ -297,7 +298,7 @@
/*
* Allocate a suitably sized-bio.
*/
- if (rw & REQ_DISCARD)
+ if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
num_bvecs = 1;
else
num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev),
@@ -310,9 +311,21 @@
store_io_and_region_in_bio(bio, io, region);
if (rw & REQ_DISCARD) {
- discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
- bio->bi_size = discard_sectors << SECTOR_SHIFT;
- remaining -= discard_sectors;
+ num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
+ bio->bi_size = num_sectors << SECTOR_SHIFT;
+ remaining -= num_sectors;
+ } else if (rw & REQ_WRITE_SAME) {
+ /*
+ * WRITE SAME only uses a single page.
+ */
+ dp->get_page(dp, &page, &len, &offset);
+ bio_add_page(bio, page, logical_block_size, offset);
+ num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining);
+ bio->bi_size = num_sectors << SECTOR_SHIFT;
+
+ offset = 0;
+ remaining -= num_sectors;
+ dp->next_page(dp);
} else while (remaining) {
/*
* Try and add as many pages as possible.
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index bed444c..68c0267 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -349,7 +349,7 @@
struct dm_kcopyd_client *kc = job->kc;
if (error) {
- if (job->rw == WRITE)
+ if (job->rw & WRITE)
job->write_err |= error;
else
job->read_err = 1;
@@ -361,7 +361,7 @@
}
}
- if (job->rw == WRITE)
+ if (job->rw & WRITE)
push(&kc->complete_jobs, job);
else {
@@ -432,7 +432,7 @@
if (r < 0) {
/* error this rogue job */
- if (job->rw == WRITE)
+ if (job->rw & WRITE)
job->write_err = (unsigned long) -1L;
else
job->read_err = 1;
@@ -585,6 +585,7 @@
unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
{
struct kcopyd_job *job;
+ int i;
/*
* Allocate an array of jobs consisting of one master job
@@ -611,7 +612,16 @@
memset(&job->source, 0, sizeof job->source);
job->source.count = job->dests[0].count;
job->pages = &zero_page_list;
- job->rw = WRITE;
+
+ /*
+ * Use WRITE SAME to optimize zeroing if all dests support it.
+ */
+ job->rw = WRITE | REQ_WRITE_SAME;
+ for (i = 0; i < job->num_dests; i++)
+ if (!bdev_write_same(job->dests[i].bdev)) {
+ job->rw = WRITE;
+ break;
+ }
}
job->fn = fn;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fba378f..4b94074 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2779,7 +2779,7 @@
static struct target_type thin_target = {
.name = "thin",
- .version = {1, 5, 0},
+ .version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,