Merge with kernel.org:/pub/scm/linux/kernel/git/gregkh/aoe-2.6.git/

for 11 aoe bugfix patches.
diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh
index 6ce7070..ec5a6de 100644
--- a/Documentation/aoe/mkdevs.sh
+++ b/Documentation/aoe/mkdevs.sh
@@ -5,6 +5,7 @@
 
 if test "$#" != "1"; then
 	echo "Usage: sh `basename $0` {dir}" 1>&2
+	echo "       n_partitions=16 sh `basename $0` {dir}" 1>&2
 	exit 1
 fi
 dir=$1
diff --git a/Documentation/aoe/mkshelf.sh b/Documentation/aoe/mkshelf.sh
index 4093283..8bacf9f 100644
--- a/Documentation/aoe/mkshelf.sh
+++ b/Documentation/aoe/mkshelf.sh
@@ -2,6 +2,7 @@
 
 if test "$#" != "2"; then
 	echo "Usage: sh `basename $0` {dir} {shelfaddress}" 1>&2
+	echo "       n_partitions=16 sh `basename $0` {dir} {shelfaddress}" 1>&2
 	exit 1
 fi
 n_partitions=${n_partitions:-16}
diff --git a/Documentation/aoe/todo.txt b/Documentation/aoe/todo.txt
new file mode 100644
index 0000000..7fee1e1
--- /dev/null
+++ b/Documentation/aoe/todo.txt
@@ -0,0 +1,14 @@
+There is a potential for deadlock when allocating a struct sk_buff for
+data that needs to be written out to aoe storage.  If the data is
+being written from a dirty page in order to free that page, and if
+there are no other pages available, then deadlock may occur when a
+free page is needed for the sk_buff allocation.  This situation has
+not been observed, but it would be nice to eliminate any potential for
+deadlock under memory pressure.
+
+Because ATA over Ethernet is not fragmented by the kernel's IP code,
+the destructore member of the struct sk_buff is available to the aoe
+driver.  By using a mempool for allocating all but the first few
+sk_buffs, and by registering a destructor, we should be able to
+efficiently allocate sk_buffs without introducing any potential for
+deadlock.
diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh
index 861a27f..6449911 100644
--- a/Documentation/aoe/udev-install.sh
+++ b/Documentation/aoe/udev-install.sh
@@ -23,4 +23,8 @@
 # /etc/udev/rules.d
 #
 rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
-test "$rules_d" && sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules"
+if test -z "$rules_d" || test ! -d "$rules_d"; then
+	echo "$me Error: cannot find udev rules directory" 1>&2
+	exit 1
+fi
+sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules"
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index db78f82..aa8b547 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,10 +1,15 @@
 /* Copyright (c) 2004 Coraid, Inc.  See COPYING for GPL terms. */
-#define VERSION "5"
+#define VERSION "6"
 #define AOE_MAJOR 152
 #define DEVICE_NAME "aoe"
+
+/* set AOE_PARTITIONS to 1 to use whole-disks only
+ * default is 16, which is 15 partitions plus the whole disk
+ */
 #ifndef AOE_PARTITIONS
 #define AOE_PARTITIONS 16
 #endif
+
 #define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * 10 + (aoeminor))
 #define AOEMAJOR(sysminor) ((sysminor) / 10)
 #define AOEMINOR(sysminor) ((sysminor) % 10)
@@ -34,13 +39,13 @@
 struct aoe_hdr {
 	unsigned char dst[6];
 	unsigned char src[6];
-	unsigned char type[2];
+	__be16 type;
 	unsigned char verfl;
 	unsigned char err;
-	unsigned char major[2];
+	__be16 major;
 	unsigned char minor;
 	unsigned char cmd;
-	unsigned char tag[4];
+	__be32 tag;
 };
 
 struct aoe_atahdr {
@@ -58,8 +63,8 @@
 };
 
 struct aoe_cfghdr {
-	unsigned char bufcnt[2];
-	unsigned char fwver[2];
+	__be16 bufcnt;
+	__be16 fwver;
 	unsigned char res;
 	unsigned char aoeccmd;
 	unsigned char cslen[2];
@@ -85,6 +90,7 @@
 
 struct buf {
 	struct list_head bufs;
+	ulong start_time;	/* for disk stats */
 	ulong flags;
 	ulong nframesout;
 	char *bufaddr;
@@ -125,7 +131,8 @@
 	struct timer_list timer;
 	spinlock_t lock;
 	struct net_device *ifp;	/* interface ed is attached to */
-	struct sk_buff *skblist;/* packets needing to be sent */
+	struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
+	struct sk_buff *sendq_tl;
 	mempool_t *bufpool;	/* for deadlock-free Buf allocation */
 	struct list_head bufq;	/* queue of bios to work on */
 	struct buf *inprocess;	/* the one we're currently working on */
@@ -151,7 +158,7 @@
 
 int aoedev_init(void);
 void aoedev_exit(void);
-struct aoedev *aoedev_bymac(unsigned char *);
+struct aoedev *aoedev_by_aoeaddr(int maj, int min);
 void aoedev_downdev(struct aoedev *d);
 struct aoedev *aoedev_set(ulong, unsigned char *, struct net_device *, ulong);
 int aoedev_busy(void);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 63561b2..4780f79 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -125,6 +125,7 @@
 	}
 	memset(buf, 0, sizeof(*buf));
 	INIT_LIST_HEAD(&buf->bufs);
+	buf->start_time = jiffies;
 	buf->bio = bio;
 	buf->resid = bio->bi_size;
 	buf->sector = bio->bi_sector;
@@ -146,8 +147,8 @@
 	list_add_tail(&buf->bufs, &d->bufq);
 	aoecmd_work(d);
 
-	sl = d->skblist;
-	d->skblist = NULL;
+	sl = d->sendq_hd;
+	d->sendq_hd = d->sendq_tl = NULL;
 
 	spin_unlock_irqrestore(&d->lock, flags);
 
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index fb6d942..b5be4b7 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -90,19 +90,16 @@
 static int
 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
 {
-	u16 type = __constant_cpu_to_be16(ETH_P_AOE);
-	u16 aoemajor = __cpu_to_be16(d->aoemajor);
 	u32 host_tag = newtag(d);
-	u32 tag = __cpu_to_be32(host_tag);
 
 	memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
 	memcpy(h->dst, d->addr, sizeof h->dst);
-	memcpy(h->type, &type, sizeof type);
+	h->type = __constant_cpu_to_be16(ETH_P_AOE);
 	h->verfl = AOE_HVER;
-	memcpy(h->major, &aoemajor, sizeof aoemajor);
+	h->major = cpu_to_be16(d->aoemajor);
 	h->minor = d->aoeminor;
 	h->cmd = AOECMD_ATA;
-	memcpy(h->tag, &tag, sizeof tag);
+	h->tag = cpu_to_be32(host_tag);
 
 	return host_tag;
 }
@@ -181,8 +178,12 @@
 
 	skb = skb_prepare(d, f);
 	if (skb) {
-		skb->next = d->skblist;
-		d->skblist = skb;
+		skb->next = NULL;
+		if (d->sendq_hd)
+			d->sendq_tl->next = skb;
+		else
+			d->sendq_hd = skb;
+		d->sendq_tl = skb;
 	}
 }
 
@@ -215,7 +216,6 @@
 	struct aoe_hdr *h;
 	char buf[128];
 	u32 n;
-	u32 net_tag;
 
 	n = newtag(d);
 
@@ -227,13 +227,16 @@
 
 	h = (struct aoe_hdr *) f->data;
 	f->tag = n;
-	net_tag = __cpu_to_be32(n);
-	memcpy(h->tag, &net_tag, sizeof net_tag);
+	h->tag = cpu_to_be32(n);
 
 	skb = skb_prepare(d, f);
 	if (skb) {
-		skb->next = d->skblist;
-		d->skblist = skb;
+		skb->next = NULL;
+		if (d->sendq_hd)
+			d->sendq_tl->next = skb;
+		else
+			d->sendq_hd = skb;
+		d->sendq_tl = skb;
 	}
 }
 
@@ -285,8 +288,8 @@
 		}
 	}
 
-	sl = d->skblist;
-	d->skblist = NULL;
+	sl = d->sendq_hd;
+	d->sendq_hd = d->sendq_tl = NULL;
 	if (sl) {
 		n = d->rttavg <<= 1;
 		if (n > MAXTIMER)
@@ -308,16 +311,16 @@
 	u16 n;
 
 	/* word 83: command set supported */
-	n = __le16_to_cpu(*((u16 *) &id[83<<1]));
+	n = le16_to_cpup((__le16 *) &id[83<<1]);
 
 	/* word 86: command set/feature enabled */
-	n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
+	n |= le16_to_cpup((__le16 *) &id[86<<1]);
 
 	if (n & (1<<10)) {	/* bit 10: LBA 48 */
 		d->flags |= DEVFL_EXT;
 
 		/* word 100: number lba48 sectors */
-		ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
+		ssize = le64_to_cpup((__le64 *) &id[100<<1]);
 
 		/* set as in ide-disk.c:init_idedisk_capacity */
 		d->geo.cylinders = ssize;
@@ -328,12 +331,12 @@
 		d->flags &= ~DEVFL_EXT;
 
 		/* number lba28 sectors */
-		ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
+		ssize = le32_to_cpup((__le32 *) &id[60<<1]);
 
 		/* NOTE: obsolete in ATA 6 */
-		d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
-		d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
-		d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
+		d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]);
+		d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]);
+		d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]);
 	}
 	d->ssize = ssize;
 	d->geo.start = 0;
@@ -380,29 +383,30 @@
 	register long n;
 	ulong flags;
 	char ebuf[128];
-	
+	u16 aoemajor;
+
 	hin = (struct aoe_hdr *) skb->mac.raw;
-	d = aoedev_bymac(hin->src);
+	aoemajor = be16_to_cpu(hin->major);
+	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
 	if (d == NULL) {
 		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
 			"for unknown device %d.%d\n",
-			 __be16_to_cpu(*((u16 *) hin->major)),
-			hin->minor);
+			 aoemajor, hin->minor);
 		aoechr_error(ebuf);
 		return;
 	}
 
 	spin_lock_irqsave(&d->lock, flags);
 
-	f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
+	f = getframe(d, be32_to_cpu(hin->tag));
 	if (f == NULL) {
 		spin_unlock_irqrestore(&d->lock, flags);
 		snprintf(ebuf, sizeof ebuf,
 			"%15s e%d.%d    tag=%08x@%08lx\n",
 			"unexpected rsp",
-			__be16_to_cpu(*((u16 *) hin->major)),
+			be16_to_cpu(hin->major),
 			hin->minor,
-			__be32_to_cpu(*((u32 *) hin->tag)),
+			be32_to_cpu(hin->tag),
 			jiffies);
 		aoechr_error(ebuf);
 		return;
@@ -452,7 +456,7 @@
 			printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
 			       "outbound ata command %2.2Xh for %d.%d\n", 
 			       ahout->cmdstat,
-			       __be16_to_cpu(*((u16 *) hin->major)),
+			       be16_to_cpu(hin->major),
 			       hin->minor);
 		}
 	}
@@ -460,6 +464,20 @@
 	if (buf) {
 		buf->nframesout -= 1;
 		if (buf->nframesout == 0 && buf->resid == 0) {
+			unsigned long duration = jiffies - buf->start_time;
+			unsigned long n_sect = buf->bio->bi_size >> 9;
+			struct gendisk *disk = d->gd;
+
+			if (bio_data_dir(buf->bio) == WRITE) {
+				disk_stat_inc(disk, writes);
+				disk_stat_add(disk, write_ticks, duration);
+				disk_stat_add(disk, write_sectors, n_sect);
+			} else {
+				disk_stat_inc(disk, reads);
+				disk_stat_add(disk, read_ticks, duration);
+				disk_stat_add(disk, read_sectors, n_sect);
+			}
+			disk_stat_add(disk, io_ticks, duration);
 			n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
 			bio_endio(buf->bio, buf->bio->bi_size, n);
 			mempool_free(buf, d->bufpool);
@@ -471,8 +489,8 @@
 
 	aoecmd_work(d);
 
-	sl = d->skblist;
-	d->skblist = NULL;
+	sl = d->sendq_hd;
+	d->sendq_hd = d->sendq_tl = NULL;
 
 	spin_unlock_irqrestore(&d->lock, flags);
 
@@ -486,8 +504,6 @@
 	struct aoe_cfghdr *ch;
 	struct sk_buff *skb, *sl;
 	struct net_device *ifp;
-	u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
-	u16 net_aoemajor = __cpu_to_be16(aoemajor);
 
 	sl = NULL;
 
@@ -507,9 +523,9 @@
 
 		memset(h->dst, 0xff, sizeof h->dst);
 		memcpy(h->src, ifp->dev_addr, sizeof h->src);
-		memcpy(h->type, &aoe_type, sizeof aoe_type);
+		h->type = __constant_cpu_to_be16(ETH_P_AOE);
 		h->verfl = AOE_HVER;
-		memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
+		h->major = cpu_to_be16(aoemajor);
 		h->minor = aoeminor;
 		h->cmd = AOECMD_CFG;
 
@@ -523,7 +539,7 @@
  
 /*
  * Since we only call this in one place (and it only prepares one frame)
- * we just return the skb.  Usually we'd chain it up to the d->skblist.
+ * we just return the skb.  Usually we'd chain it up to the aoedev sendq.
  */
 static struct sk_buff *
 aoecmd_ata_id(struct aoedev *d)
@@ -575,9 +591,10 @@
 	struct aoedev *d;
 	struct aoe_hdr *h;
 	struct aoe_cfghdr *ch;
-	ulong flags, bufcnt, sysminor, aoemajor;
+	ulong flags, sysminor, aoemajor;
+	u16 bufcnt;
 	struct sk_buff *sl;
-	enum { MAXFRAMES = 8, MAXSYSMINOR = 255 };
+	enum { MAXFRAMES = 8 };
 
 	h = (struct aoe_hdr *) skb->mac.raw;
 	ch = (struct aoe_cfghdr *) (h+1);
@@ -586,7 +603,7 @@
 	 * Enough people have their dip switches set backwards to
 	 * warrant a loud message for this special case.
 	 */
-	aoemajor = __be16_to_cpu(*((u16 *) h->major));
+	aoemajor = be16_to_cpu(h->major);
 	if (aoemajor == 0xfff) {
 		printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
 			"address is all ones.  Check shelf dip switches\n");
@@ -594,13 +611,14 @@
 	}
 
 	sysminor = SYSMINOR(aoemajor, h->minor);
-	if (sysminor > MAXSYSMINOR) {
-		printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too "
-			"large\n", sysminor);
+	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
+		printk(KERN_INFO
+			"aoe: e%ld.%d: minor number too large\n", 
+			aoemajor, (int) h->minor);
 		return;
 	}
 
-	bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
+	bufcnt = be16_to_cpu(ch->bufcnt);
 	if (bufcnt > MAXFRAMES)	/* keep it reasonable */
 		bufcnt = MAXFRAMES;
 
@@ -617,7 +635,7 @@
 		return;
 	}
 
-	d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
+	d->fw_ver = be16_to_cpu(ch->fwver);
 
 	/* we get here only if the device is new */
 	sl = aoecmd_ata_id(d);
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 240abae..ec16c64 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -13,7 +13,7 @@
 static spinlock_t devlist_lock;
 
 struct aoedev *
-aoedev_bymac(unsigned char *macaddr)
+aoedev_by_aoeaddr(int maj, int min)
 {
 	struct aoedev *d;
 	ulong flags;
@@ -21,7 +21,7 @@
 	spin_lock_irqsave(&devlist_lock, flags);
 
 	for (d=devlist; d; d=d->next)
-		if (!memcmp(d->addr, macaddr, 6))
+		if (d->aoemajor == maj && d->aoeminor == min)
 			break;
 
 	spin_unlock_irqrestore(&devlist_lock, flags);
@@ -125,7 +125,6 @@
 	d->ifp = ifp;
 
 	if (d->sysminor != sysminor
-	|| memcmp(d->addr, addr, sizeof d->addr)
 	|| (d->flags & DEVFL_UP) == 0) {
 		aoedev_downdev(d); /* flushes outstanding frames */
 		memcpy(d->addr, addr, sizeof d->addr);
@@ -147,7 +146,8 @@
 		put_disk(d->gd);
 	}
 	kfree(d->frames);
-	mempool_destroy(d->bufpool);
+	if (d->bufpool)
+		mempool_destroy(d->bufpool);
 	kfree(d);
 }
 
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index cc1945b..bc92aac 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -69,7 +69,7 @@
 u64
 mac_addr(char addr[6])
 {
-	u64 n = 0;
+	__be64 n = 0;
 	char *p = (char *) &n;
 
 	memcpy(p + 2, addr, 6);	/* (sizeof addr != 6) */
@@ -108,7 +108,7 @@
 aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt)
 {
 	struct aoe_hdr *h;
-	ulong n;
+	u32 n;
 
 	skb = skb_check(skb);
 	if (!skb)
@@ -121,7 +121,7 @@
 	skb_push(skb, ETH_HLEN);	/* (1) */
 
 	h = (struct aoe_hdr *) skb->mac.raw;
-	n = __be32_to_cpu(*((u32 *) h->tag));
+	n = be32_to_cpu(h->tag);
 	if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31))
 		goto exit;
 
@@ -132,7 +132,7 @@
 		if (net_ratelimit())
 			printk(KERN_ERR "aoe: aoenet_rcv: error packet from %d.%d; "
 			       "ecode=%d '%s'\n",
-			       __be16_to_cpu(*((u16 *) h->major)), h->minor, 
+			       be16_to_cpu(h->major), h->minor, 
 			       h->err, aoe_errlist[n]);
 		goto exit;
 	}