auto import from //depot/cupcake/@135843
diff --git a/libc/arch-x86/string/swab.S b/libc/arch-x86/string/swab.S
new file mode 100644
index 0000000..3055860
--- /dev/null
+++ b/libc/arch-x86/string/swab.S
@@ -0,0 +1,67 @@
+/*	$OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <machine/asm.h>
+
+/*
+ * On the i486, this code is negligibly faster than the code generated
+ * by gcc at about half the size.  If my i386 databook is correct, it
+ * should be considerably faster than the gcc code on a i386.
+ */
+
+ENTRY(swab)
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi
+	movl	16(%esp),%edi
+	movl	20(%esp),%ecx
+
+	cld				# set direction forward
+
+	shrl	$1,%ecx
+	testl	$7,%ecx			# copy first group of 1 to 7 words
+	jz	L2			# while swaping alternate bytes.
+	.align	2,0x90
+L1:	lodsw
+	rorw	$8,%ax
+	stosw
+	decl	%ecx
+	testl	$7,%ecx
+	jnz	L1
+
+L2:	shrl	$3,%ecx			# copy remainder 8 words at a time
+	jz	L4			# while swapping alternate bytes.
+	.align	2,0x90
+L3:	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	decl	%ecx
+	jnz	L3
+
+L4:	popl	%edi
+	popl	%esi
+	ret