Merge branch 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6

* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (156 commits)
  [PATCH] x86-64: Export smp_call_function_single
  [PATCH] i386: Clean up smp_tune_scheduling()
  [PATCH] unwinder: move .eh_frame to RODATA
  [PATCH] unwinder: fully support linker generated .eh_frame_hdr section
  [PATCH] x86-64: don't use set_irq_regs()
  [PATCH] x86-64: check vector in setup_ioapic_dest to verify if need setup_IO_APIC_irq
  [PATCH] x86-64: Make ix86 default to HIGHMEM4G instead of NOHIGHMEM
  [PATCH] i386: replace kmalloc+memset with kzalloc
  [PATCH] x86-64: remove remaining pc98 code
  [PATCH] x86-64: remove unused variable
  [PATCH] x86-64: Fix constraints in atomic_add_return()
  [PATCH] x86-64: fix asm constraints in i386 atomic_add_return
  [PATCH] x86-64: Correct documentation for bzImage protocol v2.05
  [PATCH] x86-64: replace kmalloc+memset with kzalloc in MTRR code
  [PATCH] x86-64: Fix numaq build error
  [PATCH] x86-64: include/asm-x86_64/cpufeature.h isn't a userspace header
  [PATCH] unwinder: Add debugging output to the Dwarf2 unwinder
  [PATCH] x86-64: Clarify error message in GART code
  [PATCH] x86-64: Fix interrupt race in idle callback (3rd try)
  [PATCH] x86-64: Remove unwind stack pointer alignment forcing again
  ...

Fixed conflict in include/linux/uaccess.h manually

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt
index c51314b..9575de3 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/i386/boot.txt
@@ -2,7 +2,7 @@
 		     ----------------------------
 
 		    H. Peter Anvin <hpa@zytor.com>
-			Last update 2005-09-02
+			Last update 2006-11-17
 
 On the i386 platform, the Linux kernel uses a rather complicated boot
 convention.  This has evolved partially due to historical aspects, as
@@ -35,6 +35,8 @@
 		initrd address available to the bootloader.
 
 Protocol 2.04:	(Kernel 2.6.14) Extend the syssize field to four bytes.
+Protocol 2.05:	(Kernel 2.6.20) Make protected mode kernel relocatable.
+		Introduce relocatable_kernel and kernel_alignment fields.
 
 
 **** MEMORY LAYOUT
@@ -129,6 +131,8 @@
 0226/2	N/A	pad1		Unused
 0228/4	2.02+	cmd_line_ptr	32-bit pointer to the kernel command line
 022C/4	2.03+	initrd_addr_max	Highest legal initrd address
+0230/4	2.05+	kernel_alignment Physical addr alignment required for kernel
+0234/1	2.05+	relocatable_kernel Whether kernel is relocatable or not
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6b3c3e3..b79bcdf 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -599,8 +599,6 @@
 
 	hugepages=	[HW,IA-32,IA-64] Maximal number of HugeTLB pages.
 
-	noirqbalance	[IA-32,SMP,KNL] Disable kernel irq balancing
-
 	i8042.direct	[HW] Put keyboard port into non-translated mode
 	i8042.dumbkbd	[HW] Pretend that controller can only read data from
 			     keyboard and cannot control its state
@@ -1065,9 +1063,14 @@
 			in certain environments such as networked servers or
 			real-time systems.
 
+	noirqbalance	[IA-32,SMP,KNL] Disable kernel irq balancing
+
 	noirqdebug	[IA-32] Disables the code which attempts to detect and
 			disable unhandled interrupt sources.
 
+	no_timer_check	[IA-32,X86_64,APIC] Disables the code which tests for
+			broken timer IRQ sources.
+
 	noisapnp	[ISAPNP] Disables ISA PnP code.
 
 	noinitrd	[RAM] Tells the kernel not to load any configured
@@ -1752,6 +1755,9 @@
 	norandmaps	Don't use address space randomization
 			Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space
 
+ 	unwind_debug=N 	N > 0 will enable dwarf2 unwinder debugging
+			This is useful to get more information why
+			you got a "dwarf2 unwinder stuck"
 
 ______________________________________________________________________
 
diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt
index f39c9d7..a2afca3 100644
--- a/Documentation/stable_api_nonsense.txt
+++ b/Documentation/stable_api_nonsense.txt
@@ -62,9 +62,6 @@
       - different structures can contain different fields
       - Some functions may not be implemented at all, (i.e. some locks
 	compile away to nothing for non-SMP builds.)
-      - Parameter passing of variables from function to function can be
-	done in different ways (the CONFIG_REGPARM option controls
-	this.)
       - Memory within the kernel can be aligned in different ways,
 	depending on the build options.
   - Linux runs on a wide range of different processor architectures.
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 0bc7f1e..5922e84 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -27,6 +27,7 @@
 - hotplug
 - java-appletviewer           [ binfmt_java, obsolete ]
 - java-interpreter            [ binfmt_java, obsolete ]
+- kstack_depth_to_print       [ X86 only ]
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/kmod.txt
 - msgmax
@@ -170,6 +171,13 @@
 
 ==============================================================
 
+kstack_depth_to_print: (X86 only)
+
+Controls the number of words to print when dumping the raw
+kernel stack.
+
+==============================================================
+
 osrelease, ostype & version:
 
 # cat osrelease
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index f3c57f4..dbdcaf6 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -52,10 +52,6 @@
 		 apicmaintimer. Useful when your PIT timer is totally
 		 broken.
 
-   disable_8254_timer / enable_8254_timer
-		 Enable interrupt 0 timer routing over the 8254 in addition to over
-	         the IO-APIC. The kernel tries to set a sensible default.
-
 Early Console
 
    syntax: earlyprintk=vga
@@ -183,7 +179,7 @@
 IOMMU
 
  iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge]
-         [,forcesac][,fullflush][,nomerge][,noaperture]
+         [,forcesac][,fullflush][,nomerge][,noaperture][,calgary]
    size  set size of iommu (in bytes)
    noagp don't initialize the AGP driver and use full aperture.
    off   don't use the IOMMU
@@ -204,6 +200,7 @@
 	    buffering.
    nodac    Forbid DMA >4GB
    panic    Always panic when IOMMU overflows
+   calgary  Use the Calgary IOMMU if it is available
 
   swiotlb=pages[,force]
 
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 756325d..f05288b 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -70,6 +70,7 @@
 #endif
         .text :
 	{
+	_text = .;
 #if defined(CONFIG_ROMKERNEL)
 	*(.int_redirect)
 #endif
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 8ff1c6f..ea70359 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -182,6 +182,17 @@
 
 endchoice
 
+config PARAVIRT
+	bool "Paravirtualization support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  Paravirtualization is a way of running multiple instances of
+	  Linux on the same machine, under a hypervisor.  This option
+	  changes the kernel so it can modify itself when it is run
+	  under a hypervisor, improving performance significantly.
+	  However, when run without a hypervisor the kernel is
+	  theoretically slower.  If in doubt, say N.
+
 config ACPI_SRAT
 	bool
 	default y
@@ -443,7 +454,8 @@
 
 choice
 	prompt "High Memory Support"
-	default NOHIGHMEM
+	default HIGHMEM4G if !X86_NUMAQ
+	default HIGHMEM64G if X86_NUMAQ
 
 config NOHIGHMEM
 	bool "off"
@@ -710,20 +722,6 @@
 	depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI))
 	default y
 
-config REGPARM
-	bool "Use register arguments"
-	default y
-	help
-	Compile the kernel with -mregparm=3. This instructs gcc to use
-	a more efficient function call ABI which passes the first three
-	arguments of a function call via registers, which results in denser
-	and faster code.
-
-	If this option is disabled, then the default ABI of passing
-	arguments via the stack is used.
-
-	If unsure, say Y.
-
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
@@ -773,23 +771,39 @@
           PHYSICAL_START.
 	  For more details see Documentation/kdump/kdump.txt
 
-config PHYSICAL_START
-	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
-
-	default "0x1000000" if CRASH_DUMP
-	default "0x100000"
+config RELOCATABLE
+	bool "Build a relocatable kernel(EXPERIMENTAL)"
+	depends on EXPERIMENTAL
 	help
-	  This gives the physical address where the kernel is loaded. Normally
-	  for regular kernels this value is 0x100000 (1MB). But in the case
-	  of kexec on panic the fail safe kernel needs to run at a different
-	  address than the panic-ed kernel. This option is used to set the load
-	  address for kernels used to capture crash dump on being kexec'ed
-	  after panic. The default value for crash dump kernels is
-	  0x1000000 (16MB). This can also be set based on the "X" value as
-	  specified in the "crashkernel=YM@XM" command line boot parameter
-	  passed to the panic-ed kernel. Typically this parameter is set as
-	  crashkernel=64M@16M. Please take a look at
-	  Documentation/kdump/kdump.txt for more details about crash dumps.
+	  This build a kernel image that retains relocation information
+          so it can be loaded someplace besides the default 1MB.
+	  The relocations tend to the kernel binary about 10% larger,
+          but are discarded at runtime.
+
+	  One use is for the kexec on panic case where the recovery kernel
+          must live at a different physical address than the primary
+          kernel.
+
+config PHYSICAL_ALIGN
+	hex "Alignment value to which kernel should be aligned"
+	default "0x100000"
+	range 0x2000 0x400000
+	help
+	  This value puts the alignment restrictions on physical address
+ 	  where kernel is loaded and run from. Kernel is compiled for an
+ 	  address which meets above alignment restriction.
+
+ 	  If bootloader loads the kernel at a non-aligned address and
+ 	  CONFIG_RELOCATABLE is set, kernel will move itself to nearest
+ 	  address aligned to above value and run from there.
+
+ 	  If bootloader loads the kernel at a non-aligned address and
+ 	  CONFIG_RELOCATABLE is not set, kernel will ignore the run time
+ 	  load address and decompress itself to the address it has been
+ 	  compiled for and run from there. The address for which kernel is
+ 	  compiled already meets above alignment restrictions. Hence the
+ 	  end result is that kernel runs from a physical address meeting
+	  above alignment restrictions.
 
 	  Don't change this unless you know what you are doing.
 
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index fc4f2ab..821fd26 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -103,8 +103,15 @@
 	  Select this for Intel Pentium M (not Pentium-4 M)
 	  notebook chips.
 
+config MCORE2
+	bool "Core 2/newer Xeon"
+	help
+	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx)
+	  CPUs. You can distingush newer from older Xeons by the CPU family
+	  in /proc/cpuinfo. Newer ones have 6.
+
 config MPENTIUM4
-	bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
+	bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
 	help
 	  Select this for Intel Pentium 4 chips.  This includes the
 	  Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
@@ -229,7 +236,7 @@
 	default "7" if MPENTIUM4 || X86_GENERIC
 	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
 	default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-	default "6" if MK7 || MK8 || MPENTIUMM
+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2
 
 config RWSEM_GENERIC_SPINLOCK
 	bool
@@ -287,17 +294,17 @@
 
 config X86_GOOD_APIC
 	bool
-	depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON
+	depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2
 	default y
 
 config X86_INTEL_USERCOPY
 	bool
-	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON
+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
 	default y
 
 config X86_USE_PPRO_CHECKSUM
 	bool
-	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX
+	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2
 	default y
 
 config X86_USE_3DNOW
@@ -312,5 +319,5 @@
 
 config X86_TSC
 	bool
-	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ
+	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ
 	default y
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index b31c080..f68cc6f 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -85,4 +85,14 @@
           option saves about 4k and might cause you much additional grey
           hair.
 
+config DEBUG_PARAVIRT
+	bool "Enable some paravirtualization debugging"
+	default y
+	depends on PARAVIRT && DEBUG_KERNEL
+	help
+	  Currently deliberately clobbers regs which are allowed to be
+	  clobbered in inlined paravirt hooks, even in native mode.
+	  If turning this off solves a problem, then DISABLE_INTERRUPTS() or
+	  ENABLE_INTERRUPTS() is lying about what registers can be clobbered.
+
 endmenu
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index 0677908..f7ac1ae 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -26,10 +26,12 @@
 
 LDFLAGS		:= -m elf_i386
 OBJCOPYFLAGS	:= -O binary -R .note -R .comment -S
-LDFLAGS_vmlinux :=
+ifdef CONFIG_RELOCATABLE
+LDFLAGS_vmlinux := --emit-relocs
+endif
 CHECKFLAGS	+= -D__i386__
 
-CFLAGS += -pipe -msoft-float
+CFLAGS += -pipe -msoft-float -mregparm=3
 
 # prevent gcc from keeping the stack 16 byte aligned
 CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
@@ -37,8 +39,6 @@
 # CPU-specific tuning. Anything which can be shared with UML should go here.
 include $(srctree)/arch/i386/Makefile.cpu
 
-cflags-$(CONFIG_REGPARM) += -mregparm=3
-
 # temporary until string.h is fixed
 cflags-y += -ffreestanding
 
diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu
index a11befb..a32c031 100644
--- a/arch/i386/Makefile.cpu
+++ b/arch/i386/Makefile.cpu
@@ -32,6 +32,7 @@
 cflags-$(CONFIG_MWINCHIP3D)	+= $(call cc-option,-march=winchip2,-march=i586)
 cflags-$(CONFIG_MCYRIXIII)	+= $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
 cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-option,-march=c3-2,-march=i686)
+cflags-$(CONFIG_MCORE2)		+= -march=i686 $(call cc-option,-mtune=core2,$(call cc-option,-mtune=generic,-mtune=i686))
 
 # AMD Elan support
 cflags-$(CONFIG_X86_ELAN)	+= -march=i486
diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile
index 258ea95..a661217 100644
--- a/arch/i386/boot/compressed/Makefile
+++ b/arch/i386/boot/compressed/Makefile
@@ -4,22 +4,42 @@
 # create a compressed vmlinux image from the original vmlinux
 #
 
-targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o
+targets		:= vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o \
+			vmlinux.bin.all vmlinux.relocs
 EXTRA_AFLAGS	:= -traditional
 
-LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32
+LDFLAGS_vmlinux := -T
+CFLAGS_misc.o += -fPIC
+hostprogs-y	:= relocs
 
-$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
+$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
 
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
+quiet_cmd_relocs = RELOCS  $@
+      cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
+$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
+	$(call if_changed,relocs)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
+quiet_cmd_relocbin = BUILD   $@
+      cmd_relocbin = cat $(filter-out FORCE,$^) > $@
+$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
+	$(call if_changed,relocbin)
+
+ifdef CONFIG_RELOCATABLE
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
+	$(call if_changed,gzip)
+else
 $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
 	$(call if_changed,gzip)
+endif
 
 LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
 
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
+$(obj)/piggy.o: $(src)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
 	$(call if_changed,ld)
diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
index b5893e4..f395a4b 100644
--- a/arch/i386/boot/compressed/head.S
+++ b/arch/i386/boot/compressed/head.S
@@ -26,9 +26,11 @@
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/page.h>
+#include <asm/boot.h>
 
+.section ".text.head"
 	.globl startup_32
-	
+
 startup_32:
 	cld
 	cli
@@ -37,93 +39,142 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
+	movl %eax,%ss
 
-	lss stack_start,%esp
-	xorl %eax,%eax
-1:	incl %eax		# check that A20 really IS enabled
-	movl %eax,0x000000	# loop forever if it isn't
-	cmpl %eax,0x100000
-	je 1b
+/* Calculate the delta between where we were compiled to run
+ * at and where we were actually loaded at.  This can only be done
+ * with a short local call on x86.  Nothing  else will tell us what
+ * address we are running at.  The reserved chunk of the real-mode
+ * data at 0x34-0x3f are used as the stack for this calculation.
+ * Only 4 bytes are needed.
+ */
+	leal 0x40(%esi), %esp
+	call 1f
+1:	popl %ebp
+	subl $1b, %ebp
+
+/* %ebp contains the address we are loaded at by the boot loader and %ebx
+ * contains the address where we should move the kernel image temporarily
+ * for safe in-place decompression.
+ */
+
+#ifdef CONFIG_RELOCATABLE
+	movl 	%ebp, %ebx
+	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebx
+	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx
+#else
+	movl $LOAD_PHYSICAL_ADDR, %ebx
+#endif
+
+	/* Replace the compressed data size with the uncompressed size */
+	subl input_len(%ebp), %ebx
+	movl output_len(%ebp), %eax
+	addl %eax, %ebx
+	/* Add 8 bytes for every 32K input block */
+	shrl $12, %eax
+	addl %eax, %ebx
+	/* Add 32K + 18 bytes of extra slack */
+	addl $(32768 + 18), %ebx
+	/* Align on a 4K boundary */
+	addl $4095, %ebx
+	andl $~4095, %ebx
+
+/* Copy the compressed kernel to the end of our buffer
+ * where decompression in place becomes safe.
+ */
+	pushl %esi
+	leal _end(%ebp), %esi
+	leal _end(%ebx), %edi
+	movl $(_end - startup_32), %ecx
+	std
+	rep
+	movsb
+	cld
+	popl %esi
+
+/* Compute the kernel start address.
+ */
+#ifdef CONFIG_RELOCATABLE
+	addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebp
+	andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp
+#else
+	movl	$LOAD_PHYSICAL_ADDR, %ebp
+#endif
 
 /*
- * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
+ * Jump to the relocated address.
  */
-	pushl $0
-	popfl
+	leal relocated(%ebx), %eax
+	jmp *%eax
+.section ".text"
+relocated:
+
 /*
  * Clear BSS
  */
 	xorl %eax,%eax
-	movl $_edata,%edi
-	movl $_end,%ecx
+	leal _edata(%ebx),%edi
+	leal _end(%ebx), %ecx
 	subl %edi,%ecx
 	cld
 	rep
 	stosb
+
+/*
+ * Setup the stack for the decompressor
+ */
+	leal stack_end(%ebx), %esp
+
 /*
  * Do the decompression, and jump to the new kernel..
  */
-	subl $16,%esp	# place for structure on the stack
-	movl %esp,%eax
+	movl output_len(%ebx), %eax
+	pushl %eax
+	pushl %ebp	# output address
+	movl input_len(%ebx), %eax
+	pushl %eax	# input_len
+	leal input_data(%ebx), %eax
+	pushl %eax	# input_data
+	leal _end(%ebx), %eax
+	pushl %eax	# end of the image as third argument
 	pushl %esi	# real mode pointer as second arg
-	pushl %eax	# address of structure as first arg
 	call decompress_kernel
-	orl  %eax,%eax 
-	jnz  3f
-	popl %esi	# discard address
-	popl %esi	# real mode pointer
-	xorl %ebx,%ebx
-	ljmp $(__BOOT_CS), $__PHYSICAL_START
+	addl $20, %esp
+	popl %ecx
+
+#if CONFIG_RELOCATABLE
+/* Find the address of the relocations.
+ */
+	movl %ebp, %edi
+	addl %ecx, %edi
+
+/* Calculate the delta between where vmlinux was compiled to run
+ * and where it was actually loaded.
+ */
+	movl %ebp, %ebx
+	subl $LOAD_PHYSICAL_ADDR, %ebx
+	jz   2f		/* Nothing to be done if loaded at compiled addr. */
+/*
+ * Process relocations.
+ */
+
+1:	subl $4, %edi
+	movl 0(%edi), %ecx
+	testl %ecx, %ecx
+	jz 2f
+	addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
+	jmp 1b
+2:
+#endif
 
 /*
- * We come here, if we were loaded high.
- * We need to move the move-in-place routine down to 0x1000
- * and then start it with the buffer addresses in registers,
- * which we got from the stack.
+ * Jump to the decompressed kernel.
  */
-3:
-	movl $move_routine_start,%esi
-	movl $0x1000,%edi
-	movl $move_routine_end,%ecx
-	subl %esi,%ecx
-	addl $3,%ecx
-	shrl $2,%ecx
-	cld
-	rep
-	movsl
-
-	popl %esi	# discard the address
-	popl %ebx	# real mode pointer
-	popl %esi	# low_buffer_start
-	popl %ecx	# lcount
-	popl %edx	# high_buffer_start
-	popl %eax	# hcount
-	movl $__PHYSICAL_START,%edi
-	cli		# make sure we don't get interrupted
-	ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
-
-/*
- * Routine (template) for moving the decompressed kernel in place,
- * if we were high loaded. This _must_ PIC-code !
- */
-move_routine_start:
-	movl %ecx,%ebp
-	shrl $2,%ecx
-	rep
-	movsl
-	movl %ebp,%ecx
-	andl $3,%ecx
-	rep
-	movsb
-	movl %edx,%esi
-	movl %eax,%ecx	# NOTE: rep movsb won't move if %ecx == 0
-	addl $3,%ecx
-	shrl $2,%ecx
-	rep
-	movsl
-	movl %ebx,%esi	# Restore setup pointer
 	xorl %ebx,%ebx
-	ljmp $(__BOOT_CS), $__PHYSICAL_START
-move_routine_end:
+	jmp *%ebp
+
+.bss
+.balign 4
+stack:
+	.fill 4096, 1, 0
+stack_end:
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
index b2ccd54..1ce7017 100644
--- a/arch/i386/boot/compressed/misc.c
+++ b/arch/i386/boot/compressed/misc.c
@@ -9,11 +9,94 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
 
+#undef CONFIG_PARAVIRT
 #include <linux/linkage.h>
 #include <linux/vmalloc.h>
 #include <linux/screen_info.h>
 #include <asm/io.h>
 #include <asm/page.h>
+#include <asm/boot.h>
+
+/* WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically
+ * at run time, but no relocation processing is performed.
+ * This means that it is not safe to place pointers in static structures.
+ */
+
+/*
+ * Getting to provable safe in place decompression is hard.
+ * Worst case behaviours need to be analized.
+ * Background information:
+ *
+ * The file layout is:
+ *    magic[2]
+ *    method[1]
+ *    flags[1]
+ *    timestamp[4]
+ *    extraflags[1]
+ *    os[1]
+ *    compressed data blocks[N]
+ *    crc[4] orig_len[4]
+ *
+ * resulting in 18 bytes of non compressed data overhead.
+ *
+ * Files divided into blocks
+ * 1 bit (last block flag)
+ * 2 bits (block type)
+ *
+ * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
+ * The smallest block type encoding is always used.
+ *
+ * stored:
+ *    32 bits length in bytes.
+ *
+ * fixed:
+ *    magic fixed tree.
+ *    symbols.
+ *
+ * dynamic:
+ *    dynamic tree encoding.
+ *    symbols.
+ *
+ *
+ * The buffer for decompression in place is the length of the
+ * uncompressed data, plus a small amount extra to keep the algorithm safe.
+ * The compressed data is placed at the end of the buffer.  The output
+ * pointer is placed at the start of the buffer and the input pointer
+ * is placed where the compressed data starts.  Problems will occur
+ * when the output pointer overruns the input pointer.
+ *
+ * The output pointer can only overrun the input pointer if the input
+ * pointer is moving faster than the output pointer.  A condition only
+ * triggered by data whose compressed form is larger than the uncompressed
+ * form.
+ *
+ * The worst case at the block level is a growth of the compressed data
+ * of 5 bytes per 32767 bytes.
+ *
+ * The worst case internal to a compressed block is very hard to figure.
+ * The worst case can at least be boundined by having one bit that represents
+ * 32764 bytes and then all of the rest of the bytes representing the very
+ * very last byte.
+ *
+ * All of which is enough to compute an amount of extra data that is required
+ * to be safe.  To avoid problems at the block level allocating 5 extra bytes
+ * per 32767 bytes of data is sufficient.  To avoind problems internal to a block
+ * adding an extra 32767 bytes (the worst case uncompressed block size) is
+ * sufficient, to ensure that in the worst case the decompressed data for
+ * block will stop the byte before the compressed data for a block begins.
+ * To avoid problems with the compressed data's meta information an extra 18
+ * bytes are needed.  Leading to the formula:
+ *
+ * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
+ *
+ * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+ * Adding 32768 instead of 32767 just makes for round numbers.
+ * Adding the decompressor_size is necessary as it musht live after all
+ * of the data as well.  Last I measured the decompressor is about 14K.
+ * 10K of actuall data and 4K of bss.
+ *
+ */
 
 /*
  * gzip declarations
@@ -30,15 +113,20 @@
 typedef unsigned short ush;
 typedef unsigned long  ulg;
 
-#define WSIZE 0x8000		/* Window size must be at least 32k, */
-				/* and a power of two */
+#define WSIZE 0x80000000	/* Window size must be at least 32k,
+				 * and a power of two
+				 * We don't actually have a window just
+				 * a huge output buffer so I report
+				 * a 2G windows size, as that should
+				 * always be larger than our output buffer.
+				 */
 
-static uch *inbuf;	     /* input buffer */
-static uch window[WSIZE];    /* Sliding window buffer */
+static uch *inbuf;	/* input buffer */
+static uch *window;	/* Sliding window buffer, (and final output buffer) */
 
-static unsigned insize = 0;  /* valid bytes in inbuf */
-static unsigned inptr = 0;   /* index of next byte to be processed in inbuf */
-static unsigned outcnt = 0;  /* bytes in output buffer */
+static unsigned insize;  /* valid bytes in inbuf */
+static unsigned inptr;   /* index of next byte to be processed in inbuf */
+static unsigned outcnt;  /* bytes in output buffer */
 
 /* gzip flag byte */
 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
@@ -89,8 +177,6 @@
 extern int input_len;
 
 static long bytes_out = 0;
-static uch *output_data;
-static unsigned long output_ptr = 0;
 
 static void *malloc(int size);
 static void free(void *where);
@@ -100,24 +186,17 @@
 
 static void putstr(const char *);
 
-extern int end;
-static long free_mem_ptr = (long)&end;
-static long free_mem_end_ptr;
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
 
-#define INPLACE_MOVE_ROUTINE  0x1000
-#define LOW_BUFFER_START      0x2000
-#define LOW_BUFFER_MAX       0x90000
 #define HEAP_SIZE             0x3000
-static unsigned int low_buffer_end, low_buffer_size;
-static int high_loaded =0;
-static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
 
 static char *vidmem = (char *)0xb8000;
 static int vidport;
 static int lines, cols;
 
 #ifdef CONFIG_X86_NUMAQ
-static void * xquad_portio = NULL;
+void *xquad_portio;
 #endif
 
 #include "../../../../lib/inflate.c"
@@ -151,7 +230,7 @@
 
 static void gzip_release(void **ptr)
 {
-	free_mem_ptr = (long) *ptr;
+	free_mem_ptr = (unsigned long) *ptr;
 }
  
 static void scroll(void)
@@ -179,7 +258,7 @@
 				y--;
 			}
 		} else {
-			vidmem [ ( x + cols * y ) * 2 ] = c; 
+			vidmem [ ( x + cols * y ) * 2 ] = c;
 			if ( ++x >= cols ) {
 				x = 0;
 				if ( ++y >= lines ) {
@@ -224,58 +303,31 @@
  */
 static int fill_inbuf(void)
 {
-	if (insize != 0) {
-		error("ran out of input data");
-	}
-
-	inbuf = input_data;
-	insize = input_len;
-	inptr = 1;
-	return inbuf[0];
+	error("ran out of input data");
+	return 0;
 }
 
 /* ===========================================================================
  * Write the output window window[0..outcnt-1] and update crc and bytes_out.
  * (Used for the decompressed data only.)
  */
-static void flush_window_low(void)
-{
-    ulg c = crc;         /* temporary variable */
-    unsigned n;
-    uch *in, *out, ch;
-    
-    in = window;
-    out = &output_data[output_ptr]; 
-    for (n = 0; n < outcnt; n++) {
-	    ch = *out++ = *in++;
-	    c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-    }
-    crc = c;
-    bytes_out += (ulg)outcnt;
-    output_ptr += (ulg)outcnt;
-    outcnt = 0;
-}
-
-static void flush_window_high(void)
-{
-    ulg c = crc;         /* temporary variable */
-    unsigned n;
-    uch *in,  ch;
-    in = window;
-    for (n = 0; n < outcnt; n++) {
-	ch = *output_data++ = *in++;
-	if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
-	c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
-    }
-    crc = c;
-    bytes_out += (ulg)outcnt;
-    outcnt = 0;
-}
-
 static void flush_window(void)
 {
-	if (high_loaded) flush_window_high();
-	else flush_window_low();
+	/* With my window equal to my output buffer
+	 * I only need to compute the crc here.
+	 */
+	ulg c = crc;         /* temporary variable */
+	unsigned n;
+	uch *in, ch;
+
+	in = window;
+	for (n = 0; n < outcnt; n++) {
+		ch = *in++;
+		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+	}
+	crc = c;
+	bytes_out += (ulg)outcnt;
+	outcnt = 0;
 }
 
 static void error(char *x)
@@ -287,66 +339,8 @@
 	while(1);	/* Halt */
 }
 
-#define STACK_SIZE (4096)
-
-long user_stack [STACK_SIZE];
-
-struct {
-	long * a;
-	short b;
-	} stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS };
-
-static void setup_normal_output_buffer(void)
-{
-#ifdef STANDARD_MEMORY_BIOS_CALL
-	if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
-#else
-	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
-#endif
-	output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
-	free_mem_end_ptr = (long)real_mode;
-}
-
-struct moveparams {
-	uch *low_buffer_start;  int lcount;
-	uch *high_buffer_start; int hcount;
-};
-
-static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
-{
-	high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
-#ifdef STANDARD_MEMORY_BIOS_CALL
-	if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
-#else
-	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
-#endif	
-	mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
-	low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
-	  ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
-	low_buffer_size = low_buffer_end - LOW_BUFFER_START;
-	high_loaded = 1;
-	free_mem_end_ptr = (long)high_buffer_start;
-	if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) {
-		high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size);
-		mv->hcount = 0; /* say: we need not to move high_buffer */
-	}
-	else mv->hcount = -1;
-	mv->high_buffer_start = high_buffer_start;
-}
-
-static void close_output_buffer_if_we_run_high(struct moveparams *mv)
-{
-	if (bytes_out > low_buffer_size) {
-		mv->lcount = low_buffer_size;
-		if (mv->hcount)
-			mv->hcount = bytes_out - low_buffer_size;
-	} else {
-		mv->lcount = bytes_out;
-		mv->hcount = 0;
-	}
-}
-
-asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode)
+asmlinkage void decompress_kernel(void *rmode, unsigned long end,
+			uch *input_data, unsigned long input_len, uch *output)
 {
 	real_mode = rmode;
 
@@ -361,13 +355,25 @@
 	lines = RM_SCREEN_INFO.orig_video_lines;
 	cols = RM_SCREEN_INFO.orig_video_cols;
 
-	if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
-	else setup_output_buffer_if_we_run_high(mv);
+	window = output;  	/* Output buffer (Normally at 1M) */
+	free_mem_ptr     = end;	/* Heap  */
+	free_mem_end_ptr = end + HEAP_SIZE;
+	inbuf  = input_data;	/* Input buffer */
+	insize = input_len;
+	inptr  = 0;
+
+	if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1))
+		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
+	if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff))
+		error("Destination address too large");
+#ifndef CONFIG_RELOCATABLE
+	if ((u32)output != LOAD_PHYSICAL_ADDR)
+		error("Wrong destination address");
+#endif
 
 	makecrc();
 	putstr("Uncompressing Linux... ");
 	gunzip();
 	putstr("Ok, booting the kernel.\n");
-	if (high_loaded) close_output_buffer_if_we_run_high(mv);
-	return high_loaded;
+	return;
 }
diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c
new file mode 100644
index 0000000..468da89
--- /dev/null
+++ b/arch/i386/boot/compressed/relocs.c
@@ -0,0 +1,625 @@
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <elf.h>
+#include <byteswap.h>
+#define USE_BSD
+#include <endian.h>
+
+#define MAX_SHDRS 100
+static Elf32_Ehdr ehdr;
+static Elf32_Shdr shdr[MAX_SHDRS];
+static Elf32_Sym  *symtab[MAX_SHDRS];
+static Elf32_Rel  *reltab[MAX_SHDRS];
+static char *strtab[MAX_SHDRS];
+static unsigned long reloc_count, reloc_idx;
+static unsigned long *relocs;
+
+/*
+ * Following symbols have been audited. There values are constant and do
+ * not change if bzImage is loaded at a different physical address than
+ * the address for which it has been compiled. Don't warn user about
+ * absolute relocations present w.r.t these symbols.
+ */
+static const char* safe_abs_relocs[] = {
+		"__kernel_vsyscall",
+		"__kernel_rt_sigreturn",
+		"__kernel_sigreturn",
+		"SYSENTER_RETURN",
+};
+
+static int is_safe_abs_reloc(const char* sym_name)
+{
+	int i, array_size;
+
+	array_size = sizeof(safe_abs_relocs)/sizeof(char*);
+
+	for(i = 0; i < array_size; i++) {
+		if (!strcmp(sym_name, safe_abs_relocs[i]))
+			/* Match found */
+			return 1;
+	}
+	return 0;
+}
+
+static void die(char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(1);
+}
+
+static const char *sym_type(unsigned type)
+{
+	static const char *type_name[] = {
+#define SYM_TYPE(X) [X] = #X
+		SYM_TYPE(STT_NOTYPE),
+		SYM_TYPE(STT_OBJECT),
+		SYM_TYPE(STT_FUNC),
+		SYM_TYPE(STT_SECTION),
+		SYM_TYPE(STT_FILE),
+		SYM_TYPE(STT_COMMON),
+		SYM_TYPE(STT_TLS),
+#undef SYM_TYPE
+	};
+	const char *name = "unknown sym type name";
+	if (type < sizeof(type_name)/sizeof(type_name[0])) {
+		name = type_name[type];
+	}
+	return name;
+}
+
+static const char *sym_bind(unsigned bind)
+{
+	static const char *bind_name[] = {
+#define SYM_BIND(X) [X] = #X
+		SYM_BIND(STB_LOCAL),
+		SYM_BIND(STB_GLOBAL),
+		SYM_BIND(STB_WEAK),
+#undef SYM_BIND
+	};
+	const char *name = "unknown sym bind name";
+	if (bind < sizeof(bind_name)/sizeof(bind_name[0])) {
+		name = bind_name[bind];
+	}
+	return name;
+}
+
+static const char *sym_visibility(unsigned visibility)
+{
+	static const char *visibility_name[] = {
+#define SYM_VISIBILITY(X) [X] = #X
+		SYM_VISIBILITY(STV_DEFAULT),
+		SYM_VISIBILITY(STV_INTERNAL),
+		SYM_VISIBILITY(STV_HIDDEN),
+		SYM_VISIBILITY(STV_PROTECTED),
+#undef SYM_VISIBILITY
+	};
+	const char *name = "unknown sym visibility name";
+	if (visibility < sizeof(visibility_name)/sizeof(visibility_name[0])) {
+		name = visibility_name[visibility];
+	}
+	return name;
+}
+
+static const char *rel_type(unsigned type)
+{
+	static const char *type_name[] = {
+#define REL_TYPE(X) [X] = #X
+		REL_TYPE(R_386_NONE),
+		REL_TYPE(R_386_32),
+		REL_TYPE(R_386_PC32),
+		REL_TYPE(R_386_GOT32),
+		REL_TYPE(R_386_PLT32),
+		REL_TYPE(R_386_COPY),
+		REL_TYPE(R_386_GLOB_DAT),
+		REL_TYPE(R_386_JMP_SLOT),
+		REL_TYPE(R_386_RELATIVE),
+		REL_TYPE(R_386_GOTOFF),
+		REL_TYPE(R_386_GOTPC),
+#undef REL_TYPE
+	};
+	const char *name = "unknown type rel type name";
+	if (type < sizeof(type_name)/sizeof(type_name[0])) {
+		name = type_name[type];
+	}
+	return name;
+}
+
+static const char *sec_name(unsigned shndx)
+{
+	const char *sec_strtab;
+	const char *name;
+	sec_strtab = strtab[ehdr.e_shstrndx];
+	name = "<noname>";
+	if (shndx < ehdr.e_shnum) {
+		name = sec_strtab + shdr[shndx].sh_name;
+	}
+	else if (shndx == SHN_ABS) {
+		name = "ABSOLUTE";
+	}
+	else if (shndx == SHN_COMMON) {
+		name = "COMMON";
+	}
+	return name;
+}
+
+static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym)
+{
+	const char *name;
+	name = "<noname>";
+	if (sym->st_name) {
+		name = sym_strtab + sym->st_name;
+	}
+	else {
+		name = sec_name(shdr[sym->st_shndx].sh_name);
+	}
+	return name;
+}
+
+
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define le16_to_cpu(val) (val)
+#define le32_to_cpu(val) (val)
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+#define le16_to_cpu(val) bswap_16(val)
+#define le32_to_cpu(val) bswap_32(val)
+#endif
+
+static uint16_t elf16_to_cpu(uint16_t val)
+{
+	return le16_to_cpu(val);
+}
+
+static uint32_t elf32_to_cpu(uint32_t val)
+{
+	return le32_to_cpu(val);
+}
+
+static void read_ehdr(FILE *fp)
+{
+	if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) {
+		die("Cannot read ELF header: %s\n",
+			strerror(errno));
+	}
+	if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
+		die("No ELF magic\n");
+	}
+	if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) {
+		die("Not a 32 bit executable\n");
+	}
+	if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) {
+		die("Not a LSB ELF executable\n");
+	}
+	if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+		die("Unknown ELF version\n");
+	}
+	/* Convert the fields to native endian */
+	ehdr.e_type      = elf16_to_cpu(ehdr.e_type);
+	ehdr.e_machine   = elf16_to_cpu(ehdr.e_machine);
+	ehdr.e_version   = elf32_to_cpu(ehdr.e_version);
+	ehdr.e_entry     = elf32_to_cpu(ehdr.e_entry);
+	ehdr.e_phoff     = elf32_to_cpu(ehdr.e_phoff);
+	ehdr.e_shoff     = elf32_to_cpu(ehdr.e_shoff);
+	ehdr.e_flags     = elf32_to_cpu(ehdr.e_flags);
+	ehdr.e_ehsize    = elf16_to_cpu(ehdr.e_ehsize);
+	ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize);
+	ehdr.e_phnum     = elf16_to_cpu(ehdr.e_phnum);
+	ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize);
+	ehdr.e_shnum     = elf16_to_cpu(ehdr.e_shnum);
+	ehdr.e_shstrndx  = elf16_to_cpu(ehdr.e_shstrndx);
+
+	if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+		die("Unsupported ELF header type\n");
+	}
+	if (ehdr.e_machine != EM_386) {
+		die("Not for x86\n");
+	}
+	if (ehdr.e_version != EV_CURRENT) {
+		die("Unknown ELF version\n");
+	}
+	if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) {
+		die("Bad Elf header size\n");
+	}
+	if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) {
+		die("Bad program header entry\n");
+	}
+	if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) {
+		die("Bad section header entry\n");
+	}
+	if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+		die("String table index out of bounds\n");
+	}
+}
+
+static void read_shdrs(FILE *fp)
+{
+	int i;
+	if (ehdr.e_shnum > MAX_SHDRS) {
+		die("%d section headers supported: %d\n",
+			ehdr.e_shnum, MAX_SHDRS);
+	}
+	if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
+		die("Seek to %d failed: %s\n",
+			ehdr.e_shoff, strerror(errno));
+	}
+	if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) {
+		die("Cannot read ELF section headers: %s\n",
+			strerror(errno));
+	}
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		shdr[i].sh_name      = elf32_to_cpu(shdr[i].sh_name);
+		shdr[i].sh_type      = elf32_to_cpu(shdr[i].sh_type);
+		shdr[i].sh_flags     = elf32_to_cpu(shdr[i].sh_flags);
+		shdr[i].sh_addr      = elf32_to_cpu(shdr[i].sh_addr);
+		shdr[i].sh_offset    = elf32_to_cpu(shdr[i].sh_offset);
+		shdr[i].sh_size      = elf32_to_cpu(shdr[i].sh_size);
+		shdr[i].sh_link      = elf32_to_cpu(shdr[i].sh_link);
+		shdr[i].sh_info      = elf32_to_cpu(shdr[i].sh_info);
+		shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign);
+		shdr[i].sh_entsize   = elf32_to_cpu(shdr[i].sh_entsize);
+	}
+
+}
+
+static void read_strtabs(FILE *fp)
+{
+	int i;
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		if (shdr[i].sh_type != SHT_STRTAB) {
+			continue;
+		}
+		strtab[i] = malloc(shdr[i].sh_size);
+		if (!strtab[i]) {
+			die("malloc of %d bytes for strtab failed\n",
+				shdr[i].sh_size);
+		}
+		if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				shdr[i].sh_offset, strerror(errno));
+		}
+		if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+			die("Cannot read symbol table: %s\n",
+				strerror(errno));
+		}
+	}
+}
+
+static void read_symtabs(FILE *fp)
+{
+	int i,j;
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		if (shdr[i].sh_type != SHT_SYMTAB) {
+			continue;
+		}
+		symtab[i] = malloc(shdr[i].sh_size);
+		if (!symtab[i]) {
+			die("malloc of %d bytes for symtab failed\n",
+				shdr[i].sh_size);
+		}
+		if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				shdr[i].sh_offset, strerror(errno));
+		}
+		if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+			die("Cannot read symbol table: %s\n",
+				strerror(errno));
+		}
+		for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) {
+			symtab[i][j].st_name  = elf32_to_cpu(symtab[i][j].st_name);
+			symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value);
+			symtab[i][j].st_size  = elf32_to_cpu(symtab[i][j].st_size);
+			symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx);
+		}
+	}
+}
+
+
+static void read_relocs(FILE *fp)
+{
+	int i,j;
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		if (shdr[i].sh_type != SHT_REL) {
+			continue;
+		}
+		reltab[i] = malloc(shdr[i].sh_size);
+		if (!reltab[i]) {
+			die("malloc of %d bytes for relocs failed\n",
+				shdr[i].sh_size);
+		}
+		if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+			die("Seek to %d failed: %s\n",
+				shdr[i].sh_offset, strerror(errno));
+		}
+		if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+			die("Cannot read symbol table: %s\n",
+				strerror(errno));
+		}
+		for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+			reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset);
+			reltab[i][j].r_info   = elf32_to_cpu(reltab[i][j].r_info);
+		}
+	}
+}
+
+
+static void print_absolute_symbols(void)
+{
+	int i;
+	printf("Absolute symbols\n");
+	printf(" Num:    Value Size  Type       Bind        Visibility  Name\n");
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf32_Sym *sh_symtab;
+		int j;
+		if (shdr[i].sh_type != SHT_SYMTAB) {
+			continue;
+		}
+		sh_symtab = symtab[i];
+		sym_strtab = strtab[shdr[i].sh_link];
+		for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) {
+			Elf32_Sym *sym;
+			const char *name;
+			sym = &symtab[i][j];
+			name = sym_name(sym_strtab, sym);
+			if (sym->st_shndx != SHN_ABS) {
+				continue;
+			}
+			printf("%5d %08x %5d %10s %10s %12s %s\n",
+				j, sym->st_value, sym->st_size,
+				sym_type(ELF32_ST_TYPE(sym->st_info)),
+				sym_bind(ELF32_ST_BIND(sym->st_info)),
+				sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)),
+				name);
+		}
+	}
+	printf("\n");
+}
+
+static void print_absolute_relocs(void)
+{
+	int i, printed = 0;
+
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf32_Sym *sh_symtab;
+		unsigned sec_applies, sec_symtab;
+		int j;
+		if (shdr[i].sh_type != SHT_REL) {
+			continue;
+		}
+		sec_symtab  = shdr[i].sh_link;
+		sec_applies = shdr[i].sh_info;
+		if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
+			continue;
+		}
+		sh_symtab = symtab[sec_symtab];
+		sym_strtab = strtab[shdr[sec_symtab].sh_link];
+		for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+			Elf32_Rel *rel;
+			Elf32_Sym *sym;
+			const char *name;
+			rel = &reltab[i][j];
+			sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
+			name = sym_name(sym_strtab, sym);
+			if (sym->st_shndx != SHN_ABS) {
+				continue;
+			}
+
+			/* Absolute symbols are not relocated if bzImage is
+			 * loaded at a non-compiled address. Display a warning
+			 * to user at compile time about the absolute
+			 * relocations present.
+			 *
+			 * User need to audit the code to make sure
+			 * some symbols which should have been section
+			 * relative have not become absolute because of some
+			 * linker optimization or wrong programming usage.
+			 *
+			 * Before warning check if this absolute symbol
+			 * relocation is harmless.
+			 */
+			if (is_safe_abs_reloc(name))
+				continue;
+
+			if (!printed) {
+				printf("WARNING: Absolute relocations"
+					" present\n");
+				printf("Offset     Info     Type     Sym.Value "
+					"Sym.Name\n");
+				printed = 1;
+			}
+
+			printf("%08x %08x %10s %08x  %s\n",
+				rel->r_offset,
+				rel->r_info,
+				rel_type(ELF32_R_TYPE(rel->r_info)),
+				sym->st_value,
+				name);
+		}
+	}
+
+	if (printed)
+		printf("\n");
+}
+
+static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym))
+{
+	int i;
+	/* Walk through the relocations */
+	for(i = 0; i < ehdr.e_shnum; i++) {
+		char *sym_strtab;
+		Elf32_Sym *sh_symtab;
+		unsigned sec_applies, sec_symtab;
+		int j;
+		if (shdr[i].sh_type != SHT_REL) {
+			continue;
+		}
+		sec_symtab  = shdr[i].sh_link;
+		sec_applies = shdr[i].sh_info;
+		if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
+			continue;
+		}
+		sh_symtab = symtab[sec_symtab];
+		sym_strtab = strtab[shdr[sec_symtab].sh_link];
+		for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+			Elf32_Rel *rel;
+			Elf32_Sym *sym;
+			unsigned r_type;
+			rel = &reltab[i][j];
+			sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
+			r_type = ELF32_R_TYPE(rel->r_info);
+			/* Don't visit relocations to absolute symbols */
+			if (sym->st_shndx == SHN_ABS) {
+				continue;
+			}
+			if (r_type == R_386_PC32) {
+				/* PC relative relocations don't need to be adjusted */
+			}
+			else if (r_type == R_386_32) {
+				/* Visit relocations that need to be adjusted */
+				visit(rel, sym);
+			}
+			else {
+				die("Unsupported relocation type: %d\n", r_type);
+			}
+		}
+	}
+}
+
+static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
+{
+	reloc_count += 1;
+}
+
+static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym)
+{
+	/* Remember the address that needs to be adjusted. */
+	relocs[reloc_idx++] = rel->r_offset;
+}
+
+static int cmp_relocs(const void *va, const void *vb)
+{
+	const unsigned long *a, *b;
+	a = va; b = vb;
+	return (*a == *b)? 0 : (*a > *b)? 1 : -1;
+}
+
+static void emit_relocs(int as_text)
+{
+	int i;
+	/* Count how many relocations I have and allocate space for them. */
+	reloc_count = 0;
+	walk_relocs(count_reloc);
+	relocs = malloc(reloc_count * sizeof(relocs[0]));
+	if (!relocs) {
+		die("malloc of %d entries for relocs failed\n",
+			reloc_count);
+	}
+	/* Collect up the relocations */
+	reloc_idx = 0;
+	walk_relocs(collect_reloc);
+
+	/* Order the relocations for more efficient processing */
+	qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs);
+
+	/* Print the relocations */
+	if (as_text) {
+		/* Print the relocations in a form suitable that
+		 * gas will like.
+		 */
+		printf(".section \".data.reloc\",\"a\"\n");
+		printf(".balign 4\n");
+		for(i = 0; i < reloc_count; i++) {
+			printf("\t .long 0x%08lx\n", relocs[i]);
+		}
+		printf("\n");
+	}
+	else {
+		unsigned char buf[4];
+		buf[0] = buf[1] = buf[2] = buf[3] = 0;
+		/* Print a stop */
+		printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
+		/* Now print each relocation */
+		for(i = 0; i < reloc_count; i++) {
+			buf[0] = (relocs[i] >>  0) & 0xff;
+			buf[1] = (relocs[i] >>  8) & 0xff;
+			buf[2] = (relocs[i] >> 16) & 0xff;
+			buf[3] = (relocs[i] >> 24) & 0xff;
+			printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
+		}
+	}
+}
+
+static void usage(void)
+{
+	die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n");
+}
+
+int main(int argc, char **argv)
+{
+	int show_absolute_syms, show_absolute_relocs;
+	int as_text;
+	const char *fname;
+	FILE *fp;
+	int i;
+
+	show_absolute_syms = 0;
+	show_absolute_relocs = 0;
+	as_text = 0;
+	fname = NULL;
+	for(i = 1; i < argc; i++) {
+		char *arg = argv[i];
+		if (*arg == '-') {
+			if (strcmp(argv[1], "--abs-syms") == 0) {
+				show_absolute_syms = 1;
+				continue;
+			}
+
+			if (strcmp(argv[1], "--abs-relocs") == 0) {
+				show_absolute_relocs = 1;
+				continue;
+			}
+			else if (strcmp(argv[1], "--text") == 0) {
+				as_text = 1;
+				continue;
+			}
+		}
+		else if (!fname) {
+			fname = arg;
+			continue;
+		}
+		usage();
+	}
+	if (!fname) {
+		usage();
+	}
+	fp = fopen(fname, "r");
+	if (!fp) {
+		die("Cannot open %s: %s\n",
+			fname, strerror(errno));
+	}
+	read_ehdr(fp);
+	read_shdrs(fp);
+	read_strtabs(fp);
+	read_symtabs(fp);
+	read_relocs(fp);
+	if (show_absolute_syms) {
+		print_absolute_symbols();
+		return 0;
+	}
+	if (show_absolute_relocs) {
+		print_absolute_relocs();
+		return 0;
+	}
+	emit_relocs(as_text);
+	return 0;
+}
diff --git a/arch/i386/boot/compressed/vmlinux.lds b/arch/i386/boot/compressed/vmlinux.lds
new file mode 100644
index 0000000..cc4854f
--- /dev/null
+++ b/arch/i386/boot/compressed/vmlinux.lds
@@ -0,0 +1,43 @@
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+SECTIONS
+{
+        /* Be careful parts of head.S assume startup_32 is at
+         * address 0.
+	 */
+	. =  0 	;
+	.text.head : {
+		_head = . ;
+		*(.text.head)
+		_ehead = . ;
+	}
+	.data.compressed : {
+		*(.data.compressed)
+	}
+	.text :	{
+		_text = .; 	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		_end = . ;
+	}
+}
diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr
index 1ed9d79..707a88f 100644
--- a/arch/i386/boot/compressed/vmlinux.scr
+++ b/arch/i386/boot/compressed/vmlinux.scr
@@ -1,9 +1,10 @@
 SECTIONS
 {
-  .data : { 
+  .data.compressed : {
 	input_len = .;
 	LONG(input_data_end - input_data) input_data = .; 
 	*(.data) 
+	output_len = . - 4;
 	input_data_end = .; 
 	}
 }
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index 3aec4538..06edf1c 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -81,7 +81,7 @@
 # This is the setup header, and it must start at %cs:2 (old 0x9020:2)
 
 		.ascii	"HdrS"		# header signature
-		.word	0x0204		# header version number (>= 0x0105)
+		.word	0x0205		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
 start_sys_seg:	.word	SYSSEG
@@ -160,6 +160,17 @@
 					# The highest safe address for
 					# the contents of an initrd
 
+kernel_alignment:  .long CONFIG_PHYSICAL_ALIGN 	#physical addr alignment
+						#required for protected mode
+						#kernel
+#ifdef CONFIG_RELOCATABLE
+relocatable_kernel:    .byte 1
+#else
+relocatable_kernel:    .byte 0
+#endif
+pad2:			.byte 0
+pad3:			.word 0
+
 trampoline:	call	start_of_setup
 		.align 16
 					# The offset at this point is 0x240
@@ -588,11 +599,6 @@
 	call	default_switch
 
 rmodeswtch_end:
-# we get the code32 start address and modify the below 'jmpi'
-# (loader may have changed it)
-	movl	%cs:code32_start, %eax
-	movl	%eax, %cs:code32
-
 # Now we move the system to its rightful place ... but we check if we have a
 # big-kernel. In that case we *must* not move it ...
 	testb	$LOADED_HIGH, %cs:loadflags
@@ -788,11 +794,12 @@
 a20_done:
 
 #endif /* CONFIG_X86_VOYAGER */
-# set up gdt and idt
+# set up gdt and idt and 32bit start address
 	lidt	idt_48				# load idt with 0,0
 	xorl	%eax, %eax			# Compute gdt_base
 	movw	%ds, %ax			# (Convert %ds:gdt to a linear ptr)
 	shll	$4, %eax
+	addl	%eax, code32
 	addl	$gdt, %eax
 	movl	%eax, (gdt_48+2)
 	lgdt	gdt_48				# load gdt with whatever is
@@ -851,9 +858,26 @@
 #	Manual, Mixing 16-bit and 32-bit code, page 16-6)
 
 	.byte 0x66, 0xea			# prefix + jmpi-opcode
-code32:	.long	0x1000				# will be set to 0x100000
-						# for big kernels
+code32:	.long	startup_32			# will be set to %cs+startup_32
 	.word	__BOOT_CS
+.code32
+startup_32:
+	movl $(__BOOT_DS), %eax
+	movl %eax, %ds
+	movl %eax, %es
+	movl %eax, %fs
+	movl %eax, %gs
+	movl %eax, %ss
+
+	xorl %eax, %eax
+1:	incl %eax				# check that A20 really IS enabled
+	movl %eax, 0x00000000			# loop forever if it isn't
+	cmpl %eax, 0x00100000
+	je 1b
+
+	# Jump to the 32bit entry point
+	jmpl *(code32_start - start + (DELTA_INITSEG << 4))(%esi)
+.code16
 
 # Here's a bunch of information about your current kernel..
 kernel_version:	.ascii	UTS_RELEASE
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 97aacd6..65891f1 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.19-rc2-git4
-# Sat Oct 21 03:38:56 2006
+# Linux kernel version: 2.6.19-git7
+# Wed Dec  6 23:50:49 2006
 #
 CONFIG_X86_32=y
 CONFIG_GENERIC_TIME=y
@@ -40,13 +40,14 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 # CONFIG_CPUSETS is not set
+CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
-# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -110,6 +111,7 @@
 # CONFIG_X86_VISWS is not set
 CONFIG_X86_GENERICARCH=y
 # CONFIG_X86_ES7000 is not set
+# CONFIG_PARAVIRT is not set
 CONFIG_X86_CYCLONE_TIMER=y
 # CONFIG_M386 is not set
 # CONFIG_M486 is not set
@@ -120,6 +122,7 @@
 # CONFIG_MPENTIUMII is not set
 CONFIG_MPENTIUMIII=y
 # CONFIG_MPENTIUMM is not set
+# CONFIG_MCORE2 is not set
 # CONFIG_MPENTIUM4 is not set
 # CONFIG_MK6 is not set
 # CONFIG_MK7 is not set
@@ -197,7 +200,6 @@
 CONFIG_MTRR=y
 # CONFIG_EFI is not set
 # CONFIG_IRQBALANCE is not set
-CONFIG_REGPARM=y
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
 CONFIG_HZ_250=y
@@ -205,7 +207,8 @@
 CONFIG_HZ=250
 # CONFIG_KEXEC is not set
 # CONFIG_CRASH_DUMP is not set
-CONFIG_PHYSICAL_START=0x100000
+# CONFIG_RELOCATABLE is not set
+CONFIG_PHYSICAL_ALIGN=0x100000
 # CONFIG_HOTPLUG_CPU is not set
 CONFIG_COMPAT_VDSO=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
@@ -367,6 +370,7 @@
 # CONFIG_TCP_CONG_ADVANCED is not set
 CONFIG_TCP_CONG_CUBIC=y
 CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
 CONFIG_IPV6=y
 # CONFIG_IPV6_PRIVACY is not set
 # CONFIG_IPV6_ROUTER_PREF is not set
@@ -677,6 +681,7 @@
 # CONFIG_PATA_IT821X is not set
 # CONFIG_PATA_JMICRON is not set
 # CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
 # CONFIG_PATA_MPIIX is not set
 # CONFIG_PATA_OLDPIIX is not set
 # CONFIG_PATA_NETCELL is not set
@@ -850,6 +855,7 @@
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
 # CONFIG_MYRI10GE is not set
+# CONFIG_NETXEN_NIC is not set
 
 #
 # Token Ring devices
@@ -984,10 +990,6 @@
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
 # CONFIG_SONYPI is not set
-
-#
-# Ftape, the floppy tape device driver
-#
 CONFIG_AGP=y
 # CONFIG_AGP_ALI is not set
 # CONFIG_AGP_ATI is not set
@@ -1108,6 +1110,7 @@
 # CONFIG_USB_BANDWIDTH is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_MULTITHREAD_PROBE is not set
 # CONFIG_USB_OTG is not set
 
 #
@@ -1185,6 +1188,7 @@
 # CONFIG_USB_KAWETH is not set
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET_MII is not set
 # CONFIG_USB_USBNET is not set
 CONFIG_USB_MON=y
 
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 1a884b6..1e8988e 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -6,7 +6,7 @@
 
 obj-y	:= process.o signal.o entry.o traps.o irq.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
-		pci-dma.o i386_ksyms.o i387.o bootflag.o \
+		pci-dma.o i386_ksyms.o i387.o bootflag.o e820.o\
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
@@ -40,6 +40,9 @@
 obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
 obj-$(CONFIG_K8_NB)		+= k8.o
 
+# Make sure this is linked after any other paravirt_ops structs: see head.S
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o
+
 EXTRA_AFLAGS   := -traditional
 
 obj-$(CONFIG_SCx200)		+= scx200.o
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index c984169..4b60af7 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -10,6 +10,7 @@
 #include <asm/pci-direct.h>
 #include <asm/acpi.h>
 #include <asm/apic.h>
+#include <asm/irq.h>
 
 #ifdef CONFIG_ACPI
 
@@ -49,6 +50,24 @@
 	return 0;
 }
 
+static void check_intel(void)
+{
+	u16 vendor, device;
+
+	vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID);
+
+	if (vendor != PCI_VENDOR_ID_INTEL)
+		return;
+
+	device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
+#ifdef CONFIG_SMP
+	if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
+	    device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
+	    device == PCI_DEVICE_ID_INTEL_E7525_MCH)
+		quirk_intel_irqbalance();
+#endif
+}
+
 void __init check_acpi_pci(void)
 {
 	int num, slot, func;
@@ -60,6 +79,8 @@
 	if (!early_pci_allowed())
 		return;
 
+	check_intel();
+
 	/* Poor man's PCI discovery */
 	for (num = 0; num < 32; num++) {
 		for (slot = 0; slot < 32; slot++) {
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 535f979..9eca21b 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -124,6 +124,20 @@
 
 #endif /* CONFIG_X86_64 */
 
+static void nop_out(void *insns, unsigned int len)
+{
+	unsigned char **noptable = find_nop_table();
+
+	while (len > 0) {
+		unsigned int noplen = len;
+		if (noplen > ASM_NOP_MAX)
+			noplen = ASM_NOP_MAX;
+		memcpy(insns, noptable[noplen], noplen);
+		insns += noplen;
+		len -= noplen;
+	}
+}
+
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -138,10 +152,9 @@
 
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
-	unsigned char **noptable = find_nop_table();
 	struct alt_instr *a;
 	u8 *instr;
-	int diff, i, k;
+	int diff;
 
 	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
 	for (a = start; a < end; a++) {
@@ -159,13 +172,7 @@
 #endif
 		memcpy(instr, a->replacement, a->replacementlen);
 		diff = a->instrlen - a->replacementlen;
-		/* Pad the rest with nops */
-		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
-			k = diff;
-			if (k > ASM_NOP_MAX)
-				k = ASM_NOP_MAX;
-			memcpy(a->instr + i, noptable[k], k);
-		}
+		nop_out(instr + a->replacementlen, diff);
 	}
 }
 
@@ -209,7 +216,6 @@
 
 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 {
-	unsigned char **noptable = find_nop_table();
 	u8 **ptr;
 
 	for (ptr = start; ptr < end; ptr++) {
@@ -217,7 +223,7 @@
 			continue;
 		if (*ptr > text_end)
 			continue;
-		**ptr = noptable[1][0];
+		nop_out(*ptr, 1);
 	};
 }
 
@@ -343,6 +349,40 @@
 
 #endif
 
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+{
+	struct paravirt_patch *p;
+
+	for (p = start; p < end; p++) {
+		unsigned int used;
+
+		used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
+					  p->len);
+#ifdef CONFIG_DEBUG_PARAVIRT
+		{
+		int i;
+		/* Deliberately clobber regs using "not %reg" to find bugs. */
+		for (i = 0; i < 3; i++) {
+			if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
+				memcpy(p->instr + used, "\xf7\xd0", 2);
+				p->instr[used+1] |= i;
+				used += 2;
+			}
+		}
+		}
+#endif
+		/* Pad the rest with nops */
+		nop_out(p->instr + used, p->len - used);
+	}
+
+	/* Sync to be conservative, in case we patched following instructions */
+	sync_core();
+}
+extern struct paravirt_patch __start_parainstructions[],
+	__stop_parainstructions[];
+#endif	/* CONFIG_PARAVIRT */
+
 void __init alternative_instructions(void)
 {
 	unsigned long flags;
@@ -390,5 +430,6 @@
 		alternatives_smp_switch(0);
 	}
 #endif
+ 	apply_paravirt(__start_parainstructions, __stop_parainstructions);
 	local_irq_restore(flags);
 }
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 2fd4b7d..776d9be 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -647,23 +647,30 @@
 static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 {
 	unsigned long flags;
+	int maxlvt;
 
 	if (!apic_pm_state.active)
 		return 0;
 
+	maxlvt = get_maxlvt();
+
 	apic_pm_state.apic_id = apic_read(APIC_ID);
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
 	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
 	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-	apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+	if (maxlvt >= 4)
+		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
 	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
 	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
 	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
 	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
 	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-	apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#ifdef CONFIG_X86_MCE_P4THERMAL
+	if (maxlvt >= 5)
+		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
 	
 	local_irq_save(flags);
 	disable_local_APIC();
@@ -675,10 +682,13 @@
 {
 	unsigned int l, h;
 	unsigned long flags;
+	int maxlvt;
 
 	if (!apic_pm_state.active)
 		return 0;
 
+	maxlvt = get_maxlvt();
+
 	local_irq_save(flags);
 
 	/*
@@ -700,8 +710,12 @@
 	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
 	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
 	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-	apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-	apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+#ifdef CONFIG_X86_MCE_P4THERMAL
+	if (maxlvt >= 5)
+		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
 	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
 	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
 	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index a60358f..a97847d 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -231,6 +231,7 @@
 #include <asm/uaccess.h>
 #include <asm/desc.h>
 #include <asm/i8253.h>
+#include <asm/paravirt.h>
 
 #include "io_ports.h"
 
@@ -2235,7 +2236,7 @@
 
 	dmi_check_system(apm_dmi_table);
 
-	if (apm_info.bios.version == 0) {
+	if (apm_info.bios.version == 0 || paravirt_enabled()) {
 		printk(KERN_INFO "apm: BIOS not found.\n");
 		return -ENODEV;
 	}
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index c80271f..1b2f3cd 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -15,6 +15,7 @@
 #include <asm/processor.h>
 #include <asm/thread_info.h>
 #include <asm/elf.h>
+#include <asm/pda.h>
 
 #define DEFINE(sym, val) \
         asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -51,13 +52,35 @@
 	OFFSET(TI_exec_domain, thread_info, exec_domain);
 	OFFSET(TI_flags, thread_info, flags);
 	OFFSET(TI_status, thread_info, status);
-	OFFSET(TI_cpu, thread_info, cpu);
 	OFFSET(TI_preempt_count, thread_info, preempt_count);
 	OFFSET(TI_addr_limit, thread_info, addr_limit);
 	OFFSET(TI_restart_block, thread_info, restart_block);
 	OFFSET(TI_sysenter_return, thread_info, sysenter_return);
 	BLANK();
 
+	OFFSET(GDS_size, Xgt_desc_struct, size);
+	OFFSET(GDS_address, Xgt_desc_struct, address);
+	OFFSET(GDS_pad, Xgt_desc_struct, pad);
+	BLANK();
+
+	OFFSET(PT_EBX, pt_regs, ebx);
+	OFFSET(PT_ECX, pt_regs, ecx);
+	OFFSET(PT_EDX, pt_regs, edx);
+	OFFSET(PT_ESI, pt_regs, esi);
+	OFFSET(PT_EDI, pt_regs, edi);
+	OFFSET(PT_EBP, pt_regs, ebp);
+	OFFSET(PT_EAX, pt_regs, eax);
+	OFFSET(PT_DS,  pt_regs, xds);
+	OFFSET(PT_ES,  pt_regs, xes);
+	OFFSET(PT_GS,  pt_regs, xgs);
+	OFFSET(PT_ORIG_EAX, pt_regs, orig_eax);
+	OFFSET(PT_EIP, pt_regs, eip);
+	OFFSET(PT_CS,  pt_regs, xcs);
+	OFFSET(PT_EFLAGS, pt_regs, eflags);
+	OFFSET(PT_OLDESP, pt_regs, esp);
+	OFFSET(PT_OLDSS,  pt_regs, xss);
+	BLANK();
+
 	OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
 	OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
 	BLANK();
@@ -74,4 +97,18 @@
 	DEFINE(VDSO_PRELINK, VDSO_PRELINK);
 
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+
+	BLANK();
+ 	OFFSET(PDA_cpu, i386_pda, cpu_number);
+	OFFSET(PDA_pcurrent, i386_pda, pcurrent);
+
+#ifdef CONFIG_PARAVIRT
+	BLANK();
+	OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
+	OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
+	OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
+	OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
+	OFFSET(PARAVIRT_iret, paravirt_ops, iret);
+	OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
+#endif
 }
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index e475809..41cfea5 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -104,10 +104,7 @@
 					f_vide();
 				rdtscl(d2);
 				d = d2-d;
-				
-				/* Knock these two lines out if it debugs out ok */
-				printk(KERN_INFO "AMD K6 stepping B detected - ");
-				/* -- cut here -- */
+
 				if (d > 20*K6_BUG_LOOP) 
 					printk("system stability may be impaired when more than 32 MB are used.\n");
 				else 
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index d9f3e3c..1b34c56 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -18,14 +18,15 @@
 #include <asm/apic.h>
 #include <mach_apic.h>
 #endif
+#include <asm/pda.h>
 
 #include "cpu.h"
 
 DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
 EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
 
-DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
-EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(_cpu_pda);
 
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_fxsr __cpuinitdata;
@@ -235,29 +236,14 @@
 	return flag_is_changeable_p(X86_EFLAGS_ID);
 }
 
-/* Do minimum CPU detection early.
-   Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
-   The others are not touched to avoid unwanted side effects.
-
-   WARNING: this function is only called on the BP.  Don't add code here
-   that is supposed to run on all CPUs. */
-static void __init early_cpu_detect(void)
+void __init cpu_detect(struct cpuinfo_x86 *c)
 {
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-
-	c->x86_cache_alignment = 32;
-
-	if (!have_cpuid_p())
-		return;
-
 	/* Get vendor name */
 	cpuid(0x00000000, &c->cpuid_level,
 	      (int *)&c->x86_vendor_id[0],
 	      (int *)&c->x86_vendor_id[8],
 	      (int *)&c->x86_vendor_id[4]);
 
-	get_cpu_vendor(c, 1);
-
 	c->x86 = 4;
 	if (c->cpuid_level >= 0x00000001) {
 		u32 junk, tfms, cap0, misc;
@@ -274,6 +260,26 @@
 	}
 }
 
+/* Do minimum CPU detection early.
+   Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+   The others are not touched to avoid unwanted side effects.
+
+   WARNING: this function is only called on the BP.  Don't add code here
+   that is supposed to run on all CPUs. */
+static void __init early_cpu_detect(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	c->x86_cache_alignment = 32;
+
+	if (!have_cpuid_p())
+		return;
+
+	cpu_detect(c);
+
+	get_cpu_vendor(c, 1);
+}
+
 static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 {
 	u32 tfms, xlvl;
@@ -308,6 +314,8 @@
 #else
 			c->apicid = (ebx >> 24) & 0xFF;
 #endif
+			if (c->x86_capability[0] & (1<<19))
+				c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
 		} else {
 			/* Have CPUID level 0 only - unheard of */
 			c->x86 = 4;
@@ -372,6 +380,7 @@
 	c->x86_vendor_id[0] = '\0'; /* Unset */
 	c->x86_model_id[0] = '\0';  /* Unset */
 	c->x86_max_cores = 1;
+	c->x86_clflush_size = 32;
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
 	if (!have_cpuid_p()) {
@@ -591,25 +600,134 @@
 	disable_pse = 1;
 #endif
 }
-/*
- * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
- */
-void __cpuinit cpu_init(void)
+
+/* Make sure %gs is initialized properly in idle threads */
+struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
 {
-	int cpu = smp_processor_id();
-	struct tss_struct * t = &per_cpu(init_tss, cpu);
-	struct thread_struct *thread = &current->thread;
-	struct desc_struct *gdt;
-	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
+	memset(regs, 0, sizeof(struct pt_regs));
+	regs->xgs = __KERNEL_PDA;
+	return regs;
+}
+
+static __cpuinit int alloc_gdt(int cpu)
+{
 	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+	struct desc_struct *gdt;
+	struct i386_pda *pda;
+
+	gdt = (struct desc_struct *)cpu_gdt_descr->address;
+	pda = cpu_pda(cpu);
+
+	/*
+	 * This is a horrible hack to allocate the GDT.  The problem
+	 * is that cpu_init() is called really early for the boot CPU
+	 * (and hence needs bootmem) but much later for the secondary
+	 * CPUs, when bootmem will have gone away
+	 */
+	if (NODE_DATA(0)->bdata->node_bootmem_map) {
+		BUG_ON(gdt != NULL || pda != NULL);
+
+		gdt = alloc_bootmem_pages(PAGE_SIZE);
+		pda = alloc_bootmem(sizeof(*pda));
+		/* alloc_bootmem(_pages) panics on failure, so no check */
+
+		memset(gdt, 0, PAGE_SIZE);
+		memset(pda, 0, sizeof(*pda));
+	} else {
+		/* GDT and PDA might already have been allocated if
+		   this is a CPU hotplug re-insertion. */
+		if (gdt == NULL)
+			gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+
+		if (pda == NULL)
+			pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
+
+		if (unlikely(!gdt || !pda)) {
+			free_pages((unsigned long)gdt, 0);
+			kfree(pda);
+			return 0;
+		}
+	}
+
+ 	cpu_gdt_descr->address = (unsigned long)gdt;
+	cpu_pda(cpu) = pda;
+
+	return 1;
+}
+
+/* Initial PDA used by boot CPU */
+struct i386_pda boot_pda = {
+	._pda = &boot_pda,
+	.cpu_number = 0,
+	.pcurrent = &init_task,
+};
+
+static inline void set_kernel_gs(void)
+{
+	/* Set %gs for this CPU's PDA.  Memory clobber is to create a
+	   barrier with respect to any PDA operations, so the compiler
+	   doesn't move any before here. */
+	asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
+}
+
+/* Initialize the CPU's GDT and PDA.  The boot CPU does this for
+   itself, but secondaries find this done for them. */
+__cpuinit int init_gdt(int cpu, struct task_struct *idle)
+{
+	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+	struct desc_struct *gdt;
+	struct i386_pda *pda;
+
+	/* For non-boot CPUs, the GDT and PDA should already have been
+	   allocated. */
+	if (!alloc_gdt(cpu)) {
+		printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
+		return 0;
+	}
+
+	gdt = (struct desc_struct *)cpu_gdt_descr->address;
+	pda = cpu_pda(cpu);
+
+	BUG_ON(gdt == NULL || pda == NULL);
+
+	/*
+	 * Initialize the per-CPU GDT with the boot GDT,
+	 * and set up the GDT descriptor:
+	 */
+ 	memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+	cpu_gdt_descr->size = GDT_SIZE - 1;
+
+	pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
+			(u32 *)&gdt[GDT_ENTRY_PDA].b,
+			(unsigned long)pda, sizeof(*pda) - 1,
+			0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
+
+	memset(pda, 0, sizeof(*pda));
+	pda->_pda = pda;
+	pda->cpu_number = cpu;
+	pda->pcurrent = idle;
+
+	return 1;
+}
+
+/* Common CPU init for both boot and secondary CPUs */
+static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
+{
+	struct tss_struct * t = &per_cpu(init_tss, cpu);
+	struct thread_struct *thread = &curr->thread;
+	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+
+	/* Reinit these anyway, even if they've already been done (on
+	   the boot CPU, this will transition from the boot gdt+pda to
+	   the real ones). */
+	load_gdt(cpu_gdt_descr);
+	set_kernel_gs();
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
 		for (;;) local_irq_enable();
 	}
+
 	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
 
 	if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
@@ -621,56 +739,16 @@
 		set_in_cr4(X86_CR4_TSD);
 	}
 
-	/* The CPU hotplug case */
-	if (cpu_gdt_descr->address) {
-		gdt = (struct desc_struct *)cpu_gdt_descr->address;
-		memset(gdt, 0, PAGE_SIZE);
-		goto old_gdt;
-	}
-	/*
-	 * This is a horrible hack to allocate the GDT.  The problem
-	 * is that cpu_init() is called really early for the boot CPU
-	 * (and hence needs bootmem) but much later for the secondary
-	 * CPUs, when bootmem will have gone away
-	 */
-	if (NODE_DATA(0)->bdata->node_bootmem_map) {
-		gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
-		/* alloc_bootmem_pages panics on failure, so no check */
-		memset(gdt, 0, PAGE_SIZE);
-	} else {
-		gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
-		if (unlikely(!gdt)) {
-			printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
-			for (;;)
-				local_irq_enable();
-		}
-	}
-old_gdt:
-	/*
-	 * Initialize the per-CPU GDT with the boot GDT,
-	 * and set up the GDT descriptor:
-	 */
- 	memcpy(gdt, cpu_gdt_table, GDT_SIZE);
-
-	/* Set up GDT entry for 16bit stack */
- 	*(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
-		((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
-		((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
-		(CPU_16BIT_STACK_SIZE - 1);
-
-	cpu_gdt_descr->size = GDT_SIZE - 1;
- 	cpu_gdt_descr->address = (unsigned long)gdt;
-
-	load_gdt(cpu_gdt_descr);
 	load_idt(&idt_descr);
 
 	/*
 	 * Set up and load the per-CPU TSS and LDT
 	 */
 	atomic_inc(&init_mm.mm_count);
-	current->active_mm = &init_mm;
-	BUG_ON(current->mm);
-	enter_lazy_tlb(&init_mm, current);
+	curr->active_mm = &init_mm;
+	if (curr->mm)
+		BUG();
+	enter_lazy_tlb(&init_mm, curr);
 
 	load_esp0(t, thread);
 	set_tss_desc(cpu,t);
@@ -682,8 +760,8 @@
 	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
 #endif
 
-	/* Clear %fs and %gs. */
-	asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
+	/* Clear %fs. */
+	asm volatile ("mov %0, %%fs" : : "r" (0));
 
 	/* Clear all 6 debug registers: */
 	set_debugreg(0, 0);
@@ -701,6 +779,37 @@
 	mxcsr_feature_mask_init();
 }
 
+/* Entrypoint to initialize secondary CPU */
+void __cpuinit secondary_cpu_init(void)
+{
+	int cpu = smp_processor_id();
+	struct task_struct *curr = current;
+
+	_cpu_init(cpu, curr);
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
+{
+	int cpu = smp_processor_id();
+	struct task_struct *curr = current;
+
+	/* Set up the real GDT and PDA, so we can transition from the
+	   boot versions. */
+	if (!init_gdt(cpu, curr)) {
+		/* failed to allocate something; not much we can do... */
+		for (;;)
+			local_irq_enable();
+	}
+
+	_cpu_init(cpu, curr);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 void __cpuinit cpu_uninit(void)
 {
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 94a95aa..56fe265 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -107,7 +107,7 @@
 	 * Note that the workaround only should be initialized once...
 	 */
 	c->f00f_bug = 0;
-	if ( c->x86 == 5 ) {
+	if (!paravirt_enabled() && c->x86 == 5) {
 		static int f00f_workaround_enabled = 0;
 
 		c->f00f_bug = 1;
@@ -195,8 +195,16 @@
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-}
 
+	if (cpu_has_ds) {
+		unsigned int l1;
+		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+		if (!(l1 & (1<<11)))
+			set_bit(X86_FEATURE_BTS, c->x86_capability);
+		if (!(l1 & (1<<12)))
+			set_bit(X86_FEATURE_PEBS, c->x86_capability);
+	}
+}
 
 static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
 {
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 5c43be4..80b4c5d 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -480,12 +480,10 @@
 	if (num_cache_leaves == 0)
 		return -ENOENT;
 
-	cpuid4_info[cpu] = kmalloc(
+	cpuid4_info[cpu] = kzalloc(
 	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
 	if (unlikely(cpuid4_info[cpu] == NULL))
 		return -ENOMEM;
-	memset(cpuid4_info[cpu], 0,
-	    sizeof(struct _cpuid4_info) * num_cache_leaves);
 
 	oldmask = current->cpus_allowed;
 	retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
@@ -658,17 +656,14 @@
 		return -ENOENT;
 
 	/* Allocate all required memory */
-	cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
+	cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL);
 	if (unlikely(cache_kobject[cpu] == NULL))
 		goto err_out;
-	memset(cache_kobject[cpu], 0, sizeof(struct kobject));
 
-	index_kobject[cpu] = kmalloc(
+	index_kobject[cpu] = kzalloc(
 	    sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
 	if (unlikely(index_kobject[cpu] == NULL))
 		goto err_out;
-	memset(index_kobject[cpu], 0,
-	    sizeof(struct _index_kobject) * num_cache_leaves);
 
 	return 0;
 
diff --git a/arch/i386/kernel/cpu/mtrr/Makefile b/arch/i386/kernel/cpu/mtrr/Makefile
index a25b701..191fc05 100644
--- a/arch/i386/kernel/cpu/mtrr/Makefile
+++ b/arch/i386/kernel/cpu/mtrr/Makefile
@@ -1,5 +1,3 @@
 obj-y		:= main.o if.o generic.o state.o
-obj-y		+= amd.o
-obj-y		+= cyrix.o
-obj-y		+= centaur.o
+obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
 
diff --git a/arch/i386/kernel/cpu/mtrr/amd.c b/arch/i386/kernel/cpu/mtrr/amd.c
index 1a1e04b..0949cdb 100644
--- a/arch/i386/kernel/cpu/mtrr/amd.c
+++ b/arch/i386/kernel/cpu/mtrr/amd.c
@@ -7,7 +7,7 @@
 
 static void
 amd_get_mtrr(unsigned int reg, unsigned long *base,
-	     unsigned int *size, mtrr_type * type)
+	     unsigned long *size, mtrr_type * type)
 {
 	unsigned long low, high;
 
diff --git a/arch/i386/kernel/cpu/mtrr/centaur.c b/arch/i386/kernel/cpu/mtrr/centaur.c
index 33f00ac..cb9aa3a 100644
--- a/arch/i386/kernel/cpu/mtrr/centaur.c
+++ b/arch/i386/kernel/cpu/mtrr/centaur.c
@@ -17,7 +17,7 @@
  */
 
 static int
-centaur_get_free_region(unsigned long base, unsigned long size)
+centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 /*  [SUMMARY] Get a free MTRR.
     <base> The starting (base) address of the region.
     <size> The size (in bytes) of the region.
@@ -26,10 +26,11 @@
 {
 	int i, max;
 	mtrr_type ltype;
-	unsigned long lbase;
-	unsigned int lsize;
+	unsigned long lbase, lsize;
 
 	max = num_var_ranges;
+	if (replace_reg >= 0 && replace_reg < max)
+		return replace_reg;
 	for (i = 0; i < max; ++i) {
 		if (centaur_mcr_reserved & (1 << i))
 			continue;
@@ -49,7 +50,7 @@
 
 static void
 centaur_get_mcr(unsigned int reg, unsigned long *base,
-		unsigned int *size, mtrr_type * type)
+		unsigned long *size, mtrr_type * type)
 {
 	*base = centaur_mcr[reg].high >> PAGE_SHIFT;
 	*size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
diff --git a/arch/i386/kernel/cpu/mtrr/cyrix.c b/arch/i386/kernel/cpu/mtrr/cyrix.c
index 9027a98..0737a59 100644
--- a/arch/i386/kernel/cpu/mtrr/cyrix.c
+++ b/arch/i386/kernel/cpu/mtrr/cyrix.c
@@ -9,7 +9,7 @@
 
 static void
 cyrix_get_arr(unsigned int reg, unsigned long *base,
-	      unsigned int *size, mtrr_type * type)
+	      unsigned long *size, mtrr_type * type)
 {
 	unsigned long flags;
 	unsigned char arr, ccr3, rcr, shift;
@@ -77,7 +77,7 @@
 }
 
 static int
-cyrix_get_free_region(unsigned long base, unsigned long size)
+cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 /*  [SUMMARY] Get a free ARR.
     <base> The starting (base) address of the region.
     <size> The size (in bytes) of the region.
@@ -86,9 +86,24 @@
 {
 	int i;
 	mtrr_type ltype;
-	unsigned long lbase;
-	unsigned int  lsize;
+	unsigned long lbase, lsize;
 
+	switch (replace_reg) {
+	case 7:
+		if (size < 0x40)
+			break;
+	case 6:
+	case 5:
+	case 4:
+		return replace_reg;
+	case 3:
+		if (arr3_protected)
+			break;
+	case 2:
+	case 1:
+	case 0:
+		return replace_reg;
+	}
 	/* If we are to set up a region >32M then look at ARR7 immediately */
 	if (size > 0x2000) {
 		cyrix_get_arr(7, &lbase, &lsize, &ltype);
@@ -214,7 +229,7 @@
 
 typedef struct {
 	unsigned long base;
-	unsigned int size;
+	unsigned long size;
 	mtrr_type type;
 } arr_state_t;
 
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index 0b61eed..f77fc53 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -3,6 +3,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
 #include <asm/msr.h>
@@ -15,12 +16,19 @@
 	struct mtrr_var_range *var_ranges;
 	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
 	unsigned char enabled;
+	unsigned char have_fixed;
 	mtrr_type def_type;
 };
 
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
 
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "mtrr."
+
+static __initdata int mtrr_show;
+module_param_named(show, mtrr_show, bool, 0);
+
 /*  Get the MSR pair relating to a var range  */
 static void __init
 get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
@@ -43,6 +51,14 @@
 		rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
 }
 
+static void __init print_fixed(unsigned base, unsigned step, const mtrr_type*types)
+{
+	unsigned i;
+
+	for (i = 0; i < 8; ++i, ++types, base += step)
+		printk(KERN_INFO "MTRR %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types));
+}
+
 /*  Grab all of the MTRR state for this CPU into *state  */
 void __init get_mtrr_state(void)
 {
@@ -58,13 +74,49 @@
 	} 
 	vrs = mtrr_state.var_ranges;
 
+	rdmsr(MTRRcap_MSR, lo, dummy);
+	mtrr_state.have_fixed = (lo >> 8) & 1;
+
 	for (i = 0; i < num_var_ranges; i++)
 		get_mtrr_var_range(i, &vrs[i]);
-	get_fixed_ranges(mtrr_state.fixed_ranges);
+	if (mtrr_state.have_fixed)
+		get_fixed_ranges(mtrr_state.fixed_ranges);
 
 	rdmsr(MTRRdefType_MSR, lo, dummy);
 	mtrr_state.def_type = (lo & 0xff);
 	mtrr_state.enabled = (lo & 0xc00) >> 10;
+
+	if (mtrr_show) {
+		int high_width;
+
+		printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type));
+		if (mtrr_state.have_fixed) {
+			printk(KERN_INFO "MTRR fixed ranges %sabled:\n",
+			       mtrr_state.enabled & 1 ? "en" : "dis");
+			print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
+			for (i = 0; i < 2; ++i)
+				print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
+			for (i = 0; i < 8; ++i)
+				print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
+		}
+		printk(KERN_INFO "MTRR variable ranges %sabled:\n",
+		       mtrr_state.enabled & 2 ? "en" : "dis");
+		high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
+		for (i = 0; i < num_var_ranges; ++i) {
+			if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
+				printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n",
+				       i,
+				       high_width,
+				       mtrr_state.var_ranges[i].base_hi,
+				       mtrr_state.var_ranges[i].base_lo >> 12,
+				       high_width,
+				       mtrr_state.var_ranges[i].mask_hi,
+				       mtrr_state.var_ranges[i].mask_lo >> 12,
+				       mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
+			else
+				printk(KERN_INFO "MTRR %u disabled\n", i);
+		}
+	}
 }
 
 /*  Some BIOS's are fucked and don't set all MTRRs the same!  */
@@ -95,7 +147,7 @@
 			smp_processor_id(), msr, a, b);
 }
 
-int generic_get_free_region(unsigned long base, unsigned long size)
+int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 /*  [SUMMARY] Get a free MTRR.
     <base> The starting (base) address of the region.
     <size> The size (in bytes) of the region.
@@ -104,10 +156,11 @@
 {
 	int i, max;
 	mtrr_type ltype;
-	unsigned long lbase;
-	unsigned lsize;
+	unsigned long lbase, lsize;
 
 	max = num_var_ranges;
+	if (replace_reg >= 0 && replace_reg < max)
+		return replace_reg;
 	for (i = 0; i < max; ++i) {
 		mtrr_if->get(i, &lbase, &lsize, &ltype);
 		if (lsize == 0)
@@ -117,7 +170,7 @@
 }
 
 static void generic_get_mtrr(unsigned int reg, unsigned long *base,
-			     unsigned int *size, mtrr_type * type)
+			     unsigned long *size, mtrr_type *type)
 {
 	unsigned int mask_lo, mask_hi, base_lo, base_hi;
 
@@ -202,7 +255,9 @@
 	return changed;
 }
 
-static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi)
+static u32 deftype_lo, deftype_hi;
+
+static unsigned long set_mtrr_state(void)
 /*  [SUMMARY] Set the MTRR state for this CPU.
     <state> The MTRR state information to read.
     <ctxt> Some relevant CPU context.
@@ -217,14 +272,14 @@
 		if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
 			change_mask |= MTRR_CHANGE_MASK_VARIABLE;
 
-	if (set_fixed_ranges(mtrr_state.fixed_ranges))
+	if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
 		change_mask |= MTRR_CHANGE_MASK_FIXED;
 
 	/*  Set_mtrr_restore restores the old value of MTRRdefType,
 	   so to set it we fiddle with the saved value  */
 	if ((deftype_lo & 0xff) != mtrr_state.def_type
 	    || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
-		deftype_lo |= (mtrr_state.def_type | mtrr_state.enabled << 10);
+		deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10);
 		change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
 	}
 
@@ -233,7 +288,6 @@
 
 
 static unsigned long cr4 = 0;
-static u32 deftype_lo, deftype_hi;
 static DEFINE_SPINLOCK(set_atomicity_lock);
 
 /*
@@ -271,7 +325,7 @@
 	rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
 
 	/*  Disable MTRRs, and set the default type to uncached  */
-	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi);
+	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi);
 }
 
 static void post_set(void) __releases(set_atomicity_lock)
@@ -300,7 +354,7 @@
 	prepare_set();
 
 	/* Actually set the state */
-	mask = set_mtrr_state(deftype_lo,deftype_hi);
+	mask = set_mtrr_state();
 
 	post_set();
 	local_irq_restore(flags);
@@ -366,7 +420,7 @@
 			printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
 			return -EINVAL;
 		}
-		if (!(base + size < 0x70000000 || base > 0x7003FFFF) &&
+		if (!(base + size < 0x70000 || base > 0x7003F) &&
 		    (type == MTRR_TYPE_WRCOMB
 		     || type == MTRR_TYPE_WRBACK)) {
 			printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c
index 5ac051b..5ae1705 100644
--- a/arch/i386/kernel/cpu/mtrr/if.c
+++ b/arch/i386/kernel/cpu/mtrr/if.c
@@ -17,7 +17,7 @@
 
 #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
 
-static char *mtrr_strings[MTRR_NUM_TYPES] =
+static const char *const mtrr_strings[MTRR_NUM_TYPES] =
 {
     "uncachable",               /* 0 */
     "write-combining",          /* 1 */
@@ -28,7 +28,7 @@
     "write-back",               /* 6 */
 };
 
-char *mtrr_attrib_to_str(int x)
+const char *mtrr_attrib_to_str(int x)
 {
 	return (x <= 6) ? mtrr_strings[x] : "?";
 }
@@ -44,10 +44,9 @@
 
 	max = num_var_ranges;
 	if (fcount == NULL) {
-		fcount = kmalloc(max * sizeof *fcount, GFP_KERNEL);
+		fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL);
 		if (!fcount)
 			return -ENOMEM;
-		memset(fcount, 0, max * sizeof *fcount);
 		FILE_FCOUNT(file) = fcount;
 	}
 	if (!page) {
@@ -155,6 +154,7 @@
 {
 	int err = 0;
 	mtrr_type type;
+	unsigned long size;
 	struct mtrr_sentry sentry;
 	struct mtrr_gentry gentry;
 	void __user *arg = (void __user *) __arg;
@@ -235,15 +235,15 @@
 	case MTRRIOC_GET_ENTRY:
 		if (gentry.regnum >= num_var_ranges)
 			return -EINVAL;
-		mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type);
+		mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
 
 		/* Hide entries that go above 4GB */
-		if (gentry.base + gentry.size > 0x100000
-		    || gentry.size == 0x100000)
+		if (gentry.base + size - 1 >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT))
+		    || size >= (1UL << (8 * sizeof(gentry.size) - PAGE_SHIFT)))
 			gentry.base = gentry.size = gentry.type = 0;
 		else {
 			gentry.base <<= PAGE_SHIFT;
-			gentry.size <<= PAGE_SHIFT;
+			gentry.size = size << PAGE_SHIFT;
 			gentry.type = type;
 		}
 
@@ -273,8 +273,14 @@
 	case MTRRIOC_GET_PAGE_ENTRY:
 		if (gentry.regnum >= num_var_ranges)
 			return -EINVAL;
-		mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type);
-		gentry.type = type;
+		mtrr_if->get(gentry.regnum, &gentry.base, &size, &type);
+		/* Hide entries that would overflow */
+		if (size != (__typeof__(gentry.size))size)
+			gentry.base = gentry.size = gentry.type = 0;
+		else {
+			gentry.size = size;
+			gentry.type = type;
+		}
 		break;
 	}
 
@@ -353,8 +359,7 @@
 	char factor;
 	int i, max, len;
 	mtrr_type type;
-	unsigned long base;
-	unsigned int size;
+	unsigned long base, size;
 
 	len = 0;
 	max = num_var_ranges;
@@ -373,7 +378,7 @@
 			}
 			/* RED-PEN: base can be > 32bit */ 
 			len += seq_printf(seq, 
-				   "reg%02i: base=0x%05lx000 (%4liMB), size=%4i%cB: %s, count=%d\n",
+				   "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n",
 			     i, base, base >> (20 - PAGE_SHIFT), size, factor,
 			     mtrr_attrib_to_str(type), usage_table[i]);
 		}
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index fff90bd..16bb7ea 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -59,7 +59,11 @@
 static void set_mtrr(unsigned int reg, unsigned long base,
 		     unsigned long size, mtrr_type type);
 
+#ifndef CONFIG_X86_64
 extern int arr3_protected;
+#else
+#define arr3_protected 0
+#endif
 
 void set_mtrr_ops(struct mtrr_ops * ops)
 {
@@ -168,6 +172,13 @@
 
 #endif
 
+static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
+	return type1 == MTRR_TYPE_UNCACHABLE ||
+	       type2 == MTRR_TYPE_UNCACHABLE ||
+	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
+	       (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
+}
+
 /**
  * set_mtrr - update mtrrs on all processors
  * @reg:	mtrr in question
@@ -263,8 +274,8 @@
 
 /**
  *	mtrr_add_page - Add a memory type region
- *	@base: Physical base address of region in pages (4 KB)
- *	@size: Physical size of region in pages (4 KB)
+ *	@base: Physical base address of region in pages (in units of 4 kB!)
+ *	@size: Physical size of region in pages (4 kB)
  *	@type: Type of MTRR desired
  *	@increment: If this is true do usage counting on the region
  *
@@ -300,11 +311,9 @@
 int mtrr_add_page(unsigned long base, unsigned long size, 
 		  unsigned int type, char increment)
 {
-	int i;
+	int i, replace, error;
 	mtrr_type ltype;
-	unsigned long lbase;
-	unsigned int lsize;
-	int error;
+	unsigned long lbase, lsize;
 
 	if (!mtrr_if)
 		return -ENXIO;
@@ -324,12 +333,18 @@
 		return -ENOSYS;
 	}
 
+	if (!size) {
+		printk(KERN_WARNING "mtrr: zero sized request\n");
+		return -EINVAL;
+	}
+
 	if (base & size_or_mask || size & size_or_mask) {
 		printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
 		return -EINVAL;
 	}
 
 	error = -EINVAL;
+	replace = -1;
 
 	/* No CPU hotplug when we change MTRR entries */
 	lock_cpu_hotplug();
@@ -337,21 +352,28 @@
 	mutex_lock(&mtrr_mutex);
 	for (i = 0; i < num_var_ranges; ++i) {
 		mtrr_if->get(i, &lbase, &lsize, &ltype);
-		if (base >= lbase + lsize)
-			continue;
-		if ((base < lbase) && (base + size <= lbase))
+		if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase)
 			continue;
 		/*  At this point we know there is some kind of overlap/enclosure  */
-		if ((base < lbase) || (base + size > lbase + lsize)) {
+		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
+			if (base <= lbase && base + size - 1 >= lbase + lsize - 1) {
+				/*  New region encloses an existing region  */
+				if (type == ltype) {
+					replace = replace == -1 ? i : -2;
+					continue;
+				}
+				else if (types_compatible(type, ltype))
+					continue;
+			}
 			printk(KERN_WARNING
 			       "mtrr: 0x%lx000,0x%lx000 overlaps existing"
-			       " 0x%lx000,0x%x000\n", base, size, lbase,
+			       " 0x%lx000,0x%lx000\n", base, size, lbase,
 			       lsize);
 			goto out;
 		}
 		/*  New region is enclosed by an existing region  */
 		if (ltype != type) {
-			if (type == MTRR_TYPE_UNCACHABLE)
+			if (types_compatible(type, ltype))
 				continue;
 			printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
 			     base, size, mtrr_attrib_to_str(ltype),
@@ -364,10 +386,18 @@
 		goto out;
 	}
 	/*  Search for an empty MTRR  */
-	i = mtrr_if->get_free_region(base, size);
+	i = mtrr_if->get_free_region(base, size, replace);
 	if (i >= 0) {
 		set_mtrr(i, base, size, type);
-		usage_table[i] = 1;
+		if (likely(replace < 0))
+			usage_table[i] = 1;
+		else {
+			usage_table[i] = usage_table[replace] + !!increment;
+			if (unlikely(replace != i)) {
+				set_mtrr(replace, 0, 0, 0);
+				usage_table[replace] = 0;
+			}
+		}
 	} else
 		printk(KERN_INFO "mtrr: no more MTRRs available\n");
 	error = i;
@@ -455,8 +485,7 @@
 {
 	int i, max;
 	mtrr_type ltype;
-	unsigned long lbase;
-	unsigned int lsize;
+	unsigned long lbase, lsize;
 	int error = -EINVAL;
 
 	if (!mtrr_if)
@@ -544,9 +573,11 @@
 
 static void __init init_ifs(void)
 {
+#ifndef CONFIG_X86_64
 	amd_init_mtrr();
 	cyrix_init_mtrr();
 	centaur_init_mtrr();
+#endif
 }
 
 /* The suspend/resume methods are only for CPU without MTRR. CPU using generic
@@ -555,7 +586,7 @@
 struct mtrr_value {
 	mtrr_type	ltype;
 	unsigned long	lbase;
-	unsigned int	lsize;
+	unsigned long	lsize;
 };
 
 static struct mtrr_value * mtrr_state;
@@ -565,10 +596,8 @@
 	int i;
 	int size = num_var_ranges * sizeof(struct mtrr_value);
 
-	mtrr_state = kmalloc(size,GFP_ATOMIC);
-	if (mtrr_state)
-		memset(mtrr_state,0,size);
-	else
+	mtrr_state = kzalloc(size,GFP_ATOMIC);
+	if (!mtrr_state)
 		return -ENOMEM;
 
 	for (i = 0; i < num_var_ranges; i++) {
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h
index 99c9f26..d61ea9d 100644
--- a/arch/i386/kernel/cpu/mtrr/mtrr.h
+++ b/arch/i386/kernel/cpu/mtrr/mtrr.h
@@ -43,15 +43,16 @@
 	void	(*set_all)(void);
 
 	void	(*get)(unsigned int reg, unsigned long *base,
-		       unsigned int *size, mtrr_type * type);
-	int	(*get_free_region) (unsigned long base, unsigned long size);
-
+		       unsigned long *size, mtrr_type * type);
+	int	(*get_free_region)(unsigned long base, unsigned long size,
+				   int replace_reg);
 	int	(*validate_add_page)(unsigned long base, unsigned long size,
 				     unsigned int type);
 	int	(*have_wrcomb)(void);
 };
 
-extern int generic_get_free_region(unsigned long base, unsigned long size);
+extern int generic_get_free_region(unsigned long base, unsigned long size,
+				   int replace_reg);
 extern int generic_validate_add_page(unsigned long base, unsigned long size,
 				     unsigned int type);
 
@@ -62,17 +63,17 @@
 /* library functions for processor-specific routines */
 struct set_mtrr_context {
 	unsigned long flags;
-	unsigned long deftype_lo;
-	unsigned long deftype_hi;
 	unsigned long cr4val;
-	unsigned long ccr3;
+	u32 deftype_lo;
+	u32 deftype_hi;
+	u32 ccr3;
 };
 
 struct mtrr_var_range {
-	unsigned long base_lo;
-	unsigned long base_hi;
-	unsigned long mask_lo;
-	unsigned long mask_hi;
+	u32 base_lo;
+	u32 base_hi;
+	u32 mask_lo;
+	u32 mask_hi;
 };
 
 void set_mtrr_done(struct set_mtrr_context *ctxt);
@@ -92,6 +93,6 @@
 extern unsigned int num_var_ranges;
 
 void mtrr_state_warn(void);
-char *mtrr_attrib_to_str(int x);
+const char *mtrr_attrib_to_str(int x);
 void mtrr_wrmsr(unsigned, unsigned, unsigned);
 
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 76aac08..6624d85 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -152,9 +152,10 @@
 				seq_printf(m, " [%d]", i);
 		}
 
-	seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
+	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
 		     c->loops_per_jiffy/(500000/HZ),
 		     (c->loops_per_jiffy/(5000/HZ)) % 100);
+	seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size);
 
 	return 0;
 }
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index 23b2cc7..db6dd20 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -34,7 +34,6 @@
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/fs.h>
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c
new file mode 100644
index 0000000..2f7d0a9
--- /dev/null
+++ b/arch/i386/kernel/e820.c
@@ -0,0 +1,894 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/string.h>
+#include <linux/kexec.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/efi.h>
+#include <linux/pfn.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+
+#ifdef CONFIG_EFI
+int efi_enabled = 0;
+EXPORT_SYMBOL(efi_enabled);
+#endif
+
+struct e820map e820;
+struct change_member {
+	struct e820entry *pbios; /* pointer to original bios entry */
+	unsigned long long addr; /* address for this change point */
+};
+static struct change_member change_point_list[2*E820MAX] __initdata;
+static struct change_member *change_point[2*E820MAX] __initdata;
+static struct e820entry *overlap_list[E820MAX] __initdata;
+static struct e820entry new_bios[E820MAX] __initdata;
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x10000000;
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+extern int user_defined_memmap;
+struct resource data_resource = {
+	.name	= "Kernel data",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource code_resource = {
+	.name	= "Kernel code",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource system_rom_resource = {
+	.name	= "System ROM",
+	.start	= 0xf0000,
+	.end	= 0xfffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource extension_rom_resource = {
+	.name	= "Extension ROM",
+	.start	= 0xe0000,
+	.end	= 0xeffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource adapter_rom_resources[] = { {
+	.name 	= "Adapter ROM",
+	.start	= 0xc8000,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+}, {
+	.name 	= "Adapter ROM",
+	.start	= 0,
+	.end	= 0,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+} };
+
+static struct resource video_rom_resource = {
+	.name 	= "Video ROM",
+	.start	= 0xc0000,
+	.end	= 0xc7fff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+};
+
+static struct resource video_ram_resource = {
+	.name	= "Video RAM area",
+	.start	= 0xa0000,
+	.end	= 0xbffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource standard_io_resources[] = { {
+	.name	= "dma1",
+	.start	= 0x0000,
+	.end	= 0x001f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "pic1",
+	.start	= 0x0020,
+	.end	= 0x0021,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name   = "timer0",
+	.start	= 0x0040,
+	.end    = 0x0043,
+	.flags  = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name   = "timer1",
+	.start  = 0x0050,
+	.end    = 0x0053,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "keyboard",
+	.start	= 0x0060,
+	.end	= 0x006f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "dma page reg",
+	.start	= 0x0080,
+	.end	= 0x008f,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "pic2",
+	.start	= 0x00a0,
+	.end	= 0x00a1,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "dma2",
+	.start	= 0x00c0,
+	.end	= 0x00df,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+	.name	= "fpu",
+	.start	= 0x00f0,
+	.end	= 0x00ff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
+} };
+
+static int romsignature(const unsigned char *x)
+{
+	unsigned short sig;
+	int ret = 0;
+	if (probe_kernel_address((const unsigned short *)x, sig) == 0)
+		ret = (sig == 0xaa55);
+	return ret;
+}
+
+static int __init romchecksum(unsigned char *rom, unsigned long length)
+{
+	unsigned char *p, sum = 0;
+
+	for (p = rom; p < rom + length; p++)
+		sum += *p;
+	return sum == 0;
+}
+
+static void __init probe_roms(void)
+{
+	unsigned long start, length, upper;
+	unsigned char *rom;
+	int	      i;
+
+	/* video rom */
+	upper = adapter_rom_resources[0].start;
+	for (start = video_rom_resource.start; start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		video_rom_resource.start = start;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = rom[2] * 512;
+
+		/* if checksum okay, trust length byte */
+		if (length && romchecksum(rom, length))
+			video_rom_resource.end = start + length - 1;
+
+		request_resource(&iomem_resource, &video_rom_resource);
+		break;
+	}
+
+	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
+	if (start < upper)
+		start = upper;
+
+	/* system rom */
+	request_resource(&iomem_resource, &system_rom_resource);
+	upper = system_rom_resource.start;
+
+	/* check for extension rom (ignore length byte!) */
+	rom = isa_bus_to_virt(extension_rom_resource.start);
+	if (romsignature(rom)) {
+		length = extension_rom_resource.end - extension_rom_resource.start + 1;
+		if (romchecksum(rom, length)) {
+			request_resource(&iomem_resource, &extension_rom_resource);
+			upper = extension_rom_resource.start;
+		}
+	}
+
+	/* check for adapter roms on 2k boundaries */
+	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
+		rom = isa_bus_to_virt(start);
+		if (!romsignature(rom))
+			continue;
+
+		/* 0 < length <= 0x7f * 512, historically */
+		length = rom[2] * 512;
+
+		/* but accept any length that fits if checksum okay */
+		if (!length || start + length > upper || !romchecksum(rom, length))
+			continue;
+
+		adapter_rom_resources[i].start = start;
+		adapter_rom_resources[i].end = start + length - 1;
+		request_resource(&iomem_resource, &adapter_rom_resources[i]);
+
+		start = adapter_rom_resources[i++].end & ~2047UL;
+	}
+}
+
+/*
+ * Request address space for all standard RAM and ROM resources
+ * and also for regions reported as reserved by the e820.
+ */
+static void __init
+legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
+{
+	int i;
+
+	probe_roms();
+	for (i = 0; i < e820.nr_map; i++) {
+		struct resource *res;
+#ifndef CONFIG_RESOURCES_64BIT
+		if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
+			continue;
+#endif
+		res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
+		switch (e820.map[i].type) {
+		case E820_RAM:	res->name = "System RAM"; break;
+		case E820_ACPI:	res->name = "ACPI Tables"; break;
+		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
+		default:	res->name = "reserved";
+		}
+		res->start = e820.map[i].addr;
+		res->end = res->start + e820.map[i].size - 1;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		if (request_resource(&iomem_resource, res)) {
+			kfree(res);
+			continue;
+		}
+		if (e820.map[i].type == E820_RAM) {
+			/*
+			 *  We don't know which RAM region contains kernel data,
+			 *  so we try it repeatedly and let the resource manager
+			 *  test it.
+			 */
+			request_resource(res, code_resource);
+			request_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+			request_resource(res, &crashk_res);
+#endif
+		}
+	}
+}
+
+/*
+ * Request address space for all standard resources
+ *
+ * This is called just before pcibios_init(), which is also a
+ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
+ */
+static int __init request_standard_resources(void)
+{
+	int i;
+
+	printk("Setting up standard PCI resources\n");
+	if (efi_enabled)
+		efi_initialize_iomem_resources(&code_resource, &data_resource);
+	else
+		legacy_init_iomem_resources(&code_resource, &data_resource);
+
+	/* EFI systems may still have VGA */
+	request_resource(&iomem_resource, &video_ram_resource);
+
+	/* request I/O space for devices used on all i[345]86 PCs */
+	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+		request_resource(&ioport_resource, &standard_io_resources[i]);
+	return 0;
+}
+
+subsys_initcall(request_standard_resources);
+
+void __init add_memory_region(unsigned long long start,
+			      unsigned long long size, int type)
+{
+	int x;
+
+	if (!efi_enabled) {
+       		x = e820.nr_map;
+
+		if (x == E820MAX) {
+		    printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+		    return;
+		}
+
+		e820.map[x].addr = start;
+		e820.map[x].size = size;
+		e820.map[x].type = type;
+		e820.nr_map++;
+	}
+} /* add_memory_region */
+
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries.  The following
+ * replaces the original e820 map with a new one, removing overlaps.
+ *
+ */
+int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+{
+	struct change_member *change_tmp;
+	unsigned long current_type, last_type;
+	unsigned long long last_addr;
+	int chgidx, still_changing;
+	int overlap_entries;
+	int new_bios_entry;
+	int old_nr, new_nr, chg_nr;
+	int i;
+
+	/*
+		Visually we're performing the following (1,2,3,4 = memory types)...
+
+		Sample memory map (w/overlaps):
+		   ____22__________________
+		   ______________________4_
+		   ____1111________________
+		   _44_____________________
+		   11111111________________
+		   ____________________33__
+		   ___________44___________
+		   __________33333_________
+		   ______________22________
+		   ___________________2222_
+		   _________111111111______
+		   _____________________11_
+		   _________________4______
+
+		Sanitized equivalent (no overlap):
+		   1_______________________
+		   _44_____________________
+		   ___1____________________
+		   ____22__________________
+		   ______11________________
+		   _________1______________
+		   __________3_____________
+		   ___________44___________
+		   _____________33_________
+		   _______________2________
+		   ________________1_______
+		   _________________4______
+		   ___________________2____
+		   ____________________33__
+		   ______________________4_
+	*/
+	printk("sanitize start\n");
+	/* if there's only one memory region, don't bother */
+	if (*pnr_map < 2) {
+		printk("sanitize bail 0\n");
+		return -1;
+	}
+
+	old_nr = *pnr_map;
+
+	/* bail out if we find any unreasonable addresses in bios map */
+	for (i=0; i<old_nr; i++)
+		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
+			printk("sanitize bail 1\n");
+			return -1;
+		}
+
+	/* create pointers for initial change-point information (for sorting) */
+	for (i=0; i < 2*old_nr; i++)
+		change_point[i] = &change_point_list[i];
+
+	/* record all known change-points (starting and ending addresses),
+	   omitting those that are for empty memory regions */
+	chgidx = 0;
+	for (i=0; i < old_nr; i++)	{
+		if (biosmap[i].size != 0) {
+			change_point[chgidx]->addr = biosmap[i].addr;
+			change_point[chgidx++]->pbios = &biosmap[i];
+			change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+			change_point[chgidx++]->pbios = &biosmap[i];
+		}
+	}
+	chg_nr = chgidx;    	/* true number of change-points */
+
+	/* sort change-point list by memory addresses (low -> high) */
+	still_changing = 1;
+	while (still_changing)	{
+		still_changing = 0;
+		for (i=1; i < chg_nr; i++)  {
+			/* if <current_addr> > <last_addr>, swap */
+			/* or, if current=<start_addr> & last=<end_addr>, swap */
+			if ((change_point[i]->addr < change_point[i-1]->addr) ||
+				((change_point[i]->addr == change_point[i-1]->addr) &&
+				 (change_point[i]->addr == change_point[i]->pbios->addr) &&
+				 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
+			   )
+			{
+				change_tmp = change_point[i];
+				change_point[i] = change_point[i-1];
+				change_point[i-1] = change_tmp;
+				still_changing=1;
+			}
+		}
+	}
+
+	/* create a new bios memory map, removing overlaps */
+	overlap_entries=0;	 /* number of entries in the overlap table */
+	new_bios_entry=0;	 /* index for creating new bios map entries */
+	last_type = 0;		 /* start with undefined memory type */
+	last_addr = 0;		 /* start with 0 as last starting address */
+	/* loop through change-points, determining affect on the new bios map */
+	for (chgidx=0; chgidx < chg_nr; chgidx++)
+	{
+		/* keep track of all overlapping bios entries */
+		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
+		{
+			/* add map entry to overlap list (> 1 entry implies an overlap) */
+			overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
+		}
+		else
+		{
+			/* remove entry from list (order independent, so swap with last) */
+			for (i=0; i<overlap_entries; i++)
+			{
+				if (overlap_list[i] == change_point[chgidx]->pbios)
+					overlap_list[i] = overlap_list[overlap_entries-1];
+			}
+			overlap_entries--;
+		}
+		/* if there are overlapping entries, decide which "type" to use */
+		/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+		current_type = 0;
+		for (i=0; i<overlap_entries; i++)
+			if (overlap_list[i]->type > current_type)
+				current_type = overlap_list[i]->type;
+		/* continue building up new bios map based on this information */
+		if (current_type != last_type)	{
+			if (last_type != 0)	 {
+				new_bios[new_bios_entry].size =
+					change_point[chgidx]->addr - last_addr;
+				/* move forward only if the new size was non-zero */
+				if (new_bios[new_bios_entry].size != 0)
+					if (++new_bios_entry >= E820MAX)
+						break; 	/* no more space left for new bios entries */
+			}
+			if (current_type != 0)	{
+				new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+				new_bios[new_bios_entry].type = current_type;
+				last_addr=change_point[chgidx]->addr;
+			}
+			last_type = current_type;
+		}
+	}
+	new_nr = new_bios_entry;   /* retain count for new bios entries */
+
+	/* copy new bios mapping into original location */
+	memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+	*pnr_map = new_nr;
+
+	printk("sanitize end\n");
+	return 0;
+}
+
+/*
+ * Copy the BIOS e820 map into a safe place.
+ *
+ * Sanity-check it while we're at it..
+ *
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory.  If we aren't, we'll fake a memory map.
+ *
+ * We check to see that the memory map contains at least 2 elements
+ * before we'll use it, because the detection code in setup.S may
+ * not be perfect and most every PC known to man has two memory
+ * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
+ * thinkpad 560x, for example, does not cooperate with the memory
+ * detection code.)
+ */
+int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+{
+	/* Only one memory region (or negative)? Ignore it */
+	if (nr_map < 2)
+		return -1;
+
+	do {
+		unsigned long long start = biosmap->addr;
+		unsigned long long size = biosmap->size;
+		unsigned long long end = start + size;
+		unsigned long type = biosmap->type;
+		printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
+
+		/* Overflow in 64 bits? Ignore the memory map. */
+		if (start > end)
+			return -1;
+
+		/*
+		 * Some BIOSes claim RAM in the 640k - 1M region.
+		 * Not right. Fix it up.
+		 */
+		if (type == E820_RAM) {
+			printk("copy_e820_map() type is E820_RAM\n");
+			if (start < 0x100000ULL && end > 0xA0000ULL) {
+				printk("copy_e820_map() lies in range...\n");
+				if (start < 0xA0000ULL) {
+					printk("copy_e820_map() start < 0xA0000ULL\n");
+					add_memory_region(start, 0xA0000ULL-start, type);
+				}
+				if (end <= 0x100000ULL) {
+					printk("copy_e820_map() end <= 0x100000ULL\n");
+					continue;
+				}
+				start = 0x100000ULL;
+				size = end - start;
+			}
+		}
+		add_memory_region(start, size, type);
+	} while (biosmap++,--nr_map);
+	return 0;
+}
+
+/*
+ * Callback for efi_memory_walk.
+ */
+static int __init
+efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long *max_pfn = arg, pfn;
+
+	if (start < end) {
+		pfn = PFN_UP(end -1);
+		if (pfn > *max_pfn)
+			*max_pfn = pfn;
+	}
+	return 0;
+}
+
+static int __init
+efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
+{
+	memory_present(0, PFN_UP(start), PFN_DOWN(end));
+	return 0;
+}
+
+/*
+ * Find the highest page frame number we have available
+ */
+void __init find_max_pfn(void)
+{
+	int i;
+
+	max_pfn = 0;
+	if (efi_enabled) {
+		efi_memmap_walk(efi_find_max_pfn, &max_pfn);
+		efi_memmap_walk(efi_memory_present_wrapper, NULL);
+		return;
+	}
+
+	for (i = 0; i < e820.nr_map; i++) {
+		unsigned long start, end;
+		/* RAM? */
+		if (e820.map[i].type != E820_RAM)
+			continue;
+		start = PFN_UP(e820.map[i].addr);
+		end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+		if (start >= end)
+			continue;
+		if (end > max_pfn)
+			max_pfn = end;
+		memory_present(0, start, end);
+	}
+}
+
+/*
+ * Free all available memory for boot time allocation.  Used
+ * as a callback function by efi_memory_walk()
+ */
+
+static int __init
+free_available_memory(unsigned long start, unsigned long end, void *arg)
+{
+	/* check max_low_pfn */
+	if (start >= (max_low_pfn << PAGE_SHIFT))
+		return 0;
+	if (end >= (max_low_pfn << PAGE_SHIFT))
+		end = max_low_pfn << PAGE_SHIFT;
+	if (start < end)
+		free_bootmem(start, end - start);
+
+	return 0;
+}
+/*
+ * Register fully available low RAM pages with the bootmem allocator.
+ */
+void __init register_bootmem_low_pages(unsigned long max_low_pfn)
+{
+	int i;
+
+	if (efi_enabled) {
+		efi_memmap_walk(free_available_memory, NULL);
+		return;
+	}
+	for (i = 0; i < e820.nr_map; i++) {
+		unsigned long curr_pfn, last_pfn, size;
+		/*
+		 * Reserve usable low memory
+		 */
+		if (e820.map[i].type != E820_RAM)
+			continue;
+		/*
+		 * We are rounding up the start address of usable memory:
+		 */
+		curr_pfn = PFN_UP(e820.map[i].addr);
+		if (curr_pfn >= max_low_pfn)
+			continue;
+		/*
+		 * ... and at the end of the usable range downwards:
+		 */
+		last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+
+		if (last_pfn > max_low_pfn)
+			last_pfn = max_low_pfn;
+
+		/*
+		 * .. finally, did all the rounding and playing
+		 * around just make the area go away?
+		 */
+		if (last_pfn <= curr_pfn)
+			continue;
+
+		size = last_pfn - curr_pfn;
+		free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
+	}
+}
+
+void __init register_memory(void)
+{
+	unsigned long gapstart, gapsize, round;
+	unsigned long long last;
+	int i;
+
+	/*
+	 * Search for the bigest gap in the low 32 bits of the e820
+	 * memory space.
+	 */
+	last = 0x100000000ull;
+	gapstart = 0x10000000;
+	gapsize = 0x400000;
+	i = e820.nr_map;
+	while (--i >= 0) {
+		unsigned long long start = e820.map[i].addr;
+		unsigned long long end = start + e820.map[i].size;
+
+		/*
+		 * Since "last" is at most 4GB, we know we'll
+		 * fit in 32 bits if this condition is true
+		 */
+		if (last > end) {
+			unsigned long gap = last - end;
+
+			if (gap > gapsize) {
+				gapsize = gap;
+				gapstart = end;
+			}
+		}
+		if (start < last)
+			last = start;
+	}
+
+	/*
+	 * See how much we want to round up: start off with
+	 * rounding to the next 1MB area.
+	 */
+	round = 0x100000;
+	while ((gapsize >> 4) > round)
+		round += round;
+	/* Fun with two's complement */
+	pci_mem_start = (gapstart + round) & -round;
+
+	printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
+		pci_mem_start, gapstart, gapsize);
+}
+
+void __init print_memory_map(char *who)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		printk(" %s: %016Lx - %016Lx ", who,
+			e820.map[i].addr,
+			e820.map[i].addr + e820.map[i].size);
+		switch (e820.map[i].type) {
+		case E820_RAM:	printk("(usable)\n");
+				break;
+		case E820_RESERVED:
+				printk("(reserved)\n");
+				break;
+		case E820_ACPI:
+				printk("(ACPI data)\n");
+				break;
+		case E820_NVS:
+				printk("(ACPI NVS)\n");
+				break;
+		default:	printk("type %lu\n", e820.map[i].type);
+				break;
+		}
+	}
+}
+
+static __init __always_inline void efi_limit_regions(unsigned long long size)
+{
+	unsigned long long current_addr = 0;
+	efi_memory_desc_t *md, *next_md;
+	void *p, *p1;
+	int i, j;
+
+	j = 0;
+	p1 = memmap.map;
+	for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
+		md = p;
+		next_md = p1;
+		current_addr = md->phys_addr +
+			PFN_PHYS(md->num_pages);
+		if (is_available_memory(md)) {
+			if (md->phys_addr >= size) continue;
+			memcpy(next_md, md, memmap.desc_size);
+			if (current_addr >= size) {
+				next_md->num_pages -=
+					PFN_UP(current_addr-size);
+			}
+			p1 += memmap.desc_size;
+			next_md = p1;
+			j++;
+		} else if ((md->attribute & EFI_MEMORY_RUNTIME) ==
+			   EFI_MEMORY_RUNTIME) {
+			/* In order to make runtime services
+			 * available we have to include runtime
+			 * memory regions in memory map */
+			memcpy(next_md, md, memmap.desc_size);
+			p1 += memmap.desc_size;
+			next_md = p1;
+			j++;
+		}
+	}
+	memmap.nr_map = j;
+	memmap.map_end = memmap.map +
+		(memmap.nr_map * memmap.desc_size);
+}
+
+void __init limit_regions(unsigned long long size)
+{
+	unsigned long long current_addr;
+	int i;
+
+	print_memory_map("limit_regions start");
+	if (efi_enabled) {
+		efi_limit_regions(size);
+		return;
+	}
+	for (i = 0; i < e820.nr_map; i++) {
+		current_addr = e820.map[i].addr + e820.map[i].size;
+		if (current_addr < size)
+			continue;
+
+		if (e820.map[i].type != E820_RAM)
+			continue;
+
+		if (e820.map[i].addr >= size) {
+			/*
+			 * This region starts past the end of the
+			 * requested size, skip it completely.
+			 */
+			e820.nr_map = i;
+		} else {
+			e820.nr_map = i + 1;
+			e820.map[i].size -= current_addr - size;
+		}
+		print_memory_map("limit_regions endfor");
+		return;
+	}
+	print_memory_map("limit_regions endfunc");
+}
+
+ /*
+  * This function checks if the entire range <start,end> is mapped with type.
+  *
+  * Note: this function only works correct if the e820 table is sorted and
+  * not-overlapping, which is the case
+  */
+int __init
+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
+{
+	u64 start = s;
+	u64 end = e;
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (type && ei->type != type)
+			continue;
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+		/* if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+		/* if start is now at or beyond end, we're done, full
+		 * coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+
+static int __init parse_memmap(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	if (strcmp(arg, "exactmap") == 0) {
+#ifdef CONFIG_CRASH_DUMP
+		/* If we are doing a crash dump, we
+		 * still need to know the real mem
+		 * size before original memory map is
+		 * reset.
+		 */
+		find_max_pfn();
+		saved_max_pfn = max_pfn;
+#endif
+		e820.nr_map = 0;
+		user_defined_memmap = 1;
+	} else {
+		/* If the user specifies memory size, we
+		 * limit the BIOS-provided memory map to
+		 * that size. exactmap can be used to specify
+		 * the exact map. mem=number can be used to
+		 * trim the existing memory map.
+		 */
+		unsigned long long start_at, mem_size;
+
+		mem_size = memparse(arg, &arg);
+		if (*arg == '@') {
+			start_at = memparse(arg+1, &arg);
+			add_memory_region(start_at, mem_size, E820_RAM);
+		} else if (*arg == '#') {
+			start_at = memparse(arg+1, &arg);
+			add_memory_region(start_at, mem_size, E820_ACPI);
+		} else if (*arg == '$') {
+			start_at = memparse(arg+1, &arg);
+			add_memory_region(start_at, mem_size, E820_RESERVED);
+		} else {
+			limit_regions(mem_size);
+			user_defined_memmap = 1;
+		}
+	}
+	return 0;
+}
+early_param("memmap", parse_memmap);
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index 8b40648..b92c7f0 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -194,17 +194,24 @@
 	return 0;
 }
 /*
- * This should only be used during kernel init and before runtime
- * services have been remapped, therefore, we'll need to call in physical
- * mode.  Note, this call isn't used later, so mark it __init.
+ * This is used during kernel init before runtime
+ * services have been remapped and also during suspend, therefore,
+ * we'll need to call both in physical and virtual modes.
  */
-inline unsigned long __init efi_get_time(void)
+inline unsigned long efi_get_time(void)
 {
 	efi_status_t status;
 	efi_time_t eft;
 	efi_time_cap_t cap;
 
-	status = phys_efi_get_time(&eft, &cap);
+	if (efi.get_time) {
+		/* if we are in virtual mode use remapped function */
+ 		status = efi.get_time(&eft, &cap);
+	} else {
+		/* we are in physical mode */
+		status = phys_efi_get_time(&eft, &cap);
+	}
+
 	if (status != EFI_SUCCESS)
 		printk("Oops: efitime: can't read time status: 0x%lx\n",status);
 
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 5a63d6f..de34b7fe 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -30,12 +30,13 @@
  *	18(%esp) - %eax
  *	1C(%esp) - %ds
  *	20(%esp) - %es
- *	24(%esp) - orig_eax
- *	28(%esp) - %eip
- *	2C(%esp) - %cs
- *	30(%esp) - %eflags
- *	34(%esp) - %oldesp
- *	38(%esp) - %oldss
+ *	24(%esp) - %gs
+ *	28(%esp) - orig_eax
+ *	2C(%esp) - %eip
+ *	30(%esp) - %cs
+ *	34(%esp) - %eflags
+ *	38(%esp) - %oldesp
+ *	3C(%esp) - %oldss
  *
  * "current" is in register %ebx during any slow entries.
  */
@@ -48,26 +49,24 @@
 #include <asm/smp.h>
 #include <asm/page.h>
 #include <asm/desc.h>
+#include <asm/percpu.h>
 #include <asm/dwarf2.h>
 #include "irq_vectors.h"
 
-#define nr_syscalls ((syscall_table_size)/4)
+/*
+ * We use macros for low-level operations which need to be overridden
+ * for paravirtualization.  The following will never clobber any registers:
+ *   INTERRUPT_RETURN (aka. "iret")
+ *   GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
+ *   ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ *
+ * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
+ * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
+ * Allowing a register to be clobbered can shrink the paravirt replacement
+ * enough to patch inline, increasing performance.
+ */
 
-EBX		= 0x00
-ECX		= 0x04
-EDX		= 0x08
-ESI		= 0x0C
-EDI		= 0x10
-EBP		= 0x14
-EAX		= 0x18
-DS		= 0x1C
-ES		= 0x20
-ORIG_EAX	= 0x24
-EIP		= 0x28
-CS		= 0x2C
-EFLAGS		= 0x30
-OLDESP		= 0x34
-OLDSS		= 0x38
+#define nr_syscalls ((syscall_table_size)/4)
 
 CF_MASK		= 0x00000001
 TF_MASK		= 0x00000100
@@ -76,23 +75,16 @@
 NT_MASK		= 0x00004000
 VM_MASK		= 0x00020000
 
-/* These are replaces for paravirtualization */
-#define DISABLE_INTERRUPTS		cli
-#define ENABLE_INTERRUPTS		sti
-#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
-#define INTERRUPT_RETURN		iret
-#define GET_CR0_INTO_EAX		movl %cr0, %eax
-
 #ifdef CONFIG_PREEMPT
-#define preempt_stop		DISABLE_INTERRUPTS; TRACE_IRQS_OFF
+#define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
-#define preempt_stop
+#define preempt_stop(clobbers)
 #define resume_kernel		restore_nocheck
 #endif
 
 .macro TRACE_IRQS_IRET
 #ifdef CONFIG_TRACE_IRQFLAGS
-	testl $IF_MASK,EFLAGS(%esp)     # interrupts off?
+	testl $IF_MASK,PT_EFLAGS(%esp)     # interrupts off?
 	jz 1f
 	TRACE_IRQS_ON
 1:
@@ -107,6 +99,9 @@
 
 #define SAVE_ALL \
 	cld; \
+	pushl %gs; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	/*CFI_REL_OFFSET gs, 0;*/\
 	pushl %es; \
 	CFI_ADJUST_CFA_OFFSET 4;\
 	/*CFI_REL_OFFSET es, 0;*/\
@@ -136,7 +131,9 @@
 	CFI_REL_OFFSET ebx, 0;\
 	movl $(__USER_DS), %edx; \
 	movl %edx, %ds; \
-	movl %edx, %es;
+	movl %edx, %es; \
+	movl $(__KERNEL_PDA), %edx; \
+	movl %edx, %gs
 
 #define RESTORE_INT_REGS \
 	popl %ebx;	\
@@ -169,17 +166,22 @@
 2:	popl %es;	\
 	CFI_ADJUST_CFA_OFFSET -4;\
 	/*CFI_RESTORE es;*/\
-.section .fixup,"ax";	\
-3:	movl $0,(%esp);	\
-	jmp 1b;		\
+3:	popl %gs;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	/*CFI_RESTORE gs;*/\
+.pushsection .fixup,"ax";	\
 4:	movl $0,(%esp);	\
+	jmp 1b;		\
+5:	movl $0,(%esp);	\
 	jmp 2b;		\
-.previous;		\
+6:	movl $0,(%esp);	\
+	jmp 3b;		\
 .section __ex_table,"a";\
 	.align 4;	\
-	.long 1b,3b;	\
-	.long 2b,4b;	\
-.previous
+	.long 1b,4b;	\
+	.long 2b,5b;	\
+	.long 3b,6b;	\
+.popsection
 
 #define RING0_INT_FRAME \
 	CFI_STARTPROC simple;\
@@ -198,18 +200,18 @@
 #define RING0_PTREGS_FRAME \
 	CFI_STARTPROC simple;\
 	CFI_SIGNAL_FRAME;\
-	CFI_DEF_CFA esp, OLDESP-EBX;\
-	/*CFI_OFFSET cs, CS-OLDESP;*/\
-	CFI_OFFSET eip, EIP-OLDESP;\
-	/*CFI_OFFSET es, ES-OLDESP;*/\
-	/*CFI_OFFSET ds, DS-OLDESP;*/\
-	CFI_OFFSET eax, EAX-OLDESP;\
-	CFI_OFFSET ebp, EBP-OLDESP;\
-	CFI_OFFSET edi, EDI-OLDESP;\
-	CFI_OFFSET esi, ESI-OLDESP;\
-	CFI_OFFSET edx, EDX-OLDESP;\
-	CFI_OFFSET ecx, ECX-OLDESP;\
-	CFI_OFFSET ebx, EBX-OLDESP
+	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
+	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
+	CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
+	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
+	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
+	CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
+	CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
+	CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
+	CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
+	CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
+	CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
+	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
 
 ENTRY(ret_from_fork)
 	CFI_STARTPROC
@@ -237,17 +239,18 @@
 	ALIGN
 	RING0_PTREGS_FRAME
 ret_from_exception:
-	preempt_stop
+	preempt_stop(CLBR_ANY)
 ret_from_intr:
 	GET_THREAD_INFO(%ebp)
 check_userspace:
-	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
-	movb CS(%esp), %al
+	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
+	movb PT_CS(%esp), %al
 	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
 	cmpl $USER_RPL, %eax
 	jb resume_kernel		# not returning to v8086 or userspace
+
 ENTRY(resume_userspace)
- 	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
+ 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	movl TI_flags(%ebp), %ecx
@@ -258,14 +261,14 @@
 
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
-	DISABLE_INTERRUPTS
+	DISABLE_INTERRUPTS(CLBR_ANY)
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 	jnz restore_nocheck
 need_resched:
 	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
 	jz restore_all
-	testl $IF_MASK,EFLAGS(%esp)     # interrupts off (exception path) ?
+	testl $IF_MASK,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz restore_all
 	call preempt_schedule_irq
 	jmp need_resched
@@ -287,7 +290,7 @@
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs and here we enable it straight after entry:
 	 */
-	ENABLE_INTERRUPTS
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushl $(__USER_DS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET ss, 0*/
@@ -331,20 +334,27 @@
 	cmpl $(nr_syscalls), %eax
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
-	movl %eax,EAX(%esp)
-	DISABLE_INTERRUPTS
+	movl %eax,PT_EAX(%esp)
+	DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
 /* if something modifies registers it must also disable sysexit */
-	movl EIP(%esp), %edx
-	movl OLDESP(%esp), %ecx
+	movl PT_EIP(%esp), %edx
+	movl PT_OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
 	TRACE_IRQS_ON
+1:	mov  PT_GS(%esp), %gs
 	ENABLE_INTERRUPTS_SYSEXIT
 	CFI_ENDPROC
-
+.pushsection .fixup,"ax"
+2:	movl $0,PT_GS(%esp)
+	jmp 1b
+.section __ex_table,"a"
+	.align 4
+	.long 1b,2b
+.popsection
 
 	# system call handler stub
 ENTRY(system_call)
@@ -353,7 +363,7 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
-	testl $TF_MASK,EFLAGS(%esp)
+	testl $TF_MASK,PT_EFLAGS(%esp)
 	jz no_singlestep
 	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
 no_singlestep:
@@ -365,9 +375,9 @@
 	jae syscall_badsys
 syscall_call:
 	call *sys_call_table(,%eax,4)
-	movl %eax,EAX(%esp)		# store the return value
+	movl %eax,PT_EAX(%esp)		# store the return value
 syscall_exit:
-	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
+	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	TRACE_IRQS_OFF
@@ -376,12 +386,12 @@
 	jne syscall_exit_work
 
 restore_all:
-	movl EFLAGS(%esp), %eax		# mix EFLAGS, SS and CS
-	# Warning: OLDSS(%esp) contains the wrong/random values if we
+	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
+	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
 	# are returning to the kernel.
 	# See comments in process.c:copy_thread() for details.
-	movb OLDSS(%esp), %ah
-	movb CS(%esp), %al
+	movb PT_OLDSS(%esp), %ah
+	movb PT_CS(%esp), %al
 	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 	CFI_REMEMBER_STATE
@@ -390,13 +400,13 @@
 	TRACE_IRQS_IRET
 restore_nocheck_notrace:
 	RESTORE_REGS
-	addl $4, %esp
+	addl $4, %esp			# skip orig_eax/error_code
 	CFI_ADJUST_CFA_OFFSET -4
 1:	INTERRUPT_RETURN
 .section .fixup,"ax"
 iret_exc:
 	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS
+	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushl $0			# no error code
 	pushl $do_iret_error
 	jmp error_code
@@ -408,33 +418,42 @@
 
 	CFI_RESTORE_STATE
 ldt_ss:
-	larl OLDSS(%esp), %eax
+	larl PT_OLDSS(%esp), %eax
 	jnz restore_nocheck
 	testl $0x00400000, %eax		# returning to 32bit stack?
 	jnz restore_nocheck		# allright, normal return
+
+#ifdef CONFIG_PARAVIRT
+	/*
+	 * The kernel can't run on a non-flat stack if paravirt mode
+	 * is active.  Rather than try to fixup the high bits of
+	 * ESP, bypass this code entirely.  This may break DOSemu
+	 * and/or Wine support in a paravirt VM, although the option
+	 * is still available to implement the setting of the high
+	 * 16-bits in the INTERRUPT_RETURN paravirt-op.
+	 */
+	cmpl $0, paravirt_ops+PARAVIRT_enabled
+	jne restore_nocheck
+#endif
+
 	/* If returning to userspace with 16bit stack,
 	 * try to fix the higher word of ESP, as the CPU
 	 * won't restore it.
 	 * This is an "official" bug of all the x86-compatible
 	 * CPUs, which we can try to work around to make
 	 * dosemu and wine happy. */
-	subl $8, %esp		# reserve space for switch16 pointer
-	CFI_ADJUST_CFA_OFFSET 8
-	DISABLE_INTERRUPTS
+	movl PT_OLDESP(%esp), %eax
+	movl %esp, %edx
+	call patch_espfix_desc
+	pushl $__ESPFIX_SS
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	DISABLE_INTERRUPTS(CLBR_EAX)
 	TRACE_IRQS_OFF
-	movl %esp, %eax
-	/* Set up the 16bit stack frame with switch32 pointer on top,
-	 * and a switch16 pointer on top of the current frame. */
-	call setup_x86_bogus_stack
-	CFI_ADJUST_CFA_OFFSET -8	# frame has moved
-	TRACE_IRQS_IRET
-	RESTORE_REGS
-	lss 20+4(%esp), %esp	# switch to 16bit stack
-1:	INTERRUPT_RETURN
-.section __ex_table,"a"
-	.align 4
-	.long 1b,iret_exc
-.previous
+	lss (%esp), %esp
+	CFI_ADJUST_CFA_OFFSET -8
+	jmp restore_nocheck
 	CFI_ENDPROC
 
 	# perform work that needs to be done immediately before resumption
@@ -445,7 +464,7 @@
 	jz work_notifysig
 work_resched:
 	call schedule
-	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
+	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	TRACE_IRQS_OFF
@@ -458,7 +477,8 @@
 
 work_notifysig:				# deal with pending signals and
 					# notify-resume requests
-	testl $VM_MASK, EFLAGS(%esp)
+#ifdef CONFIG_VM86
+	testl $VM_MASK, PT_EFLAGS(%esp)
 	movl %esp, %eax
 	jne work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
@@ -468,29 +488,30 @@
 
 	ALIGN
 work_notifysig_v86:
-#ifdef CONFIG_VM86
 	pushl %ecx			# save ti_flags for do_notify_resume
 	CFI_ADJUST_CFA_OFFSET 4
 	call save_v86_state		# %eax contains pt_regs pointer
 	popl %ecx
 	CFI_ADJUST_CFA_OFFSET -4
 	movl %eax, %esp
+#else
+	movl %esp, %eax
+#endif
 	xorl %edx, %edx
 	call do_notify_resume
 	jmp resume_userspace_sig
-#endif
 
 	# perform syscall exit tracing
 	ALIGN
 syscall_trace_entry:
-	movl $-ENOSYS,EAX(%esp)
+	movl $-ENOSYS,PT_EAX(%esp)
 	movl %esp, %eax
 	xorl %edx,%edx
 	call do_syscall_trace
 	cmpl $0, %eax
 	jne resume_userspace		# ret != 0 -> running under PTRACE_SYSEMU,
 					# so must skip actual syscall
-	movl ORIG_EAX(%esp), %eax
+	movl PT_ORIG_EAX(%esp), %eax
 	cmpl $(nr_syscalls), %eax
 	jnae syscall_call
 	jmp syscall_exit
@@ -501,7 +522,7 @@
 	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
 	jz work_pending
 	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS		# could let do_syscall_trace() call
+	ENABLE_INTERRUPTS(CLBR_ANY)	# could let do_syscall_trace() call
 					# schedule() instead
 	movl %esp, %eax
 	movl $1, %edx
@@ -515,39 +536,38 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
-	movl $-EFAULT,EAX(%esp)
+	movl $-EFAULT,PT_EAX(%esp)
 	jmp resume_userspace
 
 syscall_badsys:
-	movl $-ENOSYS,EAX(%esp)
+	movl $-ENOSYS,PT_EAX(%esp)
 	jmp resume_userspace
 	CFI_ENDPROC
 
 #define FIXUP_ESPFIX_STACK \
-	movl %esp, %eax; \
-	/* switch to 32bit stack using the pointer on top of 16bit stack */ \
-	lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
-	/* copy data from 16bit stack to 32bit stack */ \
-	call fixup_x86_bogus_stack; \
-	/* put ESP to the proper location */ \
-	movl %eax, %esp;
-#define UNWIND_ESPFIX_STACK \
+	/* since we are on a wrong stack, we cant make it a C code :( */ \
+	movl %gs:PDA_cpu, %ebx; \
+	PER_CPU(cpu_gdt_descr, %ebx); \
+	movl GDS_address(%ebx), %ebx; \
+	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
+	addl %esp, %eax; \
+	pushl $__KERNEL_DS; \
+	CFI_ADJUST_CFA_OFFSET 4; \
 	pushl %eax; \
 	CFI_ADJUST_CFA_OFFSET 4; \
+	lss (%esp), %esp; \
+	CFI_ADJUST_CFA_OFFSET -8;
+#define UNWIND_ESPFIX_STACK \
 	movl %ss, %eax; \
-	/* see if on 16bit stack */ \
+	/* see if on espfix stack */ \
 	cmpw $__ESPFIX_SS, %ax; \
-	je 28f; \
-27:	popl %eax; \
-	CFI_ADJUST_CFA_OFFSET -4; \
-.section .fixup,"ax"; \
-28:	movl $__KERNEL_DS, %eax; \
+	jne 27f; \
+	movl $__KERNEL_DS, %eax; \
 	movl %eax, %ds; \
 	movl %eax, %es; \
-	/* switch to 32bit stack */ \
+	/* switch to normal stack */ \
 	FIXUP_ESPFIX_STACK; \
-	jmp 27b; \
-.previous
+27:;
 
 /*
  * Build the entry stubs and pointer table with
@@ -608,13 +628,16 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	ALIGN
 error_code:
+	/* the function address is in %gs's slot on the stack */
+	pushl %es
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET es, 0*/
 	pushl %ds
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET ds, 0*/
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eax, 0
-	xorl %eax, %eax
 	pushl %ebp
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET ebp, 0
@@ -627,7 +650,6 @@
 	pushl %edx
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET edx, 0
-	decl %eax			# eax = -1
 	pushl %ecx
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET ecx, 0
@@ -635,18 +657,20 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET ebx, 0
 	cld
-	pushl %es
+	pushl %gs
 	CFI_ADJUST_CFA_OFFSET 4
-	/*CFI_REL_OFFSET es, 0*/
+	/*CFI_REL_OFFSET gs, 0*/
+	movl $(__KERNEL_PDA), %ecx
+	movl %ecx, %gs
 	UNWIND_ESPFIX_STACK
 	popl %ecx
 	CFI_ADJUST_CFA_OFFSET -4
 	/*CFI_REGISTER es, ecx*/
-	movl ES(%esp), %edi		# get the function address
-	movl ORIG_EAX(%esp), %edx	# get the error code
-	movl %eax, ORIG_EAX(%esp)
-	movl %ecx, ES(%esp)
-	/*CFI_REL_OFFSET es, ES*/
+	movl PT_GS(%esp), %edi		# get the function address
+	movl PT_ORIG_EAX(%esp), %edx	# get the error code
+	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
+	mov  %ecx, PT_GS(%esp)
+	/*CFI_REL_OFFSET gs, ES*/
 	movl $(__USER_DS), %ecx
 	movl %ecx, %ds
 	movl %ecx, %es
@@ -682,7 +706,7 @@
 	GET_CR0_INTO_EAX
 	testl $0x4, %eax		# EM (math emulation bit)
 	jne device_not_available_emulate
-	preempt_stop
+	preempt_stop(CLBR_ANY)
 	call math_state_restore
 	jmp ret_from_exception
 device_not_available_emulate:
@@ -754,7 +778,7 @@
 	cmpw $__ESPFIX_SS, %ax
 	popl %eax
 	CFI_ADJUST_CFA_OFFSET -4
-	je nmi_16bit_stack
+	je nmi_espfix_stack
 	cmpl $sysenter_entry,(%esp)
 	je nmi_stack_fixup
 	pushl %eax
@@ -797,7 +821,7 @@
 	FIX_STACK(24,nmi_stack_correct, 1)
 	jmp nmi_stack_correct
 
-nmi_16bit_stack:
+nmi_espfix_stack:
 	/* We have a RING0_INT_FRAME here.
 	 *
 	 * create the pointer to lss back
@@ -806,7 +830,6 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl %esp
 	CFI_ADJUST_CFA_OFFSET 4
-	movzwl %sp, %esp
 	addw $4, (%esp)
 	/* copy the iret frame of 12 bytes */
 	.rept 3
@@ -817,11 +840,11 @@
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	FIXUP_ESPFIX_STACK		# %eax == %esp
-	CFI_ADJUST_CFA_OFFSET -20	# the frame has now moved
 	xorl %edx,%edx			# zero error code
 	call do_nmi
 	RESTORE_REGS
-	lss 12+4(%esp), %esp		# back to 16bit stack
+	lss 12+4(%esp), %esp		# back to espfix stack
+	CFI_ADJUST_CFA_OFFSET -24
 1:	INTERRUPT_RETURN
 	CFI_ENDPROC
 .section __ex_table,"a"
@@ -830,6 +853,19 @@
 .previous
 KPROBE_END(nmi)
 
+#ifdef CONFIG_PARAVIRT
+ENTRY(native_iret)
+1:	iret
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
+
+ENTRY(native_irq_enable_sysexit)
+	sti
+	sysexit
+#endif
+
 KPROBE_ENTRY(int3)
 	RING0_INT_FRAME
 	pushl $-1			# mark this as an int
@@ -949,26 +985,27 @@
 	movl	4(%esp), %edx
 	movl	(%esp), %ecx
 	leal	4(%esp), %eax
-	movl	%ebx, EBX(%edx)
+	movl	%ebx, PT_EBX(%edx)
 	xorl	%ebx, %ebx
-	movl	%ebx, ECX(%edx)
-	movl	%ebx, EDX(%edx)
-	movl	%esi, ESI(%edx)
-	movl	%edi, EDI(%edx)
-	movl	%ebp, EBP(%edx)
-	movl	%ebx, EAX(%edx)
-	movl	$__USER_DS, DS(%edx)
-	movl	$__USER_DS, ES(%edx)
-	movl	%ebx, ORIG_EAX(%edx)
-	movl	%ecx, EIP(%edx)
+	movl	%ebx, PT_ECX(%edx)
+	movl	%ebx, PT_EDX(%edx)
+	movl	%esi, PT_ESI(%edx)
+	movl	%edi, PT_EDI(%edx)
+	movl	%ebp, PT_EBP(%edx)
+	movl	%ebx, PT_EAX(%edx)
+	movl	$__USER_DS, PT_DS(%edx)
+	movl	$__USER_DS, PT_ES(%edx)
+	movl	$0, PT_GS(%edx)
+	movl	%ebx, PT_ORIG_EAX(%edx)
+	movl	%ecx, PT_EIP(%edx)
 	movl	12(%esp), %ecx
-	movl	$__KERNEL_CS, CS(%edx)
-	movl	%ebx, EFLAGS(%edx)
-	movl	%eax, OLDESP(%edx)
+	movl	$__KERNEL_CS, PT_CS(%edx)
+	movl	%ebx, PT_EFLAGS(%edx)
+	movl	%eax, PT_OLDESP(%edx)
 	movl	8(%esp), %eax
 	movl	%ecx, 8(%esp)
-	movl	EBX(%edx), %ebx
-	movl	$__KERNEL_DS, OLDSS(%edx)
+	movl	PT_EBX(%edx), %ebx
+	movl	$__KERNEL_DS, PT_OLDSS(%edx)
 	jmpl	*%eax
 	CFI_ENDPROC
 ENDPROC(arch_unwind_init_running)
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index ca31f18..edef508 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -55,6 +55,12 @@
  */
 ENTRY(startup_32)
 
+#ifdef CONFIG_PARAVIRT
+        movl %cs, %eax
+        testl $0x3, %eax
+        jnz startup_paravirt
+#endif
+
 /*
  * Set segments to known values.
  */
@@ -302,6 +308,7 @@
 	movl %eax,%cr0
 
 	call check_x87
+	call setup_pda
 	lgdt cpu_gdt_descr
 	lidt idt_descr
 	ljmp $(__KERNEL_CS),$1f
@@ -312,10 +319,13 @@
 	movl %eax,%ds
 	movl %eax,%es
 
-	xorl %eax,%eax			# Clear FS/GS and LDT
+	xorl %eax,%eax			# Clear FS and LDT
 	movl %eax,%fs
-	movl %eax,%gs
 	lldt %ax
+
+	movl $(__KERNEL_PDA),%eax
+	mov  %eax,%gs
+
 	cld			# gcc2 wants the direction flag cleared at all times
 	pushl $0		# fake return address for unwinder
 #ifdef CONFIG_SMP
@@ -346,6 +356,23 @@
 	ret
 
 /*
+ * Point the GDT at this CPU's PDA.  On boot this will be
+ * cpu_gdt_table and boot_pda; for secondary CPUs, these will be
+ * that CPU's GDT and PDA.
+ */
+setup_pda:
+	/* get the PDA pointer */
+	movl start_pda, %eax
+
+	/* slot the PDA address into the GDT */
+	mov cpu_gdt_descr+2, %ecx
+	mov %ax, (__KERNEL_PDA+0+2)(%ecx)		/* base & 0x0000ffff */
+	shr $16, %eax
+	mov %al, (__KERNEL_PDA+4+0)(%ecx)		/* base & 0x00ff0000 */
+	mov %ah, (__KERNEL_PDA+4+3)(%ecx)		/* base & 0xff000000 */
+	ret
+
+/*
  *  setup_idt
  *
  *  sets up a idt with 256 entries pointing to
@@ -465,6 +492,33 @@
 #endif
 	iret
 
+#ifdef CONFIG_PARAVIRT
+startup_paravirt:
+	cld
+ 	movl $(init_thread_union+THREAD_SIZE),%esp
+
+	/* We take pains to preserve all the regs. */
+	pushl	%edx
+	pushl	%ecx
+	pushl	%eax
+
+	/* paravirt.o is last in link, and that probe fn never returns */
+	pushl	$__start_paravirtprobe
+1:
+	movl	0(%esp), %eax
+	pushl	(%eax)
+	movl	8(%esp), %eax
+	call	*(%esp)
+	popl	%eax
+
+	movl	4(%esp), %eax
+	movl	8(%esp), %ecx
+	movl	12(%esp), %edx
+
+	addl	$4, (%esp)
+	jmp	1b
+#endif
+
 /*
  * Real beginning of normal "text" segment
  */
@@ -484,6 +538,8 @@
  * This starts the data section.
  */
 .data
+ENTRY(start_pda)
+	.long boot_pda
 
 ENTRY(stack_start)
 	.long init_thread_union+THREAD_SIZE
@@ -525,7 +581,7 @@
 
 # boot GDT descriptor (later on used by CPU#0):
 	.word 0				# 32 bit align gdt_desc.address
-cpu_gdt_descr:
+ENTRY(cpu_gdt_descr)
 	.word GDT_ENTRIES*8-1
 	.long cpu_gdt_table
 
@@ -584,8 +640,8 @@
 	.quad 0x00009a000000ffff	/* 0xc0 APM CS 16 code (16 bit) */
 	.quad 0x004092000000ffff	/* 0xc8 APM DS    data */
 
-	.quad 0x0000920000000000	/* 0xd0 - ESPFIX 16-bit SS */
-	.quad 0x0000000000000000	/* 0xd8 - unused */
+	.quad 0x00c0920000000000	/* 0xd0 - ESPFIX SS */
+	.quad 0x00cf92000000ffff	/* 0xd8 - PDA */
 	.quad 0x0000000000000000	/* 0xe0 - unused */
 	.quad 0x0000000000000000	/* 0xe8 - unused */
 	.quad 0x0000000000000000	/* 0xf0 - unused */
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
index 17647a5..45a8685 100644
--- a/arch/i386/kernel/hpet.c
+++ b/arch/i386/kernel/hpet.c
@@ -34,6 +34,7 @@
 	unsigned long hpet_period;
 	void __iomem* hpet_base;
 	u64 tmp;
+	int err;
 
 	if (!is_hpet_enabled())
 		return -ENODEV;
@@ -61,7 +62,11 @@
 	do_div(tmp, FSEC_PER_NSEC);
 	clocksource_hpet.mult = (u32)tmp;
 
-	return clocksource_register(&clocksource_hpet);
+	err = clocksource_register(&clocksource_hpet);
+	if (err)
+		iounmap(hpet_base);
+
+	return err;
 }
 
 module_init(init_hpet_clocksource);
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index 62996cd..c8d4582 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -381,7 +381,10 @@
 	}
 }
 
-void __init init_IRQ(void)
+/* Overridden in paravirt.c */
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+
+void __init native_init_IRQ(void)
 {
 	int i;
 
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 44c5a32..e21dcde 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -154,14 +154,20 @@
  * the interrupt, and we need to make sure the entry is fully populated
  * before that happens.
  */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+	union entry_union eu;
+	eu.entry = e;
+	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
 static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
 	unsigned long flags;
-	union entry_union eu;
-	eu.entry = e;
 	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+	__ioapic_write_entry(apic, pin, e);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -837,8 +843,7 @@
 
 		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
 		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+		     mp_bus_id_to_type[lbus] == MP_BUS_MCA
 		    ) &&
 		    (mp_irqs[i].mpc_irqtype == type) &&
 		    (mp_irqs[i].mpc_srcbusirq == irq))
@@ -857,8 +862,7 @@
 
 		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
 		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+		     mp_bus_id_to_type[lbus] == MP_BUS_MCA
 		    ) &&
 		    (mp_irqs[i].mpc_irqtype == type) &&
 		    (mp_irqs[i].mpc_srcbusirq == irq))
@@ -988,12 +992,6 @@
 #define default_MCA_trigger(idx)	(1)
 #define default_MCA_polarity(idx)	(0)
 
-/* NEC98 interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_NEC98_trigger(idx)     (0)
-#define default_NEC98_polarity(idx)    (0)
-
 static int __init MPBIOS_polarity(int idx)
 {
 	int bus = mp_irqs[idx].mpc_srcbus;
@@ -1028,11 +1026,6 @@
 					polarity = default_MCA_polarity(idx);
 					break;
 				}
-				case MP_BUS_NEC98: /* NEC 98 pin */
-				{
-					polarity = default_NEC98_polarity(idx);
-					break;
-				}
 				default:
 				{
 					printk(KERN_WARNING "broken BIOS!!\n");
@@ -1102,11 +1095,6 @@
 					trigger = default_MCA_trigger(idx);
 					break;
 				}
-				case MP_BUS_NEC98: /* NEC 98 pin */
-				{
-					trigger = default_NEC98_trigger(idx);
-					break;
-				}
 				default:
 				{
 					printk(KERN_WARNING "broken BIOS!!\n");
@@ -1168,7 +1156,6 @@
 		case MP_BUS_ISA: /* ISA pin */
 		case MP_BUS_EISA:
 		case MP_BUS_MCA:
-		case MP_BUS_NEC98:
 		{
 			irq = mp_irqs[idx].mpc_srcbusirq;
 			break;
@@ -1236,7 +1223,7 @@
 }
 
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 
 static int __assign_irq_vector(int irq)
 {
@@ -1361,8 +1348,8 @@
 			if (!apic && (irq < 16))
 				disable_8259A_irq(irq);
 		}
-		ioapic_write_entry(apic, pin, entry);
 		spin_lock_irqsave(&ioapic_lock, flags);
+		__ioapic_write_entry(apic, pin, entry);
 		set_native_irq_info(irq, TARGET_CPUS);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
@@ -1927,6 +1914,15 @@
 static void __init setup_ioapic_ids_from_mpc(void) { }
 #endif
 
+static int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+	no_timer_check = 1;
+	return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1935,10 +1931,13 @@
  *	- if this function detects that timer IRQs are defunct, then we fall
  *	  back to ISA timer IRQs
  */
-static int __init timer_irq_works(void)
+int __init timer_irq_works(void)
 {
 	unsigned long t1 = jiffies;
 
+	if (no_timer_check)
+		return 1;
+
 	local_irq_enable();
 	/* Let ten ticks pass... */
 	mdelay((10 * 1000) / HZ);
@@ -2162,9 +2161,15 @@
 	unsigned char save_control, save_freq_select;
 
 	pin  = find_isa_irq_pin(8, mp_INT);
-	apic = find_isa_irq_apic(8, mp_INT);
-	if (pin == -1)
+	if (pin == -1) {
+		WARN_ON_ONCE(1);
 		return;
+	}
+	apic = find_isa_irq_apic(8, mp_INT);
+	if (apic == -1) {
+		WARN_ON_ONCE(1);
+		return;
+	}
 
 	entry0 = ioapic_read_entry(apic, pin);
 	clear_IO_APIC_pin(apic, pin);
@@ -2209,7 +2214,7 @@
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  */
-static inline void check_timer(void)
+static inline void __init check_timer(void)
 {
 	int apic1, pin1, apic2, pin2;
 	int vector;
@@ -2857,8 +2862,8 @@
 	if (!ioapic && (irq < 16))
 		disable_8259A_irq(irq);
 
-	ioapic_write_entry(ioapic, pin, entry);
 	spin_lock_irqsave(&ioapic_lock, flags);
+	__ioapic_write_entry(ioapic, pin, entry);
 	set_native_irq_info(irq, TARGET_CPUS);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c
index 445211e..b410e5f 100644
--- a/arch/i386/kernel/ldt.c
+++ b/arch/i386/kernel/ldt.c
@@ -160,16 +160,14 @@
 {
 	int err;
 	unsigned long size;
-	void *address;
 
 	err = 0;
-	address = &default_ldt[0];
 	size = 5*sizeof(struct desc_struct);
 	if (size > bytecount)
 		size = bytecount;
 
 	err = size;
-	if (copy_to_user(ptr, address, size))
+	if (clear_user(ptr, size))
 		err = -EFAULT;
 
 	return err;
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
index eb57a85..b83672b 100644
--- a/arch/i386/kernel/mca.c
+++ b/arch/i386/kernel/mca.c
@@ -283,10 +283,9 @@
 	bus->f.mca_transform_memory = mca_dummy_transform_memory;
 
 	/* get the motherboard device */
-	mca_dev = kmalloc(sizeof(struct mca_device), GFP_KERNEL);
+	mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL);
 	if(unlikely(!mca_dev))
 		goto out_nomem;
-	memset(mca_dev, 0, sizeof(struct mca_device));
 
 	/*
 	 * We do not expect many MCA interrupts during initialization,
@@ -310,11 +309,9 @@
 	mca_dev->slot = MCA_MOTHERBOARD;
 	mca_register_device(MCA_PRIMARY_BUS, mca_dev);
 
-	mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
+	mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
 	if(unlikely(!mca_dev))
 		goto out_unlock_nomem;
-	memset(mca_dev, 0, sizeof(struct mca_device));
-
 
 	/* Put motherboard into video setup mode, read integrated video
 	 * POS registers, and turn motherboard setup off.
@@ -349,10 +346,9 @@
 	}
 	if(which_scsi) {
 		/* found a scsi card */
-		mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
+		mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
 		if(unlikely(!mca_dev))
 			goto out_unlock_nomem;
-		memset(mca_dev, 0, sizeof(struct mca_device));
 
 		for(j = 0; j < 8; j++)
 			mca_dev->pos[j] = pos[j];
@@ -378,10 +374,9 @@
 		if(!mca_read_and_store_pos(pos))
 			continue;
 
-		mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC);
+		mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
 		if(unlikely(!mca_dev))
 			goto out_unlock_nomem;
-		memset(mca_dev, 0, sizeof(struct mca_device));
 
 		for(j=0; j<8; j++)
 			mca_dev->pos[j]=pos[j];
diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c
index 470cf97..d7d9c8b 100644
--- a/arch/i386/kernel/module.c
+++ b/arch/i386/kernel/module.c
@@ -108,7 +108,8 @@
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
+	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+		*para = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
@@ -118,6 +119,8 @@
 			alt = s;
 		if (!strcmp(".smp_locks", secstrings + s->sh_name))
 			locks= s;
+		if (!strcmp(".parainstructions", secstrings + s->sh_name))
+			para = s;
 	}
 
 	if (alt) {
@@ -132,6 +135,12 @@
 					    lseg, lseg + locks->sh_size,
 					    tseg, tseg + text->sh_size);
 	}
+
+	if (para) {
+		void *pseg = (void *)para->sh_addr;
+		apply_paravirt(pseg, pseg + para->sh_size);
+	}
+
 	return 0;
 }
 
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index 442aaf8..2ce6722 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -249,8 +249,6 @@
 		mp_current_pci_id++;
 	} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
-	} else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
-		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
 	} else {
 		printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
 	}
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index 7763c67..1d1a56c 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -195,7 +195,6 @@
 {
 	const u32 __user *tmp = (const u32 __user *)buf;
 	u32 data[2];
-	size_t rv;
 	u32 reg = *ppos;
 	int cpu = iminor(file->f_dentry->d_inode);
 	int err;
@@ -203,7 +202,7 @@
 	if (count % 8)
 		return -EINVAL;	/* Invalid chunk size */
 
-	for (rv = 0; count; count -= 8) {
+	for (; count; count -= 8) {
 		if (copy_from_user(&data, tmp, 8))
 			return -EFAULT;
 		err = do_wrmsr(cpu, reg, data[0], data[1]);
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index eaafe23..f5bc7e1 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -22,6 +22,7 @@
 #include <linux/percpu.h>
 #include <linux/dmi.h>
 #include <linux/kprobes.h>
+#include <linux/cpumask.h>
 
 #include <asm/smp.h>
 #include <asm/nmi.h>
@@ -42,6 +43,8 @@
 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
 
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
+
 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
  */
@@ -867,14 +870,16 @@
 
 void touch_nmi_watchdog (void)
 {
-	int i;
+	if (nmi_watchdog > 0) {
+		unsigned cpu;
 
-	/*
-	 * Just reset the alert counters, (other CPUs might be
-	 * spinning on locks we hold):
-	 */
-	for_each_possible_cpu(i)
-		alert_counter[i] = 0;
+		/*
+		 * Just reset the alert counters, (other CPUs might be
+		 * spinning on locks we hold):
+		 */
+		for_each_present_cpu (cpu)
+			alert_counter[cpu] = 0;
+	}
 
 	/*
 	 * Tickle the softlockup detector too:
@@ -907,6 +912,16 @@
 		touched = 1;
 	}
 
+	if (cpu_isset(cpu, backtrace_mask)) {
+		static DEFINE_SPINLOCK(lock);	/* Serialise the printks */
+
+		spin_lock(&lock);
+		printk("NMI backtrace for cpu %d\n", cpu);
+		dump_stack();
+		spin_unlock(&lock);
+		cpu_clear(cpu, backtrace_mask);
+	}
+
 	sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
 
 	/* if the apic timer isn't firing, this cpu isn't doing much */
@@ -1033,6 +1048,19 @@
 
 #endif
 
+void __trigger_all_cpu_backtrace(void)
+{
+	int i;
+
+	backtrace_mask = cpu_online_map;
+	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
+	for (i = 0; i < 10 * 1000; i++) {
+		if (cpus_empty(backtrace_mask))
+			break;
+		mdelay(1);
+	}
+}
+
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_watchdog);
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
new file mode 100644
index 0000000..3dceab5
--- /dev/null
+++ b/arch/i386/kernel/paravirt.c
@@ -0,0 +1,569 @@
+/*  Paravirtualization interfaces
+    Copyright (C) 2006 Rusty Russell IBM Corporation
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/efi.h>
+#include <linux/bcd.h>
+#include <linux/start_kernel.h>
+
+#include <asm/bug.h>
+#include <asm/paravirt.h>
+#include <asm/desc.h>
+#include <asm/setup.h>
+#include <asm/arch_hooks.h>
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/delay.h>
+#include <asm/fixmap.h>
+#include <asm/apic.h>
+#include <asm/tlbflush.h>
+
+/* nop stub */
+static void native_nop(void)
+{
+}
+
+static void __init default_banner(void)
+{
+	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
+	       paravirt_ops.name);
+}
+
+char *memory_setup(void)
+{
+	return paravirt_ops.memory_setup();
+}
+
+/* Simple instruction patching code. */
+#define DEF_NATIVE(name, code)					\
+	extern const char start_##name[], end_##name[];		\
+	asm("start_" #name ": " code "; end_" #name ":")
+DEF_NATIVE(cli, "cli");
+DEF_NATIVE(sti, "sti");
+DEF_NATIVE(popf, "push %eax; popf");
+DEF_NATIVE(pushf, "pushf; pop %eax");
+DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
+DEF_NATIVE(iret, "iret");
+DEF_NATIVE(sti_sysexit, "sti; sysexit");
+
+static const struct native_insns
+{
+	const char *start, *end;
+} native_insns[] = {
+	[PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
+	[PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
+	[PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
+	[PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
+	[PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
+	[PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
+	[PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
+};
+
+static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
+{
+	unsigned int insn_len;
+
+	/* Don't touch it if we don't have a replacement */
+	if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
+		return len;
+
+	insn_len = native_insns[type].end - native_insns[type].start;
+
+	/* Similarly if we can't fit replacement. */
+	if (len < insn_len)
+		return len;
+
+	memcpy(insns, native_insns[type].start, insn_len);
+	return insn_len;
+}
+
+static fastcall unsigned long native_get_debugreg(int regno)
+{
+	unsigned long val = 0; 	/* Damn you, gcc! */
+
+	switch (regno) {
+	case 0:
+		asm("movl %%db0, %0" :"=r" (val)); break;
+	case 1:
+		asm("movl %%db1, %0" :"=r" (val)); break;
+	case 2:
+		asm("movl %%db2, %0" :"=r" (val)); break;
+	case 3:
+		asm("movl %%db3, %0" :"=r" (val)); break;
+	case 6:
+		asm("movl %%db6, %0" :"=r" (val)); break;
+	case 7:
+		asm("movl %%db7, %0" :"=r" (val)); break;
+	default:
+		BUG();
+	}
+	return val;
+}
+
+static fastcall void native_set_debugreg(int regno, unsigned long value)
+{
+	switch (regno) {
+	case 0:
+		asm("movl %0,%%db0"	: /* no output */ :"r" (value));
+		break;
+	case 1:
+		asm("movl %0,%%db1"	: /* no output */ :"r" (value));
+		break;
+	case 2:
+		asm("movl %0,%%db2"	: /* no output */ :"r" (value));
+		break;
+	case 3:
+		asm("movl %0,%%db3"	: /* no output */ :"r" (value));
+		break;
+	case 6:
+		asm("movl %0,%%db6"	: /* no output */ :"r" (value));
+		break;
+	case 7:
+		asm("movl %0,%%db7"	: /* no output */ :"r" (value));
+		break;
+	default:
+		BUG();
+	}
+}
+
+void init_IRQ(void)
+{
+	paravirt_ops.init_IRQ();
+}
+
+static fastcall void native_clts(void)
+{
+	asm volatile ("clts");
+}
+
+static fastcall unsigned long native_read_cr0(void)
+{
+	unsigned long val;
+	asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
+	return val;
+}
+
+static fastcall void native_write_cr0(unsigned long val)
+{
+	asm volatile("movl %0,%%cr0": :"r" (val));
+}
+
+static fastcall unsigned long native_read_cr2(void)
+{
+	unsigned long val;
+	asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
+	return val;
+}
+
+static fastcall void native_write_cr2(unsigned long val)
+{
+	asm volatile("movl %0,%%cr2": :"r" (val));
+}
+
+static fastcall unsigned long native_read_cr3(void)
+{
+	unsigned long val;
+	asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
+	return val;
+}
+
+static fastcall void native_write_cr3(unsigned long val)
+{
+	asm volatile("movl %0,%%cr3": :"r" (val));
+}
+
+static fastcall unsigned long native_read_cr4(void)
+{
+	unsigned long val;
+	asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
+	return val;
+}
+
+static fastcall unsigned long native_read_cr4_safe(void)
+{
+	unsigned long val;
+	/* This could fault if %cr4 does not exist */
+	asm("1: movl %%cr4, %0		\n"
+		"2:				\n"
+		".section __ex_table,\"a\"	\n"
+		".long 1b,2b			\n"
+		".previous			\n"
+		: "=r" (val): "0" (0));
+	return val;
+}
+
+static fastcall void native_write_cr4(unsigned long val)
+{
+	asm volatile("movl %0,%%cr4": :"r" (val));
+}
+
+static fastcall unsigned long native_save_fl(void)
+{
+	unsigned long f;
+	asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
+	return f;
+}
+
+static fastcall void native_restore_fl(unsigned long f)
+{
+	asm volatile("pushl %0 ; popfl": /* no output */
+			     :"g" (f)
+			     :"memory", "cc");
+}
+
+static fastcall void native_irq_disable(void)
+{
+	asm volatile("cli": : :"memory");
+}
+
+static fastcall void native_irq_enable(void)
+{
+	asm volatile("sti": : :"memory");
+}
+
+static fastcall void native_safe_halt(void)
+{
+	asm volatile("sti; hlt": : :"memory");
+}
+
+static fastcall void native_halt(void)
+{
+	asm volatile("hlt": : :"memory");
+}
+
+static fastcall void native_wbinvd(void)
+{
+	asm volatile("wbinvd": : :"memory");
+}
+
+static fastcall unsigned long long native_read_msr(unsigned int msr, int *err)
+{
+	unsigned long long val;
+
+	asm volatile("2: rdmsr ; xorl %0,%0\n"
+		     "1:\n\t"
+		     ".section .fixup,\"ax\"\n\t"
+		     "3:  movl %3,%0 ; jmp 1b\n\t"
+		     ".previous\n\t"
+ 		     ".section __ex_table,\"a\"\n"
+		     "   .align 4\n\t"
+		     "   .long 	2b,3b\n\t"
+		     ".previous"
+		     : "=r" (*err), "=A" (val)
+		     : "c" (msr), "i" (-EFAULT));
+
+	return val;
+}
+
+static fastcall int native_write_msr(unsigned int msr, unsigned long long val)
+{
+	int err;
+	asm volatile("2: wrmsr ; xorl %0,%0\n"
+		     "1:\n\t"
+		     ".section .fixup,\"ax\"\n\t"
+		     "3:  movl %4,%0 ; jmp 1b\n\t"
+		     ".previous\n\t"
+ 		     ".section __ex_table,\"a\"\n"
+		     "   .align 4\n\t"
+		     "   .long 	2b,3b\n\t"
+		     ".previous"
+		     : "=a" (err)
+		     : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
+		       "i" (-EFAULT));
+	return err;
+}
+
+static fastcall unsigned long long native_read_tsc(void)
+{
+	unsigned long long val;
+	asm volatile("rdtsc" : "=A" (val));
+	return val;
+}
+
+static fastcall unsigned long long native_read_pmc(void)
+{
+	unsigned long long val;
+	asm volatile("rdpmc" : "=A" (val));
+	return val;
+}
+
+static fastcall void native_load_tr_desc(void)
+{
+	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+}
+
+static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr)
+{
+	asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr)
+{
+	asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr)
+{
+	asm ("sgdt %0":"=m" (*dtr));
+}
+
+static fastcall void native_store_idt(struct Xgt_desc_struct *dtr)
+{
+	asm ("sidt %0":"=m" (*dtr));
+}
+
+static fastcall unsigned long native_store_tr(void)
+{
+	unsigned long tr;
+	asm ("str %0":"=r" (tr));
+	return tr;
+}
+
+static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
+	C(0); C(1); C(2);
+#undef C
+}
+
+static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high)
+{
+	u32 *lp = (u32 *)((char *)dt + entry*8);
+	lp[0] = entry_low;
+	lp[1] = entry_high;
+}
+
+static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high)
+{
+	native_write_dt_entry(dt, entrynum, low, high);
+}
+
+static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high)
+{
+	native_write_dt_entry(dt, entrynum, low, high);
+}
+
+static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high)
+{
+	native_write_dt_entry(dt, entrynum, low, high);
+}
+
+static fastcall void native_load_esp0(struct tss_struct *tss,
+				      struct thread_struct *thread)
+{
+	tss->esp0 = thread->esp0;
+
+	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
+	if (unlikely(tss->ss1 != thread->sysenter_cs)) {
+		tss->ss1 = thread->sysenter_cs;
+		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+	}
+}
+
+static fastcall void native_io_delay(void)
+{
+	asm volatile("outb %al,$0x80");
+}
+
+static fastcall void native_flush_tlb(void)
+{
+	__native_flush_tlb();
+}
+
+/*
+ * Global pages have to be flushed a bit differently. Not a real
+ * performance problem because this does not happen often.
+ */
+static fastcall void native_flush_tlb_global(void)
+{
+	__native_flush_tlb_global();
+}
+
+static fastcall void native_flush_tlb_single(u32 addr)
+{
+	__native_flush_tlb_single(addr);
+}
+
+#ifndef CONFIG_X86_PAE
+static fastcall void native_set_pte(pte_t *ptep, pte_t pteval)
+{
+	*ptep = pteval;
+}
+
+static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
+{
+	*ptep = pteval;
+}
+
+static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+	*pmdp = pmdval;
+}
+
+#else /* CONFIG_X86_PAE */
+
+static fastcall void native_set_pte(pte_t *ptep, pte_t pte)
+{
+	ptep->pte_high = pte.pte_high;
+	smp_wmb();
+	ptep->pte_low = pte.pte_low;
+}
+
+static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
+{
+	ptep->pte_high = pte.pte_high;
+	smp_wmb();
+	ptep->pte_low = pte.pte_low;
+}
+
+static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+{
+	ptep->pte_low = 0;
+	smp_wmb();
+	ptep->pte_high = pte.pte_high;
+	smp_wmb();
+	ptep->pte_low = pte.pte_low;
+}
+
+static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
+{
+	set_64bit((unsigned long long *)ptep,pte_val(pteval));
+}
+
+static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+	set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
+}
+
+static fastcall void native_set_pud(pud_t *pudp, pud_t pudval)
+{
+	*pudp = pudval;
+}
+
+static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	ptep->pte_low = 0;
+	smp_wmb();
+	ptep->pte_high = 0;
+}
+
+static fastcall void native_pmd_clear(pmd_t *pmd)
+{
+	u32 *tmp = (u32 *)pmd;
+	*tmp = 0;
+	smp_wmb();
+	*(tmp + 1) = 0;
+}
+#endif /* CONFIG_X86_PAE */
+
+/* These are in entry.S */
+extern fastcall void native_iret(void);
+extern fastcall void native_irq_enable_sysexit(void);
+
+static int __init print_banner(void)
+{
+	paravirt_ops.banner();
+	return 0;
+}
+core_initcall(print_banner);
+
+/* We simply declare start_kernel to be the paravirt probe of last resort. */
+paravirt_probe(start_kernel);
+
+struct paravirt_ops paravirt_ops = {
+	.name = "bare hardware",
+	.paravirt_enabled = 0,
+	.kernel_rpl = 0,
+
+ 	.patch = native_patch,
+	.banner = default_banner,
+	.arch_setup = native_nop,
+	.memory_setup = machine_specific_memory_setup,
+	.get_wallclock = native_get_wallclock,
+	.set_wallclock = native_set_wallclock,
+	.time_init = time_init_hook,
+	.init_IRQ = native_init_IRQ,
+
+	.cpuid = native_cpuid,
+	.get_debugreg = native_get_debugreg,
+	.set_debugreg = native_set_debugreg,
+	.clts = native_clts,
+	.read_cr0 = native_read_cr0,
+	.write_cr0 = native_write_cr0,
+	.read_cr2 = native_read_cr2,
+	.write_cr2 = native_write_cr2,
+	.read_cr3 = native_read_cr3,
+	.write_cr3 = native_write_cr3,
+	.read_cr4 = native_read_cr4,
+	.read_cr4_safe = native_read_cr4_safe,
+	.write_cr4 = native_write_cr4,
+	.save_fl = native_save_fl,
+	.restore_fl = native_restore_fl,
+	.irq_disable = native_irq_disable,
+	.irq_enable = native_irq_enable,
+	.safe_halt = native_safe_halt,
+	.halt = native_halt,
+	.wbinvd = native_wbinvd,
+	.read_msr = native_read_msr,
+	.write_msr = native_write_msr,
+	.read_tsc = native_read_tsc,
+	.read_pmc = native_read_pmc,
+	.load_tr_desc = native_load_tr_desc,
+	.set_ldt = native_set_ldt,
+	.load_gdt = native_load_gdt,
+	.load_idt = native_load_idt,
+	.store_gdt = native_store_gdt,
+	.store_idt = native_store_idt,
+	.store_tr = native_store_tr,
+	.load_tls = native_load_tls,
+	.write_ldt_entry = native_write_ldt_entry,
+	.write_gdt_entry = native_write_gdt_entry,
+	.write_idt_entry = native_write_idt_entry,
+	.load_esp0 = native_load_esp0,
+
+	.set_iopl_mask = native_set_iopl_mask,
+	.io_delay = native_io_delay,
+	.const_udelay = __const_udelay,
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	.apic_write = native_apic_write,
+	.apic_write_atomic = native_apic_write_atomic,
+	.apic_read = native_apic_read,
+#endif
+
+	.flush_tlb_user = native_flush_tlb,
+	.flush_tlb_kernel = native_flush_tlb_global,
+	.flush_tlb_single = native_flush_tlb_single,
+
+	.set_pte = native_set_pte,
+	.set_pte_at = native_set_pte_at,
+	.set_pmd = native_set_pmd,
+	.pte_update = (void *)native_nop,
+	.pte_update_defer = (void *)native_nop,
+#ifdef CONFIG_X86_PAE
+	.set_pte_atomic = native_set_pte_atomic,
+	.set_pte_present = native_set_pte_present,
+	.set_pud = native_set_pud,
+	.pte_clear = native_pte_clear,
+	.pmd_clear = native_pmd_clear,
+#endif
+
+	.irq_enable_sysexit = native_irq_enable_sysexit,
+	.iret = native_iret,
+};
+EXPORT_SYMBOL(paravirt_ops);
diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c
index 5c8c6ef..41af692 100644
--- a/arch/i386/kernel/pci-dma.c
+++ b/arch/i386/kernel/pci-dma.c
@@ -92,14 +92,12 @@
 	if (!mem_base)
 		goto out;
 
-	dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
+	dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
 	if (!dev->dma_mem)
 		goto out;
-	memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
-	dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
+	dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
 	if (!dev->dma_mem->bitmap)
 		goto free1_out;
-	memset(dev->dma_mem->bitmap, 0, bitmap_size);
 
 	dev->dma_mem->virt_base = mem_base;
 	dev->dma_mem->device_base = device_addr;
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index dd53c58..9930851 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -56,6 +56,7 @@
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
+#include <asm/pda.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
@@ -99,22 +100,18 @@
  */
 void default_idle(void)
 {
-	local_irq_enable();
-
 	if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
 		current_thread_info()->status &= ~TS_POLLING;
 		smp_mb__after_clear_bit();
-		while (!need_resched()) {
-			local_irq_disable();
-			if (!need_resched())
-				safe_halt();
-			else
-				local_irq_enable();
-		}
+		local_irq_disable();
+		if (!need_resched())
+			safe_halt();	/* enables interrupts racelessly */
+		else
+			local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
 	} else {
-		while (!need_resched())
-			cpu_relax();
+		/* loop is done by the caller */
+		cpu_relax();
 	}
 }
 #ifdef CONFIG_APM_MODULE
@@ -128,14 +125,7 @@
  */
 static void poll_idle (void)
 {
-	local_irq_enable();
-
-	asm volatile(
-		"2:"
-		"testl %0, %1;"
-		"rep; nop;"
-		"je 2b;"
-		: : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
+	cpu_relax();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -256,8 +246,7 @@
 static void mwait_idle(void)
 {
 	local_irq_enable();
-	while (!need_resched())
-		mwait_idle_with_hints(0, 0);
+	mwait_idle_with_hints(0, 0);
 }
 
 void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
@@ -314,8 +303,8 @@
 		regs->eax,regs->ebx,regs->ecx,regs->edx);
 	printk("ESI: %08lx EDI: %08lx EBP: %08lx",
 		regs->esi, regs->edi, regs->ebp);
-	printk(" DS: %04x ES: %04x\n",
-		0xffff & regs->xds,0xffff & regs->xes);
+	printk(" DS: %04x ES: %04x GS: %04x\n",
+	       0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
 
 	cr0 = read_cr0();
 	cr2 = read_cr2();
@@ -346,6 +335,7 @@
 
 	regs.xds = __USER_DS;
 	regs.xes = __USER_DS;
+	regs.xgs = __KERNEL_PDA;
 	regs.orig_eax = -1;
 	regs.eip = (unsigned long) kernel_thread_helper;
 	regs.xcs = __KERNEL_CS | get_kernel_rpl();
@@ -431,7 +421,6 @@
 	p->thread.eip = (unsigned long) ret_from_fork;
 
 	savesegment(fs,p->thread.fs);
-	savesegment(gs,p->thread.gs);
 
 	tsk = current;
 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -508,7 +497,7 @@
 	dump->regs.ds = regs->xds;
 	dump->regs.es = regs->xes;
 	savesegment(fs,dump->regs.fs);
-	savesegment(gs,dump->regs.gs);
+	dump->regs.gs = regs->xgs;
 	dump->regs.orig_eax = regs->orig_eax;
 	dump->regs.eip = regs->eip;
 	dump->regs.cs = regs->xcs;
@@ -648,22 +637,27 @@
 
 	__unlazy_fpu(prev_p);
 
+
+	/* we're going to use this soon, after a few expensive things */
+	if (next_p->fpu_counter > 5)
+		prefetch(&next->i387.fxsave);
+
 	/*
 	 * Reload esp0.
 	 */
 	load_esp0(tss, next);
 
 	/*
-	 * Save away %fs and %gs. No need to save %es and %ds, as
-	 * those are always kernel segments while inside the kernel.
-	 * Doing this before setting the new TLS descriptors avoids
-	 * the situation where we temporarily have non-reloadable
-	 * segments in %fs and %gs.  This could be an issue if the
-	 * NMI handler ever used %fs or %gs (it does not today), or
-	 * if the kernel is running inside of a hypervisor layer.
+	 * Save away %fs. No need to save %gs, as it was saved on the
+	 * stack on entry.  No need to save %es and %ds, as those are
+	 * always kernel segments while inside the kernel.  Doing this
+	 * before setting the new TLS descriptors avoids the situation
+	 * where we temporarily have non-reloadable segments in %fs
+	 * and %gs.  This could be an issue if the NMI handler ever
+	 * used %fs or %gs (it does not today), or if the kernel is
+	 * running inside of a hypervisor layer.
 	 */
 	savesegment(fs, prev->fs);
-	savesegment(gs, prev->gs);
 
 	/*
 	 * Load the per-thread Thread-Local Storage descriptor.
@@ -671,22 +665,14 @@
 	load_TLS(next, cpu);
 
 	/*
-	 * Restore %fs and %gs if needed.
+	 * Restore %fs if needed.
 	 *
-	 * Glibc normally makes %fs be zero, and %gs is one of
-	 * the TLS segments.
+	 * Glibc normally makes %fs be zero.
 	 */
 	if (unlikely(prev->fs | next->fs))
 		loadsegment(fs, next->fs);
 
-	if (prev->gs | next->gs)
-		loadsegment(gs, next->gs);
-
-	/*
-	 * Restore IOPL if needed.
-	 */
-	if (unlikely(prev->iopl != next->iopl))
-		set_iopl_mask(next->iopl);
+	write_pda(pcurrent, next_p);
 
 	/*
 	 * Now maybe handle debug registers and/or IO bitmaps
@@ -697,6 +683,13 @@
 
 	disable_tsc(prev_p, next_p);
 
+	/* If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	if (next_p->fpu_counter > 5)
+		math_state_restore();
+
 	return prev_p;
 }
 
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 775f50e..f3f94ac 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -94,13 +94,9 @@
 				return -EIO;
 			child->thread.fs = value;
 			return 0;
-		case GS:
-			if (value && (value & 3) != 3)
-				return -EIO;
-			child->thread.gs = value;
-			return 0;
 		case DS:
 		case ES:
+		case GS:
 			if (value && (value & 3) != 3)
 				return -EIO;
 			value &= 0xffff;
@@ -116,8 +112,8 @@
 			value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK;
 			break;
 	}
-	if (regno > GS*4)
-		regno -= 2*4;
+	if (regno > ES*4)
+		regno -= 1*4;
 	put_stack_long(child, regno - sizeof(struct pt_regs), value);
 	return 0;
 }
@@ -131,18 +127,16 @@
 		case FS:
 			retval = child->thread.fs;
 			break;
-		case GS:
-			retval = child->thread.gs;
-			break;
 		case DS:
 		case ES:
+		case GS:
 		case SS:
 		case CS:
 			retval = 0xffff;
 			/* fall through */
 		default:
-			if (regno > GS*4)
-				regno -= 2*4;
+			if (regno > ES*4)
+				regno -= 1*4;
 			regno = regno - sizeof(struct pt_regs);
 			retval &= get_stack_long(child, regno);
 	}
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c
index 9f6ab17..a01320a 100644
--- a/arch/i386/kernel/quirks.c
+++ b/arch/i386/kernel/quirks.c
@@ -3,10 +3,23 @@
  */
 #include <linux/pci.h>
 #include <linux/irq.h>
+#include <asm/pci-direct.h>
+#include <asm/genapic.h>
+#include <asm/cpu.h>
 
 #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
+static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
+{
+#ifdef CONFIG_X86_64
+	if (genapic !=  &apic_flat)
+		panic("APIC mode must be flat on this system\n");
+#elif defined(CONFIG_X86_GENERICARCH)
+	if (genapic != &apic_default)
+		panic("APIC mode must be default(flat) on this system. Use apic=default\n");
+#endif
+}
 
-static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
+void __init quirk_intel_irqbalance(void)
 {
 	u8 config, rev;
 	u32 word;
@@ -16,18 +29,18 @@
 	 * based platforms.
 	 * Disable SW irqbalance/affinity on those platforms.
 	 */
-	pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
+	rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
 	if (rev > 0x9)
 		return;
 
 	printk(KERN_INFO "Intel E7520/7320/7525 detected.");
 
-	/* enable access to config space*/
-	pci_read_config_byte(dev, 0xf4, &config);
-	pci_write_config_byte(dev, 0xf4, config|0x2);
+	/* enable access to config space */
+	config = read_pci_config_byte(0, 0, 0, 0xf4);
+	write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
 
 	/* read xTPR register */
-	raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
+	word = read_pci_config_16(0, 0, 0x40, 0x4c);
 
 	if (!(word & (1 << 13))) {
 		printk(KERN_INFO "Disabling irq balancing and affinity\n");
@@ -38,13 +51,24 @@
 #ifdef CONFIG_PROC_FS
 		no_irq_affinity = 1;
 #endif
+#ifdef CONFIG_HOTPLUG_CPU
+		printk(KERN_INFO "Disabling cpu hotplug control\n");
+		enable_cpu_hotplug = 0;
+#endif
+#ifdef CONFIG_X86_64
+		/* force the genapic selection to flat mode so that
+		 * interrupts can be redirected to more than one CPU.
+		 */
+		genapic_force = &apic_flat;
+#endif
 	}
 
-	/* put back the original value for config space*/
+	/* put back the original value for config space */
 	if (!(config & 0x2))
-		pci_write_config_byte(dev, 0xf4, config);
+		write_pci_config_byte(0, 0, 0, 0xf4, config);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7320_MCH,	quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7520_MCH,	quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_E7320_MCH,  verify_quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_E7525_MCH,  verify_quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,   PCI_DEVICE_ID_INTEL_E7520_MCH,  verify_quirk_intel_irqbalance);
+
 #endif
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 97bb869..79df6e6 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -63,9 +63,6 @@
 #include <setup_arch.h>
 #include <bios_ebda.h>
 
-/* Forward Declaration. */
-void __init find_max_pfn(void);
-
 /* This value is set up by the early boot code to point to the value
    immediately after the boot time page tables.  It contains a *physical*
    address, and must not be in the .bss segment! */
@@ -76,11 +73,8 @@
 /*
  * Machine setup..
  */
-
-#ifdef CONFIG_EFI
-int efi_enabled = 0;
-EXPORT_SYMBOL(efi_enabled);
-#endif
+extern struct resource code_resource;
+extern struct resource data_resource;
 
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
@@ -99,12 +93,6 @@
 unsigned int BIOS_revision;
 unsigned int mca_pentium_flag;
 
-/* For PCI or other memory-mapped resources */
-unsigned long pci_mem_start = 0x10000000;
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pci_mem_start);
-#endif
-
 /* Boot loader ID as an integer, for the benefit of proc_dointvec */
 int bootloader_type;
 
@@ -134,7 +122,6 @@
 	defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
 EXPORT_SYMBOL(ist_info);
 #endif
-struct e820map e820;
 
 extern void early_cpu_init(void);
 extern int root_mountflags;
@@ -149,516 +136,6 @@
 
 unsigned char __initdata boot_params[PARAM_SIZE];
 
-static struct resource data_resource = {
-	.name	= "Kernel data",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource code_resource = {
-	.name	= "Kernel code",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource system_rom_resource = {
-	.name	= "System ROM",
-	.start	= 0xf0000,
-	.end	= 0xfffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource extension_rom_resource = {
-	.name	= "Extension ROM",
-	.start	= 0xe0000,
-	.end	= 0xeffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource adapter_rom_resources[] = { {
-	.name 	= "Adapter ROM",
-	.start	= 0xc8000,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-}, {
-	.name 	= "Adapter ROM",
-	.start	= 0,
-	.end	= 0,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-} };
-
-static struct resource video_rom_resource = {
-	.name 	= "Video ROM",
-	.start	= 0xc0000,
-	.end	= 0xc7fff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
-};
-
-static struct resource video_ram_resource = {
-	.name	= "Video RAM area",
-	.start	= 0xa0000,
-	.end	= 0xbffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
-static struct resource standard_io_resources[] = { {
-	.name	= "dma1",
-	.start	= 0x0000,
-	.end	= 0x001f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "pic1",
-	.start	= 0x0020,
-	.end	= 0x0021,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name   = "timer0",
-	.start	= 0x0040,
-	.end    = 0x0043,
-	.flags  = IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name   = "timer1",
-	.start  = 0x0050,
-	.end    = 0x0053,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "keyboard",
-	.start	= 0x0060,
-	.end	= 0x006f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "dma page reg",
-	.start	= 0x0080,
-	.end	= 0x008f,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "pic2",
-	.start	= 0x00a0,
-	.end	= 0x00a1,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "dma2",
-	.start	= 0x00c0,
-	.end	= 0x00df,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-}, {
-	.name	= "fpu",
-	.start	= 0x00f0,
-	.end	= 0x00ff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_IO
-} };
-
-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
-
-static int __init romchecksum(unsigned char *rom, unsigned long length)
-{
-	unsigned char *p, sum = 0;
-
-	for (p = rom; p < rom + length; p++)
-		sum += *p;
-	return sum == 0;
-}
-
-static void __init probe_roms(void)
-{
-	unsigned long start, length, upper;
-	unsigned char *rom;
-	int	      i;
-
-	/* video rom */
-	upper = adapter_rom_resources[0].start;
-	for (start = video_rom_resource.start; start < upper; start += 2048) {
-		rom = isa_bus_to_virt(start);
-		if (!romsignature(rom))
-			continue;
-
-		video_rom_resource.start = start;
-
-		/* 0 < length <= 0x7f * 512, historically */
-		length = rom[2] * 512;
-
-		/* if checksum okay, trust length byte */
-		if (length && romchecksum(rom, length))
-			video_rom_resource.end = start + length - 1;
-
-		request_resource(&iomem_resource, &video_rom_resource);
-		break;
-	}
-
-	start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
-	if (start < upper)
-		start = upper;
-
-	/* system rom */
-	request_resource(&iomem_resource, &system_rom_resource);
-	upper = system_rom_resource.start;
-
-	/* check for extension rom (ignore length byte!) */
-	rom = isa_bus_to_virt(extension_rom_resource.start);
-	if (romsignature(rom)) {
-		length = extension_rom_resource.end - extension_rom_resource.start + 1;
-		if (romchecksum(rom, length)) {
-			request_resource(&iomem_resource, &extension_rom_resource);
-			upper = extension_rom_resource.start;
-		}
-	}
-
-	/* check for adapter roms on 2k boundaries */
-	for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
-		rom = isa_bus_to_virt(start);
-		if (!romsignature(rom))
-			continue;
-
-		/* 0 < length <= 0x7f * 512, historically */
-		length = rom[2] * 512;
-
-		/* but accept any length that fits if checksum okay */
-		if (!length || start + length > upper || !romchecksum(rom, length))
-			continue;
-
-		adapter_rom_resources[i].start = start;
-		adapter_rom_resources[i].end = start + length - 1;
-		request_resource(&iomem_resource, &adapter_rom_resources[i]);
-
-		start = adapter_rom_resources[i++].end & ~2047UL;
-	}
-}
-
-static void __init limit_regions(unsigned long long size)
-{
-	unsigned long long current_addr = 0;
-	int i;
-
-	if (efi_enabled) {
-		efi_memory_desc_t *md;
-		void *p;
-
-		for (p = memmap.map, i = 0; p < memmap.map_end;
-			p += memmap.desc_size, i++) {
-			md = p;
-			current_addr = md->phys_addr + (md->num_pages << 12);
-			if (md->type == EFI_CONVENTIONAL_MEMORY) {
-				if (current_addr >= size) {
-					md->num_pages -=
-						(((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
-					memmap.nr_map = i + 1;
-					return;
-				}
-			}
-		}
-	}
-	for (i = 0; i < e820.nr_map; i++) {
-		current_addr = e820.map[i].addr + e820.map[i].size;
-		if (current_addr < size)
-			continue;
-
-		if (e820.map[i].type != E820_RAM)
-			continue;
-
-		if (e820.map[i].addr >= size) {
-			/*
-			 * This region starts past the end of the
-			 * requested size, skip it completely.
-			 */
-			e820.nr_map = i;
-		} else {
-			e820.nr_map = i + 1;
-			e820.map[i].size -= current_addr - size;
-		}
-		return;
-	}
-}
-
-void __init add_memory_region(unsigned long long start,
-			      unsigned long long size, int type)
-{
-	int x;
-
-	if (!efi_enabled) {
-       		x = e820.nr_map;
-
-		if (x == E820MAX) {
-		    printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-		    return;
-		}
-
-		e820.map[x].addr = start;
-		e820.map[x].size = size;
-		e820.map[x].type = type;
-		e820.nr_map++;
-	}
-} /* add_memory_region */
-
-#define E820_DEBUG	1
-
-static void __init print_memory_map(char *who)
-{
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		printk(" %s: %016Lx - %016Lx ", who,
-			e820.map[i].addr,
-			e820.map[i].addr + e820.map[i].size);
-		switch (e820.map[i].type) {
-		case E820_RAM:	printk("(usable)\n");
-				break;
-		case E820_RESERVED:
-				printk("(reserved)\n");
-				break;
-		case E820_ACPI:
-				printk("(ACPI data)\n");
-				break;
-		case E820_NVS:
-				printk("(ACPI NVS)\n");
-				break;
-		default:	printk("type %lu\n", e820.map[i].type);
-				break;
-		}
-	}
-}
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries.  The following 
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-struct change_member {
-	struct e820entry *pbios; /* pointer to original bios entry */
-	unsigned long long addr; /* address for this change point */
-};
-static struct change_member change_point_list[2*E820MAX] __initdata;
-static struct change_member *change_point[2*E820MAX] __initdata;
-static struct e820entry *overlap_list[E820MAX] __initdata;
-static struct e820entry new_bios[E820MAX] __initdata;
-
-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
-{
-	struct change_member *change_tmp;
-	unsigned long current_type, last_type;
-	unsigned long long last_addr;
-	int chgidx, still_changing;
-	int overlap_entries;
-	int new_bios_entry;
-	int old_nr, new_nr, chg_nr;
-	int i;
-
-	/*
-		Visually we're performing the following (1,2,3,4 = memory types)...
-
-		Sample memory map (w/overlaps):
-		   ____22__________________
-		   ______________________4_
-		   ____1111________________
-		   _44_____________________
-		   11111111________________
-		   ____________________33__
-		   ___________44___________
-		   __________33333_________
-		   ______________22________
-		   ___________________2222_
-		   _________111111111______
-		   _____________________11_
-		   _________________4______
-
-		Sanitized equivalent (no overlap):
-		   1_______________________
-		   _44_____________________
-		   ___1____________________
-		   ____22__________________
-		   ______11________________
-		   _________1______________
-		   __________3_____________
-		   ___________44___________
-		   _____________33_________
-		   _______________2________
-		   ________________1_______
-		   _________________4______
-		   ___________________2____
-		   ____________________33__
-		   ______________________4_
-	*/
-
-	/* if there's only one memory region, don't bother */
-	if (*pnr_map < 2)
-		return -1;
-
-	old_nr = *pnr_map;
-
-	/* bail out if we find any unreasonable addresses in bios map */
-	for (i=0; i<old_nr; i++)
-		if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
-			return -1;
-
-	/* create pointers for initial change-point information (for sorting) */
-	for (i=0; i < 2*old_nr; i++)
-		change_point[i] = &change_point_list[i];
-
-	/* record all known change-points (starting and ending addresses),
-	   omitting those that are for empty memory regions */
-	chgidx = 0;
-	for (i=0; i < old_nr; i++)	{
-		if (biosmap[i].size != 0) {
-			change_point[chgidx]->addr = biosmap[i].addr;
-			change_point[chgidx++]->pbios = &biosmap[i];
-			change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
-			change_point[chgidx++]->pbios = &biosmap[i];
-		}
-	}
-	chg_nr = chgidx;    	/* true number of change-points */
-
-	/* sort change-point list by memory addresses (low -> high) */
-	still_changing = 1;
-	while (still_changing)	{
-		still_changing = 0;
-		for (i=1; i < chg_nr; i++)  {
-			/* if <current_addr> > <last_addr>, swap */
-			/* or, if current=<start_addr> & last=<end_addr>, swap */
-			if ((change_point[i]->addr < change_point[i-1]->addr) ||
-				((change_point[i]->addr == change_point[i-1]->addr) &&
-				 (change_point[i]->addr == change_point[i]->pbios->addr) &&
-				 (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
-			   )
-			{
-				change_tmp = change_point[i];
-				change_point[i] = change_point[i-1];
-				change_point[i-1] = change_tmp;
-				still_changing=1;
-			}
-		}
-	}
-
-	/* create a new bios memory map, removing overlaps */
-	overlap_entries=0;	 /* number of entries in the overlap table */
-	new_bios_entry=0;	 /* index for creating new bios map entries */
-	last_type = 0;		 /* start with undefined memory type */
-	last_addr = 0;		 /* start with 0 as last starting address */
-	/* loop through change-points, determining affect on the new bios map */
-	for (chgidx=0; chgidx < chg_nr; chgidx++)
-	{
-		/* keep track of all overlapping bios entries */
-		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
-		{
-			/* add map entry to overlap list (> 1 entry implies an overlap) */
-			overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
-		}
-		else
-		{
-			/* remove entry from list (order independent, so swap with last) */
-			for (i=0; i<overlap_entries; i++)
-			{
-				if (overlap_list[i] == change_point[chgidx]->pbios)
-					overlap_list[i] = overlap_list[overlap_entries-1];
-			}
-			overlap_entries--;
-		}
-		/* if there are overlapping entries, decide which "type" to use */
-		/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
-		current_type = 0;
-		for (i=0; i<overlap_entries; i++)
-			if (overlap_list[i]->type > current_type)
-				current_type = overlap_list[i]->type;
-		/* continue building up new bios map based on this information */
-		if (current_type != last_type)	{
-			if (last_type != 0)	 {
-				new_bios[new_bios_entry].size =
-					change_point[chgidx]->addr - last_addr;
-				/* move forward only if the new size was non-zero */
-				if (new_bios[new_bios_entry].size != 0)
-					if (++new_bios_entry >= E820MAX)
-						break; 	/* no more space left for new bios entries */
-			}
-			if (current_type != 0)	{
-				new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
-				new_bios[new_bios_entry].type = current_type;
-				last_addr=change_point[chgidx]->addr;
-			}
-			last_type = current_type;
-		}
-	}
-	new_nr = new_bios_entry;   /* retain count for new bios entries */
-
-	/* copy new bios mapping into original location */
-	memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
-	*pnr_map = new_nr;
-
-	return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory.  If we aren't, we'll fake a memory map.
- *
- * We check to see that the memory map contains at least 2 elements
- * before we'll use it, because the detection code in setup.S may
- * not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
- */
-int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
-{
-	/* Only one memory region (or negative)? Ignore it */
-	if (nr_map < 2)
-		return -1;
-
-	do {
-		unsigned long long start = biosmap->addr;
-		unsigned long long size = biosmap->size;
-		unsigned long long end = start + size;
-		unsigned long type = biosmap->type;
-
-		/* Overflow in 64 bits? Ignore the memory map. */
-		if (start > end)
-			return -1;
-
-		/*
-		 * Some BIOSes claim RAM in the 640k - 1M region.
-		 * Not right. Fix it up.
-		 */
-		if (type == E820_RAM) {
-			if (start < 0x100000ULL && end > 0xA0000ULL) {
-				if (start < 0xA0000ULL)
-					add_memory_region(start, 0xA0000ULL-start, type);
-				if (end <= 0x100000ULL)
-					continue;
-				start = 0x100000ULL;
-				size = end - start;
-			}
-		}
-		add_memory_region(start, size, type);
-	} while (biosmap++,--nr_map);
-	return 0;
-}
-
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
 #ifdef CONFIG_EDD_MODULE
@@ -682,7 +159,7 @@
 }
 #endif
 
-static int __initdata user_defined_memmap = 0;
+int __initdata user_defined_memmap = 0;
 
 /*
  * "mem=nopentium" disables the 4MB page tables.
@@ -719,51 +196,6 @@
 }
 early_param("mem", parse_mem);
 
-static int __init parse_memmap(char *arg)
-{
-	if (!arg)
-		return -EINVAL;
-
-	if (strcmp(arg, "exactmap") == 0) {
-#ifdef CONFIG_CRASH_DUMP
-		/* If we are doing a crash dump, we
-		 * still need to know the real mem
-		 * size before original memory map is
-		 * reset.
-		 */
-		find_max_pfn();
-		saved_max_pfn = max_pfn;
-#endif
-		e820.nr_map = 0;
-		user_defined_memmap = 1;
-	} else {
-		/* If the user specifies memory size, we
-		 * limit the BIOS-provided memory map to
-		 * that size. exactmap can be used to specify
-		 * the exact map. mem=number can be used to
-		 * trim the existing memory map.
-		 */
-		unsigned long long start_at, mem_size;
-
-		mem_size = memparse(arg, &arg);
-		if (*arg == '@') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_RAM);
-		} else if (*arg == '#') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_ACPI);
-		} else if (*arg == '$') {
-			start_at = memparse(arg+1, &arg);
-			add_memory_region(start_at, mem_size, E820_RESERVED);
-		} else {
-			limit_regions(mem_size);
-			user_defined_memmap = 1;
-		}
-	}
-	return 0;
-}
-early_param("memmap", parse_memmap);
-
 #ifdef CONFIG_PROC_VMCORE
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel.
@@ -828,90 +260,6 @@
 early_param("reservetop", parse_reservetop);
 
 /*
- * Callback for efi_memory_walk.
- */
-static int __init
-efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
-{
-	unsigned long *max_pfn = arg, pfn;
-
-	if (start < end) {
-		pfn = PFN_UP(end -1);
-		if (pfn > *max_pfn)
-			*max_pfn = pfn;
-	}
-	return 0;
-}
-
-static int __init
-efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
-{
-	memory_present(0, PFN_UP(start), PFN_DOWN(end));
-	return 0;
-}
-
- /*
-  * This function checks if the entire range <start,end> is mapped with type.
-  *
-  * Note: this function only works correct if the e820 table is sorted and
-  * not-overlapping, which is the case
-  */
-int __init
-e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
-{
-	u64 start = s;
-	u64 end = e;
-	int i;
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
-		if (type && ei->type != type)
-			continue;
-		/* is the region (part) in overlap with the current region ?*/
-		if (ei->addr >= end || ei->addr + ei->size <= start)
-			continue;
-		/* if the region is at the beginning of <start,end> we move
-		 * start to the end of the region since it's ok until there
-		 */
-		if (ei->addr <= start)
-			start = ei->addr + ei->size;
-		/* if start is now at or beyond end, we're done, full
-		 * coverage */
-		if (start >= end)
-			return 1; /* we're done */
-	}
-	return 0;
-}
-
-/*
- * Find the highest page frame number we have available
- */
-void __init find_max_pfn(void)
-{
-	int i;
-
-	max_pfn = 0;
-	if (efi_enabled) {
-		efi_memmap_walk(efi_find_max_pfn, &max_pfn);
-		efi_memmap_walk(efi_memory_present_wrapper, NULL);
-		return;
-	}
-
-	for (i = 0; i < e820.nr_map; i++) {
-		unsigned long start, end;
-		/* RAM? */
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		start = PFN_UP(e820.map[i].addr);
-		end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-		if (start >= end)
-			continue;
-		if (end > max_pfn)
-			max_pfn = end;
-		memory_present(0, start, end);
-	}
-}
-
-/*
  * Determine low and high memory ranges:
  */
 unsigned long __init find_max_low_pfn(void)
@@ -971,68 +319,6 @@
 }
 
 /*
- * Free all available memory for boot time allocation.  Used
- * as a callback function by efi_memory_walk()
- */
-
-static int __init
-free_available_memory(unsigned long start, unsigned long end, void *arg)
-{
-	/* check max_low_pfn */
-	if (start >= (max_low_pfn << PAGE_SHIFT))
-		return 0;
-	if (end >= (max_low_pfn << PAGE_SHIFT))
-		end = max_low_pfn << PAGE_SHIFT;
-	if (start < end)
-		free_bootmem(start, end - start);
-
-	return 0;
-}
-/*
- * Register fully available low RAM pages with the bootmem allocator.
- */
-static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
-{
-	int i;
-
-	if (efi_enabled) {
-		efi_memmap_walk(free_available_memory, NULL);
-		return;
-	}
-	for (i = 0; i < e820.nr_map; i++) {
-		unsigned long curr_pfn, last_pfn, size;
-		/*
-		 * Reserve usable low memory
-		 */
-		if (e820.map[i].type != E820_RAM)
-			continue;
-		/*
-		 * We are rounding up the start address of usable memory:
-		 */
-		curr_pfn = PFN_UP(e820.map[i].addr);
-		if (curr_pfn >= max_low_pfn)
-			continue;
-		/*
-		 * ... and at the end of the usable range downwards:
-		 */
-		last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-
-		if (last_pfn > max_low_pfn)
-			last_pfn = max_low_pfn;
-
-		/*
-		 * .. finally, did all the rounding and playing
-		 * around just make the area go away?
-		 */
-		if (last_pfn <= curr_pfn)
-			continue;
-
-		size = last_pfn - curr_pfn;
-		free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
-	}
-}
-
-/*
  * workaround for Dell systems that neglect to reserve EBDA
  */
 static void __init reserve_ebda_region(void)
@@ -1118,8 +404,8 @@
 	 * the (very unlikely) case of us accidentally initializing the
 	 * bootmem allocator with an invalid RAM area.
 	 */
-	reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
-			 bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
+	reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
+			 bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
 
 	/*
 	 * reserve physical page 0 - it's a special BIOS page on many boxes,
@@ -1199,126 +485,6 @@
 	}
 }
 
-/*
- * Request address space for all standard RAM and ROM resources
- * and also for regions reported as reserved by the e820.
- */
-static void __init
-legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
-{
-	int i;
-
-	probe_roms();
-	for (i = 0; i < e820.nr_map; i++) {
-		struct resource *res;
-#ifndef CONFIG_RESOURCES_64BIT
-		if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
-			continue;
-#endif
-		res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
-		switch (e820.map[i].type) {
-		case E820_RAM:	res->name = "System RAM"; break;
-		case E820_ACPI:	res->name = "ACPI Tables"; break;
-		case E820_NVS:	res->name = "ACPI Non-volatile Storage"; break;
-		default:	res->name = "reserved";
-		}
-		res->start = e820.map[i].addr;
-		res->end = res->start + e820.map[i].size - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		if (request_resource(&iomem_resource, res)) {
-			kfree(res);
-			continue;
-		}
-		if (e820.map[i].type == E820_RAM) {
-			/*
-			 *  We don't know which RAM region contains kernel data,
-			 *  so we try it repeatedly and let the resource manager
-			 *  test it.
-			 */
-			request_resource(res, code_resource);
-			request_resource(res, data_resource);
-#ifdef CONFIG_KEXEC
-			request_resource(res, &crashk_res);
-#endif
-		}
-	}
-}
-
-/*
- * Request address space for all standard resources
- *
- * This is called just before pcibios_init(), which is also a
- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
- */
-static int __init request_standard_resources(void)
-{
-	int i;
-
-	printk("Setting up standard PCI resources\n");
-	if (efi_enabled)
-		efi_initialize_iomem_resources(&code_resource, &data_resource);
-	else
-		legacy_init_iomem_resources(&code_resource, &data_resource);
-
-	/* EFI systems may still have VGA */
-	request_resource(&iomem_resource, &video_ram_resource);
-
-	/* request I/O space for devices used on all i[345]86 PCs */
-	for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
-		request_resource(&ioport_resource, &standard_io_resources[i]);
-	return 0;
-}
-
-subsys_initcall(request_standard_resources);
-
-static void __init register_memory(void)
-{
-	unsigned long gapstart, gapsize, round;
-	unsigned long long last;
-	int i;
-
-	/*
-	 * Search for the bigest gap in the low 32 bits of the e820
-	 * memory space.
-	 */
-	last = 0x100000000ull;
-	gapstart = 0x10000000;
-	gapsize = 0x400000;
-	i = e820.nr_map;
-	while (--i >= 0) {
-		unsigned long long start = e820.map[i].addr;
-		unsigned long long end = start + e820.map[i].size;
-
-		/*
-		 * Since "last" is at most 4GB, we know we'll
-		 * fit in 32 bits if this condition is true
-		 */
-		if (last > end) {
-			unsigned long gap = last - end;
-
-			if (gap > gapsize) {
-				gapsize = gap;
-				gapstart = end;
-			}
-		}
-		if (start < last)
-			last = start;
-	}
-
-	/*
-	 * See how much we want to round up: start off with
-	 * rounding to the next 1MB area.
-	 */
-	round = 0x100000;
-	while ((gapsize >> 4) > round)
-		round += round;
-	/* Fun with two's complement */
-	pci_mem_start = (gapstart + round) & -round;
-
-	printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
-		pci_mem_start, gapstart, gapsize);
-}
-
 #ifdef CONFIG_MCA
 static void set_mca_bus(int x)
 {
@@ -1328,6 +494,12 @@
 static void set_mca_bus(int x) { }
 #endif
 
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+char * __attribute__((weak)) memory_setup(void)
+{
+	return machine_specific_memory_setup();
+}
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -1380,7 +552,7 @@
 		efi_init();
 	else {
 		printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-		print_memory_map(machine_specific_memory_setup());
+		print_memory_map(memory_setup());
 	}
 
 	copy_edd();
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
index 43002cf..65d7620 100644
--- a/arch/i386/kernel/signal.c
+++ b/arch/i386/kernel/signal.c
@@ -128,7 +128,7 @@
 			 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
 			 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
 
-	GET_SEG(gs);
+	COPY_SEG(gs);
 	GET_SEG(fs);
 	COPY_SEG(es);
 	COPY_SEG(ds);
@@ -244,9 +244,7 @@
 {
 	int tmp, err = 0;
 
-	tmp = 0;
-	savesegment(gs, tmp);
-	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+	err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs);
 	savesegment(fs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
 
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 9827cf9..5285aff 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -321,7 +321,6 @@
 
 fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
 	unsigned long cpu;
 
 	cpu = get_cpu();
@@ -352,7 +351,6 @@
 	smp_mb__after_clear_bit();
 out:
 	put_cpu_no_resched();
-	set_irq_regs(old_regs);
 }
 
 static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
@@ -607,14 +605,11 @@
  */
 fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
 	ack_APIC_irq();
-	set_irq_regs(old_regs);
 }
 
 fastcall void smp_call_function_interrupt(struct pt_regs *regs)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
 	void (*func) (void *info) = call_data->func;
 	void *info = call_data->info;
 	int wait = call_data->wait;
@@ -637,7 +632,6 @@
 		mb();
 		atomic_inc(&call_data->finished);
 	}
-	set_irq_regs(old_regs);
 }
 
 /*
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 02a9b66..4bf0e3c 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -33,6 +33,11 @@
  *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
 *		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */
 
+
+/* SMP boot always wants to use real time delay to allow sufficient time for
+ * the APs to come online */
+#define USE_REAL_TIME_DELAY
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -52,6 +57,8 @@
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/nmi.h>
+#include <asm/pda.h>
+#include <asm/genapic.h>
 
 #include <mach_apic.h>
 #include <mach_wakecpu.h>
@@ -536,11 +543,11 @@
 static void __devinit start_secondary(void *unused)
 {
 	/*
-	 * Dont put anything before smp_callin(), SMP
+	 * Don't put *anything* before secondary_cpu_init(), SMP
 	 * booting is too fragile that we want to limit the
 	 * things done here to the most necessary things.
 	 */
-	cpu_init();
+	secondary_cpu_init();
 	preempt_disable();
 	smp_callin();
 	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
@@ -599,13 +606,16 @@
 		"movl %0,%%esp\n\t"
 		"jmp *%1"
 		:
-		:"r" (current->thread.esp),"r" (current->thread.eip));
+		:"m" (current->thread.esp),"m" (current->thread.eip));
 }
 
+/* Static state in head.S used to set up a CPU */
 extern struct {
 	void * esp;
 	unsigned short ss;
 } stack_start;
+extern struct i386_pda *start_pda;
+extern struct Xgt_desc_struct cpu_gdt_descr;
 
 #ifdef CONFIG_NUMA
 
@@ -936,9 +946,6 @@
 	unsigned long start_eip;
 	unsigned short nmi_high = 0, nmi_low = 0;
 
-	++cpucount;
-	alternatives_smp_switch(1);
-
 	/*
 	 * We can't use kernel_thread since we must avoid to
 	 * reschedule the child.
@@ -946,15 +953,30 @@
 	idle = alloc_idle_task(cpu);
 	if (IS_ERR(idle))
 		panic("failed fork for CPU %d", cpu);
+
+	/* Pre-allocate and initialize the CPU's GDT and PDA so it
+	   doesn't have to do any memory allocation during the
+	   delicate CPU-bringup phase. */
+	if (!init_gdt(cpu, idle)) {
+		printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
+		return -1;	/* ? */
+	}
+
 	idle->thread.eip = (unsigned long) start_secondary;
 	/* start_eip had better be page-aligned! */
 	start_eip = setup_trampoline();
 
+	++cpucount;
+	alternatives_smp_switch(1);
+
 	/* So we see what's up   */
 	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	stack_start.esp = (void *) idle->thread.esp;
 
+	start_pda = cpu_pda(cpu);
+	cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu);
+
 	irq_ctx_init(cpu);
 
 	x86_cpu_to_apicid[cpu] = apicid;
@@ -1109,34 +1131,15 @@
 }
 #endif
 
-static void smp_tune_scheduling (void)
+static void smp_tune_scheduling(void)
 {
 	unsigned long cachesize;       /* kB   */
-	unsigned long bandwidth = 350; /* MB/s */
-	/*
-	 * Rough estimation for SMP scheduling, this is the number of
-	 * cycles it takes for a fully memory-limited process to flush
-	 * the SMP-local cache.
-	 *
-	 * (For a P5 this pretty much means we will choose another idle
-	 *  CPU almost always at wakeup time (this is due to the small
-	 *  L1 cache), on PIIs it's around 50-100 usecs, depending on
-	 *  the cache size)
-	 */
 
-	if (!cpu_khz) {
-		/*
-		 * this basically disables processor-affinity
-		 * scheduling on SMP without a TSC.
-		 */
-		return;
-	} else {
+	if (cpu_khz) {
 		cachesize = boot_cpu_data.x86_cache_size;
-		if (cachesize == -1) {
-			cachesize = 16; /* Pentiums, 2x8kB cache */
-			bandwidth = 100;
-		}
-		max_cache_size = cachesize * 1024;
+
+		if (cachesize > 0)
+			max_cache_size = cachesize * 1024;
 	}
 }
 
@@ -1462,6 +1465,12 @@
 	cpu_set(cpu, smp_commenced_mask);
 	while (!cpu_isset(cpu, cpu_online_map))
 		cpu_relax();
+
+#ifdef CONFIG_X86_GENERICARCH
+	if (num_online_cpus() > 8 && genapic == &apic_default)
+		panic("Default flat APIC routing can't be used with > 8 cpus\n");
+#endif
+
 	return 0;
 }
 
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 0bbacd0..7de9117 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -27,7 +27,11 @@
  * Should the kernel map a VDSO page into processes and pass its
  * address down to glibc upon exec()?
  */
+#ifdef CONFIG_PARAVIRT
+unsigned int __read_mostly vdso_enabled = 0;
+#else
 unsigned int __read_mostly vdso_enabled = 1;
+#endif
 
 EXPORT_SYMBOL_GPL(vdso_enabled);
 
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 78af572..c505b16 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -56,6 +56,7 @@
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/timer.h>
+#include <asm/time.h>
 
 #include "mach_time.h"
 
@@ -116,10 +117,7 @@
 	/* gets recalled with irq locally disabled */
 	/* XXX - does irqsave resolve this? -johnstul */
 	spin_lock_irqsave(&rtc_lock, flags);
-	if (efi_enabled)
-		retval = efi_set_rtc_mmss(nowtime);
-	else
-		retval = mach_set_rtc_mmss(nowtime);
+	retval = set_wallclock(nowtime);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return retval;
@@ -223,10 +221,7 @@
 
 	spin_lock_irqsave(&rtc_lock, flags);
 
-	if (efi_enabled)
-		retval = efi_get_time();
-	else
-		retval = mach_get_cmos_time();
+	retval = get_wallclock();
 
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
@@ -370,7 +365,7 @@
 		printk("Using HPET for base-timer\n");
 	}
 
-	time_init_hook();
+	do_time_init();
 }
 #endif
 
@@ -392,5 +387,5 @@
 
 	do_settimeofday(&ts);
 
-	time_init_hook();
+	do_time_init();
 }
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
index 1a2a979..1e4702d 100644
--- a/arch/i386/kernel/time_hpet.c
+++ b/arch/i386/kernel/time_hpet.c
@@ -132,14 +132,20 @@
 	 * the single HPET timer for system time.
 	 */
 #ifdef CONFIG_HPET_EMULATE_RTC
-	if (!(id & HPET_ID_NUMBER))
+	if (!(id & HPET_ID_NUMBER)) {
+		iounmap(hpet_virt_address);
+		hpet_virt_address = NULL;
 		return -1;
+	}
 #endif
 
 
 	hpet_period = hpet_readl(HPET_PERIOD);
-	if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD))
+	if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) {
+		iounmap(hpet_virt_address);
+		hpet_virt_address = NULL;
 		return -1;
+	}
 
 	/*
 	 * 64 bit math
@@ -156,8 +162,11 @@
 
 	hpet_use_timer = id & HPET_ID_LEGSUP;
 
-	if (hpet_timer_stop_set_go(hpet_tick))
+	if (hpet_timer_stop_set_go(hpet_tick)) {
+		iounmap(hpet_virt_address);
+		hpet_virt_address = NULL;
 		return -1;
+	}
 
 	use_hpet = 1;
 
diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c
index 07d6da3..79cf608 100644
--- a/arch/i386/kernel/topology.c
+++ b/arch/i386/kernel/topology.c
@@ -40,14 +40,18 @@
 	 * restrictions and assumptions in kernel. This basically
 	 * doesnt add a control file, one cannot attempt to offline
 	 * BSP.
+	 *
+	 * Also certain PCI quirks require not to enable hotplug control
+	 * for all CPU's.
 	 */
-	if (!num)
-		cpu_devices[num].cpu.no_control = 1;
+	if (num && enable_cpu_hotplug)
+		cpu_devices[num].cpu.hotpluggable = 1;
 
 	return register_cpu(&cpu_devices[num].cpu, num);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+int enable_cpu_hotplug = 1;
 
 void arch_unregister_cpu(int num) {
 	return unregister_cpu(&cpu_devices[num].cpu);
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 3124f1b..68de48e 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -29,6 +29,7 @@
 #include <linux/kexec.h>
 #include <linux/unwind.h>
 #include <linux/uaccess.h>
+#include <linux/nmi.h>
 
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
@@ -61,9 +62,6 @@
 
 asmlinkage int system_call(void);
 
-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
-		{ 0, 0 }, { 0, 0 } };
-
 /* Do we ignore FPU interrupts ? */
 char ignore_fpu_irq = 0;
 
@@ -94,7 +92,7 @@
 asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 
-static int kstack_depth_to_print = 24;
+int kstack_depth_to_print = 24;
 #ifdef CONFIG_STACK_UNWIND
 static int call_trace = 1;
 #else
@@ -163,16 +161,25 @@
 {
 	struct ops_and_data *oad = (struct ops_and_data *)data;
 	int n = 0;
+	unsigned long sp = UNW_SP(info);
 
+	if (arch_unw_user_mode(info))
+		return -1;
 	while (unwind(info) == 0 && UNW_PC(info)) {
 		n++;
 		oad->ops->address(oad->data, UNW_PC(info));
 		if (arch_unw_user_mode(info))
 			break;
+		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
+		    && sp > UNW_SP(info))
+			break;
+		sp = UNW_SP(info);
 	}
 	return n;
 }
 
+#define MSG(msg) ops->warning(data, msg)
+
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	        unsigned long *stack,
 		struct stacktrace_ops *ops, void *data)
@@ -191,29 +198,31 @@
 			if (unwind_init_frame_info(&info, task, regs) == 0)
 				unw_ret = dump_trace_unwind(&info, &oad);
 		} else if (task == current)
-			unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+			unw_ret = unwind_init_running(&info, dump_trace_unwind,
+						      &oad);
 		else {
 			if (unwind_init_blocked(&info, task) == 0)
 				unw_ret = dump_trace_unwind(&info, &oad);
 		}
 		if (unw_ret > 0) {
 			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
-				ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+				ops->warning_symbol(data,
+					     "DWARF2 unwinder stuck at %s",
 					     UNW_PC(&info));
 				if (UNW_SP(&info) >= PAGE_OFFSET) {
-					ops->warning(data, "Leftover inexact backtrace:\n");
+					MSG("Leftover inexact backtrace:");
 					stack = (void *)UNW_SP(&info);
 					if (!stack)
 						return;
 					ebp = UNW_FP(&info);
 				} else
-					ops->warning(data, "Full inexact backtrace again:\n");
+					MSG("Full inexact backtrace again:");
 			} else if (call_trace >= 1)
 				return;
 			else
-				ops->warning(data, "Full inexact backtrace again:\n");
+				MSG("Full inexact backtrace again:");
 		} else
-			ops->warning(data, "Inexact backtrace:\n");
+			MSG("Inexact backtrace:");
 	}
 	if (!stack) {
 		unsigned long dummy;
@@ -247,6 +256,7 @@
 		stack = (unsigned long*)context->previous_esp;
 		if (!stack)
 			break;
+		touch_nmi_watchdog();
 	}
 }
 EXPORT_SYMBOL(dump_trace);
@@ -379,7 +389,7 @@
 	 * time of the fault..
 	 */
 	if (in_kernel) {
-		u8 __user *eip;
+		u8 *eip;
 		int code_bytes = 64;
 		unsigned char c;
 
@@ -388,18 +398,20 @@
 
 		printk(KERN_EMERG "Code: ");
 
-		eip = (u8 __user *)regs->eip - 43;
-		if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+		eip = (u8 *)regs->eip - 43;
+		if (eip < (u8 *)PAGE_OFFSET ||
+			probe_kernel_address(eip, c)) {
 			/* try starting at EIP */
-			eip = (u8 __user *)regs->eip;
+			eip = (u8 *)regs->eip;
 			code_bytes = 32;
 		}
 		for (i = 0; i < code_bytes; i++, eip++) {
-			if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+			if (eip < (u8 *)PAGE_OFFSET ||
+				probe_kernel_address(eip, c)) {
 				printk(" Bad EIP value.");
 				break;
 			}
-			if (eip == (u8 __user *)regs->eip)
+			if (eip == (u8 *)regs->eip)
 				printk("<%02x> ", c);
 			else
 				printk("%02x ", c);
@@ -415,7 +427,7 @@
 
 	if (eip < PAGE_OFFSET)
 		return;
-	if (probe_kernel_address((unsigned short __user *)eip, ud2))
+	if (probe_kernel_address((unsigned short *)eip, ud2))
 		return;
 	if (ud2 != 0x0b0f)
 		return;
@@ -428,11 +440,11 @@
 		char *file;
 		char c;
 
-		if (probe_kernel_address((unsigned short __user *)(eip + 2),
-					line))
+		if (probe_kernel_address((unsigned short *)(eip + 2), line))
 			break;
-		if (__get_user(file, (char * __user *)(eip + 4)) ||
-		    (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+		if (probe_kernel_address((char **)(eip + 4), file) ||
+		    (unsigned long)file < PAGE_OFFSET ||
+			probe_kernel_address(file, c))
 			file = "<bad filename>";
 
 		printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
@@ -707,8 +719,7 @@
 {
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
 		"CPU %d.\n", reason, smp_processor_id());
-	printk(KERN_EMERG "You probably have a hardware problem with your RAM "
-			"chips\n");
+	printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
 	if (panic_on_unrecovered_nmi)
                 panic("NMI: Not continuing");
 
@@ -773,7 +784,6 @@
 	printk(" on CPU%d, eip %08lx, registers:\n",
 		smp_processor_id(), regs->eip);
 	show_registers(regs);
-	printk(KERN_EMERG "console shuts up ...\n");
 	console_silent();
 	spin_unlock(&nmi_print_lock);
 	bust_spinlocks(0);
@@ -1088,49 +1098,24 @@
 #endif
 }
 
-fastcall void setup_x86_bogus_stack(unsigned char * stk)
+fastcall unsigned long patch_espfix_desc(unsigned long uesp,
+					  unsigned long kesp)
 {
-	unsigned long *switch16_ptr, *switch32_ptr;
-	struct pt_regs *regs;
-	unsigned long stack_top, stack_bot;
-	unsigned short iret_frame16_off;
 	int cpu = smp_processor_id();
-	/* reserve the space on 32bit stack for the magic switch16 pointer */
-	memmove(stk, stk + 8, sizeof(struct pt_regs));
-	switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
-	regs = (struct pt_regs *)stk;
-	/* now the switch32 on 16bit stack */
-	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
-	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
-	switch32_ptr = (unsigned long *)(stack_top - 8);
-	iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
-	/* copy iret frame on 16bit stack */
-	memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
-	/* fill in the switch pointers */
-	switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
-	switch16_ptr[1] = __ESPFIX_SS;
-	switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
-		8 - CPU_16BIT_STACK_SIZE;
-	switch32_ptr[1] = __KERNEL_DS;
-}
-
-fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
-{
-	unsigned long *switch32_ptr;
-	unsigned char *stack16, *stack32;
-	unsigned long stack_top, stack_bot;
-	int len;
-	int cpu = smp_processor_id();
-	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
-	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
-	switch32_ptr = (unsigned long *)(stack_top - 8);
-	/* copy the data from 16bit stack to 32bit stack */
-	len = CPU_16BIT_STACK_SIZE - 8 - sp;
-	stack16 = (unsigned char *)(stack_bot + sp);
-	stack32 = (unsigned char *)
-		(switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
-	memcpy(stack32, stack16, len);
-	return stack32;
+	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+	struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
+	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
+	unsigned long new_kesp = kesp - base;
+	unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
+	__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
+	/* Set up base for espfix segment */
+ 	desc &= 0x00f0ff0000000000ULL;
+ 	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
+		((((__u64)base) << 32) & 0xff00000000000000ULL) |
+		((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
+		(lim_pages & 0xffff);
+	*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
+	return new_kesp;
 }
 
 /*
@@ -1143,7 +1128,7 @@
  * Must be called with kernel preemption disabled (in this case,
  * local interrupts are disabled at the call-site in entry.S).
  */
-asmlinkage void math_state_restore(struct pt_regs regs)
+asmlinkage void math_state_restore(void)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
@@ -1153,6 +1138,7 @@
 		init_fpu(tsk);
 	restore_fpu(tsk);
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
+	tsk->fpu_counter++;
 }
 
 #ifndef CONFIG_MATH_EMULATION
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 9810c8c..1bbe45d 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -13,7 +13,6 @@
 
 #include <asm/delay.h>
 #include <asm/tsc.h>
-#include <asm/delay.h>
 #include <asm/io.h>
 
 #include "mach_timer.h"
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index cbcd61d..be2f96e 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -43,6 +43,7 @@
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
 #include <linux/audit.h>
+#include <linux/stddef.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -72,10 +73,10 @@
 /*
  * 8- and 16-bit register defines..
  */
-#define AL(regs)	(((unsigned char *)&((regs)->eax))[0])
-#define AH(regs)	(((unsigned char *)&((regs)->eax))[1])
-#define IP(regs)	(*(unsigned short *)&((regs)->eip))
-#define SP(regs)	(*(unsigned short *)&((regs)->esp))
+#define AL(regs)	(((unsigned char *)&((regs)->pt.eax))[0])
+#define AH(regs)	(((unsigned char *)&((regs)->pt.eax))[1])
+#define IP(regs)	(*(unsigned short *)&((regs)->pt.eip))
+#define SP(regs)	(*(unsigned short *)&((regs)->pt.esp))
 
 /*
  * virtual flags (16 and 32-bit versions)
@@ -89,10 +90,37 @@
 #define SAFE_MASK	(0xDD5)
 #define RETURN_MASK	(0xDFF)
 
-#define VM86_REGS_PART2 orig_eax
-#define VM86_REGS_SIZE1 \
-        ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) )
-#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1)
+/* convert kernel_vm86_regs to vm86_regs */
+static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
+				  const struct kernel_vm86_regs *regs)
+{
+	int ret = 0;
+
+	/* kernel_vm86_regs is missing xfs, so copy everything up to
+	   (but not including) xgs, and then rest after xgs. */
+	ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs));
+	ret += copy_to_user(&user->__null_gs, &regs->pt.xgs,
+			    sizeof(struct kernel_vm86_regs) -
+			    offsetof(struct kernel_vm86_regs, pt.xgs));
+
+	return ret;
+}
+
+/* convert vm86_regs to kernel_vm86_regs */
+static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
+				    const struct vm86_regs __user *user,
+				    unsigned extra)
+{
+	int ret = 0;
+
+	ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs));
+	ret += copy_from_user(&regs->pt.xgs, &user->__null_gs,
+			      sizeof(struct kernel_vm86_regs) -
+			      offsetof(struct kernel_vm86_regs, pt.xgs) +
+			      extra);
+
+	return ret;
+}
 
 struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
 struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
@@ -112,10 +140,8 @@
 		printk("no vm86_info: BAD\n");
 		do_exit(SIGSEGV);
 	}
-	set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
-	tmp = copy_to_user(&current->thread.vm86_info->regs,regs, VM86_REGS_SIZE1);
-	tmp += copy_to_user(&current->thread.vm86_info->regs.VM86_REGS_PART2,
-		&regs->VM86_REGS_PART2, VM86_REGS_SIZE2);
+	set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask);
+	tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs,regs);
 	tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
 	if (tmp) {
 		printk("vm86: could not access userspace vm86_info\n");
@@ -129,9 +155,11 @@
 	current->thread.saved_esp0 = 0;
 	put_cpu();
 
-	loadsegment(fs, current->thread.saved_fs);
-	loadsegment(gs, current->thread.saved_gs);
 	ret = KVM86->regs32;
+
+	loadsegment(fs, current->thread.saved_fs);
+	ret->xgs = current->thread.saved_gs;
+
 	return ret;
 }
 
@@ -183,9 +211,9 @@
 	tsk = current;
 	if (tsk->thread.saved_esp0)
 		goto out;
-	tmp  = copy_from_user(&info, v86, VM86_REGS_SIZE1);
-	tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
-		(long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2);
+	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
+				       offsetof(struct kernel_vm86_struct, vm86plus) -
+				       sizeof(info.regs));
 	ret = -EFAULT;
 	if (tmp)
 		goto out;
@@ -233,9 +261,9 @@
 	if (tsk->thread.saved_esp0)
 		goto out;
 	v86 = (struct vm86plus_struct __user *)regs.ecx;
-	tmp  = copy_from_user(&info, v86, VM86_REGS_SIZE1);
-	tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2,
-		(long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2);
+	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
+				       offsetof(struct kernel_vm86_struct, regs32) -
+				       sizeof(info.regs));
 	ret = -EFAULT;
 	if (tmp)
 		goto out;
@@ -252,15 +280,15 @@
 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
 {
 	struct tss_struct *tss;
-	long eax;
 /*
  * make sure the vm86() system call doesn't try to do anything silly
  */
-	info->regs.__null_ds = 0;
-	info->regs.__null_es = 0;
+	info->regs.pt.xds = 0;
+	info->regs.pt.xes = 0;
+	info->regs.pt.xgs = 0;
 
-/* we are clearing fs,gs later just before "jmp resume_userspace",
- * because starting with Linux 2.1.x they aren't no longer saved/restored
+/* we are clearing fs later just before "jmp resume_userspace",
+ * because it is not saved/restored.
  */
 
 /*
@@ -268,10 +296,10 @@
  * has set it up safely, so this makes sure interrupt etc flags are
  * inherited from protected mode.
  */
- 	VEFLAGS = info->regs.eflags;
-	info->regs.eflags &= SAFE_MASK;
-	info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK;
-	info->regs.eflags |= VM_MASK;
+ 	VEFLAGS = info->regs.pt.eflags;
+	info->regs.pt.eflags &= SAFE_MASK;
+	info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK;
+	info->regs.pt.eflags |= VM_MASK;
 
 	switch (info->cpu_type) {
 		case CPU_286:
@@ -294,7 +322,7 @@
 	info->regs32->eax = 0;
 	tsk->thread.saved_esp0 = tsk->thread.esp0;
 	savesegment(fs, tsk->thread.saved_fs);
-	savesegment(gs, tsk->thread.saved_gs);
+	tsk->thread.saved_gs = info->regs32->xgs;
 
 	tss = &per_cpu(init_tss, get_cpu());
 	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
@@ -306,19 +334,18 @@
 	tsk->thread.screen_bitmap = info->screen_bitmap;
 	if (info->flags & VM86_SCREEN_BITMAP)
 		mark_screen_rdonly(tsk->mm);
-	__asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t");
-	__asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax));
 
 	/*call audit_syscall_exit since we do not exit via the normal paths */
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(AUDITSC_RESULT(eax), eax);
+		audit_syscall_exit(AUDITSC_RESULT(0), 0);
 
 	__asm__ __volatile__(
 		"movl %0,%%esp\n\t"
 		"movl %1,%%ebp\n\t"
+		"mov  %2, %%fs\n\t"
 		"jmp resume_userspace"
 		: /* no outputs */
-		:"r" (&info->regs), "r" (task_thread_info(tsk)));
+		:"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
 	/* we never return here */
 }
 
@@ -348,12 +375,12 @@
 
 static inline void clear_TF(struct kernel_vm86_regs * regs)
 {
-	regs->eflags &= ~TF_MASK;
+	regs->pt.eflags &= ~TF_MASK;
 }
 
 static inline void clear_AC(struct kernel_vm86_regs * regs)
 {
-	regs->eflags &= ~AC_MASK;
+	regs->pt.eflags &= ~AC_MASK;
 }
 
 /* It is correct to call set_IF(regs) from the set_vflags_*
@@ -370,7 +397,7 @@
 static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs)
 {
 	set_flags(VEFLAGS, eflags, current->thread.v86mask);
-	set_flags(regs->eflags, eflags, SAFE_MASK);
+	set_flags(regs->pt.eflags, eflags, SAFE_MASK);
 	if (eflags & IF_MASK)
 		set_IF(regs);
 	else
@@ -380,7 +407,7 @@
 static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
 {
 	set_flags(VFLAGS, flags, current->thread.v86mask);
-	set_flags(regs->eflags, flags, SAFE_MASK);
+	set_flags(regs->pt.eflags, flags, SAFE_MASK);
 	if (flags & IF_MASK)
 		set_IF(regs);
 	else
@@ -389,7 +416,7 @@
 
 static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
 {
-	unsigned long flags = regs->eflags & RETURN_MASK;
+	unsigned long flags = regs->pt.eflags & RETURN_MASK;
 
 	if (VEFLAGS & VIF_MASK)
 		flags |= IF_MASK;
@@ -493,7 +520,7 @@
 	unsigned long __user *intr_ptr;
 	unsigned long segoffs;
 
-	if (regs->cs == BIOSSEG)
+	if (regs->pt.xcs == BIOSSEG)
 		goto cannot_handle;
 	if (is_revectored(i, &KVM86->int_revectored))
 		goto cannot_handle;
@@ -505,9 +532,9 @@
 	if ((segoffs >> 16) == BIOSSEG)
 		goto cannot_handle;
 	pushw(ssp, sp, get_vflags(regs), cannot_handle);
-	pushw(ssp, sp, regs->cs, cannot_handle);
+	pushw(ssp, sp, regs->pt.xcs, cannot_handle);
 	pushw(ssp, sp, IP(regs), cannot_handle);
-	regs->cs = segoffs >> 16;
+	regs->pt.xcs = segoffs >> 16;
 	SP(regs) -= 6;
 	IP(regs) = segoffs & 0xffff;
 	clear_TF(regs);
@@ -524,7 +551,7 @@
 	if (VMPI.is_vm86pus) {
 		if ( (trapno==3) || (trapno==1) )
 			return_to_32bit(regs, VM86_TRAP + (trapno << 8));
-		do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs));
+		do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs));
 		return 0;
 	}
 	if (trapno !=1)
@@ -560,10 +587,10 @@
 		handle_vm86_trap(regs, 0, 1); \
 	return; } while (0)
 
-	orig_flags = *(unsigned short *)&regs->eflags;
+	orig_flags = *(unsigned short *)&regs->pt.eflags;
 
-	csp = (unsigned char __user *) (regs->cs << 4);
-	ssp = (unsigned char __user *) (regs->ss << 4);
+	csp = (unsigned char __user *) (regs->pt.xcs << 4);
+	ssp = (unsigned char __user *) (regs->pt.xss << 4);
 	sp = SP(regs);
 	ip = IP(regs);
 
@@ -650,7 +677,7 @@
 			SP(regs) += 6;
 		}
 		IP(regs) = newip;
-		regs->cs = newcs;
+		regs->pt.xcs = newcs;
 		CHECK_IF_IN_TRAP;
 		if (data32) {
 			set_vflags_long(newflags, regs);
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index c6f84a0..56e6ad5c 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -1,13 +1,26 @@
 /* ld script to make i386 Linux kernel
  * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ *
+ * Don't define absolute symbols until and unless you know that symbol
+ * value is should remain constant even if kernel image is relocated
+ * at run time. Absolute symbols are not relocated. If symbol value should
+ * change if kernel is relocated, make the symbol section relative and
+ * put it inside the section definition.
  */
 
+/* Don't define absolute symbols until and unless you know that symbol
+ * value is should remain constant even if kernel image is relocated
+ * at run time. Absolute symbols are not relocated. If symbol value should
+ * change if kernel is relocated, make the symbol section relative and
+ * put it inside the section definition.
+ */
 #define LOAD_OFFSET __PAGE_OFFSET
 
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/cache.h>
+#include <asm/boot.h>
 
 OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
 OUTPUT_ARCH(i386)
@@ -21,34 +34,35 @@
 }
 SECTIONS
 {
-  . = __KERNEL_START;
+  . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
   phys_startup_32 = startup_32 - LOAD_OFFSET;
   /* read-only */
-  _text = .;			/* Text and read-only data */
   .text : AT(ADDR(.text) - LOAD_OFFSET) {
+  	_text = .;			/* Text and read-only data */
 	*(.text)
 	SCHED_TEXT
 	LOCK_TEXT
 	KPROBES_TEXT
 	*(.fixup)
 	*(.gnu.warning)
-	} :text = 0x9090
-
-  _etext = .;			/* End of text section */
+  	_etext = .;			/* End of text section */
+  } :text = 0x9090
 
   . = ALIGN(16);		/* Exception table */
-  __start___ex_table = .;
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) }
-  __stop___ex_table = .;
+  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+  	__start___ex_table = .;
+	 *(__ex_table)
+  	__stop___ex_table = .;
+  }
 
   RODATA
 
   . = ALIGN(4);
-  __tracedata_start = .;
   .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
+  	__tracedata_start = .;
 	*(.tracedata)
+  	__tracedata_end = .;
   }
-  __tracedata_end = .;
 
   /* writeable */
   . = ALIGN(4096);
@@ -57,11 +71,19 @@
 	CONSTRUCTORS
 	} :data
 
+  .paravirtprobe : AT(ADDR(.paravirtprobe) - LOAD_OFFSET) {
+  	__start_paravirtprobe = .;
+	*(.paravirtprobe)
+  	__stop_paravirtprobe = .;
+  }
+
   . = ALIGN(4096);
-  __nosave_begin = .;
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
-  . = ALIGN(4096);
-  __nosave_end = .;
+  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+  	__nosave_begin = .;
+	*(.data.nosave)
+  	. = ALIGN(4096);
+  	__nosave_end = .;
+  }
 
   . = ALIGN(4096);
   .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
@@ -75,17 +97,10 @@
 
   /* rarely changed data like cpu maps */
   . = ALIGN(32);
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
-  _edata = .;			/* End of data section */
-
-#ifdef CONFIG_STACK_UNWIND
-  . = ALIGN(4);
-  .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
-	__start_unwind = .;
-  	*(.eh_frame)
-	__end_unwind = .;
+  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
+	*(.data.read_mostly)
+	_edata = .;		/* End of data section */
   }
-#endif
 
   . = ALIGN(THREAD_SIZE);	/* init_task */
   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
@@ -94,88 +109,102 @@
 
   /* might get freed after init */
   . = ALIGN(4096);
-  __smp_alt_begin = .;
-  __smp_alt_instructions = .;
   .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+	__smp_alt_begin = .;
+	__smp_alt_instructions = .;
 	*(.smp_altinstructions)
+	__smp_alt_instructions_end = .;
   }
-  __smp_alt_instructions_end = .;
   . = ALIGN(4);
-  __smp_locks = .;
   .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+  	__smp_locks = .;
 	*(.smp_locks)
+	__smp_locks_end = .;
   }
-  __smp_locks_end = .;
   .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
 	*(.smp_altinstr_replacement)
+  	__smp_alt_end = .;
   }
+  /* will be freed after init
+   * Following ALIGN() is required to make sure no other data falls on the
+   * same page where __smp_alt_end is pointing as that page might be freed
+   * after boot. Always make sure that ALIGN() directive is present after
+   * the section which contains __smp_alt_end.
+   */
   . = ALIGN(4096);
-  __smp_alt_end = .;
 
   /* will be freed after init */
   . = ALIGN(4096);		/* Init code and data */
-  __init_begin = .;
   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+  	__init_begin = .;
 	_sinittext = .;
 	*(.init.text)
 	_einittext = .;
   }
   .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) }
   . = ALIGN(16);
-  __setup_start = .;
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
-  __setup_end = .;
-  __initcall_start = .;
+  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+  	__setup_start = .;
+	*(.init.setup)
+  	__setup_end = .;
+   }
   .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+  	__initcall_start = .;
 	INITCALLS
+  	__initcall_end = .;
   }
-  __initcall_end = .;
-  __con_initcall_start = .;
   .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+  	__con_initcall_start = .;
 	*(.con_initcall.init)
+  	__con_initcall_end = .;
   }
-  __con_initcall_end = .;
   SECURITY_INIT
   . = ALIGN(4);
-  __alt_instructions = .;
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+  	__alt_instructions = .;
 	*(.altinstructions)
+	__alt_instructions_end = .;
   }
-  __alt_instructions_end = .; 
   .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
 	*(.altinstr_replacement)
   }
+  . = ALIGN(4);
+  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+  	__start_parainstructions = .;
+	*(.parainstructions)
+  	__stop_parainstructions = .;
+  }
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
   .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
   .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
   . = ALIGN(4096);
-  __initramfs_start = .;
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
-  __initramfs_end = .;
+  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+	__initramfs_start = .;
+	*(.init.ramfs)
+	__initramfs_end = .;
+  }
   . = ALIGN(L1_CACHE_BYTES);
-  __per_cpu_start = .;
-  .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
-  __per_cpu_end = .;
+  .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
+	__per_cpu_start = .;
+	*(.data.percpu)
+	__per_cpu_end = .;
+  }
   . = ALIGN(4096);
-  __init_end = .;
   /* freed after init ends here */
 	
-  __bss_start = .;		/* BSS */
-  .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) {
-	*(.bss.page_aligned)
-  }
   .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+	__init_end = .;
+	__bss_start = .;		/* BSS */
+	*(.bss.page_aligned)
 	*(.bss)
+	. = ALIGN(4);
+	__bss_stop = .;
+  	_end = . ;
+	/* This is where the kernel creates the early boot page tables */
+	. = ALIGN(4096);
+	pg0 = . ;
   }
-  . = ALIGN(4);
-  __bss_stop = .; 
-
-  _end = . ;
-
-  /* This is where the kernel creates the early boot page tables */
-  . = ALIGN(4096);
-  pg0 = .;
 
   /* Sections to be discarded */
   /DISCARD/ : {
diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c
index 94b1fd9..a7b3999 100644
--- a/arch/i386/mach-generic/probe.c
+++ b/arch/i386/mach-generic/probe.c
@@ -45,7 +45,9 @@
 			return 0;
 		}
 	}
-	return -ENOENT;
+
+	/* Parsed again by __setup for debug/verbose */
+	return 0;
 }
 early_param("apic", parse_apic);
 
diff --git a/arch/i386/mach-voyager/voyager_cat.c b/arch/i386/mach-voyager/voyager_cat.c
index f50c6c6..943a947 100644
--- a/arch/i386/mach-voyager/voyager_cat.c
+++ b/arch/i386/mach-voyager/voyager_cat.c
@@ -776,7 +776,7 @@
 		for(asic=0; asic < (*modpp)->num_asics; asic++) {
 			int j;
 			voyager_asic_t *asicp = *asicpp 
-				= kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/
+				= kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/
 			voyager_sp_table_t *sp_table;
 			voyager_at_t *asic_table;
 			voyager_jtt_t *jtag_table;
@@ -785,7 +785,6 @@
 				printk("**WARNING** kmalloc failure in cat_init\n");
 				continue;
 			}
-			memset(asicp, 0, sizeof(voyager_asic_t));
 			asicpp = &(asicp->next);
 			asicp->asic_location = asic;
 			sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset);
@@ -851,8 +850,7 @@
 #endif
 
 		{
-			struct resource *res = kmalloc(sizeof(struct resource),GFP_KERNEL);
-			memset(res, 0, sizeof(struct resource));
+			struct resource *res = kzalloc(sizeof(struct resource),GFP_KERNEL);
 			res->name = kmalloc(128, GFP_KERNEL);
 			sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i));
 			res->start = qic_addr;
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index f3fea2a..55428e6 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -28,6 +28,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/arch_hooks.h>
+#include <asm/pda.h>
 
 /* TLB state -- visible externally, indexed physically */
 DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 };
@@ -422,6 +423,7 @@
 	     VOYAGER_SUS_IN_CONTROL_PORT);
 
 	current_thread_info()->cpu = boot_cpu_id;
+	write_pda(cpu_number, boot_cpu_id);
 }
 
 /*
@@ -458,7 +460,7 @@
 	/* external functions not defined in the headers */
 	extern void calibrate_delay(void);
 
-	cpu_init();
+	secondary_cpu_init();
 
 	/* OK, we're in the routine */
 	ack_CPI(VIC_CPU_BOOT_CPI);
@@ -578,6 +580,15 @@
 	/* init_tasks (in sched.c) is indexed logically */
 	stack_start.esp = (void *) idle->thread.esp;
 
+	/* Pre-allocate and initialize the CPU's GDT and PDA so it
+	   doesn't have to do any memory allocation during the
+	   delicate CPU-bringup phase. */
+	if (!init_gdt(cpu, idle)) {
+		printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
+		cpucount--;
+		return;
+	}
+
 	irq_ctx_init(cpu);
 
 	/* Note: Don't modify initial ss override */
@@ -1963,4 +1974,5 @@
 smp_setup_processor_id(void)
 {
 	current_thread_info()->cpu = hard_smp_processor_id();
+	write_pda(cpu_number, hard_smp_processor_id());
 }
diff --git a/arch/i386/math-emu/fpu_emu.h b/arch/i386/math-emu/fpu_emu.h
index d62b20a..65120f5 100644
--- a/arch/i386/math-emu/fpu_emu.h
+++ b/arch/i386/math-emu/fpu_emu.h
@@ -57,6 +57,7 @@
 #define TAG_Special	Const(2)	/* De-normal, + or - infinity,
 					   or Not a Number */
 #define TAG_Empty	Const(3)	/* empty */
+#define TAG_Error	Const(0x80)	/* probably need to abort */
 
 #define LOADED_DATA	Const(10101)	/* Special st() number to identify
 					   loaded data (not on stack). */
diff --git a/arch/i386/math-emu/fpu_entry.c b/arch/i386/math-emu/fpu_entry.c
index d93f16e..ddf8fa3 100644
--- a/arch/i386/math-emu/fpu_entry.c
+++ b/arch/i386/math-emu/fpu_entry.c
@@ -742,7 +742,8 @@
   S387->fcs &= ~0xf8000000;
   S387->fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
-  __copy_to_user(d, &S387->cwd, 7*4);
+  if (__copy_to_user(d, &S387->cwd, 7*4))
+    return -1;
   RE_ENTRANT_CHECK_ON;
 
   d += 7*4;
diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h
index bf26341..a3ae28c 100644
--- a/arch/i386/math-emu/fpu_system.h
+++ b/arch/i386/math-emu/fpu_system.h
@@ -68,6 +68,7 @@
 
 #define FPU_access_ok(x,y,z)	if ( !access_ok(x,y,z) ) \
 				math_abort(FPU_info,SIGSEGV)
+#define FPU_abort		math_abort(FPU_info, SIGSEGV)
 
 #undef FPU_IGNORE_CODE_SEGV
 #ifdef FPU_IGNORE_CODE_SEGV
diff --git a/arch/i386/math-emu/load_store.c b/arch/i386/math-emu/load_store.c
index 85314be..eebd6fb 100644
--- a/arch/i386/math-emu/load_store.c
+++ b/arch/i386/math-emu/load_store.c
@@ -227,6 +227,8 @@
     case 027:      /* fild m64int */
       clear_C1();
       loaded_tag = FPU_load_int64((long long __user *)data_address);
+      if (loaded_tag == TAG_Error)
+	return 0;
       FPU_settag0(loaded_tag);
       break;
     case 030:     /* fstenv  m14/28byte */
diff --git a/arch/i386/math-emu/reg_ld_str.c b/arch/i386/math-emu/reg_ld_str.c
index f06ed41..e976cae 100644
--- a/arch/i386/math-emu/reg_ld_str.c
+++ b/arch/i386/math-emu/reg_ld_str.c
@@ -244,7 +244,8 @@
 
   RE_ENTRANT_CHECK_OFF;
   FPU_access_ok(VERIFY_READ, _s, 8);
-  copy_from_user(&s,_s,8);
+  if (copy_from_user(&s,_s,8))
+    FPU_abort;
   RE_ENTRANT_CHECK_ON;
 
   if (s == 0)
@@ -907,7 +908,8 @@
 
   RE_ENTRANT_CHECK_OFF;
   FPU_access_ok(VERIFY_WRITE,d,8);
-  copy_to_user(d, &tll, 8);
+  if (copy_to_user(d, &tll, 8))
+    FPU_abort;
   RE_ENTRANT_CHECK_ON;
 
   return 1;
@@ -1336,7 +1338,8 @@
       I387.soft.fcs &= ~0xf8000000;
       I387.soft.fos |= 0xffff0000;
 #endif /* PECULIAR_486 */
-      __copy_to_user(d, &control_word, 7*4);
+      if (__copy_to_user(d, &control_word, 7*4))
+	FPU_abort;
       RE_ENTRANT_CHECK_ON;
       d += 0x1c;
     }
@@ -1359,9 +1362,11 @@
   FPU_access_ok(VERIFY_WRITE,d,80);
 
   /* Copy all registers in stack order. */
-  __copy_to_user(d, register_base+offset, other);
+  if (__copy_to_user(d, register_base+offset, other))
+    FPU_abort;
   if ( offset )
-    __copy_to_user(d+other, register_base, offset);
+    if (__copy_to_user(d+other, register_base, offset))
+      FPU_abort;
   RE_ENTRANT_CHECK_ON;
 
   finit();
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
index 4de11f5..4de95a1 100644
--- a/arch/i386/mm/boot_ioremap.c
+++ b/arch/i386/mm/boot_ioremap.c
@@ -16,6 +16,7 @@
  */
 
 #undef CONFIG_X86_PAE
+#undef CONFIG_PARAVIRT
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index ddbdb03..103b76e 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -168,7 +168,7 @@
 	if (nid && node_has_online_mem(nid))
 		NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
 	else {
-		NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT));
+		NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn));
 		min_low_pfn += PFN_UP(sizeof(pg_data_t));
 	}
 }
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 2581575..aaaa4d2 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -22,9 +22,9 @@
 #include <linux/highmem.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
+#include <linux/uaccess.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
 #include <asm/desc.h>
 #include <asm/kdebug.h>
 #include <asm/segment.h>
@@ -167,7 +167,7 @@
 static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
 { 
 	unsigned long limit;
-	unsigned long instr = get_segment_eip (regs, &limit);
+	unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
 	int scan_more = 1;
 	int prefetch = 0; 
 	int i;
@@ -177,9 +177,9 @@
 		unsigned char instr_hi;
 		unsigned char instr_lo;
 
-		if (instr > limit)
+		if (instr > (unsigned char *)limit)
 			break;
-		if (__get_user(opcode, (unsigned char __user *) instr))
+		if (probe_kernel_address(instr, opcode))
 			break; 
 
 		instr_hi = opcode & 0xf0; 
@@ -204,9 +204,9 @@
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
-			if (instr > limit)
+			if (instr > (unsigned char *)limit)
 				break;
-			if (__get_user(opcode, (unsigned char __user *) instr))
+			if (probe_kernel_address(instr, opcode))
 				break;
 			prefetch = (instr_lo == 0xF) &&
 				(opcode == 0x0D || opcode == 0x18);
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index a6a8e39..84697df 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -192,8 +192,6 @@
 	return 0;
 }
 
-extern int is_available_memory(efi_memory_desc_t *);
-
 int page_is_ram(unsigned long pagenr)
 {
 	int i;
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 8564b6a..ad91528 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -67,11 +67,17 @@
 	return base;
 } 
 
-static void flush_kernel_map(void *dummy) 
+static void flush_kernel_map(void *arg)
 { 
-	/* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
-	if (boot_cpu_data.x86_model >= 4) 
+	unsigned long adr = (unsigned long)arg;
+
+	if (adr && cpu_has_clflush) {
+		int i;
+		for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
+			asm volatile("clflush (%0)" :: "r" (adr + i));
+	} else if (boot_cpu_data.x86_model >= 4)
 		wbinvd();
+
 	/* Flush all to work around Errata in early athlons regarding 
 	 * large page flushing. 
 	 */
@@ -173,9 +179,9 @@
 	return 0;
 } 
 
-static inline void flush_map(void)
+static inline void flush_map(void *adr)
 {
-	on_each_cpu(flush_kernel_map, NULL, 1, 1);
+	on_each_cpu(flush_kernel_map, adr, 1, 1);
 }
 
 /*
@@ -217,9 +223,13 @@
 	spin_lock_irq(&cpa_lock);
 	list_replace_init(&df_list, &l);
 	spin_unlock_irq(&cpa_lock);
-	flush_map();
-	list_for_each_entry_safe(pg, next, &l, lru)
+	if (!cpu_has_clflush)
+		flush_map(0);
+	list_for_each_entry_safe(pg, next, &l, lru) {
+		if (cpu_has_clflush)
+			flush_map(page_address(pg));
 		__free_page(pg);
+	}
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 33be236..f349eaf 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -95,8 +95,11 @@
 		return;
 	}
 	pte = pte_offset_kernel(pmd, vaddr);
-	/* <pfn,flags> stored as-is, to permit clearing entries */
-	set_pte(pte, pfn_pte(pfn, flags));
+	if (pgprot_val(flags))
+		/* <pfn,flags> stored as-is, to permit clearing entries */
+		set_pte(pte, pfn_pte(pfn, flags));
+	else
+		pte_clear(&init_mm, vaddr, pte);
 
 	/*
 	 * It's enough to flush this one mapping.
diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c
index 713d6c8..42df4b6 100644
--- a/arch/i386/pci/early.c
+++ b/arch/i386/pci/early.c
@@ -45,6 +45,13 @@
 	outl(val, 0xcfc);
 }
 
+void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
+{
+	PDprintk("%x writing to %x: %x\n", slot, offset, val);
+	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+	outb(val, 0xcfc);
+}
+
 int early_pci_allowed(void)
 {
 	return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index e65551c..f2cb942 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -764,7 +764,7 @@
 	DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
 	    rt->rtr_vendor, rt->rtr_device);
 
-	pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
+	pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
 	if (!pirq_router_dev) {
 		DBG(KERN_DEBUG "PCI: Interrupt router not found at "
 			"%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
@@ -784,6 +784,8 @@
 		pirq_router_dev->vendor,
 		pirq_router_dev->device,
 		pci_name(pirq_router_dev));
+
+	/* The device remains referenced for the kernel lifetime */
 }
 
 static struct irq_info *pirq_get_info(struct pci_dev *dev)
diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c
index ed1512a..5f51934 100644
--- a/arch/i386/pci/pcbios.c
+++ b/arch/i386/pci/pcbios.c
@@ -5,6 +5,7 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include "pci.h"
 #include "pci-functions.h"
 
@@ -314,6 +315,10 @@
 	for (check = (union bios32 *) __va(0xe0000);
 	     check <= (union bios32 *) __va(0xffff0);
 	     ++check) {
+		long sig;
+		if (probe_kernel_address(&check->fields.signature, sig))
+			continue;
+
 		if (check->fields.signature != BIOS32_SIGNATURE)
 			continue;
 		length = check->fields.length * 16;
@@ -331,11 +336,13 @@
 		}
 		DBG("PCI: BIOS32 Service Directory structure at 0x%p\n", check);
 		if (check->fields.entry >= 0x100000) {
-			printk("PCI: BIOS32 entry (0x%p) in high memory, cannot use.\n", check);
+			printk("PCI: BIOS32 entry (0x%p) in high memory, "
+					"cannot use.\n", check);
 			return NULL;
 		} else {
 			unsigned long bios32_entry = check->fields.entry;
-			DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", bios32_entry);
+			DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",
+					bios32_entry);
 			bios32_indirect.address = bios32_entry + PAGE_OFFSET;
 			if (check_pcibios())
 				return &pci_bios_access;
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c
index 5a1abef..2c15500 100644
--- a/arch/i386/power/cpu.c
+++ b/arch/i386/power/cpu.c
@@ -26,8 +26,8 @@
 	/*
 	 * descriptor tables
 	 */
- 	store_gdt(&ctxt->gdt_limit);
- 	store_idt(&ctxt->idt_limit);
+ 	store_gdt(&ctxt->gdt);
+ 	store_idt(&ctxt->idt);
  	store_tr(ctxt->tr);
 
 	/*
@@ -99,8 +99,8 @@
 	 * now restore the descriptor tables to their proper values
 	 * ltr is done i fix_processor_context().
 	 */
- 	load_gdt(&ctxt->gdt_limit);
- 	load_idt(&ctxt->idt_limit);
+ 	load_gdt(&ctxt->gdt);
+ 	load_idt(&ctxt->idt);
 
 	/*
 	 * segment registers
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 5629b45..687500d 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -31,11 +31,11 @@
 {
 #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
 	/*
-	 * If CPEI cannot be re-targetted, and this is
-	 * CPEI target, then dont create the control file
+	 * If CPEI can be re-targetted or if this is not
+	 * CPEI target, then it is hotpluggable
 	 */
-	if (!can_cpei_retarget() && is_cpu_cpei_target(num))
-		sysfs_cpus[num].cpu.no_control = 1;
+	if (can_cpei_retarget() || !is_cpu_cpei_target(num))
+		sysfs_cpus[num].cpu.hotpluggable = 1;
 	map_cpu_to_node(num, node_cpuid[num].nid);
 #endif
 
diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index 58afa8b..2b2a10d 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -60,6 +60,7 @@
 #endif
 
 	.text : {
+		_text = .;
 		_stext = . ;
         	*(.text)
 		SCHED_TEXT
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 22123a0..63ed265 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -239,7 +239,7 @@
 	struct cpu *c = &per_cpu(cpu_devices, cpu);
 	struct sys_device *s = &c->sysdev;
 
-	BUG_ON(c->no_control);
+	BUG_ON(!c->hotpluggable);
 
 	if (!firmware_has_feature(FW_FEATURE_ISERIES) &&
 			cpu_has_feature(CPU_FTR_SMT))
@@ -424,10 +424,10 @@
 		 * CPU.  For instance, the boot cpu might never be valid
 		 * for hotplugging.
 		 */
-		if (!ppc_md.cpu_die)
-			c->no_control = 1;
+		if (ppc_md.cpu_die)
+			c->hotpluggable = 1;
 
-		if (cpu_online(cpu) || (c->no_control == 0)) {
+		if (cpu_online(cpu) || c->hotpluggable) {
 			register_cpu(c, cpu);
 
 			sysdev_create_file(&c->sysdev, &attr_physical_id);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index e8342d8..04b9867 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -33,6 +33,7 @@
 
 	/* Text and gots */
 	.text : {
+		_text = .;
 		*(.text .text.*)
 		SCHED_TEXT
 		LOCK_TEXT
diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S
index 16e8661..6192126 100644
--- a/arch/ppc/kernel/vmlinux.lds.S
+++ b/arch/ppc/kernel/vmlinux.lds.S
@@ -31,6 +31,7 @@
   .plt : { *(.plt) }
   .text      :
   {
+    _text = .;
     *(.text)
     SCHED_TEXT
     LOCK_TEXT
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 5cc5ff7..b73e6b9 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -11,6 +11,7 @@
   . = 0x10000 + SIZEOF_HEADERS;
   .text 0xf0004000 :
   {
+    _text = .;
     *(.text)
     SCHED_TEXT
     LOCK_TEXT
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index bd9de8c..4a6063f 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -13,6 +13,7 @@
   . = 0x4000;
   .text 0x0000000000404000 :
   {
+    _text = .;
     *(.text)
     SCHED_TEXT
     LOCK_TEXT
diff --git a/arch/v850/kernel/vmlinux.lds.S b/arch/v850/kernel/vmlinux.lds.S
index 88d087f..3a5fd07 100644
--- a/arch/v850/kernel/vmlinux.lds.S
+++ b/arch/v850/kernel/vmlinux.lds.S
@@ -90,6 +90,7 @@
 
 /* Kernel text segment, and some constant data areas.  */
 #define TEXT_CONTENTS							      \
+		_text = .;						      \
 		__stext = . ;						      \
         	*(.text)						      \
 		SCHED_TEXT						      \
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 010d226..bfbb9bc 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -122,7 +122,7 @@
 
 choice
 	prompt "Processor family"
-	default MK8
+	default GENERIC_CPU
 
 config MK8
 	bool "AMD-Opteron/Athlon64"
@@ -130,16 +130,31 @@
 	  Optimize for AMD Opteron/Athlon64/Hammer/K8 CPUs.
 
 config MPSC
-       bool "Intel EM64T"
+       bool "Intel P4 / older Netburst based Xeon"
        help
-	  Optimize for Intel Pentium 4 and Xeon CPUs with Intel
-	  Extended Memory 64 Technology(EM64T). For details see
+	  Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs
+	  with Intel Extended Memory 64 Technology(EM64T). For details see
 	  <http://www.intel.com/technology/64bitextensions/>.
+	  Note the the latest Xeons (Xeon 51xx and 53xx) are not based on the
+          Netburst core and shouldn't use this option. You can distingush them
+	  using the cpu family field
+	  in /proc/cpuinfo. Family 15 is a older Xeon, Family 6 a newer one
+	  (this rule only applies to system that support EM64T)
+
+config MCORE2
+	bool "Intel Core2 / newer Xeon"
+	help
+	  Optimize for Intel Core2 and newer Xeons (51xx)
+	  You can distingush the newer Xeons from the older ones using
+	  the cpu family field in /proc/cpuinfo. 15 is a older Xeon
+	  (use CONFIG_MPSC then), 6 is a newer one. This rule only
+	  applies to CPUs that support EM64T.
 
 config GENERIC_CPU
 	bool "Generic-x86-64"
 	help
 	  Generic x86-64 CPU.
+	  Run equally well on all x86-64 CPUs.
 
 endchoice
 
@@ -149,12 +164,12 @@
 config X86_L1_CACHE_BYTES
 	int
 	default "128" if GENERIC_CPU || MPSC
-	default "64" if MK8
+	default "64" if MK8 || MCORE2
 
 config X86_L1_CACHE_SHIFT
 	int
 	default "7" if GENERIC_CPU || MPSC
-	default "6" if MK8
+	default "6" if MK8 || MCORE2
 
 config X86_INTERNODE_CACHE_BYTES
 	int
@@ -344,11 +359,6 @@
        depends on NUMA
        default y
 
-
-config ARCH_DISCONTIGMEM_ENABLE
-	def_bool y
-	depends on NUMA
-
 config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y
 	depends on NUMA
@@ -455,6 +465,17 @@
 	  Normally the kernel will make the right choice by itself.
 	  If unsure, say Y.
 
+config CALGARY_IOMMU_ENABLED_BY_DEFAULT
+	bool "Should Calgary be enabled by default?"
+	default y
+	depends on CALGARY_IOMMU
+	help
+	  Should Calgary be enabled by default? if you choose 'y', Calgary
+	  will be used (if it exists). If you choose 'n', Calgary will not be
+	  used even if it exists. If you choose 'n' and would like to use
+	  Calgary anyway, pass 'iommu=calgary' on the kernel command line.
+	  If unsure, say Y.
+
 # need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
 	bool
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 6e38d4d..b471b85 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -30,6 +30,10 @@
 cflags-kernel-y	:=
 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
+# gcc doesn't support -march=core2 yet as of gcc 4.3, but I hope it
+# will eventually. Use -mtune=generic as fallback
+cflags-$(CONFIG_MCORE2) += \
+	$(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
 
 cflags-y += -m64
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 0f5d44e..96f226c 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.19-rc2-git4
-# Sat Oct 21 03:38:52 2006
+# Linux kernel version: 2.6.19-git7
+# Wed Dec  6 23:50:47 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -47,13 +47,14 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 # CONFIG_CPUSETS is not set
+CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
-# CONFIG_SYSCTL_SYSCALL is not set
+CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
@@ -87,9 +88,7 @@
 # Block layer
 #
 CONFIG_BLOCK=y
-CONFIG_LBD=y
 # CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
 
 #
 # IO Schedulers
@@ -111,10 +110,11 @@
 # CONFIG_X86_VSMP is not set
 # CONFIG_MK8 is not set
 # CONFIG_MPSC is not set
-CONFIG_GENERIC_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_L1_CACHE_SHIFT=7
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_MCORE2=y
+# CONFIG_GENERIC_CPU is not set
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
 CONFIG_X86_TSC=y
 CONFIG_X86_GOOD_APIC=y
 # CONFIG_MICROCODE is not set
@@ -322,6 +322,7 @@
 # CONFIG_TCP_CONG_ADVANCED is not set
 CONFIG_TCP_CONG_CUBIC=y
 CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
 CONFIG_IPV6=y
 # CONFIG_IPV6_PRIVACY is not set
 # CONFIG_IPV6_ROUTER_PREF is not set
@@ -624,6 +625,7 @@
 # CONFIG_PATA_IT821X is not set
 # CONFIG_PATA_JMICRON is not set
 # CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
 # CONFIG_PATA_MPIIX is not set
 # CONFIG_PATA_OLDPIIX is not set
 # CONFIG_PATA_NETCELL is not set
@@ -795,6 +797,7 @@
 CONFIG_S2IO=m
 # CONFIG_S2IO_NAPI is not set
 # CONFIG_MYRI10GE is not set
+# CONFIG_NETXEN_NIC is not set
 
 #
 # Token Ring devices
@@ -927,10 +930,6 @@
 # CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
-
-#
-# Ftape, the floppy tape device driver
-#
 CONFIG_AGP=y
 CONFIG_AGP_AMD64=y
 CONFIG_AGP_INTEL=y
@@ -1135,6 +1134,7 @@
 # CONFIG_USB_BANDWIDTH is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
 # CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_MULTITHREAD_PROBE is not set
 # CONFIG_USB_OTG is not set
 
 #
@@ -1212,6 +1212,7 @@
 # CONFIG_USB_KAWETH is not set
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET_MII is not set
 # CONFIG_USB_USBNET is not set
 CONFIG_USB_MON=y
 
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index 0e0a266..ff499ef 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -584,6 +584,11 @@
 	regs->rdx = (unsigned long) &frame->info;
 	regs->rcx = (unsigned long) &frame->uc;
 
+	/* Make -mregparm=3 work */
+	regs->rax = sig;
+	regs->rdx = (unsigned long) &frame->info;
+	regs->rcx = (unsigned long) &frame->uc;
+
 	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 
 	asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 
 	
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 4d9d5ed..124b2d2 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -25,6 +25,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/module.h>
+#include <linux/ioport.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -45,6 +46,12 @@
 
 int disable_apic_timer __initdata;
 
+static struct resource *ioapic_resources;
+static struct resource lapic_resource = {
+	.name = "Local APIC",
+	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
 /*
  * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
  * IPIs in place of local APIC timers
@@ -133,7 +140,6 @@
 		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
 	if (maxlvt >= 4)
 		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-	v = GET_APIC_VERSION(apic_read(APIC_LVR));
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 }
@@ -452,23 +458,30 @@
 static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 {
 	unsigned long flags;
+	int maxlvt;
 
 	if (!apic_pm_state.active)
 		return 0;
 
+	maxlvt = get_maxlvt();
+
 	apic_pm_state.apic_id = apic_read(APIC_ID);
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
 	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
 	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-	apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+	if (maxlvt >= 4)
+		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
 	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
 	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
 	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
 	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
 	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-	apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#ifdef CONFIG_X86_MCE_INTEL
+	if (maxlvt >= 5)
+		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
 	local_irq_save(flags);
 	disable_local_APIC();
 	local_irq_restore(flags);
@@ -479,10 +492,13 @@
 {
 	unsigned int l, h;
 	unsigned long flags;
+	int maxlvt;
 
 	if (!apic_pm_state.active)
 		return 0;
 
+	maxlvt = get_maxlvt();
+
 	local_irq_save(flags);
 	rdmsr(MSR_IA32_APICBASE, l, h);
 	l &= ~MSR_IA32_APICBASE_BASE;
@@ -496,8 +512,12 @@
 	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
 	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
 	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-	apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-	apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+#ifdef CONFIG_X86_MCE_INTEL
+	if (maxlvt >= 5)
+		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
 	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
 	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
 	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
@@ -585,6 +605,64 @@
 	return 0;
 }
 
+#ifdef CONFIG_X86_IO_APIC
+static struct resource * __init ioapic_setup_resources(void)
+{
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+	unsigned long n;
+	struct resource *res;
+	char *mem;
+	int i;
+
+	if (nr_ioapics <= 0)
+		return NULL;
+
+	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+	n *= nr_ioapics;
+
+	mem = alloc_bootmem(n);
+	res = (void *)mem;
+
+	if (mem != NULL) {
+		memset(mem, 0, n);
+		mem += sizeof(struct resource) * nr_ioapics;
+
+		for (i = 0; i < nr_ioapics; i++) {
+			res[i].name = mem;
+			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+			sprintf(mem,  "IOAPIC %u", i);
+			mem += IOAPIC_RESOURCE_NAME_SIZE;
+		}
+	}
+
+	ioapic_resources = res;
+
+	return res;
+}
+
+static int __init ioapic_insert_resources(void)
+{
+	int i;
+	struct resource *r = ioapic_resources;
+
+	if (!r) {
+		printk("IO APIC resources could be not be allocated.\n");
+		return -1;
+	}
+
+	for (i = 0; i < nr_ioapics; i++) {
+		insert_resource(&iomem_resource, r);
+		r++;
+	}
+
+	return 0;
+}
+
+/* Insert the IO APIC resources after PCI initialization has occured to handle
+ * IO APICS that are mapped in on a BAR in PCI space. */
+late_initcall(ioapic_insert_resources);
+#endif
+
 void __init init_apic_mappings(void)
 {
 	unsigned long apic_phys;
@@ -604,6 +682,11 @@
 	apic_mapped = 1;
 	apic_printk(APIC_VERBOSE,"mapped APIC to %16lx (%16lx)\n", APIC_BASE, apic_phys);
 
+	/* Put local APIC into the resource map. */
+	lapic_resource.start = apic_phys;
+	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+	insert_resource(&iomem_resource, &lapic_resource);
+
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
@@ -613,7 +696,9 @@
 	{
 		unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
 		int i;
+		struct resource *ioapic_res;
 
+		ioapic_res = ioapic_setup_resources();
 		for (i = 0; i < nr_ioapics; i++) {
 			if (smp_found_config) {
 				ioapic_phys = mp_ioapics[i].mpc_apicaddr;
@@ -625,6 +710,12 @@
 			apic_printk(APIC_VERBOSE,"mapped IOAPIC to %016lx (%016lx)\n",
 					__fix_to_virt(idx), ioapic_phys);
 			idx++;
+
+			if (ioapic_res != NULL) {
+				ioapic_res->start = ioapic_phys;
+				ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+				ioapic_res++;
+			}
 		}
 	}
 }
@@ -644,10 +735,9 @@
 
 static void __setup_APIC_LVTT(unsigned int clocks)
 {
-	unsigned int lvtt_value, tmp_value, ver;
+	unsigned int lvtt_value, tmp_value;
 	int cpu = smp_processor_id();
 
-	ver = GET_APIC_VERSION(apic_read(APIC_LVR));
 	lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
 
 	if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 68273bf..829698f 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -69,11 +69,18 @@
 
 static void ati_bugs(void)
 {
-	if (timer_over_8254 == 1) {
-		timer_over_8254 = 0;
-		printk(KERN_INFO
-	 	"ATI board detected. Disabling timer routing over 8254.\n");
-	}
+}
+
+static void intel_bugs(void)
+{
+	u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
+
+#ifdef CONFIG_SMP
+	if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
+	    device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
+	    device == PCI_DEVICE_ID_INTEL_E7525_MCH)
+		quirk_intel_irqbalance();
+#endif
 }
 
 struct chipset {
@@ -85,6 +92,7 @@
 	{ PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
 	{ PCI_VENDOR_ID_VIA, via_bugs },
 	{ PCI_VENDOR_ID_ATI, ati_bugs },
+	{ PCI_VENDOR_ID_INTEL, intel_bugs},
 	{}
 };
 
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 7d401b0..601d332 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -230,7 +230,6 @@
 	CFI_REL_OFFSET rip,RIP-ARGOFFSET
 	GET_THREAD_INFO(%rcx)
 	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
-	CFI_REMEMBER_STATE
 	jnz tracesys
 	cmpq $__NR_syscall_max,%rax
 	ja badsys
@@ -241,7 +240,6 @@
  * Syscall return path ending with SYSRET (fast path)
  * Has incomplete stack frame and undefined top of stack. 
  */		
-	.globl ret_from_sys_call
 ret_from_sys_call:
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	flagmask */
@@ -251,8 +249,8 @@
 	TRACE_IRQS_OFF
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
-	CFI_REMEMBER_STATE
 	jnz  sysret_careful 
+	CFI_REMEMBER_STATE
 	/*
 	 * sysretq will re-enable interrupts:
 	 */
@@ -265,10 +263,10 @@
 	swapgs
 	sysretq
 
+	CFI_RESTORE_STATE
 	/* Handle reschedules */
 	/* edx:	work, edi: workmask */	
 sysret_careful:
-	CFI_RESTORE_STATE
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
 	TRACE_IRQS_ON
@@ -306,7 +304,6 @@
 
 	/* Do syscall tracing */
 tracesys:			 
-	CFI_RESTORE_STATE
 	SAVE_REST
 	movq $-ENOSYS,RAX(%rsp)
 	FIXUP_TOP_OF_STACK %rdi
@@ -322,32 +319,13 @@
 	call *sys_call_table(,%rax,8)
 1:	movq %rax,RAX-ARGOFFSET(%rsp)
 	/* Use IRET because user could have changed frame */
-	jmp int_ret_from_sys_call
-	CFI_ENDPROC
-END(system_call)
 		
 /* 
  * Syscall return path ending with IRET.
  * Has correct top of stack, but partial stack frame.
- */ 	
-ENTRY(int_ret_from_sys_call)
-	CFI_STARTPROC	simple
-	CFI_SIGNAL_FRAME
-	CFI_DEF_CFA	rsp,SS+8-ARGOFFSET
-	/*CFI_REL_OFFSET	ss,SS-ARGOFFSET*/
-	CFI_REL_OFFSET	rsp,RSP-ARGOFFSET
-	/*CFI_REL_OFFSET	rflags,EFLAGS-ARGOFFSET*/
-	/*CFI_REL_OFFSET	cs,CS-ARGOFFSET*/
-	CFI_REL_OFFSET	rip,RIP-ARGOFFSET
-	CFI_REL_OFFSET	rdx,RDX-ARGOFFSET
-	CFI_REL_OFFSET	rcx,RCX-ARGOFFSET
-	CFI_REL_OFFSET	rax,RAX-ARGOFFSET
-	CFI_REL_OFFSET	rdi,RDI-ARGOFFSET
-	CFI_REL_OFFSET	rsi,RSI-ARGOFFSET
-	CFI_REL_OFFSET	r8,R8-ARGOFFSET
-	CFI_REL_OFFSET	r9,R9-ARGOFFSET
-	CFI_REL_OFFSET	r10,R10-ARGOFFSET
-	CFI_REL_OFFSET	r11,R11-ARGOFFSET
+ */
+	.globl int_ret_from_sys_call
+int_ret_from_sys_call:
 	cli
 	TRACE_IRQS_OFF
 	testl $3,CS-ARGOFFSET(%rsp)
@@ -394,8 +372,6 @@
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
 	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
-	cli
-	TRACE_IRQS_OFF
 	jmp int_restore_rest
 	
 int_signal:
@@ -411,7 +387,7 @@
 	TRACE_IRQS_OFF
 	jmp int_with_check
 	CFI_ENDPROC
-END(int_ret_from_sys_call)
+END(system_call)
 		
 /* 
  * Certain special system calls that need to save a complete full stack frame.
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 8e78a75..b007433 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -33,7 +33,7 @@
 extern struct genapic apic_physflat;
 
 struct genapic *genapic = &apic_flat;
-
+struct genapic *genapic_force;
 
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
@@ -46,6 +46,13 @@
 	u8 cluster_cnt[NUM_APIC_CLUSTERS];
 	int max_apic = 0;
 
+	/* genapic selection can be forced because of certain quirks.
+	 */
+	if (genapic_force) {
+		genapic = genapic_force;
+		goto print;
+	}
+
 #if defined(CONFIG_ACPI)
 	/*
 	 * Some x86_64 machines use physical APIC mode regardless of how many
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index 9561eb3..cc230b9 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -57,10 +57,12 @@
 {
 	int i;
 
-	for (i = 0; i < 256; i++)
+	/* clear bss before set_intr_gate with early_idt_handler */
+	clear_bss();
+
+	for (i = 0; i < IDT_ENTRIES; i++)
 		set_intr_gate(i, early_idt_handler);
 	asm volatile("lidt %0" :: "m" (idt_descr));
-	clear_bss();
 
 	early_printk("Kernel alive\n");
 
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
index 3aa1e9b..1d58c13 100644
--- a/arch/x86_64/kernel/i387.c
+++ b/arch/x86_64/kernel/i387.c
@@ -82,11 +82,8 @@
 	struct task_struct *tsk = current;
 	int err = 0;
 
-	{ 
-		extern void bad_user_i387_struct(void); 
-		if (sizeof(struct user_i387_struct) != sizeof(tsk->thread.i387.fxsave))
-			bad_user_i387_struct();
-	} 
+	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
+			sizeof(tsk->thread.i387.fxsave));
 
 	if ((unsigned long)buf % 16) 
 		printk("save_i387: bad fpstate %p\n",buf); 
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index c4ef801..d73c79e 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -76,7 +76,8 @@
 	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
 	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
 
-void (*interrupt[NR_IRQS])(void) = {
+/* for the irq vectors */
+static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = {
 					  IRQLIST_16(0x2), IRQLIST_16(0x3),
 	IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
 	IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c80081a..2a1dcd5 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -55,10 +55,6 @@
 
 static int no_timer_check;
 
-static int disable_timer_pin_1 __initdata;
-
-int timer_over_8254 __initdata = 1;
-
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
@@ -178,14 +174,20 @@
  * the interrupt, and we need to make sure the entry is fully populated
  * before that happens.
  */
+static void
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+	union entry_union eu;
+	eu.entry = e;
+	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
 static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
 	unsigned long flags;
-	union entry_union eu;
-	eu.entry = e;
 	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+	__ioapic_write_entry(apic, pin, e);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -348,29 +350,6 @@
 }
 early_param("noapic", disable_ioapic_setup);
 
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-	disable_timer_pin_1 = 1;
-	return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
-static int __init setup_disable_8254_timer(char *s)
-{
-	timer_over_8254 = -1;
-	return 1;
-}
-static int __init setup_enable_8254_timer(char *s)
-{
-	timer_over_8254 = 2;
-	return 1;
-}
-
-__setup("disable_8254_timer", setup_disable_8254_timer);
-__setup("enable_8254_timer", setup_enable_8254_timer);
-
-
 /*
  * Find the IRQ entry number of a certain pin.
  */
@@ -750,6 +729,22 @@
 	return vector;
 }
 
+static void __clear_irq_vector(int irq)
+{
+	cpumask_t mask;
+	int cpu, vector;
+
+	BUG_ON(!irq_vector[irq]);
+
+	vector = irq_vector[irq];
+	cpus_and(mask, irq_domain[irq], cpu_online_map);
+	for_each_cpu_mask(cpu, mask)
+		per_cpu(vector_irq, cpu)[vector] = -1;
+
+	irq_vector[irq] = 0;
+	irq_domain[irq] = CPU_MASK_NONE;
+}
+
 void __setup_vector_irq(int cpu)
 {
 	/* Initialize vector_irq on a new cpu */
@@ -794,28 +789,66 @@
 					      handle_edge_irq, "edge");
 	}
 }
+static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
+{
+	struct IO_APIC_route_entry entry;
+	int vector;
+	unsigned long flags;
+
+
+	/*
+	 * add it to the IO-APIC irq-routing table:
+	 */
+	memset(&entry,0,sizeof(entry));
+
+	entry.delivery_mode = INT_DELIVERY_MODE;
+	entry.dest_mode = INT_DEST_MODE;
+	entry.mask = 0;				/* enable IRQ */
+	entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+
+	entry.trigger = irq_trigger(idx);
+	entry.polarity = irq_polarity(idx);
+
+	if (irq_trigger(idx)) {
+		entry.trigger = 1;
+		entry.mask = 1;
+		entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+	}
+
+	if (!apic && !IO_APIC_IRQ(irq))
+		return;
+
+	if (IO_APIC_IRQ(irq)) {
+		cpumask_t mask;
+		vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
+		if (vector < 0)
+			return;
+
+		entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+		entry.vector = vector;
+
+		ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+		if (!apic && (irq < 16))
+			disable_8259A_irq(irq);
+	}
+
+	ioapic_write_entry(apic, pin, entry);
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	set_native_irq_info(irq, TARGET_CPUS);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+}
 
 static void __init setup_IO_APIC_irqs(void)
 {
-	struct IO_APIC_route_entry entry;
-	int apic, pin, idx, irq, first_notcon = 1, vector;
-	unsigned long flags;
+	int apic, pin, idx, irq, first_notcon = 1;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
 	for (apic = 0; apic < nr_ioapics; apic++) {
 	for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 
-		/*
-		 * add it to the IO-APIC irq-routing table:
-		 */
-		memset(&entry,0,sizeof(entry));
-
-		entry.delivery_mode = INT_DELIVERY_MODE;
-		entry.dest_mode = INT_DEST_MODE;
-		entry.mask = 0;				/* enable IRQ */
-		entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-
 		idx = find_irq_entry(apic,pin,mp_INT);
 		if (idx == -1) {
 			if (first_notcon) {
@@ -826,39 +859,11 @@
 			continue;
 		}
 
-		entry.trigger = irq_trigger(idx);
-		entry.polarity = irq_polarity(idx);
-
-		if (irq_trigger(idx)) {
-			entry.trigger = 1;
-			entry.mask = 1;
-			entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
-		}
-
 		irq = pin_2_irq(idx, apic, pin);
 		add_pin_to_irq(irq, apic, pin);
 
-		if (!apic && !IO_APIC_IRQ(irq))
-			continue;
+		setup_IO_APIC_irq(apic, pin, idx, irq);
 
-		if (IO_APIC_IRQ(irq)) {
-			cpumask_t mask;
-			vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
-			if (vector < 0)
-				continue;
-
-			entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
-			entry.vector = vector;
-
-			ioapic_register_intr(irq, vector, IOAPIC_AUTO);
-			if (!apic && (irq < 16))
-				disable_8259A_irq(irq);
-		}
-		ioapic_write_entry(apic, pin, entry);
-
-		spin_lock_irqsave(&ioapic_lock, flags);
-		set_native_irq_info(irq, TARGET_CPUS);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
 	}
 
@@ -1563,10 +1568,33 @@
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
- *
- * FIXME: really need to revamp this for modern platforms only.
  */
-static inline void check_timer(void)
+
+static int try_apic_pin(int apic, int pin, char *msg)
+{
+	apic_printk(APIC_VERBOSE, KERN_INFO
+		    "..TIMER: trying IO-APIC=%d PIN=%d %s",
+		    apic, pin, msg);
+
+	/*
+	 * Ok, does IRQ0 through the IOAPIC work?
+	 */
+	if (!no_timer_check && timer_irq_works()) {
+		nmi_watchdog_default();
+		if (nmi_watchdog == NMI_IO_APIC) {
+			disable_8259A_irq(0);
+			setup_nmi();
+			enable_8259A_irq(0);
+		}
+		return 1;
+	}
+	clear_IO_APIC_pin(apic, pin);
+	apic_printk(APIC_QUIET, KERN_ERR " .. failed\n");
+	return 0;
+}
+
+/* The function from hell */
+static void check_timer(void)
 {
 	int apic1, pin1, apic2, pin2;
 	int vector;
@@ -1587,61 +1615,43 @@
 	 */
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
-	if (timer_over_8254 > 0)
-		enable_8259A_irq(0);
 
 	pin1  = find_isa_irq_pin(0, mp_INT);
 	apic1 = find_isa_irq_apic(0, mp_INT);
 	pin2  = ioapic_i8259.pin;
 	apic2 = ioapic_i8259.apic;
 
-	apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
-		vector, apic1, pin1, apic2, pin2);
+	/* Do this first, otherwise we get double interrupts on ATI boards */
+	if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled"))
+		return;
 
-	if (pin1 != -1) {
-		/*
-		 * Ok, does IRQ0 through the IOAPIC work?
-		 */
-		unmask_IO_APIC_irq(0);
-		if (!no_timer_check && timer_irq_works()) {
-			nmi_watchdog_default();
-			if (nmi_watchdog == NMI_IO_APIC) {
-				disable_8259A_irq(0);
-				setup_nmi();
-				enable_8259A_irq(0);
-			}
-			if (disable_timer_pin_1 > 0)
-				clear_IO_APIC_pin(0, pin1);
-			return;
-		}
-		clear_IO_APIC_pin(apic1, pin1);
-		apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
-				"connected to IO-APIC\n");
-	}
+	/* Now try again with IRQ0 8259A enabled.
+	   Assumes timer is on IO-APIC 0 ?!? */
+	enable_8259A_irq(0);
+	unmask_IO_APIC_irq(0);
+	if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled"))
+		return;
+	disable_8259A_irq(0);
 
-	apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
-				"through the 8259A ... ");
+	/* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides
+	   on Nvidia boards */
+	if (!(apic1 == 0 && pin1 == 0) &&
+	    try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled"))
+		return;
+	if (!(apic1 == 0 && pin1 == 2) &&
+	    try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled"))
+		return;
+
+	/* Then try pure 8259A routing on the 8259 as reported by BIOS*/
+	enable_8259A_irq(0);
 	if (pin2 != -1) {
-		apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
-			apic2, pin2);
-		/*
-		 * legacy devices should be connected to IO APIC #0
-		 */
 		setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
-		if (timer_irq_works()) {
-			apic_printk(APIC_VERBOSE," works.\n");
-			nmi_watchdog_default();
-			if (nmi_watchdog == NMI_IO_APIC) {
-				setup_nmi();
-			}
+		if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS"))
 			return;
-		}
-		/*
-		 * Cleanup, just in case ...
-		 */
-		clear_IO_APIC_pin(apic2, pin2);
 	}
-	apic_printk(APIC_VERBOSE," failed.\n");
+
+	/* Tried all possibilities to go through the IO-APIC. Now come the
+	   really cheesy fallbacks. */
 
 	if (nmi_watchdog == NMI_IO_APIC) {
 		printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
@@ -1837,7 +1847,7 @@
 	dynamic_irq_cleanup(irq);
 
 	spin_lock_irqsave(&vector_lock, flags);
-	irq_vector[irq] = 0;
+	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
@@ -2139,7 +2149,15 @@
 			if (irq_entry == -1)
 				continue;
 			irq = pin_2_irq(irq_entry, ioapic, pin);
-			set_ioapic_affinity_irq(irq, TARGET_CPUS);
+
+			/* setup_IO_APIC_irqs could fail to get vector for some device
+			 * when you have too many devices, because at that time only boot
+			 * cpu is online.
+			 */
+			if(!irq_vector[irq])
+				setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
+			else
+				set_ioapic_affinity_irq(irq, TARGET_CPUS);
 		}
 
 	}
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index e46c558..0c06af6 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -120,7 +120,7 @@
 
 	if (likely(irq < NR_IRQS))
 		generic_handle_irq(irq);
-	else
+	else if (printk_ratelimit())
 		printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
 			__func__, smp_processor_id(), vector);
 
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bc863c4..ac08503 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -651,6 +651,7 @@
 	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
 	sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
 	sysdev_unregister(&per_cpu(device_mce,cpu));
+	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
 }
 
 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index b147ab1..0807256 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -35,8 +35,6 @@
 int smp_found_config;
 unsigned int __initdata maxcpus = NR_CPUS;
 
-int acpi_found_madt;
-
 /*
  * Various Linux-internal data structures created from the
  * MP-table.
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 7af9cb3..27e95e7 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -12,14 +12,15 @@
  *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
  */
 
+#include <linux/nmi.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
-#include <linux/nmi.h>
 #include <linux/sysctl.h>
 #include <linux/kprobes.h>
+#include <linux/cpumask.h>
 
 #include <asm/smp.h>
 #include <asm/nmi.h>
@@ -41,6 +42,8 @@
 static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
 static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
 
+static cpumask_t backtrace_mask = CPU_MASK_NONE;
+
 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
  */
@@ -782,6 +785,7 @@
 {
 	int sum;
 	int touched = 0;
+	int cpu = smp_processor_id();
 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 	u64 dummy;
 	int rc=0;
@@ -799,6 +803,16 @@
 		touched = 1;
 	}
 
+	if (cpu_isset(cpu, backtrace_mask)) {
+		static DEFINE_SPINLOCK(lock);	/* Serialise the printks */
+
+		spin_lock(&lock);
+		printk("NMI backtrace for cpu %d\n", cpu);
+		dump_stack();
+		spin_unlock(&lock);
+		cpu_clear(cpu, backtrace_mask);
+	}
+
 #ifdef CONFIG_X86_MCE
 	/* Could check oops_in_progress here too, but it's safer
 	   not too */
@@ -931,6 +945,19 @@
 
 #endif
 
+void __trigger_all_cpu_backtrace(void)
+{
+	int i;
+
+	backtrace_mask = cpu_online_map;
+	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
+	for (i = 0; i < 10 * 1000; i++) {
+		if (cpus_empty(backtrace_mask))
+			break;
+		mdelay(1);
+	}
+}
+
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_watchdog);
 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 37a7708..3215675 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -41,6 +41,13 @@
 #include <asm/pci-direct.h>
 #include <asm/system.h>
 #include <asm/dma.h>
+#include <asm/rio.h>
+
+#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
+int use_calgary __read_mostly = 1;
+#else
+int use_calgary __read_mostly = 0;
+#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
 
 #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
 #define PCI_VENDOR_DEVICE_ID_CALGARY \
@@ -115,14 +122,35 @@
 	0xB000 /* PHB3 */
 };
 
+/* PHB debug registers */
+
+static const unsigned long phb_debug_offsets[] = {
+	0x4000	/* PHB 0 DEBUG */,
+	0x5000	/* PHB 1 DEBUG */,
+	0x6000	/* PHB 2 DEBUG */,
+	0x7000	/* PHB 3 DEBUG */
+};
+
+/*
+ * STUFF register for each debug PHB,
+ * byte 1 = start bus number, byte 2 = end bus number
+ */
+
+#define PHB_DEBUG_STUFF_OFFSET	0x0020
+
 unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
 static int translate_empty_slots __read_mostly = 0;
 static int calgary_detected __read_mostly = 0;
 
+static struct rio_table_hdr	*rio_table_hdr __initdata;
+static struct scal_detail	*scal_devs[MAX_NUMNODES] __initdata;
+static struct rio_detail	*rio_devs[MAX_NUMNODES * 4] __initdata;
+
 struct calgary_bus_info {
 	void *tce_space;
 	unsigned char translation_disabled;
 	signed char phbid;
+	void __iomem *bbar;
 };
 
 static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
@@ -475,6 +503,11 @@
 	.unmap_sg = calgary_unmap_sg,
 };
 
+static inline void __iomem * busno_to_bbar(unsigned char num)
+{
+	return bus_info[num].bbar;
+}
+
 static inline int busno_to_phbid(unsigned char num)
 {
 	return bus_info[num].phbid;
@@ -620,14 +653,9 @@
 static void __init calgary_reserve_regions(struct pci_dev *dev)
 {
 	unsigned int npages;
-	void __iomem *bbar;
-	unsigned char busnum;
 	u64 start;
 	struct iommu_table *tbl = dev->sysdata;
 
-	bbar = tbl->bbar;
-	busnum = dev->bus->number;
-
 	/* reserve bad_dma_address in case it's a legal address */
 	iommu_range_reserve(tbl, bad_dma_address, 1);
 
@@ -740,7 +768,7 @@
 {
 	u64 val64;
 	void __iomem *target;
-	unsigned long phb_shift = -1;
+	unsigned int phb_shift = ~0; /* silence gcc */
 	u64 mask;
 
 	switch (busno_to_phbid(busnum)) {
@@ -828,33 +856,6 @@
 	del_timer_sync(&tbl->watchdog_timer);
 }
 
-static inline unsigned int __init locate_register_space(struct pci_dev *dev)
-{
-	int rionodeid;
-	u32 address;
-
-	/*
-	 * Each Calgary has four busses. The first four busses (first Calgary)
-	 * have RIO node ID 2, then the next four (second Calgary) have RIO
-	 * node ID 3, the next four (third Calgary) have node ID 2 again, etc.
-	 * We use a gross hack - relying on the dev->bus->number ordering,
-	 * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1,
-	 * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the
-	 * second (id 3), and then it repeats modulo 14.
- 	 */
-	rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2;
-	/*
-	 * register space address calculation as follows:
-	 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
-	 * ChassisBase is always zero for x366/x260/x460
-	 * RioNodeId is 2 for first Calgary, 3 for second Calgary
-	 */
-	address = START_ADDRESS	-
-		(0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) +
-		(0x100000) * (rionodeid - CHASSIS_BASE);
-	return address;
-}
-
 static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
 {
 	pci_dev_get(dev);
@@ -864,23 +865,15 @@
 
 static int __init calgary_init_one(struct pci_dev *dev)
 {
-	u32 address;
 	void __iomem *bbar;
 	int ret;
 
 	BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
 
-	address = locate_register_space(dev);
-	/* map entire 1MB of Calgary config space */
-	bbar = ioremap_nocache(address, 1024 * 1024);
-	if (!bbar) {
-		ret = -ENODATA;
-		goto done;
-	}
-
+	bbar = busno_to_bbar(dev->bus->number);
 	ret = calgary_setup_tar(dev, bbar);
 	if (ret)
-		goto iounmap;
+		goto done;
 
 	pci_dev_get(dev);
 	dev->bus->self = dev;
@@ -888,17 +881,66 @@
 
 	return 0;
 
-iounmap:
-	iounmap(bbar);
 done:
 	return ret;
 }
 
+static int __init calgary_locate_bbars(void)
+{
+	int ret;
+	int rioidx, phb, bus;
+	void __iomem *bbar;
+	void __iomem *target;
+	unsigned long offset;
+	u8 start_bus, end_bus;
+	u32 val;
+
+	ret = -ENODATA;
+	for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
+		struct rio_detail *rio = rio_devs[rioidx];
+
+		if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
+			continue;
+
+		/* map entire 1MB of Calgary config space */
+		bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
+		if (!bbar)
+			goto error;
+
+		for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
+			offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
+			target = calgary_reg(bbar, offset);
+
+			val = be32_to_cpu(readl(target));
+			start_bus = (u8)((val & 0x00FF0000) >> 16);
+			end_bus = (u8)((val & 0x0000FF00) >> 8);
+			for (bus = start_bus; bus <= end_bus; bus++) {
+				bus_info[bus].bbar = bbar;
+				bus_info[bus].phbid = phb;
+			}
+		}
+	}
+
+	return 0;
+
+error:
+	/* scan bus_info and iounmap any bbars we previously ioremap'd */
+	for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
+		if (bus_info[bus].bbar)
+			iounmap(bus_info[bus].bbar);
+
+	return ret;
+}
+
 static int __init calgary_init(void)
 {
-	int ret = -ENODEV;
+	int ret;
 	struct pci_dev *dev = NULL;
 
+	ret = calgary_locate_bbars();
+	if (ret)
+		return ret;
+
 	do {
 		dev = pci_get_device(PCI_VENDOR_ID_IBM,
 				     PCI_DEVICE_ID_IBM_CALGARY,
@@ -921,7 +963,7 @@
 
 error:
 	do {
-		dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
+		dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM,
 					      PCI_DEVICE_ID_IBM_CALGARY,
 					      dev);
 		if (!dev)
@@ -962,13 +1004,56 @@
 	return ret;
 }
 
+static int __init build_detail_arrays(void)
+{
+	unsigned long ptr;
+	int i, scal_detail_size, rio_detail_size;
+
+	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
+		printk(KERN_WARNING
+			"Calgary: MAX_NUMNODES too low! Defined as %d, "
+			"but system has %d nodes.\n",
+			MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+		return -ENODEV;
+	}
+
+	switch (rio_table_hdr->version){
+	case 2:
+		scal_detail_size = 11;
+		rio_detail_size = 13;
+		break;
+	case 3:
+		scal_detail_size = 12;
+		rio_detail_size = 15;
+		break;
+	default:
+		printk(KERN_WARNING
+		       "Calgary: Invalid Rio Grande Table Version: %d\n",
+		       rio_table_hdr->version);
+		return -EPROTO;
+	}
+
+	ptr = ((unsigned long)rio_table_hdr) + 3;
+	for (i = 0; i < rio_table_hdr->num_scal_dev;
+		    i++, ptr += scal_detail_size)
+		scal_devs[i] = (struct scal_detail *)ptr;
+
+	for (i = 0; i < rio_table_hdr->num_rio_dev;
+		    i++, ptr += rio_detail_size)
+		rio_devs[i] = (struct rio_detail *)ptr;
+
+	return 0;
+}
+
 void __init detect_calgary(void)
 {
 	u32 val;
 	int bus;
 	void *tbl;
 	int calgary_found = 0;
-	int phb = -1;
+	unsigned long ptr;
+	int offset;
+	int ret;
 
 	/*
 	 * if the user specified iommu=off or iommu=soft or we found
@@ -977,25 +1062,47 @@
 	if (swiotlb || no_iommu || iommu_detected)
 		return;
 
+	if (!use_calgary)
+		return;
+
 	if (!early_pci_allowed())
 		return;
 
+	ptr = (unsigned long)phys_to_virt(get_bios_ebda());
+
+	rio_table_hdr = NULL;
+	offset = 0x180;
+	while (offset) {
+		/* The block id is stored in the 2nd word */
+		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
+			/* set the pointer past the offset & block id */
+			rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
+			break;
+		}
+		/* The next offset is stored in the 1st word. 0 means no more */
+		offset = *((unsigned short *)(ptr + offset));
+	}
+	if (!rio_table_hdr) {
+		printk(KERN_ERR "Calgary: Unable to locate "
+				"Rio Grande Table in EBDA - bailing!\n");
+		return;
+	}
+
+	ret = build_detail_arrays();
+	if (ret) {
+		printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret);
+		return;
+	}
+
 	specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
 
 	for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
 		int dev;
 		struct calgary_bus_info *info = &bus_info[bus];
-		info->phbid = -1;
 
 		if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
 			continue;
 
-		/*
-		 * There are 4 PHBs per Calgary chip.  Set phb to which phb (0-3)
-		 * it is connected to releative to the clagary chip.
-		 */
-		phb = (phb + 1) % PHBS_PER_CALGARY;
-
 		if (info->translation_disabled)
 			continue;
 
@@ -1010,7 +1117,6 @@
 				if (!tbl)
 					goto cleanup;
 				info->tce_space = tbl;
-				info->phbid = phb;
 				calgary_found = 1;
 				break;
 			}
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index f8d8574..683b7a5 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -296,6 +296,11 @@
 		gart_parse_options(p);
 #endif
 
+#ifdef CONFIG_CALGARY_IOMMU
+		if (!strncmp(p, "calgary", 7))
+			use_calgary = 1;
+#endif /* CONFIG_CALGARY_IOMMU */
+
 		p += strcspn(p, ",");
 		if (*p == ',')
 			++p;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 16261a8..fc1960f 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -601,10 +601,9 @@
 	    (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
 	    !iommu_aperture ||
 	    (no_agp && init_k8_gatt(&info) < 0)) {
-		printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
 		if (end_pfn > MAX_DMA32_PFN) {
 			printk(KERN_ERR "WARNING more than 4GB of memory "
-					"but IOMMU not available.\n"
+					"but GART IOMMU not available.\n"
 			       KERN_ERR "WARNING 32bit PCI may malfunction.\n");
 		}
 		return;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 7451a4c..a418ee4 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -108,17 +108,15 @@
  */
 static void default_idle(void)
 {
-	local_irq_enable();
-
 	current_thread_info()->status &= ~TS_POLLING;
 	smp_mb__after_clear_bit();
-	while (!need_resched()) {
-		local_irq_disable();
-		if (!need_resched())
-			safe_halt();
-		else
-			local_irq_enable();
-	}
+	local_irq_disable();
+	if (!need_resched()) {
+		/* Enables interrupts one instruction before HLT.
+		   x86 special cases this so there is no race. */
+		safe_halt();
+	} else
+		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
 }
 
@@ -130,15 +128,7 @@
 static void poll_idle (void)
 {
 	local_irq_enable();
-
-	asm volatile(
-		"2:"
-		"testl %0,%1;"
-		"rep; nop;"
-		"je 2b;"
-		: :
-		"i" (_TIF_NEED_RESCHED),
-		"m" (current_thread_info()->flags));
+	cpu_relax();
 }
 
 void cpu_idle_wait(void)
@@ -219,6 +209,12 @@
 				idle = default_idle;
 			if (cpu_is_offline(smp_processor_id()))
 				play_dead();
+			/*
+			 * Idle routines should keep interrupts disabled
+			 * from here on, until they go to idle.
+			 * Otherwise, idle callbacks can misfire.
+			 */
+			local_irq_disable();
 			enter_idle();
 			idle();
 			/* In many cases the interrupt that ended idle
@@ -256,9 +252,16 @@
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
-	local_irq_enable();
-	while (!need_resched())
-		mwait_idle_with_hints(0,0);
+	if (!need_resched()) {
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		smp_mb();
+		if (!need_resched())
+			__sti_mwait(0, 0);
+		else
+			local_irq_enable();
+	} else {
+		local_irq_enable();
+	}
 }
 
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index f12f266..af425a8 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -731,11 +731,8 @@
 	/* Fix cpuid4 emulation for more */
 	num_cache_leaves = 3;
 
-	/* When there is only one core no need to synchronize RDTSC */
-	if (num_possible_cpus() == 1)
-	        set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
-	else
-	        clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+	/* RDTSC can be speculated around */
+	clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
 }
 
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -834,6 +831,15 @@
 			set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
 	}
 
+	if (cpu_has_ds) {
+		unsigned int l1, l2;
+		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
+		if (!(l1 & (1<<11)))
+			set_bit(X86_FEATURE_BTS, c->x86_capability);
+		if (!(l1 & (1<<12)))
+			set_bit(X86_FEATURE_PEBS, c->x86_capability);
+	}
+
 	n = c->extended_cpuid_level;
 	if (n >= 0x80000008) {
 		unsigned eax = cpuid_eax(0x80000008);
@@ -853,7 +859,10 @@
 		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
 	if (c->x86 == 6)
 		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
-	set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+	if (c->x86 == 15)
+		set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+	else
+		clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
  	c->x86_max_cores = intel_num_cpu_cores(c);
 
 	srat_detect_node();
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 32f4d7e..af1ec4d 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -389,6 +389,7 @@
 	put_cpu();
 	return 0;
 }
+EXPORT_SYMBOL(smp_call_function_single);
 
 /*
  * this function sends a 'generic call function' IPI to all other CPUs
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 9800147..daf1933 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -60,6 +60,7 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 #include <asm/numa.h>
+#include <asm/genapic.h>
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
@@ -1169,6 +1170,13 @@
 
 	while (!cpu_isset(cpu, cpu_online_map))
 		cpu_relax();
+
+	if (num_online_cpus() > 8 && genapic == &apic_flat) {
+		printk(KERN_WARNING
+		       "flat APIC routing can't be used with > 8 cpus\n");
+		BUG();
+	}
+
 	err = 0;
 
 	return err;
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 0d65b22..a1641ff 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,9 +30,9 @@
 #include <linux/kprobes.h>
 #include <linux/kexec.h>
 #include <linux/unwind.h>
+#include <linux/uaccess.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/atomic.h>
 #include <asm/debugreg.h>
@@ -108,7 +108,7 @@
 	preempt_enable_no_resched();
 }
 
-static int kstack_depth_to_print = 12;
+int kstack_depth_to_print = 12;
 #ifdef CONFIG_STACK_UNWIND
 static int call_trace = 1;
 #else
@@ -225,16 +225,25 @@
 {
 	struct ops_and_data *oad = (struct ops_and_data *)context;
 	int n = 0;
+	unsigned long sp = UNW_SP(info);
 
+	if (arch_unw_user_mode(info))
+		return -1;
 	while (unwind(info) == 0 && UNW_PC(info)) {
 		n++;
 		oad->ops->address(oad->data, UNW_PC(info));
 		if (arch_unw_user_mode(info))
 			break;
+		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
+		    && sp > UNW_SP(info))
+			break;
+		sp = UNW_SP(info);
 	}
 	return n;
 }
 
+#define MSG(txt) ops->warning(data, txt)
+
 /*
  * x86-64 can have upto three kernel stacks: 
  * process stack
@@ -248,11 +257,12 @@
         return p > t && p < t + THREAD_SIZE - 3;
 }
 
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
+		unsigned long *stack,
 		struct stacktrace_ops *ops, void *data)
 {
-	const unsigned cpu = smp_processor_id();
-	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+	const unsigned cpu = get_cpu();
+	unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
 	unsigned used = 0;
 	struct thread_info *tinfo;
 
@@ -268,28 +278,30 @@
 			if (unwind_init_frame_info(&info, tsk, regs) == 0)
 				unw_ret = dump_trace_unwind(&info, &oad);
 		} else if (tsk == current)
-			unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+			unw_ret = unwind_init_running(&info, dump_trace_unwind,
+						      &oad);
 		else {
 			if (unwind_init_blocked(&info, tsk) == 0)
 				unw_ret = dump_trace_unwind(&info, &oad);
 		}
 		if (unw_ret > 0) {
 			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
-				ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+				ops->warning_symbol(data,
+					     "DWARF2 unwinder stuck at %s",
 					     UNW_PC(&info));
 				if ((long)UNW_SP(&info) < 0) {
-					ops->warning(data, "Leftover inexact backtrace:\n");
+					MSG("Leftover inexact backtrace:");
 					stack = (unsigned long *)UNW_SP(&info);
 					if (!stack)
-						return;
+						goto out;
 				} else
-					ops->warning(data, "Full inexact backtrace again:\n");
+					MSG("Full inexact backtrace again:");
 			} else if (call_trace >= 1)
-				return;
+				goto out;
 			else
-				ops->warning(data, "Full inexact backtrace again:\n");
+				MSG("Full inexact backtrace again:");
 		} else
-			ops->warning(data, "Inexact backtrace:\n");
+			MSG("Inexact backtrace:");
 	}
 	if (!stack) {
 		unsigned long dummy;
@@ -297,12 +309,6 @@
 		if (tsk && tsk != current)
 			stack = (unsigned long *)tsk->thread.rsp;
 	}
-	/*
-	 * Align the stack pointer on word boundary, later loops
-	 * rely on that (and corruption / debug info bugs can cause
-	 * unaligned values here):
-	 */
-	stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
 
 	/*
 	 * Print function call entries within a stack. 'cond' is the
@@ -312,9 +318,9 @@
 #define HANDLE_STACK(cond) \
 	do while (cond) { \
 		unsigned long addr = *stack++; \
-		if (oops_in_progress ? 		\
-			__kernel_text_address(addr) : \
-			kernel_text_address(addr)) { \
+		/* Use unlocked access here because except for NMIs	\
+		   we should be already protected against module unloads */ \
+		if (__kernel_text_address(addr)) { \
 			/* \
 			 * If the address is either in the text segment of the \
 			 * kernel, or in the region which contains vmalloc'ed \
@@ -380,6 +386,8 @@
 	tinfo = current_thread_info();
 	HANDLE_STACK (valid_stack_ptr(tinfo, stack));
 #undef HANDLE_STACK
+out:
+	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);
 
@@ -786,8 +794,7 @@
 {
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
 		reason);
-	printk(KERN_EMERG "You probably have a hardware problem with your "
-		"RAM chips\n");
+	printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
 
 	if (panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index d9534e7..6a1f8f4 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -51,15 +51,6 @@
 
   RODATA
 
-#ifdef CONFIG_STACK_UNWIND
-  . = ALIGN(8);
-  .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
-	__start_unwind = .;
-  	*(.eh_frame)
-	__end_unwind = .;
-  }
-#endif
-
   . = ALIGN(PAGE_SIZE);        /* Align data segment to page size boundary */
 				/* Data */
   .data : AT(ADDR(.data) - LOAD_OFFSET) {
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 3785e49..4a673f5 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -289,6 +289,7 @@
 	extern char __vsyscall_0;
 	unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
 
+	/* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
 	__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
 }
 
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index 06ae630..bc503f5 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -9,8 +9,6 @@
 #include <linux/module.h>
 #include <asm/checksum.h>
 
-#define __force_inline inline __attribute__((always_inline))
-
 static inline unsigned short from32to16(unsigned a) 
 {
 	unsigned short b = a >> 16; 
@@ -33,7 +31,7 @@
  * Unrolling to an 128 bytes inner loop.
  * Using interleaving with more registers to break the carry chains.
  */
-static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len)
+static unsigned do_csum(const unsigned char *buff, unsigned len)
 {
 	unsigned odd, count;
 	unsigned long result = 0;
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 50be909..2dbebd3 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -40,13 +40,13 @@
 
 inline void __const_udelay(unsigned long xloops)
 {
-	__delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32);
+	__delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
 }
 EXPORT_SYMBOL(__const_udelay);
 
 void __udelay(unsigned long usecs)
 {
-	__const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
+	__const_udelay(usecs * 0x000010c7);  /* 2**32 / 1000000 (rounded up) */
 }
 EXPORT_SYMBOL(__udelay);
 
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 3751b47..a65fc6f 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -23,9 +23,9 @@
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
+#include <linux/uaccess.h>
 
 #include <asm/system.h>
-#include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/smp.h>
 #include <asm/tlbflush.h>
@@ -96,7 +96,7 @@
 static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
 				unsigned long error_code)
 { 
-	unsigned char __user *instr;
+	unsigned char *instr;
 	int scan_more = 1;
 	int prefetch = 0; 
 	unsigned char *max_instr;
@@ -116,7 +116,7 @@
 		unsigned char instr_hi;
 		unsigned char instr_lo;
 
-		if (__get_user(opcode, (char __user *)instr))
+		if (probe_kernel_address(instr, opcode))
 			break; 
 
 		instr_hi = opcode & 0xf0; 
@@ -154,7 +154,7 @@
 		case 0x00:
 			/* Prefetch instruction is 0x0F0D or 0x0F18 */
 			scan_more = 0;
-			if (__get_user(opcode, (char __user *)instr))
+			if (probe_kernel_address(instr, opcode))
 				break;
 			prefetch = (instr_lo == 0xF) &&
 				(opcode == 0x0D || opcode == 0x18);
@@ -170,7 +170,7 @@
 static int bad_address(void *p) 
 { 
 	unsigned long dummy;
-	return __get_user(dummy, (unsigned long __user *)p);
+	return probe_kernel_address((unsigned long *)p, dummy);
 } 
 
 void dump_pagetable(unsigned long address)
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 4c0c00e..2968b90 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -730,14 +730,15 @@
 __initcall(x8664_sysctl_init);
 #endif
 
-/* A pseudo VMAs to allow ptrace access for the vsyscall page.   This only
+/* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
    not need special handling anymore. */
 
 static struct vm_area_struct gate_vma = {
 	.vm_start = VSYSCALL_START,
-	.vm_end = VSYSCALL_END,
-	.vm_page_prot = PAGE_READONLY
+	.vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
+	.vm_page_prot = PAGE_READONLY_EXEC,
+	.vm_flags = VM_READ | VM_EXEC
 };
 
 struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 3e231d76..ccb91dd 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -61,34 +61,40 @@
 	return base;
 } 
 
-
-static void flush_kernel_map(void *address) 
+static void cache_flush_page(void *adr)
 {
-	if (0 && address && cpu_has_clflush) {
-		/* is this worth it? */ 
-		int i;
-		for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) 
-			asm volatile("clflush (%0)" :: "r" (address + i)); 
-	} else
-		asm volatile("wbinvd":::"memory"); 
-	if (address)
-		__flush_tlb_one(address);
-	else
-		__flush_tlb_all();
+	int i;
+	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
+		asm volatile("clflush (%0)" :: "r" (adr + i));
 }
 
+static void flush_kernel_map(void *arg)
+{
+	struct list_head *l = (struct list_head *)arg;
+	struct page *pg;
 
-static inline void flush_map(unsigned long address)
+	/* When clflush is available always use it because it is
+	   much cheaper than WBINVD */
+	if (!cpu_has_clflush)
+		asm volatile("wbinvd" ::: "memory");
+	list_for_each_entry(pg, l, lru) {
+		void *adr = page_address(pg);
+		if (cpu_has_clflush)
+			cache_flush_page(adr);
+		__flush_tlb_one(adr);
+	}
+}
+
+static inline void flush_map(struct list_head *l)
 {	
-	on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
+	on_each_cpu(flush_kernel_map, l, 1, 1);
 }
 
-static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
+static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
 
 static inline void save_page(struct page *fpage)
 {
-	fpage->lru.next = (struct list_head *)deferred_pages;
-	deferred_pages = fpage;
+	list_add(&fpage->lru, &deferred_pages);
 }
 
 /* 
@@ -207,18 +213,18 @@
 
 void global_flush_tlb(void)
 { 
-	struct page *dpage;
+	struct page *pg, *next;
+	struct list_head l;
 
 	down_read(&init_mm.mmap_sem);
-	dpage = xchg(&deferred_pages, NULL);
+	list_replace_init(&deferred_pages, &l);
 	up_read(&init_mm.mmap_sem);
 
-	flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
-	while (dpage) {
-		struct page *tmp = dpage;
-		dpage = (struct page *)dpage->lru.next;
-		ClearPagePrivate(tmp);
-		__free_page(tmp);
+	flush_map(&l);
+
+	list_for_each_entry_safe(pg, next, &l, lru) {
+		ClearPagePrivate(pg);
+		__free_page(pg);
 	} 
 } 
 
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 1f745f1..7fd095e 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -104,8 +104,8 @@
 
 /*
  * register_cpu - Setup a driverfs device for a CPU.
- * @cpu - Callers can set the cpu->no_control field to 1, to indicate not to
- *		  generate a control file in sysfs for this CPU.
+ * @cpu - cpu->hotpluggable field set to 1 will generate a control file in
+ *	  sysfs for this CPU.
  * @num - CPU number to use when creating the device.
  *
  * Initialize and register the CPU device.
@@ -119,7 +119,7 @@
 
 	error = sysdev_register(&cpu->sysdev);
 
-	if (!error && !cpu->no_control)
+	if (!error && cpu->hotpluggable)
 		register_cpu_control(cpu);
 	if (!error)
 		cpu_sys_devices[num] = &cpu->sysdev;
diff --git a/drivers/net/de600.c b/drivers/net/de600.c
index 690bb40..8396e41 100644
--- a/drivers/net/de600.c
+++ b/drivers/net/de600.c
@@ -43,7 +43,6 @@
  * modify the following "#define": (see <asm/io.h> for more info)
 #define REALLY_SLOW_IO
  */
-#define SLOW_IO_BY_JUMPING /* Looks "better" than dummy write to port 0x80 :-) */
 
 /* use 0 for production, 1 for verification, >2 for debug */
 #ifdef DE600_DEBUG
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 81186f4..33adeba 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -531,7 +531,8 @@
 	if (check_legacy_ioport(PNPBIOS_BASE))
 		return -ENODEV;
 #endif
-	if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table)) {
+	if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table) ||
+	    paravirt_enabled()) {
 		printk(KERN_INFO "PnPBIOS: Disabled\n");
 		return -ENODEV;
 	}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6e31776..bcc3caf 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2404,6 +2404,7 @@
 HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
 HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
 HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFHWBROADCAST, dev_ifsioc)
 
 /* ioctls used by appletalk ddp.c */
 HANDLE_IOCTL(SIOCATALKDIFADDR, dev_ifsioc)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e60d6f2..4d4c62d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -11,8 +11,8 @@
 
 #define RODATA								\
 	. = ALIGN(4096);						\
-	__start_rodata = .;						\
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
+		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
 	}								\
@@ -119,17 +119,16 @@
 		*(__ksymtab_strings)					\
 	}								\
 									\
+	EH_FRAME							\
+									\
 	/* Built-in module parameters. */				\
 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
 		VMLINUX_SYMBOL(__start___param) = .;			\
 		*(__param)						\
 		VMLINUX_SYMBOL(__stop___param) = .;			\
+		VMLINUX_SYMBOL(__end_rodata) = .;			\
 	}								\
 									\
-	/* Unwind data binary search table */				\
-	EH_FRAME_HDR							\
-									\
-	__end_rodata = .;						\
 	. = ALIGN(4096);
 
 #define SECURITY_INIT							\
@@ -162,15 +161,23 @@
 		VMLINUX_SYMBOL(__kprobes_text_end) = .;
 
 #ifdef CONFIG_STACK_UNWIND
-		/* Unwind data binary search table */
-#define EH_FRAME_HDR							\
+#define EH_FRAME							\
+		/* Unwind data binary search table */			\
+		. = ALIGN(8);						\
         	.eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) {	\
 			VMLINUX_SYMBOL(__start_unwind_hdr) = .;		\
 			*(.eh_frame_hdr)				\
 			VMLINUX_SYMBOL(__end_unwind_hdr) = .;		\
+		}							\
+		/* Unwind data */					\
+		. = ALIGN(8);						\
+		.eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {		\
+			VMLINUX_SYMBOL(__start_unwind) = .;		\
+		  	*(.eh_frame)					\
+			VMLINUX_SYMBOL(__end_unwind) = .;		\
 		}
 #else
-#define EH_FRAME_HDR
+#define EH_FRAME
 #endif
 
 		/* DWARF debug sections.
diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h
index b01a7ec..b8fa955 100644
--- a/include/asm-i386/alternative.h
+++ b/include/asm-i386/alternative.h
@@ -4,7 +4,7 @@
 #ifdef __KERNEL__
 
 #include <asm/types.h>
-
+#include <linux/stddef.h>
 #include <linux/types.h>
 
 struct alt_instr {
@@ -118,4 +118,15 @@
 #define LOCK_PREFIX ""
 #endif
 
+struct paravirt_patch;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
+#else
+static inline void
+apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+{}
+#define __start_parainstructions NULL
+#define __stop_parainstructions NULL
+#endif
+
 #endif /* _I386_ALTERNATIVE_H */
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index b952957..41a4431 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -37,18 +37,27 @@
 /*
  * Basic functions accessing APICs.
  */
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define apic_write native_apic_write
+#define apic_write_atomic native_apic_write_atomic
+#define apic_read native_apic_read
+#endif
 
-static __inline void apic_write(unsigned long reg, unsigned long v)
+static __inline fastcall void native_apic_write(unsigned long reg,
+						unsigned long v)
 {
 	*((volatile unsigned long *)(APIC_BASE+reg)) = v;
 }
 
-static __inline void apic_write_atomic(unsigned long reg, unsigned long v)
+static __inline fastcall void native_apic_write_atomic(unsigned long reg,
+						       unsigned long v)
 {
 	xchg((volatile unsigned long *)(APIC_BASE+reg), v);
 }
 
-static __inline unsigned long apic_read(unsigned long reg)
+static __inline fastcall unsigned long native_apic_read(unsigned long reg)
 {
 	return *((volatile unsigned long *)(APIC_BASE+reg));
 }
diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
index a6c024e..c57441b 100644
--- a/include/asm-i386/atomic.h
+++ b/include/asm-i386/atomic.h
@@ -187,9 +187,9 @@
 	/* Modern 486+ processor */
 	__i = i;
 	__asm__ __volatile__(
-		LOCK_PREFIX "xaddl %0, %1;"
-		:"=r"(i)
-		:"m"(v->counter), "0"(i));
+		LOCK_PREFIX "xaddl %0, %1"
+		:"+r" (i), "+m" (v->counter)
+		: : "memory");
 	return i + __i;
 
 #ifdef CONFIG_M386
diff --git a/include/asm-i386/boot.h b/include/asm-i386/boot.h
index 96b228e..8ce79a6 100644
--- a/include/asm-i386/boot.h
+++ b/include/asm-i386/boot.h
@@ -12,4 +12,8 @@
 #define EXTENDED_VGA	0xfffe		/* 80x50 mode */
 #define ASK_VGA		0xfffd		/* ask for it at bootup */
 
-#endif
+/* Physical address where kenrel should be loaded. */
+#define LOAD_PHYSICAL_ADDR ((0x100000 + CONFIG_PHYSICAL_ALIGN - 1) \
+				& ~(CONFIG_PHYSICAL_ALIGN - 1))
+
+#endif /* _LINUX_BOOT_H */
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h
index 592ffee..38f1aeb 100644
--- a/include/asm-i386/bugs.h
+++ b/include/asm-i386/bugs.h
@@ -21,6 +21,7 @@
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/msr.h>
+#include <asm/paravirt.h>
 
 static int __init no_halt(char *s)
 {
@@ -91,6 +92,9 @@
 
 static void __init check_hlt(void)
 {
+	if (paravirt_enabled())
+		return;
+
 	printk(KERN_INFO "Checking 'hlt' instruction... ");
 	if (!boot_cpu_data.hlt_works_ok) {
 		printk("disabled\n");
diff --git a/include/asm-i386/cpu.h b/include/asm-i386/cpu.h
index b1bc7b1..9d914e1 100644
--- a/include/asm-i386/cpu.h
+++ b/include/asm-i386/cpu.h
@@ -13,6 +13,9 @@
 extern int arch_register_cpu(int num);
 #ifdef CONFIG_HOTPLUG_CPU
 extern void arch_unregister_cpu(int);
+extern int enable_cpu_hotplug;
+#else
+#define enable_cpu_hotplug	0
 #endif
 
 DECLARE_PER_CPU(int, cpu_state);
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index d314ebb..3f92b94 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -31,7 +31,7 @@
 #define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
 #define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
 #define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DTES	(0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_DS		(0*32+21) /* Debug Store */
 #define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
 #define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
 #define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
@@ -73,6 +73,8 @@
 #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
 #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -134,6 +136,10 @@
 #define cpu_has_phe_enabled	boot_cpu_has(X86_FEATURE_PHE_EN)
 #define cpu_has_pmm		boot_cpu_has(X86_FEATURE_PMM)
 #define cpu_has_pmm_enabled	boot_cpu_has(X86_FEATURE_PMM_EN)
+#define cpu_has_ds		boot_cpu_has(X86_FEATURE_DS)
+#define cpu_has_pebs 		boot_cpu_has(X86_FEATURE_PEBS)
+#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
+#define cpu_has_bts 		boot_cpu_has(X86_FEATURE_BTS)
 
 #endif /* __ASM_I386_CPUFEATURE_H */
 
diff --git a/include/asm-i386/current.h b/include/asm-i386/current.h
index 3cbbecd..5252ee0 100644
--- a/include/asm-i386/current.h
+++ b/include/asm-i386/current.h
@@ -1,13 +1,14 @@
 #ifndef _I386_CURRENT_H
 #define _I386_CURRENT_H
 
-#include <linux/thread_info.h>
+#include <asm/pda.h>
+#include <linux/compiler.h>
 
 struct task_struct;
 
-static __always_inline struct task_struct * get_current(void)
+static __always_inline struct task_struct *get_current(void)
 {
-	return current_thread_info()->task;
+	return read_pda(pcurrent);
 }
  
 #define current get_current()
diff --git a/include/asm-i386/delay.h b/include/asm-i386/delay.h
index b1c7650..32d6678 100644
--- a/include/asm-i386/delay.h
+++ b/include/asm-i386/delay.h
@@ -7,6 +7,7 @@
  * Delay routines calling functions in arch/i386/lib/delay.c
  */
  
+/* Undefined functions to get compile-time errors */
 extern void __bad_udelay(void);
 extern void __bad_ndelay(void);
 
@@ -15,13 +16,23 @@
 extern void __const_udelay(unsigned long usecs);
 extern void __delay(unsigned long loops);
 
+#if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY)
+#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul)
+
+#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul)
+
+#else /* !PARAVIRT || USE_REAL_TIME_DELAY */
+
+/* 0x10c7 is 2**32 / 1000000 (rounded up) */
 #define udelay(n) (__builtin_constant_p(n) ? \
 	((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \
 	__udelay(n))
-	
+
+/* 0x5 is 2**32 / 1000000000 (rounded up) */
 #define ndelay(n) (__builtin_constant_p(n) ? \
 	((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
 	__ndelay(n))
+#endif
 
 void use_tsc_delay(void);
 
diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h
index 5874ef1..f398cc4 100644
--- a/include/asm-i386/desc.h
+++ b/include/asm-i386/desc.h
@@ -4,8 +4,6 @@
 #include <asm/ldt.h>
 #include <asm/segment.h>
 
-#define CPU_16BIT_STACK_SIZE 1024
-
 #ifndef __ASSEMBLY__
 
 #include <linux/preempt.h>
@@ -16,8 +14,6 @@
 
 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
 
-DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
-
 struct Xgt_desc_struct {
 	unsigned short size;
 	unsigned long address __attribute__((packed));
@@ -33,11 +29,6 @@
 	return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
 }
 
-/*
- * This is the ldt that every process will get unless we need
- * something other than this.
- */
-extern struct desc_struct default_ldt[];
 extern struct desc_struct idt_table[];
 extern void set_intr_gate(unsigned int irq, void * addr);
 
@@ -64,8 +55,10 @@
 #define DESCTYPE_DPL3	0x60	/* DPL-3 */
 #define DESCTYPE_S	0x10	/* !system */
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
 #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
-#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
 
 #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
 #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
@@ -88,6 +81,10 @@
 #undef C
 }
 
+#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+
 static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
 {
 	__u32 *lp = (__u32 *)((char *)dt + entry*8);
@@ -95,9 +92,25 @@
 	*(lp+1) = entry_b;
 }
 
-#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
-#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
+#define set_ldt native_set_ldt
+#endif /* CONFIG_PARAVIRT */
+
+static inline fastcall void native_set_ldt(const void *addr,
+					   unsigned int entries)
+{
+	if (likely(entries == 0))
+		__asm__ __volatile__("lldt %w0"::"q" (0));
+	else {
+		unsigned cpu = smp_processor_id();
+		__u32 a, b;
+
+		pack_descriptor(&a, &b, (unsigned long)addr,
+				entries * sizeof(struct desc_struct) - 1,
+				DESCTYPE_LDT, 0);
+		write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
+		__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
+	}
+}
 
 static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
 {
@@ -115,14 +128,6 @@
 	write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
 }
 
-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
-{
-	__u32 a, b;
-	pack_descriptor(&a, &b, (unsigned long)addr,
-			entries * sizeof(struct desc_struct) - 1,
-			DESCTYPE_LDT, 0);
-	write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
-}
 
 #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
 
@@ -153,35 +158,22 @@
 
 static inline void clear_LDT(void)
 {
-	int cpu = get_cpu();
-
-	set_ldt_desc(cpu, &default_ldt[0], 5);
-	load_LDT_desc();
-	put_cpu();
+	set_ldt(NULL, 0);
 }
 
 /*
  * load one particular LDT into the current CPU
  */
-static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
+static inline void load_LDT_nolock(mm_context_t *pc)
 {
-	void *segments = pc->ldt;
-	int count = pc->size;
-
-	if (likely(!count)) {
-		segments = &default_ldt[0];
-		count = 5;
-	}
-		
-	set_ldt_desc(cpu, segments, count);
-	load_LDT_desc();
+	set_ldt(pc->ldt, pc->size);
 }
 
 static inline void load_LDT(mm_context_t *pc)
 {
-	int cpu = get_cpu();
-	load_LDT_nolock(pc, cpu);
-	put_cpu();
+	preempt_disable();
+	load_LDT_nolock(pc);
+	preempt_enable();
 }
 
 static inline unsigned long get_desc_base(unsigned long *desc)
@@ -193,6 +185,29 @@
 	return base;
 }
 
+#else /* __ASSEMBLY__ */
+
+/*
+ * GET_DESC_BASE reads the descriptor base of the specified segment.
+ *
+ * Args:
+ *    idx - descriptor index
+ *    gdt - GDT pointer
+ *    base - 32bit register to which the base will be written
+ *    lo_w - lo word of the "base" register
+ *    lo_b - lo byte of the "base" register
+ *    hi_b - hi byte of the low word of the "base" register
+ *
+ * Example:
+ *    GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
+ *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
+ */
+#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
+	movb idx*8+4(gdt), lo_b; \
+	movb idx*8+7(gdt), hi_b; \
+	shll $16, base; \
+	movw idx*8+2(gdt), lo_w;
+
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h
index f7514fb..395077a 100644
--- a/include/asm-i386/e820.h
+++ b/include/asm-i386/e820.h
@@ -38,6 +38,11 @@
 
 extern int e820_all_mapped(unsigned long start, unsigned long end,
 			   unsigned type);
+extern void find_max_pfn(void);
+extern void register_bootmem_low_pages(unsigned long max_low_pfn);
+extern void register_memory(void);
+extern void limit_regions(unsigned long long size);
+extern void print_memory_map(char *who);
 
 #endif/*!__ASSEMBLY__*/
 
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 3a05436..45d21a0 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -91,7 +91,7 @@
 	pr_reg[7] = regs->xds;				\
 	pr_reg[8] = regs->xes;				\
 	savesegment(fs,pr_reg[9]);			\
-	savesegment(gs,pr_reg[10]);			\
+	pr_reg[10] = regs->xgs;				\
 	pr_reg[11] = regs->orig_eax;			\
 	pr_reg[12] = regs->eip;				\
 	pr_reg[13] = regs->xcs;				\
diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h
index 8ffbb0f..fd2be59 100644
--- a/include/asm-i386/genapic.h
+++ b/include/asm-i386/genapic.h
@@ -122,6 +122,6 @@
 	APICFUNC(phys_pkg_id) \
 	}
 
-extern struct genapic *genapic;
+extern struct genapic *genapic, apic_default;
 
 #endif
diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h
index bc1d6ed..434936c 100644
--- a/include/asm-i386/i387.h
+++ b/include/asm-i386/i387.h
@@ -76,7 +76,9 @@
 
 #define __unlazy_fpu( tsk ) do { \
 	if (task_thread_info(tsk)->status & TS_USEDFPU) \
-		save_init_fpu( tsk ); \
+		save_init_fpu( tsk ); 			\
+	else						\
+		tsk->fpu_counter = 0;			\
 } while (0)
 
 #define __clear_fpu( tsk )					\
@@ -118,6 +120,7 @@
 extern unsigned short get_fpu_cwd( struct task_struct *tsk );
 extern unsigned short get_fpu_swd( struct task_struct *tsk );
 extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
+extern asmlinkage void math_state_restore(void);
 
 /*
  * Signal frame handlers...
diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h
index 68df0dc3..86ff5e8 100644
--- a/include/asm-i386/io.h
+++ b/include/asm-i386/io.h
@@ -256,11 +256,11 @@
 
 #endif /* __KERNEL__ */
 
-#ifdef SLOW_IO_BY_JUMPING
-#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:"
+#if defined(CONFIG_PARAVIRT)
+#include <asm/paravirt.h>
 #else
+
 #define __SLOW_DOWN_IO "outb %%al,$0x80;"
-#endif
 
 static inline void slow_down_io(void) {
 	__asm__ __volatile__(
@@ -271,6 +271,8 @@
 		: : );
 }
 
+#endif
+
 #ifdef CONFIG_X86_NUMAQ
 extern void *xquad_portio;    /* Where the IO area was mapped */
 #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h
index 331726b..11761cd 100644
--- a/include/asm-i386/irq.h
+++ b/include/asm-i386/irq.h
@@ -37,8 +37,13 @@
 extern int irqbalance_disable(char *str);
 #endif
 
+extern void quirk_intel_irqbalance(void);
+
 #ifdef CONFIG_HOTPLUG_CPU
 extern void fixup_irqs(cpumask_t map);
 #endif
 
+void init_IRQ(void);
+void __init native_init_IRQ(void);
+
 #endif /* _ASM_IRQ_H */
diff --git a/include/asm-i386/irq_regs.h b/include/asm-i386/irq_regs.h
index 3dd9c0b..a1b3f7f 100644
--- a/include/asm-i386/irq_regs.h
+++ b/include/asm-i386/irq_regs.h
@@ -1 +1,27 @@
-#include <asm-generic/irq_regs.h>
+/*
+ * Per-cpu current frame pointer - the location of the last exception frame on
+ * the stack, stored in the PDA.
+ *
+ * Jeremy Fitzhardinge <jeremy@goop.org>
+ */
+#ifndef _ASM_I386_IRQ_REGS_H
+#define _ASM_I386_IRQ_REGS_H
+
+#include <asm/pda.h>
+
+static inline struct pt_regs *get_irq_regs(void)
+{
+	return read_pda(irq_regs);
+}
+
+static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = read_pda(irq_regs);
+	write_pda(irq_regs, new_regs);
+
+	return old_regs;
+}
+
+#endif /* _ASM_I386_IRQ_REGS_H */
diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h
index e1bdb97..17b18cf 100644
--- a/include/asm-i386/irqflags.h
+++ b/include/asm-i386/irqflags.h
@@ -10,6 +10,9 @@
 #ifndef _ASM_IRQFLAGS_H
 #define _ASM_IRQFLAGS_H
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
 #ifndef __ASSEMBLY__
 
 static inline unsigned long __raw_local_save_flags(void)
@@ -25,9 +28,6 @@
 	return flags;
 }
 
-#define raw_local_save_flags(flags) \
-		do { (flags) = __raw_local_save_flags(); } while (0)
-
 static inline void raw_local_irq_restore(unsigned long flags)
 {
 	__asm__ __volatile__(
@@ -66,18 +66,6 @@
 	__asm__ __volatile__("hlt": : :"memory");
 }
 
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
-	return !(flags & (1 << 9));
-}
-
-static inline int raw_irqs_disabled(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	return raw_irqs_disabled_flags(flags);
-}
-
 /*
  * For spinlocks, etc:
  */
@@ -90,9 +78,33 @@
 	return flags;
 }
 
+#else
+#define DISABLE_INTERRUPTS(clobbers)	cli
+#define ENABLE_INTERRUPTS(clobbers)	sti
+#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
+#define INTERRUPT_RETURN		iret
+#define GET_CR0_INTO_EAX		movl %cr0, %eax
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifndef __ASSEMBLY__
+#define raw_local_save_flags(flags) \
+		do { (flags) = __raw_local_save_flags(); } while (0)
+
 #define raw_local_irq_save(flags) \
 		do { (flags) = __raw_local_irq_save(); } while (0)
 
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1 << 9));
+}
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/include/asm-i386/mach-default/setup_arch.h b/include/asm-i386/mach-default/setup_arch.h
index fb42099..605e3cc 100644
--- a/include/asm-i386/mach-default/setup_arch.h
+++ b/include/asm-i386/mach-default/setup_arch.h
@@ -2,4 +2,6 @@
 
 /* no action for generic */
 
+#ifndef ARCH_SETUP
 #define ARCH_SETUP
+#endif
diff --git a/include/asm-i386/math_emu.h b/include/asm-i386/math_emu.h
index 697673b..a4b0aa3 100644
--- a/include/asm-i386/math_emu.h
+++ b/include/asm-i386/math_emu.h
@@ -21,6 +21,7 @@
 	long ___eax;
 	long ___ds;
 	long ___es;
+	long ___fs;
 	long ___orig_eax;
 	long ___eip;
 	long ___cs;
diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h
index 62b7bf1..68ff102 100644
--- a/include/asm-i386/mmu_context.h
+++ b/include/asm-i386/mmu_context.h
@@ -44,7 +44,7 @@
 		 * load the LDT, if the LDT is different:
 		 */
 		if (unlikely(prev->context.ldt != next->context.ldt))
-			load_LDT_nolock(&next->context, cpu);
+			load_LDT_nolock(&next->context);
 	}
 #ifdef CONFIG_SMP
 	else {
@@ -56,14 +56,14 @@
 			 * tlb flush IPI delivery. We must reload %cr3.
 			 */
 			load_cr3(next->pgd);
-			load_LDT_nolock(&next->context, cpu);
+			load_LDT_nolock(&next->context);
 		}
 	}
 #endif
 }
 
-#define deactivate_mm(tsk, mm) \
-	asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
+#define deactivate_mm(tsk, mm)			\
+	asm("movl %0,%%fs": :"r" (0));
 
 #define activate_mm(prev, next) \
 	switch_mm((prev),(next),NULL)
diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h
index 424661d..02f8f54 100644
--- a/include/asm-i386/module.h
+++ b/include/asm-i386/module.h
@@ -20,6 +20,8 @@
 #define MODULE_PROC_FAMILY "586TSC "
 #elif defined CONFIG_M586MMX
 #define MODULE_PROC_FAMILY "586MMX "
+#elif defined CONFIG_MCORE2
+#define MODULE_PROC_FAMILY "CORE2 "
 #elif defined CONFIG_M686
 #define MODULE_PROC_FAMILY "686 "
 #elif defined CONFIG_MPENTIUMII
@@ -60,18 +62,12 @@
 #error unknown processor family
 #endif
 
-#ifdef CONFIG_REGPARM
-#define MODULE_REGPARM "REGPARM "
-#else
-#define MODULE_REGPARM ""
-#endif
-
 #ifdef CONFIG_4KSTACKS
 #define MODULE_STACKSIZE "4KSTACKS "
 #else
 #define MODULE_STACKSIZE ""
 #endif
 
-#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
 
 #endif /* _ASM_I386_MODULE_H */
diff --git a/include/asm-i386/mpspec_def.h b/include/asm-i386/mpspec_def.h
index 76feedf..13bafb1 100644
--- a/include/asm-i386/mpspec_def.h
+++ b/include/asm-i386/mpspec_def.h
@@ -97,7 +97,6 @@
 #define BUSTYPE_TC	"TC"
 #define BUSTYPE_VME	"VME"
 #define BUSTYPE_XPRESS	"XPRESS"
-#define BUSTYPE_NEC98	"NEC98"
 
 struct mpc_config_ioapic
 {
@@ -182,7 +181,6 @@
 	MP_BUS_EISA,
 	MP_BUS_PCI,
 	MP_BUS_MCA,
-	MP_BUS_NEC98
 };
 #endif
 
diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h
index 62b76cd..5679d49 100644
--- a/include/asm-i386/msr.h
+++ b/include/asm-i386/msr.h
@@ -1,6 +1,10 @@
 #ifndef __ASM_MSR_H
 #define __ASM_MSR_H
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+
 /*
  * Access to machine-specific registers (available on 586 and better only)
  * Note: the rd* operations modify the parameters directly (without using
@@ -77,6 +81,7 @@
      __asm__ __volatile__("rdpmc" \
 			  : "=a" (low), "=d" (high) \
 			  : "c" (counter))
+#endif	/* !CONFIG_PARAVIRT */
 
 /* symbolic names for some interesting MSRs */
 /* Intel defined MSRs. */
@@ -141,6 +146,10 @@
 #define MSR_IA32_MC0_ADDR		0x402
 #define MSR_IA32_MC0_MISC		0x403
 
+#define MSR_IA32_PEBS_ENABLE		0x3f1
+#define MSR_IA32_DS_AREA		0x600
+#define MSR_IA32_PERF_CAPABILITIES	0x345
+
 /* Pentium IV performance counter MSRs */
 #define MSR_P4_BPU_PERFCTR0 		0x300
 #define MSR_P4_BPU_PERFCTR1 		0x301
@@ -284,4 +293,13 @@
 #define MSR_TMTA_LRTI_READOUT		0x80868018
 #define MSR_TMTA_LRTI_VOLT_MHZ		0x8086801a
 
+/* Intel Core-based CPU performance counters */
+#define MSR_CORE_PERF_FIXED_CTR0	0x309
+#define MSR_CORE_PERF_FIXED_CTR1	0x30a
+#define MSR_CORE_PERF_FIXED_CTR2	0x30b
+#define MSR_CORE_PERF_FIXED_CTR_CTRL	0x38d
+#define MSR_CORE_PERF_GLOBAL_STATUS	0x38e
+#define MSR_CORE_PERF_GLOBAL_CTRL	0x38f
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL	0x390
+
 #endif /* __ASM_MSR_H */
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h
index 269d315..b04333e 100644
--- a/include/asm-i386/nmi.h
+++ b/include/asm-i386/nmi.h
@@ -5,6 +5,9 @@
 #define ASM_NMI_H
 
 #include <linux/pm.h>
+#include <asm/irq.h>
+
+#ifdef ARCH_HAS_NMI_WATCHDOG
 
 /**
  * do_nmi_callback
@@ -42,4 +45,9 @@
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
 
+void __trigger_all_cpu_backtrace(void);
+#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+
+#endif
+
 #endif /* ASM_NMI_H */
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index f5bf544..fd3f64a 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -52,6 +52,7 @@
 #define pte_val(x)	((x).pte_low | ((unsigned long long)(x).pte_high << 32))
 #define __pmd(x) ((pmd_t) { (x) } )
 #define HPAGE_SHIFT	21
+#include <asm-generic/pgtable-nopud.h>
 #else
 typedef struct { unsigned long pte_low; } pte_t;
 typedef struct { unsigned long pgd; } pgd_t;
@@ -59,6 +60,7 @@
 #define boot_pte_t pte_t /* or would you rather have a typedef */
 #define pte_val(x)	((x).pte_low)
 #define HPAGE_SHIFT	22
+#include <asm-generic/pgtable-nopmd.h>
 #endif
 #define PTE_MASK	PAGE_MASK
 
@@ -112,18 +114,18 @@
 
 #ifdef __ASSEMBLY__
 #define __PAGE_OFFSET		CONFIG_PAGE_OFFSET
-#define __PHYSICAL_START	CONFIG_PHYSICAL_START
 #else
 #define __PAGE_OFFSET		((unsigned long)CONFIG_PAGE_OFFSET)
-#define __PHYSICAL_START	((unsigned long)CONFIG_PHYSICAL_START)
 #endif
-#define __KERNEL_START		(__PAGE_OFFSET + __PHYSICAL_START)
 
 
 #define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE		((unsigned long)__VMALLOC_RESERVE)
 #define MAXMEM			(-__PAGE_OFFSET-__VMALLOC_RESERVE)
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
+/* __pa_symbol should be used for C visible symbols.
+   This seems to be the official gcc blessed way to do such arithmetic. */
+#define __pa_symbol(x)          __pa(RELOC_HIDE((unsigned long)(x),0))
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 #ifdef CONFIG_FLATMEM
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
new file mode 100644
index 0000000..9f06265
--- /dev/null
+++ b/include/asm-i386/paravirt.h
@@ -0,0 +1,505 @@
+#ifndef __ASM_PARAVIRT_H
+#define __ASM_PARAVIRT_H
+/* Various instructions on x86 need to be replaced for
+ * para-virtualization: those hooks are defined here. */
+#include <linux/linkage.h>
+#include <linux/stringify.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_PARAVIRT
+/* These are the most performance critical ops, so we want to be able to patch
+ * callers */
+#define PARAVIRT_IRQ_DISABLE 0
+#define PARAVIRT_IRQ_ENABLE 1
+#define PARAVIRT_RESTORE_FLAGS 2
+#define PARAVIRT_SAVE_FLAGS 3
+#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4
+#define PARAVIRT_INTERRUPT_RETURN 5
+#define PARAVIRT_STI_SYSEXIT 6
+
+/* Bitmask of what can be clobbered: usually at least eax. */
+#define CLBR_NONE 0x0
+#define CLBR_EAX 0x1
+#define CLBR_ECX 0x2
+#define CLBR_EDX 0x4
+#define CLBR_ANY 0x7
+
+#ifndef __ASSEMBLY__
+struct thread_struct;
+struct Xgt_desc_struct;
+struct tss_struct;
+struct mm_struct;
+struct paravirt_ops
+{
+	unsigned int kernel_rpl;
+ 	int paravirt_enabled;
+	const char *name;
+
+	/*
+	 * Patch may replace one of the defined code sequences with arbitrary
+	 * code, subject to the same register constraints.  This generally
+	 * means the code is not free to clobber any registers other than EAX.
+	 * The patch function should return the number of bytes of code
+	 * generated, as we nop pad the rest in generic code.
+	 */
+	unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len);
+
+	void (*arch_setup)(void);
+	char *(*memory_setup)(void);
+	void (*init_IRQ)(void);
+
+	void (*banner)(void);
+
+	unsigned long (*get_wallclock)(void);
+	int (*set_wallclock)(unsigned long);
+	void (*time_init)(void);
+
+	/* All the function pointers here are declared as "fastcall"
+	   so that we get a specific register-based calling
+	   convention.  This makes it easier to implement inline
+	   assembler replacements. */
+
+	void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx,
+		      unsigned int *ecx, unsigned int *edx);
+
+	unsigned long (fastcall *get_debugreg)(int regno);
+	void (fastcall *set_debugreg)(int regno, unsigned long value);
+
+	void (fastcall *clts)(void);
+
+	unsigned long (fastcall *read_cr0)(void);
+	void (fastcall *write_cr0)(unsigned long);
+
+	unsigned long (fastcall *read_cr2)(void);
+	void (fastcall *write_cr2)(unsigned long);
+
+	unsigned long (fastcall *read_cr3)(void);
+	void (fastcall *write_cr3)(unsigned long);
+
+	unsigned long (fastcall *read_cr4_safe)(void);
+	unsigned long (fastcall *read_cr4)(void);
+	void (fastcall *write_cr4)(unsigned long);
+
+	unsigned long (fastcall *save_fl)(void);
+	void (fastcall *restore_fl)(unsigned long);
+	void (fastcall *irq_disable)(void);
+	void (fastcall *irq_enable)(void);
+	void (fastcall *safe_halt)(void);
+	void (fastcall *halt)(void);
+	void (fastcall *wbinvd)(void);
+
+	/* err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
+	u64 (fastcall *read_msr)(unsigned int msr, int *err);
+	int (fastcall *write_msr)(unsigned int msr, u64 val);
+
+	u64 (fastcall *read_tsc)(void);
+	u64 (fastcall *read_pmc)(void);
+
+	void (fastcall *load_tr_desc)(void);
+	void (fastcall *load_gdt)(const struct Xgt_desc_struct *);
+	void (fastcall *load_idt)(const struct Xgt_desc_struct *);
+	void (fastcall *store_gdt)(struct Xgt_desc_struct *);
+	void (fastcall *store_idt)(struct Xgt_desc_struct *);
+	void (fastcall *set_ldt)(const void *desc, unsigned entries);
+	unsigned long (fastcall *store_tr)(void);
+	void (fastcall *load_tls)(struct thread_struct *t, unsigned int cpu);
+	void (fastcall *write_ldt_entry)(void *dt, int entrynum,
+					 u32 low, u32 high);
+	void (fastcall *write_gdt_entry)(void *dt, int entrynum,
+					 u32 low, u32 high);
+	void (fastcall *write_idt_entry)(void *dt, int entrynum,
+					 u32 low, u32 high);
+	void (fastcall *load_esp0)(struct tss_struct *tss,
+				   struct thread_struct *thread);
+
+	void (fastcall *set_iopl_mask)(unsigned mask);
+
+	void (fastcall *io_delay)(void);
+	void (*const_udelay)(unsigned long loops);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	void (fastcall *apic_write)(unsigned long reg, unsigned long v);
+	void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v);
+	unsigned long (fastcall *apic_read)(unsigned long reg);
+#endif
+
+	void (fastcall *flush_tlb_user)(void);
+	void (fastcall *flush_tlb_kernel)(void);
+	void (fastcall *flush_tlb_single)(u32 addr);
+
+	void (fastcall *set_pte)(pte_t *ptep, pte_t pteval);
+	void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval);
+	void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval);
+	void (fastcall *pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep);
+	void (fastcall *pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep);
+#ifdef CONFIG_X86_PAE
+	void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval);
+	void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
+	void (fastcall *set_pud)(pud_t *pudp, pud_t pudval);
+	void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+	void (fastcall *pmd_clear)(pmd_t *pmdp);
+#endif
+
+	/* These two are jmp to, not actually called. */
+	void (fastcall *irq_enable_sysexit)(void);
+	void (fastcall *iret)(void);
+};
+
+/* Mark a paravirt probe function. */
+#define paravirt_probe(fn)						\
+ static asmlinkage void (*__paravirtprobe_##fn)(void) __attribute_used__ \
+		__attribute__((__section__(".paravirtprobe"))) = fn
+
+extern struct paravirt_ops paravirt_ops;
+
+#define paravirt_enabled() (paravirt_ops.paravirt_enabled)
+
+static inline void load_esp0(struct tss_struct *tss,
+			     struct thread_struct *thread)
+{
+	paravirt_ops.load_esp0(tss, thread);
+}
+
+#define ARCH_SETUP			paravirt_ops.arch_setup();
+static inline unsigned long get_wallclock(void)
+{
+	return paravirt_ops.get_wallclock();
+}
+
+static inline int set_wallclock(unsigned long nowtime)
+{
+	return paravirt_ops.set_wallclock(nowtime);
+}
+
+static inline void do_time_init(void)
+{
+	return paravirt_ops.time_init();
+}
+
+/* The paravirtualized CPUID instruction. */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+			   unsigned int *ecx, unsigned int *edx)
+{
+	paravirt_ops.cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg)
+#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val)
+
+#define clts() paravirt_ops.clts()
+
+#define read_cr0() paravirt_ops.read_cr0()
+#define write_cr0(x) paravirt_ops.write_cr0(x)
+
+#define read_cr2() paravirt_ops.read_cr2()
+#define write_cr2(x) paravirt_ops.write_cr2(x)
+
+#define read_cr3() paravirt_ops.read_cr3()
+#define write_cr3(x) paravirt_ops.write_cr3(x)
+
+#define read_cr4() paravirt_ops.read_cr4()
+#define read_cr4_safe(x) paravirt_ops.read_cr4_safe()
+#define write_cr4(x) paravirt_ops.write_cr4(x)
+
+static inline void raw_safe_halt(void)
+{
+	paravirt_ops.safe_halt();
+}
+
+static inline void halt(void)
+{
+	paravirt_ops.safe_halt();
+}
+#define wbinvd() paravirt_ops.wbinvd()
+
+#define get_kernel_rpl()  (paravirt_ops.kernel_rpl)
+
+#define rdmsr(msr,val1,val2) do {				\
+	int _err;						\
+	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	val1 = (u32)_l;						\
+	val2 = _l >> 32;					\
+} while(0)
+
+#define wrmsr(msr,val1,val2) do {				\
+	u64 _l = ((u64)(val2) << 32) | (val1);			\
+	paravirt_ops.write_msr((msr), _l);			\
+} while(0)
+
+#define rdmsrl(msr,val) do {					\
+	int _err;						\
+	val = paravirt_ops.read_msr((msr),&_err);		\
+} while(0)
+
+#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val)))
+#define wrmsr_safe(msr,a,b) ({					\
+	u64 _l = ((u64)(b) << 32) | (a);			\
+	paravirt_ops.write_msr((msr),_l);			\
+})
+
+/* rdmsr with exception handling */
+#define rdmsr_safe(msr,a,b) ({					\
+	int _err;						\
+	u64 _l = paravirt_ops.read_msr(msr,&_err);		\
+	(*a) = (u32)_l;						\
+	(*b) = _l >> 32;					\
+	_err; })
+
+#define rdtsc(low,high) do {					\
+	u64 _l = paravirt_ops.read_tsc();			\
+	low = (u32)_l;						\
+	high = _l >> 32;					\
+} while(0)
+
+#define rdtscl(low) do {					\
+	u64 _l = paravirt_ops.read_tsc();			\
+	low = (int)_l;						\
+} while(0)
+
+#define rdtscll(val) (val = paravirt_ops.read_tsc())
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) do {				\
+	u64 _l = paravirt_ops.read_pmc();			\
+	low = (u32)_l;						\
+	high = _l >> 32;					\
+} while(0)
+
+#define load_TR_desc() (paravirt_ops.load_tr_desc())
+#define load_gdt(dtr) (paravirt_ops.load_gdt(dtr))
+#define load_idt(dtr) (paravirt_ops.load_idt(dtr))
+#define set_ldt(addr, entries) (paravirt_ops.set_ldt((addr), (entries)))
+#define store_gdt(dtr) (paravirt_ops.store_gdt(dtr))
+#define store_idt(dtr) (paravirt_ops.store_idt(dtr))
+#define store_tr(tr) ((tr) = paravirt_ops.store_tr())
+#define load_TLS(t,cpu) (paravirt_ops.load_tls((t),(cpu)))
+#define write_ldt_entry(dt, entry, low, high)				\
+	(paravirt_ops.write_ldt_entry((dt), (entry), (low), (high)))
+#define write_gdt_entry(dt, entry, low, high)				\
+	(paravirt_ops.write_gdt_entry((dt), (entry), (low), (high)))
+#define write_idt_entry(dt, entry, low, high)				\
+	(paravirt_ops.write_idt_entry((dt), (entry), (low), (high)))
+#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask))
+
+/* The paravirtualized I/O functions */
+static inline void slow_down_io(void) {
+	paravirt_ops.io_delay();
+#ifdef REALLY_SLOW_IO
+	paravirt_ops.io_delay();
+	paravirt_ops.io_delay();
+	paravirt_ops.io_delay();
+#endif
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/*
+ * Basic functions accessing APICs.
+ */
+static inline void apic_write(unsigned long reg, unsigned long v)
+{
+	paravirt_ops.apic_write(reg,v);
+}
+
+static inline void apic_write_atomic(unsigned long reg, unsigned long v)
+{
+	paravirt_ops.apic_write_atomic(reg,v);
+}
+
+static inline unsigned long apic_read(unsigned long reg)
+{
+	return paravirt_ops.apic_read(reg);
+}
+#endif
+
+
+#define __flush_tlb() paravirt_ops.flush_tlb_user()
+#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel()
+#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr)
+
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+	paravirt_ops.set_pte(ptep, pteval);
+}
+
+static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
+{
+	paravirt_ops.set_pte_at(mm, addr, ptep, pteval);
+}
+
+static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+	paravirt_ops.set_pmd(pmdp, pmdval);
+}
+
+static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep)
+{
+	paravirt_ops.pte_update(mm, addr, ptep);
+}
+
+static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep)
+{
+	paravirt_ops.pte_update_defer(mm, addr, ptep);
+}
+
+#ifdef CONFIG_X86_PAE
+static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
+{
+	paravirt_ops.set_pte_atomic(ptep, pteval);
+}
+
+static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+{
+	paravirt_ops.set_pte_present(mm, addr, ptep, pte);
+}
+
+static inline void set_pud(pud_t *pudp, pud_t pudval)
+{
+	paravirt_ops.set_pud(pudp, pudval);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	paravirt_ops.pte_clear(mm, addr, ptep);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	paravirt_ops.pmd_clear(pmdp);
+}
+#endif
+
+/* These all sit in the .parainstructions section to tell us what to patch. */
+struct paravirt_patch {
+	u8 *instr; 		/* original instructions */
+	u8 instrtype;		/* type of this instruction */
+	u8 len;			/* length of original instruction */
+	u16 clobbers;		/* what registers you may clobber */
+};
+
+#define paravirt_alt(insn_string, typenum, clobber)	\
+	"771:\n\t" insn_string "\n" "772:\n"		\
+	".pushsection .parainstructions,\"a\"\n"	\
+	"  .long 771b\n"				\
+	"  .byte " __stringify(typenum) "\n"		\
+	"  .byte 772b-771b\n"				\
+	"  .short " __stringify(clobber) "\n"		\
+	".popsection"
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+	unsigned long f;
+
+	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+					   "call *%1;"
+					   "popl %%edx; popl %%ecx",
+					  PARAVIRT_SAVE_FLAGS, CLBR_NONE)
+			     : "=a"(f): "m"(paravirt_ops.save_fl)
+			     : "memory", "cc");
+	return f;
+}
+
+static inline void raw_local_irq_restore(unsigned long f)
+{
+	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+					   "call *%1;"
+					   "popl %%edx; popl %%ecx",
+					  PARAVIRT_RESTORE_FLAGS, CLBR_EAX)
+			     : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f)
+			     : "memory", "cc");
+}
+
+static inline void raw_local_irq_disable(void)
+{
+	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+					   "call *%0;"
+					   "popl %%edx; popl %%ecx",
+					  PARAVIRT_IRQ_DISABLE, CLBR_EAX)
+			     : : "m" (paravirt_ops.irq_disable)
+			     : "memory", "eax", "cc");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+					   "call *%0;"
+					   "popl %%edx; popl %%ecx",
+					  PARAVIRT_IRQ_ENABLE, CLBR_EAX)
+			     : : "m" (paravirt_ops.irq_enable)
+			     : "memory", "eax", "cc");
+}
+
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long f;
+
+	__asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;"
+					   "call *%1; pushl %%eax;"
+					   "call *%2; popl %%eax;"
+					   "popl %%edx; popl %%ecx",
+					  PARAVIRT_SAVE_FLAGS_IRQ_DISABLE,
+					  CLBR_NONE)
+			     : "=a"(f)
+			     : "m" (paravirt_ops.save_fl),
+			       "m" (paravirt_ops.irq_disable)
+			     : "memory", "cc");
+	return f;
+}
+
+#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;"		\
+		     "call *paravirt_ops+%c[irq_disable];"		\
+		     "popl %%edx; popl %%ecx",				\
+		     PARAVIRT_IRQ_DISABLE, CLBR_EAX)
+
+#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;"		\
+		     "call *paravirt_ops+%c[irq_enable];"		\
+		     "popl %%edx; popl %%ecx",				\
+		     PARAVIRT_IRQ_ENABLE, CLBR_EAX)
+#define CLI_STI_CLOBBERS , "%eax"
+#define CLI_STI_INPUT_ARGS \
+	,								\
+	[irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)),	\
+	[irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable))
+
+#else  /* __ASSEMBLY__ */
+
+#define PARA_PATCH(ptype, clobbers, ops)	\
+771:;						\
+	ops;					\
+772:;						\
+	.pushsection .parainstructions,"a";	\
+	 .long 771b;				\
+	 .byte ptype;				\
+	 .byte 772b-771b;			\
+	 .short clobbers;			\
+	.popsection
+
+#define INTERRUPT_RETURN				\
+	PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY,	\
+	jmp *%cs:paravirt_ops+PARAVIRT_iret)
+
+#define DISABLE_INTERRUPTS(clobbers)			\
+	PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers,	\
+	pushl %ecx; pushl %edx;				\
+	call *paravirt_ops+PARAVIRT_irq_disable;	\
+	popl %edx; popl %ecx)				\
+
+#define ENABLE_INTERRUPTS(clobbers)			\
+	PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers,	\
+	pushl %ecx; pushl %edx;				\
+	call *%cs:paravirt_ops+PARAVIRT_irq_enable;	\
+	popl %edx; popl %ecx)
+
+#define ENABLE_INTERRUPTS_SYSEXIT			\
+	PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY,	\
+	jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit)
+
+#define GET_CR0_INTO_EAX			\
+	call *paravirt_ops+PARAVIRT_read_cr0
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+#endif	/* __ASM_PARAVIRT_H */
diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h
new file mode 100644
index 0000000..2ba2736
--- /dev/null
+++ b/include/asm-i386/pda.h
@@ -0,0 +1,100 @@
+/*
+   Per-processor Data Areas
+   Jeremy Fitzhardinge <jeremy@goop.org> 2006
+   Based on asm-x86_64/pda.h by Andi Kleen.
+ */
+#ifndef _I386_PDA_H
+#define _I386_PDA_H
+
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+struct i386_pda
+{
+	struct i386_pda *_pda;		/* pointer to self */
+
+	int cpu_number;
+	struct task_struct *pcurrent;	/* current process */
+	struct pt_regs *irq_regs;
+};
+
+extern struct i386_pda *_cpu_pda[];
+
+#define cpu_pda(i)	(_cpu_pda[i])
+
+#define pda_offset(field) offsetof(struct i386_pda, field)
+
+extern void __bad_pda_field(void);
+
+/* This variable is never instantiated.  It is only used as a stand-in
+   for the real per-cpu PDA memory, so that gcc can understand what
+   memory operations the inline asms() below are performing.  This
+   eliminates the need to make the asms volatile or have memory
+   clobbers, so gcc can readily analyse them. */
+extern struct i386_pda _proxy_pda;
+
+#define pda_to_op(op,field,val)						\
+	do {								\
+		typedef typeof(_proxy_pda.field) T__;			\
+		if (0) { T__ tmp__; tmp__ = (val); }			\
+		switch (sizeof(_proxy_pda.field)) {			\
+		case 1:							\
+			asm(op "b %1,%%gs:%c2"				\
+			    : "+m" (_proxy_pda.field)			\
+			    :"ri" ((T__)val),				\
+			     "i"(pda_offset(field)));			\
+			break;						\
+		case 2:							\
+			asm(op "w %1,%%gs:%c2"				\
+			    : "+m" (_proxy_pda.field)			\
+			    :"ri" ((T__)val),				\
+			     "i"(pda_offset(field)));			\
+			break;						\
+		case 4:							\
+			asm(op "l %1,%%gs:%c2"				\
+			    : "+m" (_proxy_pda.field)			\
+			    :"ri" ((T__)val),				\
+			     "i"(pda_offset(field)));			\
+			break;						\
+		default: __bad_pda_field();				\
+		}							\
+	} while (0)
+
+#define pda_from_op(op,field)						\
+	({								\
+		typeof(_proxy_pda.field) ret__;				\
+		switch (sizeof(_proxy_pda.field)) {			\
+		case 1:							\
+			asm(op "b %%gs:%c1,%0"				\
+			    : "=r" (ret__)				\
+			    : "i" (pda_offset(field)),			\
+			      "m" (_proxy_pda.field));			\
+			break;						\
+		case 2:							\
+			asm(op "w %%gs:%c1,%0"				\
+			    : "=r" (ret__)				\
+			    : "i" (pda_offset(field)),			\
+			      "m" (_proxy_pda.field));			\
+			break;						\
+		case 4:							\
+			asm(op "l %%gs:%c1,%0"				\
+			    : "=r" (ret__)				\
+			    : "i" (pda_offset(field)),			\
+			      "m" (_proxy_pda.field));			\
+			break;						\
+		default: __bad_pda_field();				\
+		}							\
+		ret__; })
+
+/* Return a pointer to a pda field */
+#define pda_addr(field)							\
+	((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \
+				      pda_offset(field)))
+
+#define read_pda(field) pda_from_op("mov",field)
+#define write_pda(field,val) pda_to_op("mov",field,val)
+#define add_pda(field,val) pda_to_op("add",field,val)
+#define sub_pda(field,val) pda_to_op("sub",field,val)
+#define or_pda(field,val) pda_to_op("or",field,val)
+
+#endif	/* _I386_PDA_H */
diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h
index 5764afa..510ae1d 100644
--- a/include/asm-i386/percpu.h
+++ b/include/asm-i386/percpu.h
@@ -1,6 +1,31 @@
 #ifndef __ARCH_I386_PERCPU__
 #define __ARCH_I386_PERCPU__
 
+#ifndef __ASSEMBLY__
 #include <asm-generic/percpu.h>
+#else
+
+/*
+ * PER_CPU finds an address of a per-cpu variable.
+ *
+ * Args:
+ *    var - variable name
+ *    cpu - 32bit register containing the current CPU number
+ *
+ * The resulting address is stored in the "cpu" argument.
+ *
+ * Example:
+ *    PER_CPU(cpu_gdt_descr, %ebx)
+ */
+#ifdef CONFIG_SMP
+#define PER_CPU(var, cpu) \
+	movl __per_cpu_offset(,cpu,4), cpu;	\
+	addl $per_cpu__/**/var, cpu;
+#else /* ! SMP */
+#define PER_CPU(var, cpu) \
+	movl $per_cpu__/**/var, cpu;
+#endif	/* SMP */
+
+#endif /* !__ASSEMBLY__ */
 
 #endif /* __ARCH_I386_PERCPU__ */
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 8d8d3b9..38c3fcc 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -1,8 +1,6 @@
 #ifndef _I386_PGTABLE_2LEVEL_H
 #define _I386_PGTABLE_2LEVEL_H
 
-#include <asm-generic/pgtable-nopmd.h>
-
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low)
 #define pgd_ERROR(e) \
@@ -13,17 +11,19 @@
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
  */
+#ifndef CONFIG_PARAVIRT
 #define set_pte(pteptr, pteval) (*(pteptr) = pteval)
 #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#endif
+
 #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
 #define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval)
-#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
 
 #define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
 #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
 
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define ptep_get_and_clear(mm,addr,xp)	__pte(xchg(&(xp)->pte_low, 0))
+#define raw_ptep_get_and_clear(xp)	__pte(xchg(&(xp)->pte_low, 0))
 
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
 #define pte_none(x)		(!(x).pte_low)
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index c2d701e..7a2318f 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -1,8 +1,6 @@
 #ifndef _I386_PGTABLE_3LEVEL_H
 #define _I386_PGTABLE_3LEVEL_H
 
-#include <asm-generic/pgtable-nopud.h>
-
 /*
  * Intel Physical Address Extension (PAE) Mode - three-level page
  * tables on PPro+ CPUs.
@@ -44,6 +42,7 @@
 	return pte_x(pte);
 }
 
+#ifndef CONFIG_PARAVIRT
 /* Rules for using set_pte: the pte being assigned *must* be
  * either not present or in a state where the hardware will
  * not attempt to update the pte.  In places where this is
@@ -81,25 +80,6 @@
 		(*(pudptr) = (pudval))
 
 /*
- * Pentium-II erratum A13: in PAE mode we explicitly have to flush
- * the TLB via cr3 if the top-level pgd is changed...
- * We do not let the generic code free and clear pgd entries due to
- * this erratum.
- */
-static inline void pud_clear (pud_t * pud) { }
-
-#define pud_page(pud) \
-((struct page *) __va(pud_val(pud) & PAGE_MASK))
-
-#define pud_page_vaddr(pud) \
-((unsigned long) __va(pud_val(pud) & PAGE_MASK))
-
-
-/* Find an entry in the second-level page table.. */
-#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
-			pmd_index(address))
-
-/*
  * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
  * entry, so clear the bottom half first and enforce ordering with a compiler
  * barrier.
@@ -118,9 +98,28 @@
 	smp_wmb();
 	*(tmp + 1) = 0;
 }
+#endif
 
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+/*
+ * Pentium-II erratum A13: in PAE mode we explicitly have to flush
+ * the TLB via cr3 if the top-level pgd is changed...
+ * We do not let the generic code free and clear pgd entries due to
+ * this erratum.
+ */
+static inline void pud_clear (pud_t * pud) { }
+
+#define pud_page(pud) \
+((struct page *) __va(pud_val(pud) & PAGE_MASK))
+
+#define pud_page_vaddr(pud) \
+((unsigned long) __va(pud_val(pud) & PAGE_MASK))
+
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
+			pmd_index(address))
+
+static inline pte_t raw_ptep_get_and_clear(pte_t *ptep)
 {
 	pte_t res;
 
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index bfee7dd..e6a4723 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -15,6 +15,7 @@
 #include <asm/processor.h>
 #include <asm/fixmap.h>
 #include <linux/threads.h>
+#include <asm/paravirt.h>
 
 #ifndef _I386_BITOPS_H
 #include <asm/bitops.h>
@@ -246,6 +247,7 @@
 # include <asm/pgtable-2level.h>
 #endif
 
+#ifndef CONFIG_PARAVIRT
 /*
  * Rules for using pte_update - it must be called after any PTE update which
  * has not been done using the set_pte / clear_pte interfaces.  It is used by
@@ -261,7 +263,7 @@
  */
 #define pte_update(mm, addr, ptep)		do { } while (0)
 #define pte_update_defer(mm, addr, ptep)	do { } while (0)
-
+#endif
 
 /*
  * We only update the dirty/accessed state if we set
@@ -275,7 +277,7 @@
 do {									\
 	if (dirty) {							\
 		(ptep)->pte_low = (entry).pte_low;			\
-		pte_update_defer((vma)->vm_mm, (addr), (ptep));		\
+		pte_update_defer((vma)->vm_mm, (address), (ptep));	\
 		flush_tlb_page(vma, address);				\
 	}								\
 } while (0)
@@ -305,7 +307,7 @@
 	__dirty = pte_dirty(*(ptep));					\
 	if (__dirty) {							\
 		clear_bit(_PAGE_BIT_DIRTY, &(ptep)->pte_low);		\
-		pte_update_defer((vma)->vm_mm, (addr), (ptep));		\
+		pte_update_defer((vma)->vm_mm, (address), (ptep));	\
 		flush_tlb_page(vma, address);				\
 	}								\
 	__dirty;							\
@@ -318,12 +320,20 @@
 	__young = pte_young(*(ptep));					\
 	if (__young) {							\
 		clear_bit(_PAGE_BIT_ACCESSED, &(ptep)->pte_low);	\
-		pte_update_defer((vma)->vm_mm, (addr), (ptep));		\
+		pte_update_defer((vma)->vm_mm, (address), (ptep));	\
 		flush_tlb_page(vma, address);				\
 	}								\
 	__young;							\
 })
 
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = raw_ptep_get_and_clear(ptep);
+	pte_update(mm, addr, ptep);
+	return pte;
+}
+
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
 {
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index e0ddca9..a52d654 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -20,6 +20,7 @@
 #include <linux/threads.h>
 #include <asm/percpu.h>
 #include <linux/cpumask.h>
+#include <linux/init.h>
 
 /* flag for disabling the tsc */
 extern int tsc_disable;
@@ -72,6 +73,7 @@
 #endif
 	unsigned char x86_max_cores;	/* cpuid returned max cores value */
 	unsigned char apicid;
+	unsigned short x86_clflush_size;
 #ifdef CONFIG_SMP
 	unsigned char booted_cores;	/* number of cores as seen by OS */
 	__u8 phys_proc_id; 		/* Physical processor id. */
@@ -111,6 +113,8 @@
 extern	int cpu_llc_id[NR_CPUS];
 extern char ignore_fpu_irq;
 
+void __init cpu_detect(struct cpuinfo_x86 *c);
+
 extern void identify_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
@@ -143,8 +147,8 @@
 #define X86_EFLAGS_VIP	0x00100000 /* Virtual Interrupt Pending */
 #define X86_EFLAGS_ID	0x00200000 /* CPUID detection flag */
 
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
-			   unsigned int *ecx, unsigned int *edx)
+static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx,
+					 unsigned int *ecx, unsigned int *edx)
 {
 	/* ecx is often an input as well as an output. */
 	__asm__("cpuid"
@@ -155,59 +159,6 @@
 		: "0" (*eax), "2" (*ecx));
 }
 
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
-{
-	*eax = op;
-	*ecx = 0;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
-			       int *edx)
-{
-	*eax = op;
-	*ecx = count;
-	__cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return eax;
-}
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return ebx;
-}
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return ecx;
-}
-static inline unsigned int cpuid_edx(unsigned int op)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	cpuid(op, &eax, &ebx, &ecx, &edx);
-	return edx;
-}
-
 #define load_cr3(pgdir) write_cr3(__pa(pgdir))
 
 /*
@@ -473,6 +424,7 @@
 	.vm86_info = NULL,						\
 	.sysenter_cs = __KERNEL_CS,					\
 	.io_bitmap_ptr = NULL,						\
+	.gs = __KERNEL_PDA,						\
 }
 
 /*
@@ -489,18 +441,9 @@
 	.io_bitmap	= { [ 0 ... IO_BITMAP_LONGS] = ~0 },		\
 }
 
-static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
-{
-	tss->esp0 = thread->esp0;
-	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
-	if (unlikely(tss->ss1 != thread->sysenter_cs)) {
-		tss->ss1 = thread->sysenter_cs;
-		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
-	}
-}
-
 #define start_thread(regs, new_eip, new_esp) do {		\
-	__asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0));	\
+	__asm__("movl %0,%%fs": :"r" (0));			\
+	regs->xgs = 0;						\
 	set_fs(USER_DS);					\
 	regs->xds = __USER_DS;					\
 	regs->xes = __USER_DS;					\
@@ -510,33 +453,6 @@
 	regs->esp = new_esp;					\
 } while (0)
 
-/*
- * These special macros can be used to get or set a debugging register
- */
-#define get_debugreg(var, register)				\
-		__asm__("movl %%db" #register ", %0"		\
-			:"=r" (var))
-#define set_debugreg(value, register)			\
-		__asm__("movl %0,%%db" #register		\
-			: /* no output */			\
-			:"r" (value))
-
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void set_iopl_mask(unsigned mask)
-{
-	unsigned int reg;
-	__asm__ __volatile__ ("pushfl;"
-			      "popl %0;"
-			      "andl %1, %0;"
-			      "orl %2, %0;"
-			      "pushl %0;"
-			      "popfl"
-				: "=&r" (reg)
-				: "i" (~X86_EFLAGS_IOPL), "r" (mask));
-}
-
 /* Forward declaration, a strange C thing */
 struct task_struct;
 struct mm_struct;
@@ -628,6 +544,105 @@
 
 #define cpu_relax()	rep_nop()
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define paravirt_enabled() 0
+#define __cpuid native_cpuid
+
+static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
+{
+	tss->esp0 = thread->esp0;
+	/* This can only happen when SEP is enabled, no need to test "SEP"arately */
+	if (unlikely(tss->ss1 != thread->sysenter_cs)) {
+		tss->ss1 = thread->sysenter_cs;
+		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+	}
+}
+
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, register)				\
+		__asm__("movl %%db" #register ", %0"		\
+			:"=r" (var))
+#define set_debugreg(value, register)			\
+		__asm__("movl %0,%%db" #register		\
+			: /* no output */			\
+			:"r" (value))
+
+#define set_iopl_mask native_set_iopl_mask
+#endif /* CONFIG_PARAVIRT */
+
+/*
+ * Set IOPL bits in EFLAGS from given mask
+ */
+static fastcall inline void native_set_iopl_mask(unsigned mask)
+{
+	unsigned int reg;
+	__asm__ __volatile__ ("pushfl;"
+			      "popl %0;"
+			      "andl %1, %0;"
+			      "orl %2, %0;"
+			      "pushl %0;"
+			      "popfl"
+				: "=&r" (reg)
+				: "i" (~X86_EFLAGS_IOPL), "r" (mask));
+}
+
+/*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
+{
+	*eax = op;
+	*ecx = 0;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ecx */
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+			       int *edx)
+{
+	*eax = op;
+	*ecx = count;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+static inline unsigned int cpuid_eax(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return eax;
+}
+static inline unsigned int cpuid_ebx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return ecx;
+}
+static inline unsigned int cpuid_edx(unsigned int op)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid(op, &eax, &ebx, &ecx, &edx);
+	return edx;
+}
+
 /* generic versions from gas */
 #define GENERIC_NOP1	".byte 0x90\n"
 #define GENERIC_NOP2    	".byte 0x89,0xf6\n"
@@ -727,4 +742,7 @@
 extern void enable_sep_cpu(void);
 extern int sysenter_setup(void);
 
+extern int init_gdt(int cpu, struct task_struct *idle);
+extern void secondary_cpu_init(void);
+
 #endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h
index d505f50..bdbc894 100644
--- a/include/asm-i386/ptrace.h
+++ b/include/asm-i386/ptrace.h
@@ -16,6 +16,8 @@
 	long eax;
 	int  xds;
 	int  xes;
+	/* int  xfs; */
+	int  xgs;
 	long orig_eax;
 	long eip;
 	int  xcs;
diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h
index b7ab596..3c796af 100644
--- a/include/asm-i386/segment.h
+++ b/include/asm-i386/segment.h
@@ -39,7 +39,7 @@
  *  25 - APM BIOS support 
  *
  *  26 - ESPFIX small SS
- *  27 - unused
+ *  27 - PDA				[ per-cpu private data area ]
  *  28 - unused
  *  29 - unused
  *  30 - unused
@@ -74,6 +74,9 @@
 #define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
 #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
 
+#define GDT_ENTRY_PDA			(GDT_ENTRY_KERNEL_BASE + 15)
+#define __KERNEL_PDA (GDT_ENTRY_PDA * 8)
+
 #define GDT_ENTRY_DOUBLEFAULT_TSS	31
 
 /*
@@ -128,5 +131,7 @@
 #define SEGMENT_LDT		0x4
 #define SEGMENT_GDT		0x0
 
+#ifndef CONFIG_PARAVIRT
 #define get_kernel_rpl()  0
 #endif
+#endif
diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h
index c5b504b..67659db 100644
--- a/include/asm-i386/setup.h
+++ b/include/asm-i386/setup.h
@@ -70,6 +70,7 @@
 struct e820entry;
 
 char * __init machine_specific_memory_setup(void);
+char *memory_setup(void);
 
 int __init copy_e820_map(struct e820entry * biosmap, int nr_map);
 int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map);
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index bd59c15..64fe624 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
+#include <asm/pda.h>
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -56,7 +57,7 @@
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define raw_smp_processor_id() (current_thread_info()->cpu)
+#define raw_smp_processor_id() (read_pda(cpu_number))
 
 extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_callin_map;
diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index c18b71f..d3bcebe 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -7,8 +7,14 @@
 #include <asm/processor.h>
 #include <linux/compiler.h>
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
 #define CLI_STRING	"cli"
 #define STI_STRING	"sti"
+#define CLI_STI_CLOBBERS
+#define CLI_STI_INPUT_ARGS
+#endif /* CONFIG_PARAVIRT */
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
@@ -53,25 +59,28 @@
 {
 	asm volatile(
 		"\n1:\t"
-		LOCK_PREFIX " ; decb %0\n\t"
+		LOCK_PREFIX " ; decb %[slock]\n\t"
 		"jns 5f\n"
 		"2:\t"
-		"testl $0x200, %1\n\t"
+		"testl $0x200, %[flags]\n\t"
 		"jz 4f\n\t"
 		STI_STRING "\n"
 		"3:\t"
 		"rep;nop\n\t"
-		"cmpb $0, %0\n\t"
+		"cmpb $0, %[slock]\n\t"
 		"jle 3b\n\t"
 		CLI_STRING "\n\t"
 		"jmp 1b\n"
 		"4:\t"
 		"rep;nop\n\t"
-		"cmpb $0, %0\n\t"
+		"cmpb $0, %[slock]\n\t"
 		"jg 1b\n\t"
 		"jmp 4b\n"
 		"5:\n\t"
-		: "+m" (lock->slock) : "r" (flags) : "memory");
+		: [slock] "+m" (lock->slock)
+		: [flags] "r" (flags)
+	 	  CLI_STI_INPUT_ARGS
+		: "memory" CLI_STI_CLOBBERS);
 }
 #endif
 
diff --git a/include/asm-i386/suspend.h b/include/asm-i386/suspend.h
index c1da5ca..8dbaafe 100644
--- a/include/asm-i386/suspend.h
+++ b/include/asm-i386/suspend.h
@@ -12,12 +12,8 @@
 struct saved_context {
   	u16 es, fs, gs, ss;
 	unsigned long cr0, cr2, cr3, cr4;
-	u16 gdt_pad;
-	u16 gdt_limit;
-	unsigned long gdt_base;
-	u16 idt_pad;
-	u16 idt_limit;
-	unsigned long idt_base;
+	struct Xgt_desc_struct gdt;
+	struct Xgt_desc_struct idt;
 	u16 ldt;
 	u16 tss;
 	unsigned long tr;
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index a6dabbc..a6d20d9 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -88,6 +88,9 @@
 #define savesegment(seg, value) \
 	asm volatile("mov %%" #seg ",%0":"=rm" (value))
 
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
 #define read_cr0() ({ \
 	unsigned int __dummy; \
 	__asm__ __volatile__( \
@@ -139,17 +142,18 @@
 #define write_cr4(x) \
 	__asm__ __volatile__("movl %0,%%cr4": :"r" (x))
 
-/*
- * Clear and set 'TS' bit respectively
- */
+#define wbinvd() \
+	__asm__ __volatile__ ("wbinvd": : :"memory")
+
+/* Clear the 'TS' bit */
 #define clts() __asm__ __volatile__ ("clts")
+#endif/* CONFIG_PARAVIRT */
+
+/* Set the 'TS' bit */
 #define stts() write_cr0(8 | read_cr0())
 
 #endif	/* __KERNEL__ */
 
-#define wbinvd() \
-	__asm__ __volatile__ ("wbinvd": : :"memory")
-
 static inline unsigned long get_limit(unsigned long segment)
 {
 	unsigned long __limit;
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index 54d6d7a..46d32ad 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -95,15 +95,7 @@
 
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
-#define alloc_thread_info(tsk)					\
-	({							\
-		struct thread_info *ret;			\
-								\
-		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
-		if (ret)					\
-			memset(ret, 0, THREAD_SIZE);		\
-		ret;						\
-	})
+#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL)
 #else
 #define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL)
 #endif
diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h
new file mode 100644
index 0000000..ea8065a
--- /dev/null
+++ b/include/asm-i386/time.h
@@ -0,0 +1,41 @@
+#ifndef _ASMi386_TIME_H
+#define _ASMi386_TIME_H
+
+#include <linux/efi.h>
+#include "mach_time.h"
+
+static inline unsigned long native_get_wallclock(void)
+{
+	unsigned long retval;
+
+	if (efi_enabled)
+		retval = efi_get_time();
+	else
+		retval = mach_get_cmos_time();
+
+	return retval;
+}
+
+static inline int native_set_wallclock(unsigned long nowtime)
+{
+	int retval;
+
+	if (efi_enabled)
+		retval = efi_set_rtc_mmss(nowtime);
+	else
+		retval = mach_set_rtc_mmss(nowtime);
+
+	return retval;
+}
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else /* !CONFIG_PARAVIRT */
+
+#define get_wallclock() native_get_wallclock()
+#define set_wallclock(x) native_set_wallclock(x)
+#define do_time_init() time_init_hook()
+
+#endif /* CONFIG_PARAVIRT */
+
+#endif
diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h
index 360648b..4dd8284 100644
--- a/include/asm-i386/tlbflush.h
+++ b/include/asm-i386/tlbflush.h
@@ -4,7 +4,15 @@
 #include <linux/mm.h>
 #include <asm/processor.h>
 
-#define __flush_tlb()							\
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+#define __flush_tlb() __native_flush_tlb()
+#define __flush_tlb_global() __native_flush_tlb_global()
+#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#endif
+
+#define __native_flush_tlb()						\
 	do {								\
 		unsigned int tmpreg;					\
 									\
@@ -19,7 +27,7 @@
  * Global pages have to be flushed a bit differently. Not a real
  * performance problem because this does not happen often.
  */
-#define __flush_tlb_global()						\
+#define __native_flush_tlb_global()					\
 	do {								\
 		unsigned int tmpreg, cr4, cr4_orig;			\
 									\
@@ -36,6 +44,9 @@
 			: "memory");					\
 	} while (0)
 
+#define __native_flush_tlb_single(addr) 				\
+	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
+
 # define __flush_tlb_all()						\
 	do {								\
 		if (cpu_has_pge)					\
@@ -46,9 +57,6 @@
 
 #define cpu_has_invlpg	(boot_cpu_data.x86 > 3)
 
-#define __flush_tlb_single(addr) \
-	__asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
-
 #ifdef CONFIG_X86_INVLPG
 # define __flush_tlb_one(addr) __flush_tlb_single(addr)
 #else
diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h
index 5031d69..aa2c931 100644
--- a/include/asm-i386/unwind.h
+++ b/include/asm-i386/unwind.h
@@ -71,6 +71,7 @@
 	info->regs.xss = __KERNEL_DS;
 	info->regs.xds = __USER_DS;
 	info->regs.xes = __USER_DS;
+	info->regs.xgs = __KERNEL_PDA;
 }
 
 extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
@@ -78,17 +79,13 @@
                                                                           void *arg),
                                                void *arg);
 
-static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
+static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info)
 {
-#if 0 /* This can only work when selector register and EFLAGS saves/restores
-         are properly annotated (and tracked in UNW_REGISTER_INFO). */
-	return user_mode_vm(&info->regs);
-#else
-	return info->regs.eip < PAGE_OFFSET
+	return user_mode_vm(&info->regs)
+	       || info->regs.eip < PAGE_OFFSET
 	       || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
-	            && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
+	           && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
 	       || info->regs.esp < PAGE_OFFSET;
-#endif
 }
 
 #else
diff --git a/include/asm-i386/vm86.h b/include/asm-i386/vm86.h
index 952fd69..a5edf51 100644
--- a/include/asm-i386/vm86.h
+++ b/include/asm-i386/vm86.h
@@ -145,26 +145,13 @@
  * at the end of the structure. Look at ptrace.h to see the "normal"
  * setup. For user space layout see 'struct vm86_regs' above.
  */
+#include <asm/ptrace.h>
 
 struct kernel_vm86_regs {
 /*
  * normal regs, with special meaning for the segment descriptors..
  */
-	long ebx;
-	long ecx;
-	long edx;
-	long esi;
-	long edi;
-	long ebp;
-	long eax;
-	long __null_ds;
-	long __null_es;
-	long orig_eax;
-	long eip;
-	unsigned short cs, __csh;
-	long eflags;
-	long esp;
-	unsigned short ss, __ssh;
+	struct pt_regs pt;
 /*
  * these are specific to v86 mode:
  */
diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild
index 7635213..ebd7117 100644
--- a/include/asm-x86_64/Kbuild
+++ b/include/asm-x86_64/Kbuild
@@ -6,7 +6,6 @@
 
 header-y += boot.h
 header-y += bootsetup.h
-header-y += cpufeature.h
 header-y += debugreg.h
 header-y += ldt.h
 header-y += msr.h
diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h
index a584826..a6657b4 100644
--- a/include/asm-x86_64/alternative.h
+++ b/include/asm-x86_64/alternative.h
@@ -4,6 +4,7 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
+#include <linux/stddef.h>
 #include <asm/cpufeature.h>
 
 struct alt_instr {
@@ -133,4 +134,15 @@
 #define LOCK_PREFIX ""
 #endif
 
+struct paravirt_patch;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end);
+#else
+static inline void
+apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+{}
+#define __start_parainstructions NULL
+#define __stop_parainstructions NULL
+#endif
+
 #endif /* _X86_64_ALTERNATIVE_H */
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h
index 93849f7..706ca4b 100644
--- a/include/asm-x86_64/atomic.h
+++ b/include/asm-x86_64/atomic.h
@@ -189,9 +189,9 @@
 {
 	int __i = i;
 	__asm__ __volatile__(
-		LOCK_PREFIX "xaddl %0, %1;"
-		:"=r"(i)
-		:"m"(v->counter), "0"(i));
+		LOCK_PREFIX "xaddl %0, %1"
+		:"+r" (i), "+m" (v->counter)
+		: : "memory");
 	return i + __i;
 }
 
diff --git a/include/asm-x86_64/calgary.h b/include/asm-x86_64/calgary.h
index 6b93f5a..7ee9006 100644
--- a/include/asm-x86_64/calgary.h
+++ b/include/asm-x86_64/calgary.h
@@ -51,6 +51,8 @@
 #define TCE_TABLE_SIZE_4M		6
 #define TCE_TABLE_SIZE_8M		7
 
+extern int use_calgary;
+
 #ifdef CONFIG_CALGARY_IOMMU
 extern int calgary_iommu_init(void);
 extern void detect_calgary(void);
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
index ee792fa..0b3c686 100644
--- a/include/asm-x86_64/cpufeature.h
+++ b/include/asm-x86_64/cpufeature.h
@@ -29,7 +29,7 @@
 #define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
 #define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
 #define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
-#define X86_FEATURE_DTES	(0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_DS		(0*32+21) /* Debug Store */
 #define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
 #define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
 #define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
@@ -68,6 +68,8 @@
 #define X86_FEATURE_FXSAVE_LEAK (3*32+7)  /* FIP/FOP/FDP leaks through FXSAVE */
 #define X86_FEATURE_UP		(3*32+8) /* SMP kernel running on UP */
 #define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS	(3*32+10) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS		(3*32+11) /* Branch Trace Store */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
@@ -112,5 +114,8 @@
 #define cpu_has_cyrix_arr      0
 #define cpu_has_centaur_mcr    0
 #define cpu_has_clflush	       boot_cpu_has(X86_FEATURE_CLFLSH)
+#define cpu_has_ds 	       boot_cpu_has(X86_FEATURE_DS)
+#define cpu_has_pebs 	       boot_cpu_has(X86_FEATURE_PEBS)
+#define cpu_has_bts 	       boot_cpu_has(X86_FEATURE_BTS)
 
 #endif /* __ASM_X8664_CPUFEATURE_H */
diff --git a/include/asm-x86_64/delay.h b/include/asm-x86_64/delay.h
index 65f64ac..c2669f1 100644
--- a/include/asm-x86_64/delay.h
+++ b/include/asm-x86_64/delay.h
@@ -7,18 +7,21 @@
  * Delay routines calling functions in arch/x86_64/lib/delay.c
  */
  
+/* Undefined functions to get compile-time errors */
 extern void __bad_udelay(void);
 extern void __bad_ndelay(void);
 
 extern void __udelay(unsigned long usecs);
-extern void __ndelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
 extern void __const_udelay(unsigned long usecs);
 extern void __delay(unsigned long loops);
 
+/* 0x10c7 is 2**32 / 1000000 (rounded up) */
 #define udelay(n) (__builtin_constant_p(n) ? \
-	((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
+	((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \
 	__udelay(n))
 
+/* 0x5 is 2**32 / 1000000000 (rounded up) */
 #define ndelay(n) (__builtin_constant_p(n) ? \
        ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
        __ndelay(n))
diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h
index eb7723a..913d6ac 100644
--- a/include/asm-x86_64/desc.h
+++ b/include/asm-x86_64/desc.h
@@ -9,64 +9,13 @@
 
 #include <linux/string.h>
 #include <linux/smp.h>
+#include <asm/desc_defs.h>
 
 #include <asm/segment.h>
 #include <asm/mmu.h>
 
-// 8 byte segment descriptor
-struct desc_struct { 
-	u16 limit0;
-	u16 base0;
-	unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
-	unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
-} __attribute__((packed)); 
-
-struct n_desc_struct { 
-	unsigned int a,b;
-}; 	
-
 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
 
-enum { 
-	GATE_INTERRUPT = 0xE, 
-	GATE_TRAP = 0xF, 	
-	GATE_CALL = 0xC,
-}; 	
-
-// 16byte gate
-struct gate_struct {          
-	u16 offset_low;
-	u16 segment; 
-	unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
-	u16 offset_middle;
-	u32 offset_high;
-	u32 zero1; 
-} __attribute__((packed));
-
-#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) 
-#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
-#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
-
-enum { 
-	DESC_TSS = 0x9,
-	DESC_LDT = 0x2,
-}; 
-
-// LDT or TSS descriptor in the GDT. 16 bytes.
-struct ldttss_desc { 
-	u16 limit0;
-	u16 base0;
-	unsigned base1 : 8, type : 5, dpl : 2, p : 1;
-	unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
-	u32 base3;
-	u32 zero1; 
-} __attribute__((packed)); 
-
-struct desc_ptr {
-	unsigned short size;
-	unsigned long address;
-} __attribute__((packed)) ;
-
 #define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8))
 #define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
 #define clear_LDT()  asm volatile("lldt %w0"::"r" (0))
diff --git a/include/asm-x86_64/desc_defs.h b/include/asm-x86_64/desc_defs.h
new file mode 100644
index 0000000..0890040
--- /dev/null
+++ b/include/asm-x86_64/desc_defs.h
@@ -0,0 +1,69 @@
+/* Written 2000 by Andi Kleen */
+#ifndef __ARCH_DESC_DEFS_H
+#define __ARCH_DESC_DEFS_H
+
+/*
+ * Segment descriptor structure definitions, usable from both x86_64 and i386
+ * archs.
+ */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+// 8 byte segment descriptor
+struct desc_struct {
+	u16 limit0;
+	u16 base0;
+	unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
+	unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
+} __attribute__((packed));
+
+struct n_desc_struct {
+	unsigned int a,b;
+};
+
+enum {
+	GATE_INTERRUPT = 0xE,
+	GATE_TRAP = 0xF,
+	GATE_CALL = 0xC,
+};
+
+// 16byte gate
+struct gate_struct {
+	u16 offset_low;
+	u16 segment;
+	unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
+	u16 offset_middle;
+	u32 offset_high;
+	u32 zero1;
+} __attribute__((packed));
+
+#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
+#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
+#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
+
+enum {
+	DESC_TSS = 0x9,
+	DESC_LDT = 0x2,
+};
+
+// LDT or TSS descriptor in the GDT. 16 bytes.
+struct ldttss_desc {
+	u16 limit0;
+	u16 base0;
+	unsigned base1 : 8, type : 5, dpl : 2, p : 1;
+	unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
+	u32 base3;
+	u32 zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+	unsigned short size;
+	unsigned long address;
+} __attribute__((packed)) ;
+
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h
index a0e9a4b..b80f4bb 100644
--- a/include/asm-x86_64/genapic.h
+++ b/include/asm-x86_64/genapic.h
@@ -30,6 +30,6 @@
 };
 
 
-extern struct genapic *genapic;
+extern struct genapic *genapic, *genapic_force, apic_flat;
 
 #endif
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
index 37e1941..952783d 100644
--- a/include/asm-x86_64/msr.h
+++ b/include/asm-x86_64/msr.h
@@ -169,8 +169,8 @@
 #define MSR_LSTAR 0xc0000082 		/* long mode SYSCALL target */
 #define MSR_CSTAR 0xc0000083		/* compatibility mode SYSCALL target */
 #define MSR_SYSCALL_MASK 0xc0000084	/* EFLAGS mask for syscall */
-#define MSR_FS_BASE 0xc0000100		/* 64bit GS base */
-#define MSR_GS_BASE 0xc0000101		/* 64bit FS base */
+#define MSR_FS_BASE 0xc0000100		/* 64bit FS base */
+#define MSR_GS_BASE 0xc0000101		/* 64bit GS base */
 #define MSR_KERNEL_GS_BASE  0xc0000102	/* SwapGS GS shadow (or USER_GS from kernel) */ 
 /* EFER bits: */ 
 #define _EFER_SCE 0  /* SYSCALL/SYSRET */
@@ -210,6 +210,10 @@
 #define MSR_IA32_LASTINTFROMIP     0x1dd
 #define MSR_IA32_LASTINTTOIP       0x1de
 
+#define MSR_IA32_PEBS_ENABLE		0x3f1
+#define MSR_IA32_DS_AREA		0x600
+#define MSR_IA32_PERF_CAPABILITIES	0x345
+
 #define MSR_MTRRfix64K_00000	0x250
 #define MSR_MTRRfix16K_80000	0x258
 #define MSR_MTRRfix16K_A0000	0x259
@@ -407,4 +411,13 @@
 #define MSR_P4_U2L_ESCR0 		0x3b0
 #define MSR_P4_U2L_ESCR1 		0x3b1
 
+/* Intel Core-based CPU performance counters */
+#define MSR_CORE_PERF_FIXED_CTR0	0x309
+#define MSR_CORE_PERF_FIXED_CTR1	0x30a
+#define MSR_CORE_PERF_FIXED_CTR2	0x30b
+#define MSR_CORE_PERF_FIXED_CTR_CTRL	0x38d
+#define MSR_CORE_PERF_GLOBAL_STATUS	0x38e
+#define MSR_CORE_PERF_GLOBAL_CTRL	0x38f
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL	0x390
+
 #endif
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h
index f367d40..72375e7 100644
--- a/include/asm-x86_64/nmi.h
+++ b/include/asm-x86_64/nmi.h
@@ -77,4 +77,7 @@
 
 extern int unknown_nmi_panic;
 
+void __trigger_all_cpu_backtrace(void);
+#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+
 #endif /* ASM_NMI_H */
diff --git a/include/asm-x86_64/pci-direct.h b/include/asm-x86_64/pci-direct.h
index eba9cb4..6823fa4 100644
--- a/include/asm-x86_64/pci-direct.h
+++ b/include/asm-x86_64/pci-direct.h
@@ -10,6 +10,7 @@
 extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset);
 extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset);
 extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val);
+extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val);
 
 extern int early_pci_allowed(void);
 
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 0555c1c..59901c6 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -221,20 +221,19 @@
 #define __S110	PAGE_SHARED_EXEC
 #define __S111	PAGE_SHARED_EXEC
 
-static inline unsigned long pgd_bad(pgd_t pgd) 
-{ 
-       unsigned long val = pgd_val(pgd);
-       val &= ~PTE_MASK; 
-       val &= ~(_PAGE_USER | _PAGE_DIRTY); 
-       return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);      
-} 
+static inline unsigned long pgd_bad(pgd_t pgd)
+{
+	return pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
+}
 
 static inline unsigned long pud_bad(pud_t pud)
 {
-       unsigned long val = pud_val(pud);
-       val &= ~PTE_MASK;
-       val &= ~(_PAGE_USER | _PAGE_DIRTY);
-       return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
+	return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
+}
+
+static inline unsigned long pmd_bad(pmd_t pmd)
+{
+	return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
 }
 
 #define pte_none(x)	(!pte_val(x))
@@ -347,7 +346,6 @@
 #define pmd_none(x)	(!pmd_val(x))
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
 #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
-#define	pmd_bad(x)	((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE )
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index cef17e0..76552d7 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -475,6 +475,14 @@
 		: :"a" (eax), "c" (ecx));
 }
 
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+	/* "mwait %eax,%ecx;" */
+	asm volatile(
+		"sti; .byte 0x0f,0x01,0xc9;"
+		: :"a" (eax), "c" (ecx));
+}
+
 extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
 
 #define stack_current() \
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index e72cfcd..6d324b8 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -61,7 +61,6 @@
 extern unsigned long numa_free_all_bootmem(void);
 
 extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
-extern void free_bootmem_generic(unsigned long phys, unsigned len);
 
 extern void load_gs_index(unsigned gs);
 
@@ -88,6 +87,7 @@
 extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
 
 extern void early_quirks(void);
+extern void quirk_intel_irqbalance(void);
 extern void check_efer(void);
 
 extern int unhandled_signal(struct task_struct *tsk, int sig);
diff --git a/include/asm-x86_64/rio.h b/include/asm-x86_64/rio.h
new file mode 100644
index 0000000..c7350f6
--- /dev/null
+++ b/include/asm-x86_64/rio.h
@@ -0,0 +1,74 @@
+/*
+ * Derived from include/asm-i386/mach-summit/mach_mpparse.h
+ *          and include/asm-i386/mach-default/bios_ebda.h
+ *
+ * Author: Laurent Vivier <Laurent.Vivier@bull.net>
+ */
+
+#ifndef __ASM_RIO_H
+#define __ASM_RIO_H
+
+#define RIO_TABLE_VERSION	3
+
+struct rio_table_hdr {
+	u8 version;      /* Version number of this data structure  */
+	u8 num_scal_dev; /* # of Scalability devices               */
+	u8 num_rio_dev;  /* # of RIO I/O devices                   */
+} __attribute__((packed));
+
+struct scal_detail {
+	u8 node_id;      /* Scalability Node ID                    */
+	u32 CBAR;        /* Address of 1MB register space          */
+	u8 port0node;    /* Node ID port connected to: 0xFF=None   */
+	u8 port0port;    /* Port num port connected to: 0,1,2, or  */
+	                 /* 0xFF=None                              */
+	u8 port1node;    /* Node ID port connected to: 0xFF = None */
+	u8 port1port;    /* Port num port connected to: 0,1,2, or  */
+	                 /* 0xFF=None                              */
+	u8 port2node;    /* Node ID port connected to: 0xFF = None */
+	u8 port2port;    /* Port num port connected to: 0,1,2, or  */
+	                 /* 0xFF=None                              */
+	u8 chassis_num;  /* 1 based Chassis number (1 = boot node) */
+} __attribute__((packed));
+
+struct rio_detail {
+	u8 node_id;      /* RIO Node ID                            */
+	u32 BBAR;        /* Address of 1MB register space          */
+	u8 type;         /* Type of device                         */
+	u8 owner_id;     /* Node ID of Hurricane that owns this    */
+	                 /* node                                   */
+	u8 port0node;    /* Node ID port connected to: 0xFF=None   */
+	u8 port0port;    /* Port num port connected to: 0,1,2, or  */
+	                 /* 0xFF=None                              */
+	u8 port1node;    /* Node ID port connected to: 0xFF=None   */
+	u8 port1port;    /* Port num port connected to: 0,1,2, or  */
+	                 /* 0xFF=None                              */
+	u8 first_slot;   /* Lowest slot number below this Calgary  */
+	u8 status;       /* Bit 0 = 1 : the XAPIC is used          */
+	                 /*       = 0 : the XAPIC is not used, ie: */
+	                 /*            ints fwded to another XAPIC */
+	                 /*           Bits1:7 Reserved             */
+	u8 WP_index;     /* instance index - lower ones have       */
+	                 /*     lower slot numbers/PCI bus numbers */
+	u8 chassis_num;  /* 1 based Chassis number                 */
+} __attribute__((packed));
+
+enum {
+	HURR_SCALABILTY	= 0,  /* Hurricane Scalability info */
+	HURR_RIOIB	= 2,  /* Hurricane RIOIB info       */
+	COMPAT_CALGARY	= 4,  /* Compatibility Calgary      */
+	ALT_CALGARY	= 5,  /* Second Planar Calgary      */
+};
+
+/*
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E.
+ */
+static inline unsigned long get_bios_ebda(void)
+{
+	unsigned long address = *(unsigned short *)phys_to_virt(0x40EUL);
+	address <<= 4;
+	return address;
+}
+
+#endif /* __ASM_RIO_H */
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index f1bdd50..e17b9ec4 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -82,11 +82,6 @@
 extern u8 x86_cpu_to_log_apicid[NR_CPUS];
 extern u8 bios_cpu_apicid[];
 
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
-{
-	return cpus_addr(cpumask)[0];
-}
-
 static inline int cpu_present_to_apicid(int mps_cpu)
 {
 	if (mps_cpu < NR_CPUS)
diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h
index 05ef097..88bf981 100644
--- a/include/asm-x86_64/spinlock.h
+++ b/include/asm-x86_64/spinlock.h
@@ -36,7 +36,34 @@
 		"2:\t" : "=m" (lock->slock) : : "memory");
 }
 
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+/*
+ * Same as __raw_spin_lock, but reenable interrupts during spinning.
+ */
+#ifndef CONFIG_PROVE_LOCKING
+static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+{
+	asm volatile(
+		"\n1:\t"
+		LOCK_PREFIX " ; decl %0\n\t"
+		"jns 5f\n"
+		"testl $0x200, %1\n\t"	/* interrupts were disabled? */
+		"jz 4f\n\t"
+	        "sti\n"
+		"3:\t"
+		"rep;nop\n\t"
+		"cmpl $0, %0\n\t"
+		"jle 3b\n\t"
+		"cli\n\t"
+		"jmp 1b\n"
+		"4:\t"
+		"rep;nop\n\t"
+		"cmpl $0, %0\n\t"
+		"jg 1b\n\t"
+		"jmp 4b\n"
+		"5:\n\t"
+		: "+m" (lock->slock) : "r" ((unsigned)flags) : "memory");
+}
+#endif
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
diff --git a/include/asm-x86_64/stacktrace.h b/include/asm-x86_64/stacktrace.h
index 5eb9799..6f0b545 100644
--- a/include/asm-x86_64/stacktrace.h
+++ b/include/asm-x86_64/stacktrace.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_STACKTRACE_H
 #define _ASM_STACKTRACE_H 1
 
+extern int kstack_depth_to_print;
+
 /* Generic stack tracer with callbacks */
 
 struct stacktrace_ops {
diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h
index 2e7ff10..2f6349e 100644
--- a/include/asm-x86_64/unwind.h
+++ b/include/asm-x86_64/unwind.h
@@ -87,14 +87,10 @@
 
 static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
 {
-#if 0 /* This can only work when selector register saves/restores
-         are properly annotated (and tracked in UNW_REGISTER_INFO). */
-	return user_mode(&info->regs);
-#else
-	return (long)info->regs.rip >= 0
+	return user_mode(&info->regs)
+	       || (long)info->regs.rip >= 0
 	       || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
 	       || (long)info->regs.rsp >= 0;
-#endif
 }
 
 #else
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
index 01d1c17..05cb8dd 100644
--- a/include/asm-x86_64/vsyscall.h
+++ b/include/asm-x86_64/vsyscall.h
@@ -10,6 +10,7 @@
 #define VSYSCALL_START (-10UL << 20)
 #define VSYSCALL_SIZE 1024
 #define VSYSCALL_END (-2UL << 20)
+#define VSYSCALL_MAPPED_PAGES 1
 #define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
 
 #ifdef __KERNEL__
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index bf00ce6..bfb5202 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -28,7 +28,7 @@
 
 struct cpu {
 	int node_id;		/* The node which contains the CPU */
-	int no_control;		/* Should the sysfs control file be created? */
+	int hotpluggable;	/* creates sysfs control file if hotpluggable */
 	struct sys_device sysdev;
 };
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 66d621d..df1c918 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -300,8 +300,9 @@
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 					struct resource *data_resource);
-extern unsigned long __init efi_get_time(void);
+extern unsigned long efi_get_time(void);
 extern int __init efi_set_rtc_mmss(unsigned long nowtime);
+extern int is_available_memory(efi_memory_desc_t * md);
 extern struct efi_memory_map memmap;
 
 /**
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index e16904e..acb4ed1 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -15,9 +15,14 @@
  * disables interrupts for a long time. This call is stateless.
  */
 #ifdef ARCH_HAS_NMI_WATCHDOG
+#include <asm/nmi.h>
 extern void touch_nmi_watchdog(void);
 #else
 # define touch_nmi_watchdog() touch_softlockup_watchdog()
 #endif
 
+#ifndef trigger_all_cpu_backtrace
+#define trigger_all_cpu_backtrace() do { } while (0)
+#endif
+
 #endif
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 2925e66..b02308e 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -42,7 +42,8 @@
 	u16 pages;		/* 0x32 */
 	u16 vesa_attributes;	/* 0x34 */
 	u32 capabilities;       /* 0x36 */
-				/* 0x3a -- 0x3f reserved for future expansion */
+				/* 0x3a -- 0x3b reserved for future expansion */
+				/* 0x3c -- 0x3f micro stack for relocatable kernels */
 };
 
 extern struct screen_info screen_info;
diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h
new file mode 100644
index 0000000..d3e5f27
--- /dev/null
+++ b/include/linux/start_kernel.h
@@ -0,0 +1,12 @@
+#ifndef _LINUX_START_KERNEL_H
+#define _LINUX_START_KERNEL_H
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+/* Define the prototype for start_kernel here, rather than cluttering
+   up something else. */
+
+extern asmlinkage void __init start_kernel(void);
+
+#endif /* _LINUX_START_KERNEL_H */
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 76c3fe3..975c963 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -78,7 +78,7 @@
 							\
 		set_fs(KERNEL_DS);			\
 		pagefault_disable();			\
-		ret = __get_user(retval, addr);		\
+		ret = __get_user(retval, (__force typeof(retval) __user *)(addr));		\
 		pagefault_enable();			\
 		set_fs(old_fs);				\
 		ret;					\
diff --git a/init/main.c b/init/main.c
index 4650830..1174ae3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -29,6 +29,7 @@
 #include <linux/percpu.h>
 #include <linux/kmod.h>
 #include <linux/kernel_stat.h>
+#include <linux/start_kernel.h>
 #include <linux/security.h>
 #include <linux/workqueue.h>
 #include <linux/profile.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 6588381..7f2e31b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1319,7 +1319,7 @@
 	return ERR_PTR(retval);
 }
 
-struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
+noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
 	return regs;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 62e73ce..b020324 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -221,11 +221,7 @@
 	trace->skip = 3;
 	trace->all_contexts = 0;
 
-	/* Make sure to not recurse in case the the unwinder needs to tak
-e	   locks. */
-	lockdep_off();
 	save_stack_trace(trace, NULL);
-	lockdep_on();
 
 	trace->max_entries = trace->nr_entries;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 758dbbf..8e9f00f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -54,6 +54,7 @@
 
 #ifdef CONFIG_X86
 #include <asm/nmi.h>
+#include <asm/stacktrace.h>
 #endif
 
 #if defined(CONFIG_SYSCTL)
@@ -707,6 +708,14 @@
 		.mode		= 0444,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "kstack_depth_to_print",
+		.data		= &kstack_depth_to_print,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 #if defined(CONFIG_MMU)
 	{
diff --git a/kernel/unwind.c b/kernel/unwind.c
index ed0a21d..09c2613 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -14,11 +14,12 @@
 #include <linux/bootmem.h>
 #include <linux/sort.h>
 #include <linux/stop_machine.h>
+#include <linux/uaccess.h>
 #include <asm/sections.h>
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 
-extern char __start_unwind[], __end_unwind[];
+extern const char __start_unwind[], __end_unwind[];
 extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
 
 #define MAX_STACK_DEPTH 8
@@ -94,6 +95,7 @@
 
 typedef unsigned long uleb128_t;
 typedef   signed long sleb128_t;
+#define sleb128abs __builtin_labs
 
 static struct unwind_table {
 	struct {
@@ -135,6 +137,17 @@
 
 static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
 
+static unsigned unwind_debug;
+static int __init unwind_debug_setup(char *s)
+{
+	unwind_debug = simple_strtoul(s, NULL, 0);
+	return 1;
+}
+__setup("unwind_debug=", unwind_debug_setup);
+#define dprintk(lvl, fmt, args...) \
+	((void)(lvl > unwind_debug \
+	 || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
+
 static struct unwind_table *find_table(unsigned long pc)
 {
 	struct unwind_table *table;
@@ -151,7 +164,9 @@
 
 static unsigned long read_pointer(const u8 **pLoc,
                                   const void *end,
-                                  signed ptrType);
+                                  signed ptrType,
+                                  unsigned long text_base,
+                                  unsigned long data_base);
 
 static void init_unwind_table(struct unwind_table *table,
                               const char *name,
@@ -176,10 +191,13 @@
 	/* See if the linker provided table looks valid. */
 	if (header_size <= 4
 	    || header_start[0] != 1
-	    || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
-	    || header_start[2] == DW_EH_PE_omit
-	    || read_pointer(&ptr, end, header_start[2]) <= 0
-	    || header_start[3] == DW_EH_PE_omit)
+	    || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
+	       != table_start
+	    || !read_pointer(&ptr, end, header_start[2], 0, 0)
+	    || !read_pointer(&ptr, end, header_start[3], 0,
+	                     (unsigned long)header_start)
+	    || !read_pointer(&ptr, end, header_start[3], 0,
+	                     (unsigned long)header_start))
 		header_start = NULL;
 	table->hdrsz = header_size;
 	smp_wmb();
@@ -269,7 +287,7 @@
 		ptr = (const u8 *)(fde + 2);
 		if (!read_pointer(&ptr,
 		                  (const u8 *)(fde + 1) + *fde,
-		                  ptrType))
+		                  ptrType, 0, 0))
 			return;
 		++n;
 	}
@@ -279,6 +297,7 @@
 
 	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
 	        + 2 * n * sizeof(unsigned long);
+	dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
 	header = alloc(hdrSize);
 	if (!header)
 		return;
@@ -303,7 +322,7 @@
 		ptr = (const u8 *)(fde + 2);
 		header->table[n].start = read_pointer(&ptr,
 		                                      (const u8 *)(fde + 1) + *fde,
-		                                      fde_pointer_type(cie));
+		                                      fde_pointer_type(cie), 0, 0);
 		header->table[n].fde = (unsigned long)fde;
 		++n;
 	}
@@ -486,7 +505,9 @@
 
 static unsigned long read_pointer(const u8 **pLoc,
                                   const void *end,
-                                  signed ptrType)
+                                  signed ptrType,
+                                  unsigned long text_base,
+                                  unsigned long data_base)
 {
 	unsigned long value = 0;
 	union {
@@ -498,13 +519,17 @@
 		const unsigned long *pul;
 	} ptr;
 
-	if (ptrType < 0 || ptrType == DW_EH_PE_omit)
+	if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
+		dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
 		return 0;
+	}
 	ptr.p8 = *pLoc;
 	switch(ptrType & DW_EH_PE_FORM) {
 	case DW_EH_PE_data2:
-		if (end < (const void *)(ptr.p16u + 1))
+		if (end < (const void *)(ptr.p16u + 1)) {
+			dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
 			return 0;
+		}
 		if(ptrType & DW_EH_PE_signed)
 			value = get_unaligned(ptr.p16s++);
 		else
@@ -512,8 +537,10 @@
 		break;
 	case DW_EH_PE_data4:
 #ifdef CONFIG_64BIT
-		if (end < (const void *)(ptr.p32u + 1))
+		if (end < (const void *)(ptr.p32u + 1)) {
+			dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
 			return 0;
+		}
 		if(ptrType & DW_EH_PE_signed)
 			value = get_unaligned(ptr.p32s++);
 		else
@@ -525,8 +552,10 @@
 		BUILD_BUG_ON(sizeof(u32) != sizeof(value));
 #endif
 	case DW_EH_PE_native:
-		if (end < (const void *)(ptr.pul + 1))
+		if (end < (const void *)(ptr.pul + 1)) {
+			dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
 			return 0;
+		}
 		value = get_unaligned(ptr.pul++);
 		break;
 	case DW_EH_PE_leb128:
@@ -534,10 +563,14 @@
 		value = ptrType & DW_EH_PE_signed
 		        ? get_sleb128(&ptr.p8, end)
 		        : get_uleb128(&ptr.p8, end);
-		if ((const void *)ptr.p8 > end)
+		if ((const void *)ptr.p8 > end) {
+			dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
 			return 0;
+		}
 		break;
 	default:
+		dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
+		        ptrType, ptr.p8, end);
 		return 0;
 	}
 	switch(ptrType & DW_EH_PE_ADJUST) {
@@ -546,12 +579,33 @@
 	case DW_EH_PE_pcrel:
 		value += (unsigned long)*pLoc;
 		break;
+	case DW_EH_PE_textrel:
+		if (likely(text_base)) {
+			value += text_base;
+			break;
+		}
+		dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
+		        ptrType, *pLoc, end);
+		return 0;
+	case DW_EH_PE_datarel:
+		if (likely(data_base)) {
+			value += data_base;
+			break;
+		}
+		dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
+		        ptrType, *pLoc, end);
+		return 0;
 	default:
+		dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
+		        ptrType, *pLoc, end);
 		return 0;
 	}
 	if ((ptrType & DW_EH_PE_indirect)
-	    && __get_user(value, (unsigned long *)value))
+	    && probe_kernel_address((unsigned long *)value, value)) {
+		dprintk(1, "Cannot read indirect value %lx (%p,%p).",
+		        value, *pLoc, end);
 		return 0;
+	}
 	*pLoc = ptr.p8;
 
 	return value;
@@ -594,7 +648,8 @@
 			case 'P': {
 					signed ptrType = *ptr++;
 
-					if (!read_pointer(&ptr, end, ptrType) || ptr > end)
+					if (!read_pointer(&ptr, end, ptrType, 0, 0)
+					    || ptr > end)
 						return -1;
 				}
 				break;
@@ -654,7 +709,8 @@
 			case DW_CFA_nop:
 				break;
 			case DW_CFA_set_loc:
-				if ((state->loc = read_pointer(&ptr.p8, end, ptrType)) == 0)
+				state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
+				if (state->loc == 0)
 					result = 0;
 				break;
 			case DW_CFA_advance_loc1:
@@ -700,8 +756,10 @@
 					state->label = NULL;
 					return 1;
 				}
-				if (state->stackDepth >= MAX_STACK_DEPTH)
+				if (state->stackDepth >= MAX_STACK_DEPTH) {
+					dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
 					return 0;
+				}
 				state->stack[state->stackDepth++] = ptr.p8;
 				break;
 			case DW_CFA_restore_state:
@@ -716,8 +774,10 @@
 					result = processCFI(start, end, 0, ptrType, state);
 					state->loc = loc;
 					state->label = label;
-				} else
+				} else {
+					dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
 					return 0;
+				}
 				break;
 			case DW_CFA_def_cfa:
 				state->cfa.reg = get_uleb128(&ptr.p8, end);
@@ -749,6 +809,7 @@
 				break;
 			case DW_CFA_GNU_window_save:
 			default:
+				dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
 				result = 0;
 				break;
 			}
@@ -764,12 +825,17 @@
 			set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
 			break;
 		}
-		if (ptr.p8 > end)
+		if (ptr.p8 > end) {
+			dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
 			result = 0;
+		}
 		if (result && targetLoc != 0 && targetLoc < state->loc)
 			return 1;
 	}
 
+	if (result && ptr.p8 < end)
+		dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
+
 	return result
 	   && ptr.p8 == end
 	   && (targetLoc == 0
@@ -786,7 +852,7 @@
 #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
 	const u32 *fde = NULL, *cie = NULL;
 	const u8 *ptr = NULL, *end = NULL;
-	unsigned long pc = UNW_PC(frame) - frame->call_frame;
+	unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
 	unsigned long startLoc = 0, endLoc = 0, cfa;
 	unsigned i;
 	signed ptrType = -1;
@@ -813,9 +879,9 @@
 			ptr = hdr + 4;
 			end = hdr + table->hdrsz;
 			if (tableSize
-			    && read_pointer(&ptr, end, hdr[1])
+			    && read_pointer(&ptr, end, hdr[1], 0, 0)
 			       == (unsigned long)table->address
-			    && (i = read_pointer(&ptr, end, hdr[2])) > 0
+			    && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
 			    && i == (end - ptr) / (2 * tableSize)
 			    && !((end - ptr) % (2 * tableSize))) {
 				do {
@@ -823,7 +889,8 @@
 
 					startLoc = read_pointer(&cur,
 					                        cur + tableSize,
-					                        hdr[3]);
+					                        hdr[3], 0,
+					                        (unsigned long)hdr);
 					if (pc < startLoc)
 						i /= 2;
 					else {
@@ -834,13 +901,17 @@
 				if (i == 1
 				    && (startLoc = read_pointer(&ptr,
 				                                ptr + tableSize,
-				                                hdr[3])) != 0
+				                                hdr[3], 0,
+				                                (unsigned long)hdr)) != 0
 				    && pc >= startLoc)
 					fde = (void *)read_pointer(&ptr,
 					                           ptr + tableSize,
-					                           hdr[3]);
+					                           hdr[3], 0,
+					                           (unsigned long)hdr);
 			}
 		}
+		if(hdr && !fde)
+			dprintk(3, "Binary lookup for %lx failed.", pc);
 
 		if (fde != NULL) {
 			cie = cie_for_fde(fde, table);
@@ -851,17 +922,19 @@
 			   && (ptrType = fde_pointer_type(cie)) >= 0
 			   && read_pointer(&ptr,
 			                   (const u8 *)(fde + 1) + *fde,
-			                   ptrType) == startLoc) {
+			                   ptrType, 0, 0) == startLoc) {
 				if (!(ptrType & DW_EH_PE_indirect))
 					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
 				endLoc = startLoc
 				         + read_pointer(&ptr,
 				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType);
+				                        ptrType, 0, 0);
 				if(pc >= endLoc)
 					fde = NULL;
 			} else
 				fde = NULL;
+			if(!fde)
+				dprintk(1, "Binary lookup result for %lx discarded.", pc);
 		}
 		if (fde == NULL) {
 			for (fde = table->address, tableSize = table->size;
@@ -881,7 +954,7 @@
 				ptr = (const u8 *)(fde + 2);
 				startLoc = read_pointer(&ptr,
 				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType);
+				                        ptrType, 0, 0);
 				if (!startLoc)
 					continue;
 				if (!(ptrType & DW_EH_PE_indirect))
@@ -889,10 +962,12 @@
 				endLoc = startLoc
 				         + read_pointer(&ptr,
 				                        (const u8 *)(fde + 1) + *fde,
-				                        ptrType);
+				                        ptrType, 0, 0);
 				if (pc >= startLoc && pc < endLoc)
 					break;
 			}
+			if(!fde)
+				dprintk(3, "Linear lookup for %lx failed.", pc);
 		}
 	}
 	if (cie != NULL) {
@@ -926,6 +1001,8 @@
 			if (ptr >= end || *ptr)
 				cie = NULL;
 		}
+		if(!cie)
+			dprintk(1, "CIE unusable (%p,%p).", ptr, end);
 		++ptr;
 	}
 	if (cie != NULL) {
@@ -935,7 +1012,12 @@
 		state.dataAlign = get_sleb128(&ptr, end);
 		if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
 			cie = NULL;
-		else {
+		else if (UNW_PC(frame) % state.codeAlign
+		         || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
+			dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
+			        UNW_PC(frame), UNW_SP(frame));
+			return -EPERM;
+		} else {
 			retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
 			/* skip augmentation */
 			if (((const char *)(cie + 2))[1] == 'z') {
@@ -949,6 +1031,8 @@
 			   || reg_info[retAddrReg].width != sizeof(unsigned long))
 				cie = NULL;
 		}
+		if(!cie)
+			dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
 	}
 	if (cie != NULL) {
 		state.cieStart = ptr;
@@ -962,11 +1046,15 @@
 			if ((ptr += augSize) > end)
 				fde = NULL;
 		}
+		if(!fde)
+			dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
 	}
 	if (cie == NULL || fde == NULL) {
 #ifdef CONFIG_FRAME_POINTER
 		unsigned long top, bottom;
 
+		if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long))
+			return -EPERM;
 		top = STACK_TOP(frame->task);
 		bottom = STACK_BOTTOM(frame->task);
 # if FRAME_RETADDR_OFFSET < 0
@@ -982,18 +1070,19 @@
 		        & (sizeof(unsigned long) - 1))) {
 			unsigned long link;
 
-			if (!__get_user(link,
+			if (!probe_kernel_address(
 			                (unsigned long *)(UNW_FP(frame)
-			                                  + FRAME_LINK_OFFSET))
+			                                  + FRAME_LINK_OFFSET),
+						  link)
 # if FRAME_RETADDR_OFFSET < 0
 			   && link > bottom && link < UNW_FP(frame)
 # else
 			   && link > UNW_FP(frame) && link < bottom
 # endif
 			   && !(link & (sizeof(link) - 1))
-			   && !__get_user(UNW_PC(frame),
+			   && !probe_kernel_address(
 			                  (unsigned long *)(UNW_FP(frame)
-			                                    + FRAME_RETADDR_OFFSET))) {
+			                                    + FRAME_RETADDR_OFFSET), UNW_PC(frame))) {
 				UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
 # if FRAME_RETADDR_OFFSET < 0
 					-
@@ -1016,8 +1105,11 @@
 	   || state.regs[retAddrReg].where == Nowhere
 	   || state.cfa.reg >= ARRAY_SIZE(reg_info)
 	   || reg_info[state.cfa.reg].width != sizeof(unsigned long)
-	   || state.cfa.offs % sizeof(unsigned long))
+	   || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
+	   || state.cfa.offs % sizeof(unsigned long)) {
+		dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
 		return -EIO;
+	}
 	/* update frame */
 #ifndef CONFIG_AS_CFI_SIGNAL_FRAME
 	if(frame->call_frame
@@ -1036,10 +1128,14 @@
 #else
 # define CASES CASE(8); CASE(16); CASE(32); CASE(64)
 #endif
+	pc = UNW_PC(frame);
+	sp = UNW_SP(frame);
 	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
 		if (REG_INVALID(i)) {
 			if (state.regs[i].where == Nowhere)
 				continue;
+			dprintk(1, "Cannot restore register %u (%d).",
+			        i, state.regs[i].where);
 			return -EIO;
 		}
 		switch(state.regs[i].where) {
@@ -1048,8 +1144,11 @@
 		case Register:
 			if (state.regs[i].value >= ARRAY_SIZE(reg_info)
 			   || REG_INVALID(state.regs[i].value)
-			   || reg_info[i].width > reg_info[state.regs[i].value].width)
+			   || reg_info[i].width > reg_info[state.regs[i].value].width) {
+				dprintk(1, "Cannot restore register %u from register %lu.",
+				        i, state.regs[i].value);
 				return -EIO;
+			}
 			switch(reg_info[state.regs[i].value].width) {
 #define CASE(n) \
 			case sizeof(u##n): \
@@ -1059,6 +1158,9 @@
 			CASES;
 #undef CASE
 			default:
+				dprintk(1, "Unsupported register size %u (%lu).",
+				        reg_info[state.regs[i].value].width,
+				        state.regs[i].value);
 				return -EIO;
 			}
 			break;
@@ -1083,12 +1185,17 @@
 			CASES;
 #undef CASE
 			default:
+				dprintk(1, "Unsupported register size %u (%u).",
+				        reg_info[i].width, i);
 				return -EIO;
 			}
 			break;
 		case Value:
-			if (reg_info[i].width != sizeof(unsigned long))
+			if (reg_info[i].width != sizeof(unsigned long)) {
+				dprintk(1, "Unsupported value size %u (%u).",
+				        reg_info[i].width, i);
 				return -EIO;
+			}
 			FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
 			                                    * state.dataAlign;
 			break;
@@ -1100,15 +1207,20 @@
 				    % sizeof(unsigned long)
 				    || addr < startLoc
 				    || addr + sizeof(unsigned long) < addr
-				    || addr + sizeof(unsigned long) > endLoc)
+				    || addr + sizeof(unsigned long) > endLoc) {
+					dprintk(1, "Bad memory location %lx (%lx).",
+					        addr, state.regs[i].value);
 					return -EIO;
+				}
 				switch(reg_info[i].width) {
 #define CASE(n)     case sizeof(u##n): \
-					__get_user(FRAME_REG(i, u##n), (u##n *)addr); \
+					probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \
 					break
 				CASES;
 #undef CASE
 				default:
+					dprintk(1, "Unsupported memory size %u (%u).",
+					        reg_info[i].width, i);
 					return -EIO;
 				}
 			}
@@ -1116,6 +1228,17 @@
 		}
 	}
 
+	if (UNW_PC(frame) % state.codeAlign
+	    || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
+		dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
+		        UNW_PC(frame), UNW_SP(frame));
+		return -EIO;
+	}
+	if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
+		dprintk(1, "No progress (%lx,%lx).", pc, sp);
+		return -EIO;
+	}
+
 	return 0;
 #undef CASES
 #undef FRAME_REG
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index b6c4f89..479fd46 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/spinlock.h>
+#include <linux/nmi.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
 #include <linux/delay.h>
@@ -117,6 +118,9 @@
 				raw_smp_processor_id(), current->comm,
 				current->pid, lock);
 			dump_stack();
+#ifdef CONFIG_SMP
+			trigger_all_cpu_backtrace();
+#endif
 		}
 	}
 }
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 22d281c..f359b73 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -43,7 +43,7 @@
 
 static struct sym_entry *table;
 static unsigned int table_size, table_cnt;
-static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
+static unsigned long long _text, _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
 static int all_symbols = 0;
 static char symbol_prefix_char = '\0';
 
@@ -91,7 +91,9 @@
 		sym++;
 
 	/* Ignore most absolute/undefined (?) symbols. */
-	if (strcmp(sym, "_stext") == 0)
+	if (strcmp(sym, "_text") == 0)
+		_text = s->addr;
+	else if (strcmp(sym, "_stext") == 0)
 		_stext = s->addr;
 	else if (strcmp(sym, "_etext") == 0)
 		_etext = s->addr;
@@ -265,9 +267,25 @@
 
 	printf(".data\n");
 
+	/* Provide proper symbols relocatability by their '_text'
+	 * relativeness.  The symbol names cannot be used to construct
+	 * normal symbol references as the list of symbols contains
+	 * symbols that are declared static and are private to their
+	 * .o files.  This prevents .tmp_kallsyms.o or any other
+	 * object from referencing them.
+	 */
 	output_label("kallsyms_addresses");
 	for (i = 0; i < table_cnt; i++) {
-		printf("\tPTR\t%#llx\n", table[i].addr);
+		if (toupper(table[i].sym[0]) != 'A') {
+			if (_text <= table[i].addr)
+				printf("\tPTR\t_text + %#llx\n",
+					table[i].addr - _text);
+			else
+				printf("\tPTR\t_text - %#llx\n",
+					_text - table[i].addr);
+		} else {
+			printf("\tPTR\t%#llx\n", table[i].addr);
+		}
 	}
 	printf("\n");
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 2e11416..ac0a582 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -911,6 +911,7 @@
 		".toc1",  /* used by ppc64 */
 		".stab",
 		".rodata",
+		".parainstructions",
 		".text.lock",
 		"__bug_table", /* used by powerpc for BUG() */
 		".pci_fixup_header",
@@ -931,6 +932,7 @@
 		".altinstructions",
 		".eh_frame",
 		".debug",
+		".parainstructions",
 		NULL
 	};
 	/* part of section name */