diff -urN oldtree/Documentation/DocBook/Makefile newtree/Documentation/DocBook/Makefile
--- oldtree/Documentation/DocBook/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/Documentation/DocBook/Makefile	2006-04-01 08:53:50.582662750 -0500
@@ -28,7 +28,7 @@
 
 ###
 # The targets that may be used.
-.PHONY:	xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs
+PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs
 
 BOOKS := $(addprefix $(obj)/,$(DOCBOOKS))
 xmldocs: $(BOOKS)
@@ -211,3 +211,9 @@
 
 #man put files in man subdir - traverse down
 subdir- := man/
+
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff -urN oldtree/Documentation/sysctl/vm.txt newtree/Documentation/sysctl/vm.txt
--- oldtree/Documentation/sysctl/vm.txt	2006-04-01 04:48:27.000000000 -0500
+++ newtree/Documentation/sysctl/vm.txt	2006-04-01 08:54:41.749860500 -0500
@@ -29,6 +29,7 @@
 - drop-caches
 - zone_reclaim_mode
 - zone_reclaim_interval
+- swap_prefetch
 
 ==============================================================
 
@@ -178,3 +179,22 @@
 Reduce the interval if undesired off node allocations occur. However, too
 frequent scans will have a negative impact onoff node allocation performance.
 
+==============================================================
+
+swap_prefetch
+
+This enables or disables the swap prefetching feature. When the virtual
+memory subsystem has been extremely idle for at least 5 seconds it will start
+copying back pages from swap into the swapcache and keep a copy in swap. In
+practice it can take many minutes before the vm is idle enough.
+
+This is value ORed together of
+1	= Normal background swap prefetching when load is light
+2	= Aggressively swap prefetch as much as possible
+
+When 2 is set, after the maximum amount possible has been prefetched, this bit
+is unset. ie Setting the value to 3 will prefetch aggressively then drop to 1.
+This is useful for doing aggressive prefetching for short periods in scripts
+such as after resuming from software suspend.
+
+The default value is 1.
diff -urN oldtree/Makefile newtree/Makefile
--- oldtree/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/Makefile	2006-04-01 08:53:50.586663000 -0500
@@ -95,7 +95,7 @@
 endif
 
 # That's our default target when none is given on the command line
-.PHONY: _all
+PHONY := _all
 _all:
 
 ifneq ($(KBUILD_OUTPUT),)
@@ -106,7 +106,7 @@
 $(if $(KBUILD_OUTPUT),, \
      $(error output directory "$(saved-output)" does not exist))
 
-.PHONY: $(MAKECMDGOALS)
+PHONY += $(MAKECMDGOALS)
 
 $(filter-out _all,$(MAKECMDGOALS)) _all:
 	$(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \
@@ -123,7 +123,7 @@
 
 # If building an external module we do not care about the all: rule
 # but instead _all depend on modules
-.PHONY: all
+PHONY += all
 ifeq ($(KBUILD_EXTMOD),)
 _all: all
 else
@@ -369,14 +369,14 @@
 # Rules shared between *config targets and build targets
 
 # Basic helpers built in scripts/
-.PHONY: scripts_basic
+PHONY += scripts_basic
 scripts_basic:
 	$(Q)$(MAKE) $(build)=scripts/basic
 
 # To avoid any implicit rule to kick in, define an empty command.
 scripts/basic/%: scripts_basic ;
 
-.PHONY: outputmakefile
+PHONY += outputmakefile
 # outputmakefile generate a Makefile to be placed in output directory, if
 # using a seperate output directory. This allows convinient use
 # of make in output directory
@@ -452,7 +452,7 @@
 # Additional helpers built in scripts/
 # Carefully list dependencies so we do not try to build scripts twice
 # in parrallel
-.PHONY: scripts
+PHONY += scripts
 scripts: scripts_basic include/config/MARKER
 	$(Q)$(MAKE) $(build)=$(@)
 
@@ -752,7 +752,7 @@
 # make menuconfig etc.
 # Error messages still appears in the original language
 
-.PHONY: $(vmlinux-dirs)
+PHONY += $(vmlinux-dirs)
 $(vmlinux-dirs): prepare scripts
 	$(Q)$(MAKE) $(build)=$@
 
@@ -805,10 +805,10 @@
 # version.h and scripts_basic is processed / created.
 
 # Listed in dependency order
-.PHONY: prepare archprepare prepare0 prepare1 prepare2 prepare3
+PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3
 
 # prepare-all is deprecated, use prepare as valid replacement
-.PHONY: prepare-all
+PHONY += prepare-all
 
 # prepare3 is used to check if we are building in a separate output directory,
 # and if so do:
@@ -910,7 +910,7 @@
 
 # ---------------------------------------------------------------------------
 
-.PHONY: depend dep
+PHONY += depend dep
 depend dep:
 	@echo '*** Warning: make $@ is unnecessary now.'
 
@@ -925,21 +925,21 @@
 
 #	Build modules
 
-.PHONY: modules
+PHONY += modules
 modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux)
 	@echo '  Building modules, stage 2.';
 	$(Q)$(MAKE) -rR -f $(srctree)/scripts/Makefile.modpost
 
 
 # Target to prepare building external modules
-.PHONY: modules_prepare
+PHONY += modules_prepare
 modules_prepare: prepare scripts
 
 # Target to install modules
-.PHONY: modules_install
+PHONY += modules_install
 modules_install: _modinst_ _modinst_post
 
-.PHONY: _modinst_
+PHONY += _modinst_
 _modinst_:
 	@if [ -z "`$(DEPMOD) -V 2>/dev/null | grep module-init-tools`" ]; then \
 		echo "Warning: you may need to install module-init-tools"; \
@@ -966,7 +966,7 @@
 else
 depmod_opts	:= -b $(INSTALL_MOD_PATH) -r
 endif
-.PHONY: _modinst_post
+PHONY += _modinst_post
 _modinst_post: _modinst_
 	if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi
 
@@ -1009,7 +1009,7 @@
 clean: rm-files := $(CLEAN_FILES)
 clean-dirs      := $(addprefix _clean_,$(srctree) $(vmlinux-alldirs))
 
-.PHONY: $(clean-dirs) clean archclean
+PHONY += $(clean-dirs) clean archclean
 $(clean-dirs):
 	$(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
 
@@ -1027,7 +1027,7 @@
 mrproper: rm-files := $(wildcard $(MRPROPER_FILES))
 mrproper-dirs      := $(addprefix _mrproper_,Documentation/DocBook scripts)
 
-.PHONY: $(mrproper-dirs) mrproper archmrproper
+PHONY += $(mrproper-dirs) mrproper archmrproper
 $(mrproper-dirs):
 	$(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@)
 
@@ -1037,7 +1037,7 @@
 
 # distclean
 #
-.PHONY: distclean
+PHONY += distclean
 
 distclean: mrproper
 	@find $(srctree) $(RCS_FIND_IGNORE) \
@@ -1053,7 +1053,7 @@
 # rpm target kept for backward compatibility
 package-dir	:= $(srctree)/scripts/package
 
-.PHONY: %-pkg rpm
+PHONY += %-pkg rpm
 
 %pkg: FORCE
 	$(Q)$(MAKE) -f $(package-dir)/Makefile $@
@@ -1145,11 +1145,11 @@
 
 # We are always building modules
 KBUILD_MODULES := 1
-.PHONY: crmodverdir
+PHONY += crmodverdir
 crmodverdir:
 	$(Q)mkdir -p $(MODVERDIR)
 
-.PHONY: $(objtree)/Module.symvers
+PHONY += $(objtree)/Module.symvers
 $(objtree)/Module.symvers:
 	@test -e $(objtree)/Module.symvers || ( \
 	echo; \
@@ -1158,7 +1158,7 @@
 	echo )
 
 module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD))
-.PHONY: $(module-dirs) modules
+PHONY += $(module-dirs) modules
 $(module-dirs): crmodverdir $(objtree)/Module.symvers
 	$(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@)
 
@@ -1166,13 +1166,13 @@
 	@echo '  Building modules, stage 2.';
 	$(Q)$(MAKE) -rR -f $(srctree)/scripts/Makefile.modpost
 
-.PHONY: modules_install
+PHONY += modules_install
 modules_install:
 	$(Q)$(MAKE) -rR -f $(srctree)/scripts/Makefile.modinst
 
 clean-dirs := $(addprefix _clean_,$(KBUILD_EXTMOD))
 
-.PHONY: $(clean-dirs) clean
+PHONY += $(clean-dirs) clean
 $(clean-dirs):
 	$(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@)
 
@@ -1302,7 +1302,7 @@
 endif #ifeq ($(config-targets),1)
 endif #ifeq ($(mixed-targets),1)
 
-.PHONY: checkstack
+PHONY += checkstack
 checkstack:
 	$(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \
 	$(PERL) $(src)/scripts/checkstack.pl $(ARCH)
@@ -1347,4 +1347,10 @@
 
 endif	# skip-makefile
 
+PHONY += FORCE
 FORCE:
+
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable se we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff -urN oldtree/arch/arm/Makefile newtree/arch/arm/Makefile
--- oldtree/arch/arm/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/arm/Makefile	2006-04-01 08:53:50.590663250 -0500
@@ -1,6 +1,9 @@
 #
 # arch/arm/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
@@ -176,7 +179,7 @@
 
 archprepare: maketools
 
-.PHONY: maketools FORCE
+PHONY += maketools FORCE
 maketools: include/linux/version.h include/asm-arm/.arch FORCE
 	$(Q)$(MAKE) $(build)=arch/arm/tools include/asm-arm/mach-types.h
 
diff -urN oldtree/arch/arm/boot/Makefile newtree/arch/arm/boot/Makefile
--- oldtree/arch/arm/boot/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/arm/boot/Makefile	2006-04-01 08:53:50.590663250 -0500
@@ -1,6 +1,9 @@
 #
 # arch/arm/boot/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
@@ -73,7 +76,7 @@
 	$(call if_changed,objcopy)
 	@echo '  Kernel: $@ is ready'
 
-.PHONY: initrd FORCE
+PHONY += initrd FORCE
 initrd:
 	@test "$(INITRD_PHYS)" != "" || \
 	(echo This machine does not support INITRD; exit -1)
diff -urN oldtree/arch/arm/boot/bootp/Makefile newtree/arch/arm/boot/bootp/Makefile
--- oldtree/arch/arm/boot/bootp/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/arm/boot/bootp/Makefile	2006-04-01 08:53:50.594663500 -0500
@@ -1,6 +1,9 @@
 #
 # linux/arch/arm/boot/bootp/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 
 LDFLAGS_bootp	:=-p --no-undefined -X \
 		 --defsym initrd_phys=$(INITRD_PHYS) \
@@ -21,4 +24,4 @@
 
 $(obj)/initrd.o: $(INITRD) FORCE
 
-.PHONY:	$(INITRD) FORCE
+PHONY += $(INITRD) FORCE
diff -urN oldtree/arch/arm26/Makefile newtree/arch/arm26/Makefile
--- oldtree/arch/arm26/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/arm26/Makefile	2006-04-01 08:53:50.598663750 -0500
@@ -1,6 +1,9 @@
 #
 # arch/arm26/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
@@ -49,9 +52,9 @@
 
 boot := arch/arm26/boot
 
-.PHONY: maketools FORCE
+PHONY += maketools FORCE
 maketools: FORCE
-	
+
 
 # Convert bzImage to zImage
 bzImage: vmlinux
diff -urN oldtree/arch/arm26/boot/Makefile newtree/arch/arm26/boot/Makefile
--- oldtree/arch/arm26/boot/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/arm26/boot/Makefile	2006-04-01 08:53:50.602664000 -0500
@@ -1,6 +1,9 @@
 #
 # arch/arm26/boot/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
@@ -60,7 +63,7 @@
 	@echo '  Kernel: $@ is ready'
 endif
 
-.PHONY: initrd
+PHONY += initrd
 initrd:
 	@test "$(INITRD_PHYS)" != "" || \
 	(echo This machine does not support INITRD; exit -1)
diff -urN oldtree/arch/i386/Makefile newtree/arch/i386/Makefile
--- oldtree/arch/i386/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/i386/Makefile	2006-04-01 08:53:50.606664250 -0500
@@ -99,8 +99,8 @@
 
 boot := arch/i386/boot
 
-.PHONY: zImage bzImage compressed zlilo bzlilo \
-	zdisk bzdisk fdimage fdimage144 fdimage288 install
+PHONY += zImage bzImage compressed zlilo bzlilo \
+         zdisk bzdisk fdimage fdimage144 fdimage288 install
 
 all: bzImage
 
diff -urN oldtree/arch/ia64/Makefile newtree/arch/ia64/Makefile
--- oldtree/arch/ia64/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/ia64/Makefile	2006-04-01 08:53:50.606664250 -0500
@@ -1,6 +1,9 @@
 #
 # ia64/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
@@ -62,7 +65,7 @@
 
 boot := arch/ia64/hp/sim/boot
 
-.PHONY: boot compressed check
+PHONY += boot compressed check
 
 all: compressed unwcheck
 
diff -urN oldtree/arch/ia64/configs/tiger_defconfig newtree/arch/ia64/configs/tiger_defconfig
--- oldtree/arch/ia64/configs/tiger_defconfig	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/ia64/configs/tiger_defconfig	2006-04-01 08:52:29.321584250 -0500
@@ -105,10 +105,10 @@
 # CONFIG_IA64_PAGE_SIZE_64KB is not set
 CONFIG_PGTABLE_3=y
 # CONFIG_PGTABLE_4 is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
 CONFIG_IA64_L1_CACHE_SHIFT=7
 CONFIG_IA64_CYCLONE=y
 CONFIG_IOSAPIC=y
diff -urN oldtree/arch/m32r/Makefile newtree/arch/m32r/Makefile
--- oldtree/arch/m32r/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/m32r/Makefile	2006-04-01 08:53:50.610664500 -0500
@@ -1,6 +1,9 @@
 #
 # m32r/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 
 LDFLAGS		:=
 OBJCOPYFLAGS	:= -O binary -R .note -R .comment -S
@@ -39,7 +42,7 @@
 
 boot := arch/m32r/boot
 
-.PHONY: zImage
+PHONY += zImage
 
 all: zImage
 
diff -urN oldtree/arch/powerpc/Makefile newtree/arch/powerpc/Makefile
--- oldtree/arch/powerpc/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/powerpc/Makefile	2006-04-01 08:53:50.614664750 -0500
@@ -150,7 +150,7 @@
 
 BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm uImage
 
-.PHONY: $(BOOT_TARGETS)
+PHONY += $(BOOT_TARGETS)
 
 boot := arch/$(ARCH)/boot
 
diff -urN oldtree/arch/ppc/Makefile newtree/arch/ppc/Makefile
--- oldtree/arch/ppc/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/ppc/Makefile	2006-04-01 08:53:50.618665000 -0500
@@ -82,7 +82,7 @@
 
 BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm
 
-.PHONY: $(BOOT_TARGETS)
+PHONY += $(BOOT_TARGETS)
 
 all: uImage zImage
 
diff -urN oldtree/arch/ppc/boot/Makefile newtree/arch/ppc/boot/Makefile
--- oldtree/arch/ppc/boot/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/ppc/boot/Makefile	2006-04-01 08:53:50.618665000 -0500
@@ -1,6 +1,9 @@
 #
 # arch/ppc/boot/Makefile
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
 # for more details.
@@ -25,7 +28,7 @@
 
 hostprogs-y := $(addprefix utils/, addnote mknote hack-coff mkprep mkbugboot mktree)
 
-.PHONY: $(BOOT_TARGETS) $(bootdir-y)
+PHONY += $(BOOT_TARGETS) $(bootdir-y)
 
 $(BOOT_TARGETS): $(bootdir-y)
 
diff -urN oldtree/arch/ppc/boot/openfirmware/Makefile newtree/arch/ppc/boot/openfirmware/Makefile
--- oldtree/arch/ppc/boot/openfirmware/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/ppc/boot/openfirmware/Makefile	2006-04-01 08:53:50.622665250 -0500
@@ -1,5 +1,8 @@
 # Makefile for making bootable images on various OpenFirmware machines.
 #
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # Paul Mackerras	January 1997
 #	XCOFF bootable images for PowerMacs
 # Geert Uytterhoeven	September 1997
@@ -86,7 +89,7 @@
 
 # The targets used on the make command-line
 
-.PHONY: zImage zImage.initrd
+PHONY += zImage zImage.initrd
 zImage:		 $(images)/zImage.chrp		\
 		 $(images)/zImage.chrp-rs6k
 	@echo '  kernel: $@ is ready ($<)'
@@ -96,7 +99,7 @@
 
 TFTPIMAGE	:= /tftpboot/zImage
 
-.PHONY: znetboot znetboot.initrd
+PHONY += znetboot znetboot.initrd
 znetboot:	$(images)/zImage.chrp
 	cp $(images)/zImage.chrp      $(TFTPIMAGE).chrp$(END)
 	@echo '  kernel: $@ is ready ($<)'
diff -urN oldtree/arch/ppc/configs/common_defconfig newtree/arch/ppc/configs/common_defconfig
--- oldtree/arch/ppc/configs/common_defconfig	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/ppc/configs/common_defconfig	2006-04-01 08:52:30.717671500 -0500
@@ -139,10 +139,10 @@
 CONFIG_PPCBUG_NVRAM=y
 # CONFIG_SMP is not set
 # CONFIG_HIGHMEM is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
diff -urN oldtree/arch/ppc/configs/pmac_defconfig newtree/arch/ppc/configs/pmac_defconfig
--- oldtree/arch/ppc/configs/pmac_defconfig	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/ppc/configs/pmac_defconfig	2006-04-01 08:52:30.729672250 -0500
@@ -139,10 +139,10 @@
 CONFIG_PPCBUG_NVRAM=y
 # CONFIG_SMP is not set
 # CONFIG_HIGHMEM is not set
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
+CONFIG_HZ=100
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
diff -urN oldtree/arch/sh/Makefile newtree/arch/sh/Makefile
--- oldtree/arch/sh/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/sh/Makefile	2006-04-01 08:53:50.626665500 -0500
@@ -172,7 +172,7 @@
 
 archprepare: maketools include/asm-sh/.cpu include/asm-sh/.mach
 
-.PHONY: maketools FORCE
+PHONY += maketools FORCE
 maketools:  include/linux/version.h FORCE
 	$(Q)$(MAKE) $(build)=arch/sh/tools include/asm-sh/machtypes.h
 
diff -urN oldtree/arch/um/Makefile newtree/arch/um/Makefile
--- oldtree/arch/um/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/um/Makefile	2006-04-01 08:53:50.630665750 -0500
@@ -1,4 +1,7 @@
-# 
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
 # Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
 # Licensed under the GPL
 #
@@ -88,7 +91,7 @@
 
 SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000)
 
-.PHONY: linux
+PHONY += linux
 
 all: linux
 
diff -urN oldtree/arch/x86_64/Makefile newtree/arch/x86_64/Makefile
--- oldtree/arch/x86_64/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/arch/x86_64/Makefile	2006-04-01 08:53:50.630665750 -0500
@@ -67,8 +67,8 @@
 
 boot := arch/x86_64/boot
 
-.PHONY: bzImage bzlilo install archmrproper \
-	fdimage fdimage144 fdimage288 archclean
+PHONY += bzImage bzlilo install archmrproper \
+	 fdimage fdimage144 fdimage288 archclean
 
 #Default target when executing "make"
 all: bzImage
diff -urN oldtree/block/Kconfig.iosched newtree/block/Kconfig.iosched
--- oldtree/block/Kconfig.iosched	2006-04-01 04:48:27.000000000 -0500
+++ newtree/block/Kconfig.iosched	2006-04-01 08:53:42.926184250 -0500
@@ -40,7 +40,7 @@
 
 choice
 	prompt "Default I/O scheduler"
-	default DEFAULT_AS
+	default DEFAULT_DEADLINE
 	help
 	  Select the I/O scheduler which will be used by default for all
 	  block devices.
diff -urN oldtree/drivers/block/loop.c newtree/drivers/block/loop.c
--- oldtree/drivers/block/loop.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/drivers/block/loop.c	2006-04-01 08:51:11.580725750 -0500
@@ -779,6 +779,12 @@
 	mapping = file->f_mapping;
 	inode = mapping->host;
 
+	/*
+	 * The upper layer should already do proper look-ahead,
+	 * one more look-ahead here only ruins the cache hit rate.
+	 */
+	file->f_ra.flags |= RA_FLAG_NO_LOOKAHEAD;
+
 	if (!(file->f_mode & FMODE_WRITE))
 		lo_flags |= LO_FLAGS_READ_ONLY;
 
diff -urN oldtree/fs/mpage.c newtree/fs/mpage.c
--- oldtree/fs/mpage.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/fs/mpage.c	2006-04-01 08:51:11.584726000 -0500
@@ -343,8 +343,10 @@
 			bio = do_mpage_readpage(bio, page,
 					nr_pages - page_idx,
 					&last_block_in_bio, get_block);
-			if (!pagevec_add(&lru_pvec, page))
+			if (!pagevec_add(&lru_pvec, page)) {
+				cond_resched();
 				__pagevec_lru_add(&lru_pvec);
+			}
 		} else {
 			page_cache_release(page);
 		}
diff -urN oldtree/fs/nfsd/vfs.c newtree/fs/nfsd/vfs.c
--- oldtree/fs/nfsd/vfs.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/fs/nfsd/vfs.c	2006-04-01 08:51:11.592726500 -0500
@@ -833,10 +833,14 @@
 #endif
 
 	/* Get readahead parameters */
-	ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
+	if (prefer_adaptive_readahead())
+		ra = NULL;
+	else
+		ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
 
 	if (ra && ra->p_set)
 		file->f_ra = ra->p_ra;
+	file->f_ra.flags |= RA_FLAG_NFSD;
 
 	if (file->f_op->sendfile) {
 		svc_pushback_unused_pages(rqstp);
diff -urN oldtree/include/linux/fs.h newtree/include/linux/fs.h
--- oldtree/include/linux/fs.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/fs.h	2006-04-01 08:51:11.596726750 -0500
@@ -600,19 +600,40 @@
  * Track a single file's readahead state
  */
 struct file_ra_state {
-	unsigned long start;		/* Current window */
-	unsigned long size;
-	unsigned long flags;		/* ra flags RA_FLAG_xxx*/
-	unsigned long cache_hit;	/* cache hit count*/
-	unsigned long prev_page;	/* Cache last read() position */
-	unsigned long ahead_start;	/* Ahead window */
-	unsigned long ahead_size;
-	unsigned long ra_pages;		/* Maximum readahead window */
-	unsigned long mmap_hit;		/* Cache hit stat for mmap accesses */
-	unsigned long mmap_miss;	/* Cache miss stat for mmap accesses */
+	union {
+		struct { /* conventional read-ahead */
+			unsigned long start;		/* Current window */
+			unsigned long size;
+			unsigned long ahead_start;	/* Ahead window */
+			unsigned long ahead_size;
+			unsigned long cache_hit;        /* cache hit count */
+		};
+#ifdef CONFIG_ADAPTIVE_READAHEAD
+		struct { /* adaptive read-ahead */
+			pgoff_t la_index;
+			pgoff_t ra_index;
+			pgoff_t lookahead_index;
+			pgoff_t readahead_index;
+			unsigned long age;
+			uint64_t cache_hits;
+		};
+#endif
+	};
+
+	/* mmap read-around */
+	unsigned long mmap_hit;         /* Cache hit stat for mmap accesses */
+	unsigned long mmap_miss;        /* Cache miss stat for mmap accesses */
+
+	/* common ones */
+	unsigned long flags;            /* ra flags RA_FLAG_xxx*/
+	unsigned long prev_page;        /* Cache last read() position */
+	unsigned long ra_pages;         /* Maximum readahead window */
 };
 #define RA_FLAG_MISS 0x01	/* a cache miss occured against this file */
 #define RA_FLAG_INCACHE 0x02	/* file is already in cache */
+#define RA_FLAG_MMAP		(1UL<<31)	/* mmaped page access */
+#define RA_FLAG_NO_LOOKAHEAD	(1UL<<30)	/* disable look-ahead */
+#define RA_FLAG_NFSD		(1UL<<29)	/* request from nfsd */
 
 struct file {
 	/*
diff -urN oldtree/include/linux/ioprio.h newtree/include/linux/ioprio.h
--- oldtree/include/linux/ioprio.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/ioprio.h	2006-04-01 08:54:37.153573250 -0500
@@ -53,7 +53,13 @@
 
 static inline int task_nice_ioprio(struct task_struct *task)
 {
-	return (task_nice(task) + 20) / 5;
+	int effective_nice = task_nice(task);
+
+	if (idleprio_task(task))
+		effective_nice = 19;
+	else if (rt_task(task) || iso_task(task))
+		effective_nice = -20;
+	return (effective_nice + 20) / 5;
 }
 
 /*
diff -urN oldtree/include/linux/mm.h newtree/include/linux/mm.h
--- oldtree/include/linux/mm.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/mm.h	2006-04-01 08:51:11.600727000 -0500
@@ -954,7 +954,11 @@
 int write_one_page(struct page *page, int wait);
 
 /* readahead.c */
+#ifdef CONFIG_ADAPTIVE_READAHEAD
+#define VM_MAX_READAHEAD	1024	/* kbytes */
+#else
 #define VM_MAX_READAHEAD	128	/* kbytes */
+#endif
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 #define VM_MAX_CACHE_HIT    	256	/* max pages in a row in cache before
 					 * turning readahead off */
@@ -971,6 +975,33 @@
 void handle_ra_miss(struct address_space *mapping, 
 		    struct file_ra_state *ra, pgoff_t offset);
 unsigned long max_sane_readahead(unsigned long nr);
+unsigned long
+page_cache_readahead_adaptive(struct address_space *mapping,
+			struct file_ra_state *ra, struct file *filp,
+			struct page *prev_page, struct page *page,
+			pgoff_t first_index, pgoff_t index, pgoff_t last_index);
+
+#ifdef CONFIG_ADAPTIVE_READAHEAD
+void fastcall readahead_cache_hit(struct file_ra_state *ra, struct page *page);
+extern int readahead_ratio;
+#else
+#define readahead_cache_hit(ra, page) do { } while (0)
+#define readahead_ratio 1
+#endif /* CONFIG_ADAPTIVE_READAHEAD */
+
+static inline int prefer_adaptive_readahead(void)
+{
+	return readahead_ratio >= 10;
+}
+
+DECLARE_PER_CPU(unsigned long, readahead_aging);
+static inline void inc_readahead_aging(void)
+{
+	if (prefer_adaptive_readahead()) {
+		per_cpu(readahead_aging, get_cpu())++;
+		put_cpu();
+	}
+}
 
 /* Do stack extension */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
diff -urN oldtree/include/linux/mm_inline.h newtree/include/linux/mm_inline.h
--- oldtree/include/linux/mm_inline.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/mm_inline.h	2006-04-01 08:53:58.007126750 -0500
@@ -14,6 +14,13 @@
 }
 
 static inline void
+add_page_to_inactive_list_tail(struct zone *zone, struct page *page)
+{
+	list_add_tail(&page->lru, &zone->inactive_list);
+	zone->nr_inactive++;
+}
+
+static inline void
 del_page_from_active_list(struct zone *zone, struct page *page)
 {
 	list_del(&page->lru);
diff -urN oldtree/include/linux/mmzone.h newtree/include/linux/mmzone.h
--- oldtree/include/linux/mmzone.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/mmzone.h	2006-04-01 08:54:45.766111500 -0500
@@ -120,7 +120,7 @@
 struct zone {
 	/* Fields commonly accessed by the page allocator */
 	unsigned long		free_pages;
-	unsigned long		pages_min, pages_low, pages_high;
+	unsigned long		pages_min, pages_low, pages_high, pages_lots;
 	/*
 	 * We don't know if the memory that we're going to allocate will be freeable
 	 * or/and it will be released eventually, so to avoid totally wasting several
diff -urN oldtree/include/linux/page-flags.h newtree/include/linux/page-flags.h
--- oldtree/include/linux/page-flags.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/page-flags.h	2006-04-01 08:51:11.604727250 -0500
@@ -75,6 +75,7 @@
 #define PG_reclaim		17	/* To be reclaimed asap */
 #define PG_nosave_free		18	/* Free, should not be written */
 #define PG_uncached		19	/* Page has been mapped as uncached */
+#define PG_readahead		20	/* Reminder to do readahead */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -344,6 +345,10 @@
 #define SetPageUncached(page)	set_bit(PG_uncached, &(page)->flags)
 #define ClearPageUncached(page)	clear_bit(PG_uncached, &(page)->flags)
 
+#define PageReadahead(page)	test_bit(PG_readahead, &(page)->flags)
+#define __SetPageReadahead(page) __set_bit(PG_readahead, &(page)->flags)
+#define TestClearPageReadahead(page) test_and_clear_bit(PG_readahead, &(page)->flags)
+
 struct page;	/* forward declaration */
 
 int test_clear_page_dirty(struct page *page);
diff -urN oldtree/include/linux/radix-tree.h newtree/include/linux/radix-tree.h
--- oldtree/include/linux/radix-tree.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/radix-tree.h	2006-04-01 08:51:11.608727500 -0500
@@ -23,12 +23,24 @@
 #include <linux/preempt.h>
 #include <linux/types.h>
 
+#define RADIX_TREE_MAP_SHIFT	6
+#define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)
+
 struct radix_tree_root {
 	unsigned int		height;
 	gfp_t			gfp_mask;
 	struct radix_tree_node	*rnode;
 };
 
+/*
+ * Lookaside cache to support access patterns with strong locality.
+ */
+struct radix_tree_cache {
+	unsigned long first_index;
+	struct radix_tree_node *tree_node;
+};
+
 #define RADIX_TREE_INIT(mask)	{					\
 	.height = 0,							\
 	.gfp_mask = (mask),						\
@@ -46,9 +58,18 @@
 } while (0)
 
 int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
-void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
-void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *radix_tree_lookup_node(struct radix_tree_root *, unsigned long,
+							unsigned int);
+void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+unsigned int radix_tree_cache_count(struct radix_tree_cache *cache);
+void *radix_tree_cache_lookup_node(struct radix_tree_root *root,
+				struct radix_tree_cache *cache,
+				unsigned long index, unsigned int level);
+unsigned long radix_tree_scan_hole_backward(struct radix_tree_root *root,
+				unsigned long index, unsigned long max_scan);
+unsigned long radix_tree_scan_hole(struct radix_tree_root *root,
+				unsigned long index, unsigned long max_scan);
 unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items);
@@ -70,4 +91,61 @@
 	preempt_enable();
 }
 
+/**
+ *	radix_tree_lookup    -    perform lookup operation on a radix tree
+ *	@root:		radix tree root
+ *	@index:		index key
+ *
+ *	Lookup the item at the position @index in the radix tree @root.
+ */
+static inline void *radix_tree_lookup(struct radix_tree_root *root,
+							unsigned long index)
+{
+	return radix_tree_lookup_node(root, index, 0);
+}
+
+/**
+ *	radix_tree_cache_init    -    init a look-aside cache
+ *	@cache:		look-aside cache
+ *
+ *	Init the radix tree look-aside cache @cache.
+ */
+static inline void radix_tree_cache_init(struct radix_tree_cache *cache)
+{
+	cache->first_index = RADIX_TREE_MAP_MASK;
+	cache->tree_node = NULL;
+}
+
+/**
+ *	radix_tree_cache_lookup    -    cached lookup on a radix tree
+ *	@root:		radix tree root
+ *	@cache:		look-aside cache
+ *	@index:		index key
+ *
+ *	Lookup the item at the position @index in the radix tree @root,
+ *	and make use of @cache to speedup the lookup process.
+ */
+static inline void *radix_tree_cache_lookup(struct radix_tree_root *root,
+						struct radix_tree_cache *cache,
+						unsigned long index)
+{
+	return radix_tree_cache_lookup_node(root, cache, index, 0);
+}
+
+static inline unsigned int radix_tree_cache_size(struct radix_tree_cache *cache)
+{
+	return RADIX_TREE_MAP_SIZE;
+}
+
+static inline int radix_tree_cache_full(struct radix_tree_cache *cache)
+{
+	return radix_tree_cache_count(cache) == radix_tree_cache_size(cache);
+}
+
+static inline unsigned long
+radix_tree_cache_first_index(struct radix_tree_cache *cache)
+{
+	return cache->first_index;
+}
+
 #endif /* _LINUX_RADIX_TREE_H */
diff -urN oldtree/include/linux/swap-prefetch.h newtree/include/linux/swap-prefetch.h
--- oldtree/include/linux/swap-prefetch.h	1969-12-31 19:00:00.000000000 -0500
+++ newtree/include/linux/swap-prefetch.h	2006-04-01 08:53:58.011127000 -0500
@@ -0,0 +1,55 @@
+#ifndef SWAP_PREFETCH_H_INCLUDED
+#define SWAP_PREFETCH_H_INCLUDED
+
+#ifdef CONFIG_SWAP_PREFETCH
+/* mm/swap_prefetch.c */
+extern int swap_prefetch;
+struct swapped_entry {
+	swp_entry_t		swp_entry;	/* The actual swap entry */
+	struct list_head	swapped_list;	/* Linked list of entries */
+#if MAX_NUMNODES > 1
+	int			node;		/* Node id */
+#endif
+} __attribute__((packed));
+
+static inline void store_swap_entry_node(struct swapped_entry *entry,
+	struct page *page)
+{
+#if MAX_NUMNODES > 1
+	entry->node = page_to_nid(page);
+#endif
+}
+
+static inline int get_swap_entry_node(struct swapped_entry *entry)
+{
+#if MAX_NUMNODES > 1
+	return entry->node;
+#else
+	return 0;
+#endif
+}
+
+extern void add_to_swapped_list(struct page *page);
+extern void remove_from_swapped_list(const unsigned long index);
+extern void delay_swap_prefetch(void);
+extern void prepare_swap_prefetch(void);
+
+#else	/* CONFIG_SWAP_PREFETCH */
+static inline void add_to_swapped_list(struct page *__unused)
+{
+}
+
+static inline void prepare_swap_prefetch(void)
+{
+}
+
+static inline void remove_from_swapped_list(const unsigned long __unused)
+{
+}
+
+static inline void delay_swap_prefetch(void)
+{
+}
+#endif	/* CONFIG_SWAP_PREFETCH */
+
+#endif		/* SWAP_PREFETCH_H_INCLUDED */
diff -urN oldtree/include/linux/swap.h newtree/include/linux/swap.h
--- oldtree/include/linux/swap.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/swap.h	2006-04-01 08:54:48.062255000 -0500
@@ -164,6 +164,7 @@
 /* linux/mm/swap.c */
 extern void FASTCALL(lru_cache_add(struct page *));
 extern void FASTCALL(lru_cache_add_active(struct page *));
+extern void FASTCALL(lru_cache_add_tail(struct page *));
 extern void FASTCALL(activate_page(struct page *));
 extern void FASTCALL(mark_page_accessed(struct page *));
 extern void lru_add_drain(void);
@@ -174,7 +175,8 @@
 /* linux/mm/vmscan.c */
 extern int try_to_free_pages(struct zone **, gfp_t);
 extern int shrink_all_memory(int);
-extern int vm_swappiness;
+extern int vm_mapped;
+extern int vm_hardmaplimit;
 
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
@@ -235,6 +237,7 @@
 extern struct page * lookup_swap_cache(swp_entry_t);
 extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma,
 					   unsigned long addr);
+extern int add_to_swap_cache(struct page *page, swp_entry_t entry);
 /* linux/mm/swapfile.c */
 extern long total_swap_pages;
 extern unsigned int nr_swapfiles;
diff -urN oldtree/include/linux/sysctl.h newtree/include/linux/sysctl.h
--- oldtree/include/linux/sysctl.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/sysctl.h	2006-04-01 08:55:15.339959750 -0500
@@ -172,7 +172,7 @@
 	VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */
 	VM_PAGEBUF=17,		/* struct: Control pagebuf parameters */
 	VM_HUGETLB_PAGES=18,	/* int: Number of available Huge Pages */
-	VM_SWAPPINESS=19,	/* Tendency to steal mapped memory */
+	VM_MAPPED=19,		/* percent mapped min while evicting cache */
 	VM_LOWMEM_RESERVE_RATIO=20,/* reservation ratio for lower memory zones */
 	VM_MIN_FREE_KBYTES=21,	/* Minimum free kilobytes to maintain */
 	VM_MAX_MAP_COUNT=22,	/* int: Maximum number of mmaps/address-space */
@@ -186,6 +186,10 @@
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
 	VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
 	VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
+        VM_SWAP_PREFETCH=33,    /* swap prefetch */
+        VM_HARDMAPLIMIT=34,     /* Make mapped a hard limit */
+        VM_READAHEAD_RATIO=35,  /* percent of read-ahead size to thrashing-threshold */
+        VM_READAHEAD_HIT_RATE=36, /* one accessed page legitimizes so many read-ahead pages */
 };
 
 
diff -urN oldtree/include/linux/writeback.h newtree/include/linux/writeback.h
--- oldtree/include/linux/writeback.h	2006-04-01 04:48:27.000000000 -0500
+++ newtree/include/linux/writeback.h	2006-04-01 08:51:11.624728500 -0500
@@ -85,6 +85,12 @@
 void laptop_sync_completion(void);
 void throttle_vm_writeout(void);
 
+extern struct timer_list laptop_mode_wb_timer;
+static inline int laptop_spinned_down(void)
+{
+	return !timer_pending(&laptop_mode_wb_timer);
+}
+
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
 extern int vm_dirty_ratio;
diff -urN oldtree/init/Kconfig newtree/init/Kconfig
--- oldtree/init/Kconfig	2006-04-01 04:48:27.000000000 -0500
+++ newtree/init/Kconfig	2006-04-01 08:53:57.979125000 -0500
@@ -92,6 +92,28 @@
 	  used to provide more virtual memory than the actual RAM present
 	  in your computer.  If unsure say Y.
 
+config SWAP_PREFETCH
+	bool "Support for prefetching swapped memory"
+	depends on SWAP
+	default y
+	---help---
+	  This option will allow the kernel to prefetch swapped memory pages
+	  when idle. The pages will be kept on both swap and in swap_cache
+	  thus avoiding the need for further I/O if either ram or swap space
+	  is required.
+
+	  What this will do on workstations is slowly bring back applications
+	  that have swapped out after memory intensive workloads back into
+	  physical ram if you have free ram at a later stage and the machine
+	  is relatively idle. This means that when you come back to your
+	  computer after leaving it idle for a while, applications will come
+	  to life faster. Note that your swap usage will appear to increase
+	  but these are cached pages, can be dropped freely by the vm, and it
+	  should stabilise around 50% swap usage maximum.
+
+	  Workstations and multiuser workstation servers will most likely want
+	  to say Y.
+
 config SYSVIPC
 	bool "System V IPC"
 	---help---
diff -urN oldtree/kernel/Kconfig.hz newtree/kernel/Kconfig.hz
--- oldtree/kernel/Kconfig.hz	2006-04-01 04:48:27.000000000 -0500
+++ newtree/kernel/Kconfig.hz	2006-04-01 08:53:08.900057750 -0500
@@ -4,7 +4,7 @@
 
 choice
 	prompt "Timer frequency"
-	default HZ_250
+	default HZ_1000
 	help
 	 Allows the configuration of the timer frequency. It is customary
 	 to have the timer interrupt run at 1000 HZ but 100 HZ may be more
@@ -21,14 +21,17 @@
 	help
 	  100 HZ is a typical choice for servers, SMP and NUMA systems
 	  with lots of processors that may show reduced performance if
-	  too many timer interrupts are occurring.
+	  too many timer interrupts are occurring. Laptops may also show
+	  improved battery life.
 
-	config HZ_250
+	config HZ_250_NODEFAULT
 		bool "250 HZ"
 	help
-	 250 HZ is a good compromise choice allowing server performance
-	 while also showing good interactive responsiveness even
-	 on SMP and NUMA systems.
+	 250 HZ is a lousy compromise choice allowing server interactivity
+	 while also showing desktop throughput and no extra power saving on
+	 laptops. Good for when you can't make up your mind.
+
+	 Recommend 100 or 1000 instead.
 
 	config HZ_1000
 		bool "1000 HZ"
@@ -41,6 +44,6 @@
 config HZ
 	int
 	default 100 if HZ_100
-	default 250 if HZ_250
+	default 250 if HZ_250_NODEFAULT
 	default 1000 if HZ_1000
 
diff -urN oldtree/kernel/sysctl.c newtree/kernel/sysctl.c
--- oldtree/kernel/sysctl.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/kernel/sysctl.c	2006-04-01 08:54:48.082256250 -0500
@@ -23,6 +23,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/swap-prefetch.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
 #include <linux/capability.h>
@@ -73,6 +74,12 @@
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 
+#if defined(CONFIG_ADAPTIVE_READAHEAD)
+extern int readahead_ratio;
+extern int readahead_hit_rate;
+static int one = 1;
+#endif
+
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
 extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
@@ -683,6 +690,28 @@
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_ADAPTIVE_READAHEAD
+	{
+		.ctl_name	= VM_READAHEAD_RATIO,
+		.procname	= "readahead_ratio",
+		.data		= &readahead_ratio,
+		.maxlen		= sizeof(readahead_ratio),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+	{
+		.ctl_name	= VM_READAHEAD_HIT_RATE,
+		.procname	= "readahead_hit_rate",
+		.data		= &readahead_hit_rate,
+		.maxlen		= sizeof(readahead_hit_rate),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
@@ -764,16 +793,24 @@
 		.proc_handler	= &proc_dointvec,
 	},
 	{
-		.ctl_name	= VM_SWAPPINESS,
-		.procname	= "swappiness",
-		.data		= &vm_swappiness,
-		.maxlen		= sizeof(vm_swappiness),
+		.ctl_name	= VM_MAPPED,
+		.procname	= "mapped",
+		.data		= &vm_mapped,
+		.maxlen		= sizeof(vm_mapped),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.ctl_name	= VM_HARDMAPLIMIT,
+		.procname	= "hardmaplimit",
+		.data		= &vm_hardmaplimit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #ifdef CONFIG_HUGETLB_PAGE
 	 {
 		.ctl_name	= VM_HUGETLB_PAGES,
@@ -916,6 +953,16 @@
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
+#ifdef CONFIG_SWAP_PREFETCH
+	{
+		.ctl_name	= VM_SWAP_PREFETCH,
+		.procname	= "swap_prefetch",
+		.data		= &swap_prefetch,
+		.maxlen		= sizeof(swap_prefetch),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
diff -urN oldtree/lib/radix-tree.c newtree/lib/radix-tree.c
--- oldtree/lib/radix-tree.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/lib/radix-tree.c	2006-04-01 08:51:11.640729500 -0500
@@ -32,16 +32,7 @@
 #include <linux/bitops.h>
 
 
-#ifdef __KERNEL__
-#define RADIX_TREE_MAP_SHIFT	6
-#else
-#define RADIX_TREE_MAP_SHIFT	3	/* For more stressful testing */
-#endif
 #define RADIX_TREE_TAGS		2
-
-#define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
-#define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)
-
 #define RADIX_TREE_TAG_LONGS	\
 	((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
 
@@ -286,32 +277,89 @@
 }
 EXPORT_SYMBOL(radix_tree_insert);
 
-static inline void **__lookup_slot(struct radix_tree_root *root,
-				   unsigned long index)
+/**
+ *	radix_tree_lookup_node    -    low level lookup routine
+ *	@root:		radix tree root
+ *	@index:		index key
+ *	@level:		stop at that many levels from the tree leaf
+ *
+ *	Lookup the item at the position @index in the radix tree @root.
+ *	The return value is:
+ *	@level == 0:      page at @index;
+ *	@level == 1:      the corresponding bottom level tree node;
+ *	@level < height:  (@level-1)th parent node of the bottom node
+ *			  that contains @index;
+ *	@level >= height: the root node.
+ */
+void *radix_tree_lookup_node(struct radix_tree_root *root,
+				unsigned long index, unsigned int level)
 {
 	unsigned int height, shift;
-	struct radix_tree_node **slot;
+	struct radix_tree_node *slot;
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
 		return NULL;
 
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
-	slot = &root->rnode;
+	slot = root->rnode;
 
-	while (height > 0) {
-		if (*slot == NULL)
+	while (height > level) {
+		if (slot == NULL)
 			return NULL;
 
-		slot = (struct radix_tree_node **)
-			((*slot)->slots +
-				((index >> shift) & RADIX_TREE_MAP_MASK));
+		slot = slot->slots[(index >> shift) & RADIX_TREE_MAP_MASK];
 		shift -= RADIX_TREE_MAP_SHIFT;
 		height--;
 	}
 
-	return (void **)slot;
+	return slot;
+}
+EXPORT_SYMBOL(radix_tree_lookup_node);
+
+/**
+ *	radix_tree_cache_lookup_node    -    cached lookup node
+ *	@root:		radix tree root
+ *	@cache:		look-aside cache
+ *	@index:		index key
+ *
+ *	Lookup the item at the position @index in the radix tree @root,
+ *	and return the node @level levels from the bottom in the search path.
+ *
+ *	@cache stores the last accessed upper level tree node by this
+ *	function, and is always checked first before searching in the tree.
+ *	It can improve speed for access patterns with strong locality.
+ *
+ *	NOTE:
+ *	- The cache becomes invalid on leaving the lock;
+ *	- Do not intermix calls with different @level.
+ */
+void *radix_tree_cache_lookup_node(struct radix_tree_root *root,
+				struct radix_tree_cache *cache,
+				unsigned long index, unsigned int level)
+{
+	struct radix_tree_node *node;
+        unsigned long i;
+        unsigned long mask;
+
+        if (level >= root->height)
+                return root->rnode;
+
+        i = ((index >> (level * RADIX_TREE_MAP_SHIFT)) & RADIX_TREE_MAP_MASK);
+        mask = ~((RADIX_TREE_MAP_SIZE << (level * RADIX_TREE_MAP_SHIFT)) - 1);
+
+	if ((index & mask) == cache->first_index)
+                return cache->tree_node->slots[i];
+
+	node = radix_tree_lookup_node(root, index, level + 1);
+	if (!node)
+		return 0;
+
+	cache->tree_node = node;
+	cache->first_index = (index & mask);
+        return node->slots[i];
 }
+EXPORT_SYMBOL(radix_tree_cache_lookup_node);
 
 /**
  *	radix_tree_lookup_slot    -    lookup a slot in a radix tree
@@ -323,25 +371,131 @@
  */
 void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 {
-	return __lookup_slot(root, index);
+	struct radix_tree_node *node;
+
+	node = radix_tree_lookup_node(root, index, 1);
+	return node->slots + (index & RADIX_TREE_MAP_MASK);
 }
 EXPORT_SYMBOL(radix_tree_lookup_slot);
 
 /**
- *	radix_tree_lookup    -    perform lookup operation on a radix tree
+ *	radix_tree_cache_count    -    items in the cached node
+ *	@cache:      radix tree look-aside cache
+ *
+ *      Query the number of items contained in the cached node.
+ */
+unsigned int radix_tree_cache_count(struct radix_tree_cache *cache)
+{
+	if (!(cache->first_index & RADIX_TREE_MAP_MASK))
+		return cache->tree_node->count;
+	else
+		return 0;
+}
+EXPORT_SYMBOL(radix_tree_cache_count);
+
+/**
+ *	radix_tree_scan_hole_backward    -    scan backward for hole
  *	@root:		radix tree root
  *	@index:		index key
+ *	@max_scan:      advice on max items to scan (it may scan a little more)
  *
- *	Lookup the item at the position @index in the radix tree @root.
+ *      Scan backward from @index for a hole/empty item, stop when
+ *      - hit hole
+ *      - @max_scan or more items scanned
+ *      - hit index 0
+ *
+ *      Return the correponding index.
  */
-void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+unsigned long radix_tree_scan_hole_backward(struct radix_tree_root *root,
+				unsigned long index, unsigned long max_scan)
 {
-	void **slot;
+	struct radix_tree_cache cache;
+	struct radix_tree_node *node;
+	unsigned long origin;
+	int i;
+
+	origin = index;
+        radix_tree_cache_init(&cache);
+
+	while (origin - index < max_scan) {
+		node = radix_tree_cache_lookup_node(root, &cache, index, 1);
+		if (!node)
+			break;
+
+		if (node->count == RADIX_TREE_MAP_SIZE) {
+			index = (index - RADIX_TREE_MAP_SIZE) |
+					RADIX_TREE_MAP_MASK;
+			goto check_underflow;
+		}
+
+		for (i = index & RADIX_TREE_MAP_MASK; i >= 0; i--, index--) {
+			if (!node->slots[i])
+				goto out;
+		}
+
+check_underflow:
+		if (unlikely(index == ULONG_MAX)) {
+			index = 0;
+			break;
+		}
+	}
+
+out:
+	return index;
+}
+EXPORT_SYMBOL(radix_tree_scan_hole_backward);
 
-	slot = __lookup_slot(root, index);
-	return slot != NULL ? *slot : NULL;
+/**
+ *	radix_tree_scan_hole    -    scan for hole
+ *	@root:		radix tree root
+ *	@index:		index key
+ *	@max_scan:      advice on max items to scan (it may scan a little more)
+ *
+ *      Scan forward from @index for a hole/empty item, stop when
+ *      - hit hole
+ *      - hit EOF
+ *      - hit index ULONG_MAX
+ *      - @max_scan or more items scanned
+ *
+ *      Return the correponding index.
+ */
+unsigned long radix_tree_scan_hole(struct radix_tree_root *root,
+				unsigned long index, unsigned long max_scan)
+{
+	struct radix_tree_cache cache;
+	struct radix_tree_node *node;
+	unsigned long origin;
+	int i;
+
+	origin = index;
+        radix_tree_cache_init(&cache);
+
+	while (index - origin < max_scan) {
+		node = radix_tree_cache_lookup_node(root, &cache, index, 1);
+		if (!node)
+			break;
+
+		if (node->count == RADIX_TREE_MAP_SIZE) {
+			index = (index | RADIX_TREE_MAP_MASK) + 1;
+			goto check_overflow;
+		}
+
+		for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE;
+								i++, index++) {
+			if (!node->slots[i])
+				goto out;
+		}
+
+check_overflow:
+		if (unlikely(!index)) {
+			index = ULONG_MAX;
+			break;
+		}
+	}
+out:
+	return index;
 }
-EXPORT_SYMBOL(radix_tree_lookup);
+EXPORT_SYMBOL(radix_tree_scan_hole);
 
 /**
  *	radix_tree_tag_set - set a tag on a radix tree node
diff -urN oldtree/mm/Kconfig newtree/mm/Kconfig
--- oldtree/mm/Kconfig	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/Kconfig	2006-04-01 08:51:11.644729750 -0500
@@ -139,3 +139,58 @@
 config MIGRATION
 	def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM
 	depends on SWAP
+
+#
+# Adaptive file readahead
+#
+config ADAPTIVE_READAHEAD
+	bool "Adaptive file readahead (EXPERIMENTAL)"
+	default n
+	depends on EXPERIMENTAL
+	help
+	  Readahead is a technique employed by the kernel in an attempt
+	  to improve file reading performance. If the kernel has reason
+	  to believe that a particular file is being read sequentially,
+	  it will attempt to read blocks from the file into memory before
+	  the application requests them. When readahead works, it speeds
+	  up the system's throughput, since the reading application does
+	  not have to wait for its requests. When readahead fails, instead,
+	  it generates useless I/O and occupies memory pages which are
+	  needed for some other purpose. For sequential readings,
+
+	  Normally, the kernel uses a stock readahead logic that is well
+	  understood and well tuned. This option enables a much complex and
+	  feature rich one. It is more aggressive and memory efficient in
+	  doing readahead, and supports some less-common access patterns such
+	  as reading backward and reading sparsely. However, due to the great
+	  diversity of real world applications, it might not fit everyone.
+
+	  Please refer to Documentation/sysctl/vm.txt for tunable parameters.
+
+	  Say Y here if you are building kernel for file servers.
+	  Say N if you are unsure.
+
+config DEBUG_READAHEAD
+	bool "Readahead debug and accounting"
+	default n
+	depends on ADAPTIVE_READAHEAD
+	select DEBUG_FS
+	help
+	  This option injects extra code to dump detailed debug traces and do
+	  readahead events accounting.
+
+	  To actually get the data:
+
+	  mkdir /debug
+	  mount -t debug none /debug
+
+	  After that you can do the following:
+
+	  echo > /debug/readahead/events # reset the counters
+	  cat /debug/readahead/events    # check the counters
+
+	  echo 1 > /debug/readahead/debug_level # show printk traces
+	  echo 2 > /debug/readahead/debug_level # show verbose printk traces
+	  echo 0 > /debug/readahead/debug_level # stop filling my kern.log
+
+	  Say N, unless you have readahead performance problems.
diff -urN oldtree/mm/Makefile newtree/mm/Makefile
--- oldtree/mm/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/Makefile	2006-04-01 08:53:57.987125500 -0500
@@ -13,6 +13,7 @@
 			   prio_tree.o util.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
+obj-$(CONFIG_SWAP_PREFETCH) += swap_prefetch.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
diff -urN oldtree/mm/filemap.c newtree/mm/filemap.c
--- oldtree/mm/filemap.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/filemap.c	2006-04-01 08:51:11.648730000 -0500
@@ -42,6 +42,12 @@
 generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	loff_t offset, unsigned long nr_segs);
 
+#ifdef CONFIG_DEBUG_READAHEAD
+extern u32 readahead_debug_level;
+#else
+#define readahead_debug_level 0
+#endif /* CONFIG_DEBUG_READAHEAD */
+
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -746,10 +752,12 @@
 	unsigned long prev_index;
 	loff_t isize;
 	struct page *cached_page;
+	struct page *prev_page;
 	int error;
 	struct file_ra_state ra = *_ra;
 
 	cached_page = NULL;
+	prev_page = NULL;
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	next_index = index;
 	prev_index = ra.prev_page;
@@ -760,6 +768,10 @@
 	if (!isize)
 		goto out;
 
+	if (readahead_debug_level >= 5)
+		printk(KERN_DEBUG "read-file(ino=%lu, req=%lu+%lu)\n",
+			inode->i_ino, index, last_index - index);
+
 	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
 	for (;;) {
 		struct page *page;
@@ -778,16 +790,45 @@
 		nr = nr - offset;
 
 		cond_resched();
-		if (index == next_index)
+
+		if (!prefer_adaptive_readahead() && index == next_index)
 			next_index = page_cache_readahead(mapping, &ra, filp,
 					index, last_index - index);
 
 find_page:
 		page = find_get_page(mapping, index);
+		if (prefer_adaptive_readahead()) {
+			if (unlikely(page == NULL)) {
+				ra.prev_page = prev_index;
+				page_cache_readahead_adaptive(mapping, &ra,
+						filp, prev_page, NULL,
+						*ppos >> PAGE_CACHE_SHIFT,
+						index, last_index);
+				page = find_get_page(mapping, index);
+			} else if (PageReadahead(page)) {
+				ra.prev_page = prev_index;
+				page_cache_readahead_adaptive(mapping, &ra,
+						filp, prev_page, page,
+						*ppos >> PAGE_CACHE_SHIFT,
+						index, last_index);
+			}
+		}
 		if (unlikely(page == NULL)) {
-			handle_ra_miss(mapping, &ra, index);
+			if (!prefer_adaptive_readahead())
+				handle_ra_miss(mapping, &ra, index);
 			goto no_cached_page;
 		}
+
+		if (prev_page)
+			page_cache_release(prev_page);
+		prev_page = page;
+
+		readahead_cache_hit(&ra, page);
+		if (readahead_debug_level >= 7)
+			printk(KERN_DEBUG "read-page(ino=%lu, idx=%lu, io=%s)\n",
+				inode->i_ino, index,
+				PageUptodate(page) ? "hit" : "miss");
+
 		if (!PageUptodate(page))
 			goto page_not_up_to_date;
 page_ok:
@@ -822,7 +863,6 @@
 		index += offset >> PAGE_CACHE_SHIFT;
 		offset &= ~PAGE_CACHE_MASK;
 
-		page_cache_release(page);
 		if (ret == nr && desc->count)
 			continue;
 		goto out;
@@ -834,7 +874,6 @@
 		/* Did it get unhashed before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
-			page_cache_release(page);
 			continue;
 		}
 
@@ -864,7 +903,6 @@
 					 * invalidate_inode_pages got it
 					 */
 					unlock_page(page);
-					page_cache_release(page);
 					goto find_page;
 				}
 				unlock_page(page);
@@ -885,7 +923,6 @@
 		isize = i_size_read(inode);
 		end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
 		if (unlikely(!isize || index > end_index)) {
-			page_cache_release(page);
 			goto out;
 		}
 
@@ -894,7 +931,6 @@
 		if (index == end_index) {
 			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
 			if (nr <= offset) {
-				page_cache_release(page);
 				goto out;
 			}
 		}
@@ -904,7 +940,6 @@
 readpage_error:
 		/* UHHUH! A synchronous read error occurred. Report it */
 		desc->error = error;
-		page_cache_release(page);
 		goto out;
 
 no_cached_page:
@@ -929,15 +964,22 @@
 		}
 		page = cached_page;
 		cached_page = NULL;
+		if (prev_page)
+			page_cache_release(prev_page);
+		prev_page = page;
 		goto readpage;
 	}
 
 out:
 	*_ra = ra;
+	if (prefer_adaptive_readahead())
+		_ra->prev_page = prev_index;
 
 	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
 	if (cached_page)
 		page_cache_release(cached_page);
+	if (prev_page)
+		page_cache_release(prev_page);
 	if (filp)
 		file_accessed(filp);
 }
@@ -1216,6 +1258,7 @@
 	unsigned long size, pgoff;
 	int did_readaround = 0, majmin = VM_FAULT_MINOR;
 
+	ra->flags |= RA_FLAG_MMAP;
 	pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
 retry_all:
@@ -1233,7 +1276,7 @@
 	 *
 	 * For sequential accesses, we use the generic readahead logic.
 	 */
-	if (VM_SequentialReadHint(area))
+	if (!prefer_adaptive_readahead() && VM_SequentialReadHint(area))
 		page_cache_readahead(mapping, ra, file, pgoff, 1);
 
 	/*
@@ -1241,11 +1284,24 @@
 	 */
 retry_find:
 	page = find_get_page(mapping, pgoff);
+	if (prefer_adaptive_readahead() && VM_SequentialReadHint(area)) {
+		if (!page) {
+			page_cache_readahead_adaptive(mapping, ra,
+						file, NULL, NULL,
+						pgoff, pgoff, pgoff + 1);
+			page = find_get_page(mapping, pgoff);
+		} else if (PageReadahead(page)) {
+			page_cache_readahead_adaptive(mapping, ra,
+						file, NULL, page,
+						pgoff, pgoff, pgoff + 1);
+		}
+	}
 	if (!page) {
 		unsigned long ra_pages;
 
 		if (VM_SequentialReadHint(area)) {
-			handle_ra_miss(mapping, ra, pgoff);
+			if (!prefer_adaptive_readahead())
+				handle_ra_miss(mapping, ra, pgoff);
 			goto no_cached_page;
 		}
 		ra->mmap_miss++;
@@ -1282,6 +1338,14 @@
 	if (!did_readaround)
 		ra->mmap_hit++;
 
+	readahead_cache_hit(ra, page);
+	if (readahead_debug_level >= 6)
+		printk(KERN_DEBUG "read-mmap(ino=%lu, idx=%lu, hint=%s, io=%s)\n",
+			inode->i_ino, pgoff,
+			VM_RandomReadHint(area) ? "random" :
+			(VM_SequentialReadHint(area) ? "sequential" : "none"),
+			PageUptodate(page) ? "hit" : "miss");
+
 	/*
 	 * Ok, found a page in the page cache, now we need to check
 	 * that it's up-to-date.
@@ -1296,6 +1360,8 @@
 	mark_page_accessed(page);
 	if (type)
 		*type = majmin;
+	if (prefer_adaptive_readahead())
+		ra->prev_page = page->index;
 	return page;
 
 outside_data_content:
diff -urN oldtree/mm/memory.c newtree/mm/memory.c
--- oldtree/mm/memory.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/memory.c	2006-04-01 08:51:11.652730250 -0500
@@ -1993,6 +1993,7 @@
 		page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 		if (!pte_none(*page_table))
 			goto release;
+		inc_readahead_aging();
 		inc_mm_counter(mm, anon_rss);
 		lru_cache_add_active(page);
 		page_add_new_anon_rmap(page, vma, address);
diff -urN oldtree/mm/page-writeback.c newtree/mm/page-writeback.c
--- oldtree/mm/page-writeback.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/page-writeback.c	2006-04-01 08:54:33.701357500 -0500
@@ -69,18 +69,18 @@
 /*
  * The generator of dirty data starts writeback at this percentage
  */
-int vm_dirty_ratio = 40;
+int vm_dirty_ratio = 33;
 
 /*
  * The interval between `kupdate'-style writebacks, in centiseconds
  * (hundredths of a second)
  */
-int dirty_writeback_centisecs = 5 * 100;
+int dirty_writeback_centisecs = 3 * 100;
 
 /*
  * The longest number of centiseconds for which data is allowed to remain dirty
  */
-int dirty_expire_centisecs = 30 * 100;
+int dirty_expire_centisecs = 15 * 100;
 
 /*
  * Flag that makes the machine dump writes/reads and block dirtyings.
@@ -370,7 +370,7 @@
 static void laptop_timer_fn(unsigned long unused);
 
 static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
-static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
+DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
 
 /*
  * Periodic writeback of "old" data.
diff -urN oldtree/mm/page_alloc.c newtree/mm/page_alloc.c
--- oldtree/mm/page_alloc.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/page_alloc.c	2006-04-01 08:54:45.770111750 -0500
@@ -532,7 +532,7 @@
 	if (PageReserved(page))
 		return 1;
 
-	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
+	page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_readahead |
 			1 << PG_referenced | 1 << PG_arch_1 |
 			1 << PG_checked | 1 << PG_mappedtodisk);
 	set_page_private(page, 0);
@@ -1436,6 +1436,7 @@
 			" min:%lukB"
 			" low:%lukB"
 			" high:%lukB"
+			" lots:%lukB"
 			" active:%lukB"
 			" inactive:%lukB"
 			" present:%lukB"
@@ -1447,6 +1448,7 @@
 			K(zone->pages_min),
 			K(zone->pages_low),
 			K(zone->pages_high),
+			K(zone->pages_lots),
 			K(zone->nr_active),
 			K(zone->nr_inactive),
 			K(zone->present_pages),
@@ -2227,6 +2229,7 @@
 			   "\n        min      %lu"
 			   "\n        low      %lu"
 			   "\n        high     %lu"
+			   "\n        lots     %lu"
 			   "\n        active   %lu"
 			   "\n        inactive %lu"
 			   "\n        scanned  %lu (a: %lu i: %lu)"
@@ -2236,6 +2239,7 @@
 			   zone->pages_min,
 			   zone->pages_low,
 			   zone->pages_high,
+			   zone->pages_lots,
 			   zone->nr_active,
 			   zone->nr_inactive,
 			   zone->pages_scanned,
@@ -2538,6 +2542,7 @@
 
 		zone->pages_low   = zone->pages_min + tmp / 4;
 		zone->pages_high  = zone->pages_min + tmp / 2;
+		zone->pages_lots  = zone->pages_min + tmp;
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 }
diff -urN oldtree/mm/readahead.c newtree/mm/readahead.c
--- oldtree/mm/readahead.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/readahead.c	2006-04-01 08:51:11.664731000 -0500
@@ -14,6 +14,300 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/writeback.h>
+#include <linux/nfsd/const.h>
+#include <asm/div64.h>
+
+/* The default max/min read-ahead pages. */
+#define KB(size)	(((size)*1024 + PAGE_CACHE_SIZE-1) / PAGE_CACHE_SIZE)
+#define MAX_RA_PAGES	KB(VM_MAX_READAHEAD)
+#define MIN_RA_PAGES	KB(VM_MIN_READAHEAD)
+#define MIN_NFSD_PAGES	KB(NFSSVC_MAXBLKSIZE/1024)
+
+#define next_page(pg) (list_entry((pg)->lru.prev, struct page, lru))
+#define prev_page(pg) (list_entry((pg)->lru.next, struct page, lru))
+
+#ifdef CONFIG_ADAPTIVE_READAHEAD
+/*
+ * Adaptive read-ahead parameters.
+ */
+
+/* In laptop mode, poll delayed look-ahead on every ## pages read. */
+#define LAPTOP_POLL_INTERVAL 16
+
+/* Set look-ahead size to 1/# of the thrashing-threshold. */
+#define LOOKAHEAD_RATIO 8
+
+/* Set read-ahead size to ##% of the thrashing-threshold. */
+int readahead_ratio = 50;
+EXPORT_SYMBOL(readahead_ratio);
+
+/* Readahead as long as cache hit ratio keeps above 1/##. */
+int readahead_hit_rate = 2;
+EXPORT_SYMBOL(readahead_hit_rate);
+
+/*
+ * Measures the aging process of cold pages.
+ * Mainly increased on fresh page references to make it smooth.
+ */
+DEFINE_PER_CPU(unsigned long, readahead_aging);
+EXPORT_PER_CPU_SYMBOL(readahead_aging);
+
+/*
+ * Detailed classification of read-ahead behaviors.
+ */
+#define RA_CLASS_SHIFT 4
+#define RA_CLASS_MASK  ((1 << RA_CLASS_SHIFT) - 1)
+enum ra_class {
+	RA_CLASS_ALL,
+	RA_CLASS_NEWFILE,
+	RA_CLASS_STATE,
+	RA_CLASS_CONTEXT,
+	RA_CLASS_CONTEXT_AGGRESSIVE,
+	RA_CLASS_BACKWARD,
+	RA_CLASS_THRASHING,
+	RA_CLASS_SEEK,
+	RA_CLASS_END,
+};
+#endif /* CONFIG_ADAPTIVE_READAHEAD */
+
+/*
+ * Read-ahead events accounting.
+ */
+#ifdef CONFIG_DEBUG_READAHEAD
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define DEBUG_READAHEAD_RADIXTREE
+
+/* Read-ahead events to be accounted. */
+enum ra_event {
+	RA_EVENT_CACHE_MISS,		/* read cache misses */
+	RA_EVENT_READRANDOM,		/* random reads */
+	RA_EVENT_IO_CONGESTION,		/* io congestion */
+	RA_EVENT_IO_CACHE_HIT,		/* canceled io due to cache hit */
+	RA_EVENT_IO_BLOCK,		/* read on locked page */
+
+	RA_EVENT_READAHEAD,		/* read-ahead issued */
+	RA_EVENT_READAHEAD_HIT,		/* read-ahead page hit */
+	RA_EVENT_LOOKAHEAD,		/* look-ahead issued */
+	RA_EVENT_LOOKAHEAD_HIT,		/* look-ahead mark hit */
+	RA_EVENT_LOOKAHEAD_NOACTION,	/* look-ahead mark ignored */
+	RA_EVENT_READAHEAD_MMAP,	/* read-ahead for memory mapped file */
+	RA_EVENT_READAHEAD_EOF,		/* read-ahead reaches EOF */
+	RA_EVENT_READAHEAD_SHRINK,	/* ra_size under previous la_size */
+	RA_EVENT_READAHEAD_THRASHING,	/* read-ahead thrashing happened */
+	RA_EVENT_READAHEAD_MUTILATE,	/* read-ahead request mutilated */
+	RA_EVENT_READAHEAD_RESCUE,	/* read-ahead rescued */
+
+	RA_EVENT_END
+};
+
+static const char * const ra_event_name[] = {
+	"cache_miss",
+	"read_random",
+	"io_congestion",
+	"io_cache_hit",
+	"io_block",
+	"readahead",
+	"readahead_hit",
+	"lookahead",
+	"lookahead_hit",
+	"lookahead_ignore",
+	"readahead_mmap",
+	"readahead_eof",
+	"readahead_shrink",
+	"readahead_thrash",
+	"readahead_mutilt",
+	"readahead_rescue",
+};
+
+static const char * const ra_class_name[] = {
+	"total",
+	"newfile",
+	"state",
+	"context",
+	"contexta",
+	"backward",
+	"onthrash",
+	"onraseek",
+	"none",
+};
+
+static unsigned long ra_events[RA_CLASS_END+1][RA_EVENT_END+1][2];
+
+static inline void ra_account(struct file_ra_state *ra,
+					enum ra_event e, int pages)
+{
+	enum ra_class c;
+
+	if (e == RA_EVENT_READAHEAD_HIT && pages < 0) {
+		c = (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+		pages = -pages;
+	} else if (ra)
+		c = ra->flags & RA_CLASS_MASK;
+	else
+		c = RA_CLASS_END;
+
+	if (!c)
+		c = RA_CLASS_END;
+
+	ra_events[c][e][0] += 1;
+	ra_events[c][e][1] += pages;
+
+	if (e == RA_EVENT_READAHEAD)
+		ra_events[c][RA_EVENT_END][1] += pages * pages;
+}
+
+static int ra_events_show(struct seq_file *s, void *_)
+{
+	int i;
+	int c;
+	int e;
+	static const char event_fmt[] = "%-16s";
+	static const char class_fmt[] = "%10s";
+	static const char item_fmt[] = "%10lu";
+	static const char percent_format[] = "%9lu%%";
+	static const char * const table_name[] = {
+		"[table requests]",
+		"[table pages]",
+		"[table summary]"};
+
+	for (i = 0; i <= 1; i++) {
+		for (e = 0; e <= RA_EVENT_END; e++) {
+			ra_events[0][e][i] = 0;
+			for (c = 1; c < RA_CLASS_END; c++)
+				ra_events[0][e][i] += ra_events[c][e][i];
+		}
+
+		seq_printf(s, event_fmt, table_name[i]);
+		for (c = 0; c <= RA_CLASS_END; c++)
+			seq_printf(s, class_fmt, ra_class_name[c]);
+		seq_puts(s, "\n");
+
+		for (e = 0; e < RA_EVENT_END; e++) {
+			if (e == RA_EVENT_READAHEAD_HIT && i == 0)
+				continue;
+			if (e == RA_EVENT_IO_BLOCK && i == 1)
+				continue;
+
+			seq_printf(s, event_fmt, ra_event_name[e]);
+			for (c = 0; c <= RA_CLASS_END; c++)
+				seq_printf(s, item_fmt, ra_events[c][e][i]);
+			seq_puts(s, "\n");
+		}
+		seq_puts(s, "\n");
+	}
+
+	seq_printf(s, event_fmt, table_name[2]);
+	for (c = 0; c <= RA_CLASS_END; c++)
+		seq_printf(s, class_fmt, ra_class_name[c]);
+	seq_puts(s, "\n");
+
+	seq_printf(s, event_fmt, "random_rate");
+	for (c = 0; c <= RA_CLASS_END; c++)
+		seq_printf(s, percent_format,
+			(ra_events[c][RA_EVENT_READRANDOM][0] * 100) /
+			((ra_events[c][RA_EVENT_READRANDOM][0] +
+			  ra_events[c][RA_EVENT_READAHEAD][0]) | 1));
+	seq_puts(s, "\n");
+
+	seq_printf(s, event_fmt, "ra_hit_rate");
+	for (c = 0; c <= RA_CLASS_END; c++)
+		seq_printf(s, percent_format,
+			(ra_events[c][RA_EVENT_READAHEAD_HIT][1] * 100) /
+			(ra_events[c][RA_EVENT_READAHEAD][1] | 1));
+	seq_puts(s, "\n");
+
+	seq_printf(s, event_fmt, "la_hit_rate");
+	for (c = 0; c <= RA_CLASS_END; c++)
+		seq_printf(s, percent_format,
+			(ra_events[c][RA_EVENT_LOOKAHEAD_HIT][0] * 100) /
+			(ra_events[c][RA_EVENT_LOOKAHEAD][0] | 1));
+	seq_puts(s, "\n");
+
+	seq_printf(s, event_fmt, "var_ra_size");
+	for (c = 0; c <= RA_CLASS_END; c++)
+		seq_printf(s, item_fmt,
+			(ra_events[c][RA_EVENT_END][1] -
+			 ra_events[c][RA_EVENT_READAHEAD][1] *
+			(ra_events[c][RA_EVENT_READAHEAD][1] /
+			(ra_events[c][RA_EVENT_READAHEAD][0] | 1))) /
+			(ra_events[c][RA_EVENT_READAHEAD][0] | 1));
+	seq_puts(s, "\n");
+
+	seq_printf(s, event_fmt, "avg_ra_size");
+	for (c = 0; c <= RA_CLASS_END; c++)
+		seq_printf(s, item_fmt,
+			(ra_events[c][RA_EVENT_READAHEAD][1] +
+			 ra_events[c][RA_EVENT_READAHEAD][0] / 2) /
+			(ra_events[c][RA_EVENT_READAHEAD][0] | 1));
+	seq_puts(s, "\n");
+
+	seq_printf(s, event_fmt, "avg_la_size");
+	for (c = 0; c <= RA_CLASS_END; c++)
+		seq_printf(s, item_fmt,
+			(ra_events[c][RA_EVENT_LOOKAHEAD][1] +
+			 ra_events[c][RA_EVENT_LOOKAHEAD][0] / 2) /
+			(ra_events[c][RA_EVENT_LOOKAHEAD][0] | 1));
+	seq_puts(s, "\n");
+
+	return 0;
+}
+
+static int ra_events_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ra_events_show, NULL);
+}
+
+static ssize_t ra_events_write(struct file *file, const char __user *buf,
+						size_t size, loff_t *offset)
+{
+	memset(ra_events, 0, sizeof(ra_events));
+	return 1;
+}
+
+struct file_operations ra_events_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ra_events_open,
+	.write		= ra_events_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+u32 readahead_debug_level = 0;
+u32 disable_stateful_method = 0;
+
+static int __init readahead_init(void)
+{
+	struct dentry *root;
+
+	root = debugfs_create_dir("readahead", NULL);
+
+	debugfs_create_file("events", 0644, root, NULL, &ra_events_fops);
+
+	debugfs_create_u32("debug_level", 0644, root, &readahead_debug_level);
+	debugfs_create_bool("disable_stateful_method", 0644, root,
+			    &disable_stateful_method);
+
+	return 0;
+}
+
+module_init(readahead_init)
+#else
+#define ra_account(ra, e, pages)	do { } while (0)
+#define readahead_debug_level 		(0)
+#define disable_stateful_method		(0)
+#endif /* CONFIG_DEBUG_READAHEAD */
+
+#define dprintk(args...) \
+	do { if (readahead_debug_level >= 1) printk(KERN_DEBUG args); } while(0)
+#define ddprintk(args...) \
+	do { if (readahead_debug_level >= 2) printk(KERN_DEBUG args); } while(0)
+
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
@@ -21,7 +315,7 @@
 EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
-	.ra_pages	= (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
+	.ra_pages	= MAX_RA_PAGES,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
@@ -49,7 +343,7 @@
 
 static inline unsigned long get_min_readahead(struct file_ra_state *ra)
 {
-	return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	return MIN_RA_PAGES;
 }
 
 static inline void ra_off(struct file_ra_state *ra)
@@ -134,8 +428,10 @@
 			continue;
 		}
 		ret = filler(data, page);
-		if (!pagevec_add(&lru_pvec, page))
+		if (!pagevec_add(&lru_pvec, page)) {
+			cond_resched();
 			__pagevec_lru_add(&lru_pvec);
+		}
 		if (ret) {
 			while (!list_empty(pages)) {
 				struct page *victim;
@@ -173,8 +469,10 @@
 					page->index, GFP_KERNEL)) {
 			ret = mapping->a_ops->readpage(filp, page);
 			if (ret != AOP_TRUNCATED_PAGE) {
-				if (!pagevec_add(&lru_pvec, page))
+				if (!pagevec_add(&lru_pvec, page)) {
+					cond_resched();
 					__pagevec_lru_add(&lru_pvec);
+				}
 				continue;
 			} /* else fall through to release */
 		}
@@ -257,7 +555,8 @@
  */
 static int
 __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-			pgoff_t offset, unsigned long nr_to_read)
+			pgoff_t offset, unsigned long nr_to_read,
+			unsigned long lookahead_size)
 {
 	struct inode *inode = mapping->host;
 	struct page *page;
@@ -270,7 +569,7 @@
 	if (isize == 0)
 		goto out;
 
- 	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
 
 	/*
 	 * Preallocate as many pages as we will need.
@@ -287,12 +586,15 @@
 			continue;
 
 		read_unlock_irq(&mapping->tree_lock);
+		cond_resched();
 		page = page_cache_alloc_cold(mapping);
 		read_lock_irq(&mapping->tree_lock);
 		if (!page)
 			break;
 		page->index = page_offset;
 		list_add(&page->lru, &page_pool);
+		if (page_idx == nr_to_read - lookahead_size)
+			__SetPageReadahead(page);
 		ret++;
 	}
 	read_unlock_irq(&mapping->tree_lock);
@@ -329,7 +631,7 @@
 		if (this_chunk > nr_to_read)
 			this_chunk = nr_to_read;
 		err = __do_page_cache_readahead(mapping, filp,
-						offset, this_chunk);
+						offset, this_chunk, 0);
 		if (err < 0) {
 			ret = err;
 			break;
@@ -338,6 +640,9 @@
 		offset += this_chunk;
 		nr_to_read -= this_chunk;
 	}
+
+	ra_account(NULL, RA_EVENT_READAHEAD, ret);
+
 	return ret;
 }
 
@@ -373,10 +678,16 @@
 int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read)
 {
+	unsigned long ret;
+
 	if (bdi_read_congested(mapping->backing_dev_info))
 		return -1;
 
-	return __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
+	ret = __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0);
+
+	ra_account(NULL, RA_EVENT_READAHEAD, ret);
+
+	return ret;
 }
 
 /*
@@ -396,7 +707,11 @@
 	if (!block && bdi_read_congested(mapping->backing_dev_info))
 		return 0;
 
-	actual = __do_page_cache_readahead(mapping, filp, offset, nr_to_read);
+	actual = __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0);
+
+	ra_account(NULL, RA_EVENT_READAHEAD, actual);
+	dprintk("blockable-readahead(ino=%lu, ra=%lu+%lu) = %d\n",
+			mapping->host->i_ino, offset, nr_to_read, actual);
 
 	return check_ra_success(ra, nr_to_read, actual);
 }
@@ -442,7 +757,7 @@
  * @req_size: hint: total size of the read which the caller is performing in
  *            PAGE_CACHE_SIZE units
  *
- * page_cache_readahead() is the main function.  If performs the adaptive
+ * page_cache_readahead() is the main function.  It performs the adaptive
  * readahead window size management and submits the readahead I/O.
  *
  * Note that @filp is purely used for passing on to the ->readpage[s]()
@@ -572,3 +887,1187 @@
 	__get_zone_counts(&active, &inactive, &free, NODE_DATA(numa_node_id()));
 	return min(nr, (inactive + free) / 2);
 }
+
+/*
+ * Adaptive read-ahead.
+ *
+ * Good read patterns are compact both in space and time. The read-ahead logic
+ * tries to grant larger read-ahead size to better readers under the constraint
+ * of system memory and load pressure.
+ *
+ * It employs two methods to estimate the max thrashing safe read-ahead size:
+ *   1. state based   - the default one
+ *   2. context based - the failsafe one
+ * The integration of the dual methods has the merit of being agile and robust.
+ * It makes the overall design clean: special cases are handled in general by
+ * the stateless method, leaving the stateful one simple and fast.
+ *
+ * To improve throughput and decrease read delay, the logic 'looks ahead'.
+ * In most read-ahead chunks, one page will be selected and tagged with
+ * PG_readahead. Later when the page with PG_readahead is read, the logic
+ * will be notified to submit the next read-ahead chunk in advance.
+ *
+ *                 a read-ahead chunk
+ *    +-----------------------------------------+
+ *    |       # PG_readahead                    |
+ *    +-----------------------------------------+
+ *            ^ When this page is read, notify me for the next read-ahead.
+ *
+ *
+ * Here are some variable names used frequently:
+ *
+ *                                   |<------- la_size ------>|
+ *                  +-----------------------------------------+
+ *                  |                #                        |
+ *                  +-----------------------------------------+
+ *      ra_index -->|<---------------- ra_size -------------->|
+ *
+ */
+
+#ifdef CONFIG_ADAPTIVE_READAHEAD
+
+/*
+ * The nature of read-ahead allows false tests to occur occasionally.
+ * Here we just do not bother to call get_page(), it's meaningless anyway.
+ */
+static inline struct page *__find_page(struct address_space *mapping,
+							pgoff_t offset)
+{
+	return radix_tree_lookup(&mapping->page_tree, offset);
+}
+
+static inline struct page *find_page(struct address_space *mapping,
+							pgoff_t offset)
+{
+	struct page *page;
+
+	read_lock_irq(&mapping->tree_lock);
+	page = __find_page(mapping, offset);
+	read_unlock_irq(&mapping->tree_lock);
+	return page;
+}
+
+/*
+ * Move pages in danger (of thrashing) to the head of inactive_list.
+ * Not expected to happen frequently.
+ */
+static unsigned long rescue_pages(struct page *page, unsigned long nr_pages)
+{
+	int pgrescue;
+	pgoff_t index;
+	struct zone *zone;
+	struct address_space *mapping;
+
+	BUG_ON(!nr_pages || !page);
+	pgrescue = 0;
+	index = page_index(page);
+	mapping = page_mapping(page);
+
+	dprintk("rescue_pages(ino=%lu, index=%lu nr=%lu)\n",
+			mapping->host->i_ino, index, nr_pages);
+
+	for(;;) {
+		zone = page_zone(page);
+		spin_lock_irq(&zone->lru_lock);
+
+		if (!PageLRU(page))
+			goto out_unlock;
+
+		while (page_mapping(page) == mapping &&
+				page_index(page) == index) {
+			struct page *the_page = page;
+			page = next_page(page);
+			if (!PageActive(the_page) &&
+					!PageLocked(the_page) &&
+					page_count(the_page) == 1) {
+				list_move(&the_page->lru, &zone->inactive_list);
+				pgrescue++;
+			}
+			index++;
+			if (!--nr_pages)
+				goto out_unlock;
+		}
+
+		spin_unlock_irq(&zone->lru_lock);
+
+		cond_resched();
+		page = find_page(mapping, index);
+		if (!page)
+			goto out;
+	}
+out_unlock:
+	spin_unlock_irq(&zone->lru_lock);
+out:
+	ra_account(NULL, RA_EVENT_READAHEAD_RESCUE, pgrescue);
+	return nr_pages;
+}
+
+/*
+ * Set a new look-ahead mark at @new_index.
+ * Return 0 if the new mark is successfully set.
+ */
+static inline int renew_lookahead(struct address_space *mapping,
+				struct file_ra_state *ra,
+				pgoff_t index, pgoff_t new_index)
+{
+	struct page *page;
+
+	if (index == ra->lookahead_index &&
+			new_index >= ra->readahead_index)
+		return 1;
+
+	page = find_page(mapping, new_index);
+	if (!page)
+		return 1;
+
+	__SetPageReadahead(page);
+	if (ra->lookahead_index == index)
+		ra->lookahead_index = new_index;
+
+	return 0;
+}
+
+/*
+ * State based calculation of read-ahead request.
+ *
+ * This figure shows the meaning of file_ra_state members:
+ *
+ *             chunk A                            chunk B
+ *  +---------------------------+-------------------------------------------+
+ *  |             #             |                   #                       |
+ *  +---------------------------+-------------------------------------------+
+ *                ^             ^                   ^                       ^
+ *              la_index      ra_index     lookahead_index         readahead_index
+ */
+
+/*
+ * The node's effective length of inactive_list(s).
+ */
+static unsigned long node_free_and_cold_pages(void)
+{
+	unsigned int i;
+	unsigned long sum = 0;
+	struct zone *zones = NODE_DATA(numa_node_id())->node_zones;
+
+	for (i = 0; i < MAX_NR_ZONES; i++)
+		sum += zones[i].nr_inactive +
+			zones[i].free_pages - zones[i].pages_low;
+
+	return sum;
+}
+
+/*
+ * The node's accumulated aging activities.
+ */
+static unsigned long node_readahead_aging(void)
+{
+	unsigned long cpu;
+	unsigned long sum = 0;
+	cpumask_t mask = node_to_cpumask(numa_node_id());
+
+	for_each_cpu_mask(cpu, mask)
+		sum += per_cpu(readahead_aging, cpu);
+
+	return sum;
+}
+
+/*
+ * The 64bit cache_hits stores three accumulated values and a counter value.
+ * MSB                                                                   LSB
+ * 3333333333333333 : 2222222222222222 : 1111111111111111 : 0000000000000000
+ */
+static inline int ra_cache_hit(struct file_ra_state *ra, int nr)
+{
+	return (ra->cache_hits >> (nr * 16)) & 0xFFFF;
+}
+
+/*
+ * Conceptual code:
+ * ra_cache_hit(ra, 1) += ra_cache_hit(ra, 0);
+ * ra_cache_hit(ra, 0) = 0;
+ */
+static inline void ra_addup_cache_hit(struct file_ra_state *ra)
+{
+	int n;
+
+	n = ra_cache_hit(ra, 0);
+	ra->cache_hits -= n;
+	n <<= 16;
+	ra->cache_hits += n;
+}
+
+/*
+ * The read-ahead is deemed success if cache-hit-rate >= 1/readahead_hit_rate.
+ */
+static inline int ra_cache_hit_ok(struct file_ra_state *ra)
+{
+	return ra_cache_hit(ra, 0) * readahead_hit_rate >=
+					(ra->lookahead_index - ra->la_index);
+}
+
+/*
+ * Check if @index falls in the @ra request.
+ */
+static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
+{
+	if (index < ra->la_index || index >= ra->readahead_index)
+		return 0;
+
+	if (index >= ra->ra_index)
+		return 1;
+	else
+		return -1;
+}
+
+/*
+ * Which method is issuing this read-ahead?
+ */
+static inline void ra_set_class(struct file_ra_state *ra,
+				enum ra_class ra_class)
+{
+	unsigned long flags_mask;
+	unsigned long flags;
+	unsigned long old_ra_class;
+
+	flags_mask = ~(RA_CLASS_MASK | (RA_CLASS_MASK << RA_CLASS_SHIFT));
+	flags = ra->flags & flags_mask;
+
+	old_ra_class = (ra->flags & RA_CLASS_MASK) << RA_CLASS_SHIFT;
+
+	ra->flags = flags | old_ra_class | ra_class;
+
+	ra_addup_cache_hit(ra);
+	if (ra_class != RA_CLASS_STATE)
+		ra->cache_hits <<= 16;
+
+	ra->age = node_readahead_aging();
+}
+
+/*
+ * Where is the old read-ahead and look-ahead?
+ */
+static inline void ra_set_index(struct file_ra_state *ra,
+				pgoff_t la_index, pgoff_t ra_index)
+{
+	ra->la_index = la_index;
+	ra->ra_index = ra_index;
+}
+
+/*
+ * Where is the new read-ahead and look-ahead?
+ */
+static inline void ra_set_size(struct file_ra_state *ra,
+				unsigned long ra_size, unsigned long la_size)
+{
+	/* Disable look-ahead for loopback file. */
+	if (unlikely(ra->flags & RA_FLAG_NO_LOOKAHEAD))
+		la_size = 0;
+
+	ra->readahead_index = ra->ra_index + ra_size;
+	ra->lookahead_index = ra->readahead_index - la_size;
+}
+
+/*
+ * Submit IO for the read-ahead request in file_ra_state.
+ */
+static int ra_dispatch(struct file_ra_state *ra,
+			struct address_space *mapping, struct file *filp)
+{
+	pgoff_t eof_index;
+	unsigned long ra_size;
+	unsigned long la_size;
+	int actual;
+	enum ra_class ra_class;
+
+	ra_class = (ra->flags & RA_CLASS_MASK);
+	BUG_ON(ra_class == 0 || ra_class > RA_CLASS_END);
+
+	eof_index = ((i_size_read(mapping->host) - 1) >> PAGE_CACHE_SHIFT) + 1;
+	ra_size = ra->readahead_index - ra->ra_index;
+	la_size = ra->readahead_index - ra->lookahead_index;
+
+	/* Snap to EOF. */
+	if (unlikely(ra->ra_index >= eof_index))
+		return 0;
+	if (ra->readahead_index + ra_size / 2 > eof_index) {
+		if (ra_class == RA_CLASS_CONTEXT_AGGRESSIVE &&
+				eof_index > ra->lookahead_index + 1)
+			la_size = eof_index - ra->lookahead_index;
+		else
+			la_size = 0;
+		ra_size = eof_index - ra->ra_index;
+		ra_set_size(ra, ra_size, la_size);
+	}
+
+	actual = __do_page_cache_readahead(mapping, filp,
+					ra->ra_index, ra_size, la_size);
+
+#ifdef CONFIG_DEBUG_READAHEAD
+	if (ra->flags & RA_FLAG_MMAP)
+		ra_account(ra, RA_EVENT_READAHEAD_MMAP, actual);
+	if (ra->readahead_index == eof_index)
+		ra_account(ra, RA_EVENT_READAHEAD_EOF, actual);
+	if (la_size)
+		ra_account(ra, RA_EVENT_LOOKAHEAD, la_size);
+	if (ra_size > actual)
+		ra_account(ra, RA_EVENT_IO_CACHE_HIT, ra_size - actual);
+	ra_account(ra, RA_EVENT_READAHEAD, actual);
+
+	if (!ra->ra_index && filp->f_dentry->d_inode) {
+		char *fn;
+		static char path[1024];
+		unsigned long size;
+
+		size = (i_size_read(filp->f_dentry->d_inode)+1023)/1024;
+		fn = d_path(filp->f_dentry, filp->f_vfsmnt, path, 1000);
+		if (!IS_ERR(fn))
+			ddprintk("ino %lu is %s size %luK by %s(%d)\n",
+					filp->f_dentry->d_inode->i_ino,
+					fn, size,
+					current->comm, current->pid);
+	}
+
+	dprintk("readahead-%s(ino=%lu, index=%lu, ra=%lu+%lu-%lu) = %d\n",
+			ra_class_name[ra_class],
+			mapping->host->i_ino, ra->la_index,
+			ra->ra_index, ra_size, la_size, actual);
+#endif /* CONFIG_DEBUG_READAHEAD */
+
+	return actual;
+}
+
+/*
+ * Determine the ra request from primitive values.
+ *
+ * It applies the following rules:
+ *   - Substract ra_size by the old look-ahead to get real safe read-ahead;
+ *   - Set new la_size according to the (still large) ra_size;
+ *   - Apply upper limits;
+ *   - Make sure stream_shift is not too small.
+ *     (So that the next global_shift will not be too small.)
+ *
+ * Input:
+ * ra_size stores the estimated thrashing-threshold.
+ * la_size stores the look-ahead size of previous request.
+ */
+static inline int adjust_rala(unsigned long ra_max,
+				unsigned long *ra_size, unsigned long *la_size)
+{
+	unsigned long stream_shift = *la_size;
+
+	if (*ra_size > *la_size)
+		*ra_size -= *la_size;
+	else {
+		ra_account(NULL, RA_EVENT_READAHEAD_SHRINK, *ra_size);
+		return 0;
+	}
+
+	*la_size = *ra_size / LOOKAHEAD_RATIO;
+
+	if (*ra_size > ra_max)
+		*ra_size = ra_max;
+	if (*la_size > *ra_size)
+		*la_size = *ra_size;
+
+	stream_shift += (*ra_size - *la_size);
+	if (stream_shift < *ra_size / 4)
+		*la_size -= (*ra_size / 4 - stream_shift);
+
+	return 1;
+}
+
+/*
+ * The function estimates two values:
+ * 1. thrashing-threshold for the current stream
+ *    It is returned to make the next read-ahead request.
+ * 2. the remained safe space for the current chunk
+ *    It will be checked to ensure that the current chunk is safe.
+ *
+ * The computation will be pretty accurate under heavy load, and will vibrate
+ * more on light load(with small global_shift), so the grow speed of ra_size
+ * must be limited, and a moderate large stream_shift must be insured.
+ *
+ * This figure illustrates the formula used in the function:
+ * While the stream reads stream_shift pages inside the chunks,
+ * the chunks are shifted global_shift pages inside inactive_list.
+ *
+ *      chunk A                    chunk B
+ *                          |<=============== global_shift ================|
+ *  +-------------+         +-------------------+                          |
+ *  |       #     |         |           #       |            inactive_list |
+ *  +-------------+         +-------------------+                     head |
+ *          |---->|         |---------->|
+ *             |                  |
+ *             +-- stream_shift --+
+ */
+static inline unsigned long compute_thrashing_threshold(
+						struct file_ra_state *ra,
+						unsigned long *remain)
+{
+	unsigned long global_size;
+	unsigned long global_shift;
+	unsigned long stream_shift;
+	unsigned long ra_size;
+	uint64_t ll;
+
+	global_size = node_free_and_cold_pages();
+	global_shift = node_readahead_aging() - ra->age;
+	global_shift |= 1UL;
+	stream_shift = ra_cache_hit(ra, 0);
+
+	ll = (uint64_t) stream_shift * (global_size >> 9) * readahead_ratio * 5;
+	do_div(ll, global_shift);
+	ra_size = ll;
+
+	if (global_size > global_shift) {
+		ll = (uint64_t) stream_shift * (global_size - global_shift);
+		do_div(ll, global_shift);
+		*remain = ll;
+	} else
+		*remain = 0;
+
+	ddprintk("compute_thrashing_threshold: "
+			"at %lu ra %lu=%lu*%lu/%lu, remain %lu for %lu\n",
+			ra->readahead_index, ra_size,
+			stream_shift, global_size, global_shift,
+			*remain, ra->readahead_index - ra->lookahead_index);
+
+	return ra_size;
+}
+
+/*
+ * Main function for file_ra_state based read-ahead.
+ */
+static inline unsigned long
+state_based_readahead(struct address_space *mapping, struct file *filp,
+			struct file_ra_state *ra,
+			struct page *page, pgoff_t index,
+			unsigned long ra_size, unsigned long ra_max)
+{
+	unsigned long ra_old;
+	unsigned long la_size;
+	unsigned long remain_space;
+	unsigned long growth_limit;
+
+	la_size = ra->readahead_index - index;
+	ra_old = ra->readahead_index - ra->ra_index;
+	growth_limit = ra_size + ra_max / 16 +
+				(2 + readahead_ratio / 64) * ra_old;
+	ra_size = compute_thrashing_threshold(ra, &remain_space);
+
+	if (page && remain_space <= la_size && la_size > 1) {
+		rescue_pages(page, la_size);
+		return 0;
+	}
+
+	if (!adjust_rala(min(ra_max, growth_limit), &ra_size, &la_size))
+		return 0;
+
+	ra_set_class(ra, RA_CLASS_STATE);
+	ra_set_index(ra, index, ra->readahead_index);
+	ra_set_size(ra, ra_size, la_size);
+
+	return ra_dispatch(ra, mapping, filp);
+}
+
+/*
+ * Page cache context based estimation of read-ahead/look-ahead size/index.
+ *
+ * The logic first looks around to find the start point of next read-ahead,
+ * and then, if necessary, looks backward in the inactive_list to get an
+ * estimation of the thrashing-threshold.
+ *
+ * The estimation theory can be illustrated with figure:
+ *
+ *   chunk A           chunk B                      chunk C                 head
+ *
+ *   l01 l11           l12   l21                    l22
+ *| |-->|-->|       |------>|-->|                |------>|
+ *| +-------+       +-----------+                +-------------+               |
+ *| |   #   |       |       #   |                |       #     |               |
+ *| +-------+       +-----------+                +-------------+               |
+ *| |<==============|<===========================|<============================|
+ *        L0                     L1                            L2
+ *
+ * Let f(l) = L be a map from
+ * 	l: the number of pages read by the stream
+ * to
+ * 	L: the number of pages pushed into inactive_list in the mean time
+ * then
+ * 	f(l01) <= L0
+ * 	f(l11 + l12) = L1
+ * 	f(l21 + l22) = L2
+ * 	...
+ * 	f(l01 + l11 + ...) <= Sum(L0 + L1 + ...)
+ *			   <= Length(inactive_list) = f(thrashing-threshold)
+ *
+ * So the count of countinuous history pages left in the inactive_list is always
+ * a lower estimation of the true thrashing-threshold.
+ */
+
+#define PAGE_REFCNT_0           0
+#define PAGE_REFCNT_1           (1 << PG_referenced)
+#define PAGE_REFCNT_2           (1 << PG_active)
+#define PAGE_REFCNT_3           ((1 << PG_active) | (1 << PG_referenced))
+#define PAGE_REFCNT_MASK        PAGE_REFCNT_3
+
+/*
+ * STATUS   REFERENCE COUNT
+ *  __                   0
+ *  _R       PAGE_REFCNT_1
+ *  A_       PAGE_REFCNT_2
+ *  AR       PAGE_REFCNT_3
+ *
+ *  A/R: Active / Referenced
+ */
+static inline unsigned long page_refcnt(struct page *page)
+{
+        return page->flags & PAGE_REFCNT_MASK;
+}
+
+/*
+ * STATUS   REFERENCE COUNT      TYPE
+ *  __                   0      fresh
+ *  _R       PAGE_REFCNT_1      stale
+ *  A_       PAGE_REFCNT_2      disturbed once
+ *  AR       PAGE_REFCNT_3      disturbed twice
+ *
+ *  A/R: Active / Referenced
+ */
+static inline unsigned long cold_page_refcnt(struct page *page)
+{
+	if (!page || PageActive(page))
+		return 0;
+
+	return page_refcnt(page);
+}
+
+static inline char page_refcnt_symbol(struct page *page)
+{
+	if (!page)
+		return 'X';
+
+	switch (page_refcnt(page)) {
+		case 0:
+			return '_';
+		case PAGE_REFCNT_1:
+			return '-';
+		case PAGE_REFCNT_2:
+			return '=';
+		case PAGE_REFCNT_3:
+			return '#';
+		default:
+			return '?';
+	}
+}
+
+/*
+ * Count/estimate cache hits in range [first_index, last_index].
+ * The estimation is simple and optimistic.
+ */
+static int count_cache_hit(struct address_space *mapping,
+				pgoff_t first_index, pgoff_t last_index)
+{
+	struct page *page;
+	int size = last_index - first_index + 1;
+	int count = 0;
+	int i;
+
+	cond_resched();
+	read_lock_irq(&mapping->tree_lock);
+
+	/*
+	 * The first page may well is chunk head and has been accessed,
+	 * so it is index 0 that makes the estimation optimistic. This
+	 * behavior guarantees a readahead when (size < ra_max) and
+	 * (readahead_hit_rate >= 16).
+	 */
+	for (i = 0; i < 16;) {
+		page = __find_page(mapping, first_index +
+						size * ((i++ * 29) & 15) / 16);
+		if (cold_page_refcnt(page) >= PAGE_REFCNT_1 && ++count >= 2)
+			break;
+	}
+
+	read_unlock_irq(&mapping->tree_lock);
+
+	return size * count / i;
+}
+
+/*
+ * Look back and check history pages to estimate thrashing-threshold.
+ */
+static unsigned long query_page_cache_segment(struct address_space *mapping,
+				struct file_ra_state *ra,
+				unsigned long *remain, pgoff_t offset,
+				unsigned long ra_min, unsigned long ra_max)
+{
+	pgoff_t index;
+	unsigned long count;
+	unsigned long nr_lookback;
+	struct radix_tree_cache cache;
+
+	/*
+	 * Scan backward and check the near @ra_max pages.
+	 * The count here determines ra_size.
+	 */
+	cond_resched();
+	read_lock_irq(&mapping->tree_lock);
+	index = radix_tree_scan_hole_backward(&mapping->page_tree,
+							offset, ra_max);
+#ifdef DEBUG_READAHEAD_RADIXTREE
+	WARN_ON(index > offset);
+	if (index != offset)
+		WARN_ON(!__find_page(mapping, index + 1));
+	if (index && offset - index < ra_max)
+		WARN_ON(__find_page(mapping, index));
+#endif
+	read_unlock_irq(&mapping->tree_lock);
+
+	*remain = offset - index;
+
+	if (offset == ra->readahead_index && ra_cache_hit_ok(ra))
+		count = *remain;
+	else if (count_cache_hit(mapping, index + 1, offset) *
+						readahead_hit_rate >= *remain)
+		count = *remain;
+	else
+		count = ra_min;
+
+	/*
+	 * Unnecessary to count more?
+	 */
+	if (count < ra_max)
+		goto out;
+
+	if (unlikely(ra->flags & RA_FLAG_NO_LOOKAHEAD))
+		goto out;
+
+	/*
+	 * Check the far pages coarsely.
+	 * The big count here helps increase la_size.
+	 */
+	nr_lookback = ra_max * (LOOKAHEAD_RATIO + 1) *
+						100 / (readahead_ratio + 1);
+
+	cond_resched();
+	radix_tree_cache_init(&cache);
+	read_lock_irq(&mapping->tree_lock);
+	for (count += ra_max; count < nr_lookback; count += ra_max) {
+		struct radix_tree_node *node;
+		node = radix_tree_cache_lookup_node(&mapping->page_tree,
+						&cache, offset - count, 1);
+#ifdef DEBUG_READAHEAD_RADIXTREE
+		if (node != radix_tree_lookup_node(&mapping->page_tree,
+							offset - count, 1))
+			BUG();
+#endif
+		if (!node)
+			break;
+	}
+	read_unlock_irq(&mapping->tree_lock);
+
+out:
+	/*
+	 *  For sequential read that extends from index 0, the counted value
+	 *  may well be far under the true threshold, so return it unmodified
+	 *  for further process in adjust_rala_aggressive().
+	 */
+	if (count >= offset)
+		count = offset;
+	else
+		count = max(ra_min, count * readahead_ratio / 100);
+
+	ddprintk("query_page_cache_segment: "
+			"ino=%lu, idx=%lu, count=%lu, remain=%lu\n",
+			mapping->host->i_ino, offset, count, *remain);
+
+	return count;
+}
+
+/*
+ * Find past-the-end index of the segment before @index.
+ */
+static inline pgoff_t find_segtail_backward(struct address_space *mapping,
+					pgoff_t index, unsigned long max_scan)
+{
+	struct radix_tree_cache cache;
+	struct page *page;
+	pgoff_t origin;
+
+	origin = index;
+	if (max_scan > index)
+		max_scan = index;
+
+	cond_resched();
+	radix_tree_cache_init(&cache);
+	read_lock_irq(&mapping->tree_lock);
+	for (; origin - index < max_scan;) {
+		page = radix_tree_cache_lookup(&mapping->page_tree,
+							&cache, --index);
+		if (page) {
+			read_unlock_irq(&mapping->tree_lock);
+			return index + 1;
+		}
+	}
+	read_unlock_irq(&mapping->tree_lock);
+
+	return 0;
+}
+
+/*
+ * Find past-the-end index of the segment at @index.
+ */
+static inline pgoff_t find_segtail(struct address_space *mapping,
+					pgoff_t index, unsigned long max_scan)
+{
+	pgoff_t ra_index;
+
+	cond_resched();
+	read_lock_irq(&mapping->tree_lock);
+	ra_index = radix_tree_scan_hole(&mapping->page_tree, index, max_scan);
+#ifdef DEBUG_READAHEAD_RADIXTREE
+	BUG_ON(!__find_page(mapping, index));
+	WARN_ON(ra_index < index);
+	if (ra_index != index && !__find_page(mapping, ra_index - 1))
+		printk(KERN_ERR "radix_tree_scan_hole(index=%lu ra_index=%lu "
+				"max_scan=%lu nrpages=%lu) fooled!\n",
+				index, ra_index, max_scan, mapping->nrpages);
+	if (ra_index != ~0UL && ra_index - index < max_scan)
+		WARN_ON(__find_page(mapping, ra_index));
+#endif
+	read_unlock_irq(&mapping->tree_lock);
+
+	if (ra_index <= index + max_scan)
+		return ra_index;
+	else
+		return 0;
+}
+
+/*
+ * Determine the request parameters for context based read-ahead that extends
+ * from start of file.
+ *
+ * The major weakness of stateless method is perhaps the slow grow up speed of
+ * ra_size. The logic tries to make up for this in the important case of
+ * sequential reads that extend from start of file. In this case, the ra_size
+ * is not chosen to make the whole next chunk safe (as in normal ones). Only
+ * half of which is safe. The added 'unsafe' half is the look-ahead part. It
+ * is expected to be safeguarded by rescue_pages() when the previous chunks are
+ * lost.
+ */
+static inline int adjust_rala_aggressive(unsigned long ra_max,
+				unsigned long *ra_size, unsigned long *la_size)
+{
+	pgoff_t index = *ra_size;
+
+	*ra_size -= min(*ra_size, *la_size);
+	*ra_size = *ra_size * readahead_ratio / 100;
+	*la_size = index * readahead_ratio / 100;
+	*ra_size += *la_size;
+
+	if (*ra_size > ra_max)
+		*ra_size = ra_max;
+	if (*la_size > *ra_size)
+		*la_size = *ra_size;
+
+	return 1;
+}
+
+/*
+ * Main function for page context based read-ahead.
+ */
+static inline int
+try_context_based_readahead(struct address_space *mapping,
+			struct file_ra_state *ra, struct page *prev_page,
+			struct page *page, pgoff_t index,
+			unsigned long ra_min, unsigned long ra_max)
+{
+	pgoff_t ra_index;
+	unsigned long ra_size;
+	unsigned long la_size;
+	unsigned long remain_pages;
+
+	/* Where to start read-ahead?
+	 * NFSv3 daemons may process adjacent requests in parallel,
+	 * leading to many locally disordered, globally sequential reads.
+	 * So do not require nearby history pages to be present or accessed.
+	 */
+	if (page) {
+		ra_index = find_segtail(mapping, index, ra_max * 5 / 4);
+		if (!ra_index)
+			return -1;
+	} else if (prev_page || find_page(mapping, index - 1)) {
+		ra_index = index;
+	} else if (readahead_hit_rate > 1) {
+		ra_index = find_segtail_backward(mapping, index,
+						readahead_hit_rate + ra_min);
+		if (!ra_index)
+			return 0;
+		ra_min += 2 * (index - ra_index);
+		index = ra_index;	/* pretend the request starts here */
+	} else
+		return 0;
+
+	ra_size = query_page_cache_segment(mapping, ra, &remain_pages,
+							index, ra_min, ra_max);
+
+	la_size = ra_index - index;
+	if (page && remain_pages <= la_size &&
+			remain_pages < index && la_size > 1) {
+		rescue_pages(page, la_size);
+		return -1;
+	}
+
+	if (ra_size == index) {
+		if (!adjust_rala_aggressive(ra_max, &ra_size, &la_size))
+			return -1;
+		ra_set_class(ra, RA_CLASS_CONTEXT_AGGRESSIVE);
+	} else {
+		if (!adjust_rala(ra_max, &ra_size, &la_size))
+			return -1;
+		ra_set_class(ra, RA_CLASS_CONTEXT);
+	}
+
+	ra_set_index(ra, index, ra_index);
+	ra_set_size(ra, ra_size, la_size);
+
+	return 1;
+}
+
+/*
+ * Read-ahead on start of file.
+ *
+ * The strategies here are most important for small files.
+ * 1. Set a moderately large read-ahead size;
+ * 2. Issue the next read-ahead request as soon as possible.
+ *
+ * But be careful, there are some applications that dip into only the very head
+ * of a file. The most important thing is to prevent them from triggering the
+ * next (much larger) read-ahead request, which leads to lots of cache misses.
+ * Two pages should be enough for them, correct me if I'm wrong.
+ */
+static inline unsigned long
+newfile_readahead(struct address_space *mapping,
+		struct file *filp, struct file_ra_state *ra,
+		unsigned long req_size, unsigned long ra_min)
+{
+	unsigned long ra_size;
+	unsigned long la_size;
+
+	if (req_size > ra_min) /* larger value risks thrashing */
+		req_size = ra_min;
+
+	if (unlikely(ra->flags & RA_FLAG_NFSD)) {
+		ra_size = MIN_NFSD_PAGES;
+		la_size = 0;
+	} else {
+		ra_size = 4 * req_size;
+		la_size = 2 * req_size;
+	}
+
+	ra_set_class(ra, RA_CLASS_NEWFILE);
+	ra_set_index(ra, 0, 0);
+	ra_set_size(ra, ra_size, la_size);
+
+	return ra_dispatch(ra, mapping, filp);
+}
+
+/*
+ * Backward prefetching.
+ * No look ahead and thrashing threshold estimation for stepping backward
+ * pattern: should be unnecessary.
+ */
+static inline int
+try_read_backward(struct file_ra_state *ra, pgoff_t begin_index,
+			unsigned long ra_size, unsigned long ra_max)
+{
+	pgoff_t end_index;
+
+	/* Are we reading backward? */
+	if (begin_index > ra->prev_page)
+		return 0;
+
+	if ((ra->flags & RA_CLASS_MASK) == RA_CLASS_BACKWARD &&
+					ra_has_index(ra, ra->prev_page)) {
+		ra_size += 2 * ra_cache_hit(ra, 0);
+		end_index = ra->la_index;
+	} else {
+		ra_size += ra_size + ra_size * (readahead_hit_rate - 1) / 2;
+		end_index = ra->prev_page;
+	}
+
+	if (ra_size > ra_max)
+		ra_size = ra_max;
+
+	/* Read traces close enough to be covered by the prefetching? */
+	if (end_index > begin_index + ra_size)
+		return 0;
+
+	begin_index = end_index - ra_size;
+
+	ra_set_class(ra, RA_CLASS_BACKWARD);
+	ra_set_index(ra, begin_index, begin_index);
+	ra_set_size(ra, ra_size, 0);
+
+	return 1;
+}
+
+/*
+ * Readahead thrashing recovery.
+ */
+static inline unsigned long
+thrashing_recovery_readahead(struct address_space *mapping,
+				struct file *filp, struct file_ra_state *ra,
+				pgoff_t index, unsigned long ra_max)
+{
+	unsigned long ra_size;
+
+	if (readahead_debug_level && find_page(mapping, index - 1))
+		ra_account(ra, RA_EVENT_READAHEAD_MUTILATE,
+						ra->readahead_index - index);
+	ra_account(ra, RA_EVENT_READAHEAD_THRASHING,
+						ra->readahead_index - index);
+
+	/*
+	 * Some thrashing occur in (ra_index, la_index], in which case the
+	 * old read-ahead chunk is lost soon after the new one is allocated.
+	 * Ensure that we recover all needed pages in the old chunk.
+	 */
+	if (index < ra->ra_index)
+		ra_size = ra->ra_index - index;
+	else {
+		/* After thrashing, we know the exact thrashing-threshold. */
+		ra_size = ra_cache_hit(ra, 0);
+
+		/* And we'd better be a bit conservative. */
+		ra_size = ra_size * 3 / 4;
+	}
+
+	if (ra_size > ra_max)
+		ra_size = ra_max;
+
+	ra_set_class(ra, RA_CLASS_THRASHING);
+	ra_set_index(ra, index, index);
+	ra_set_size(ra, ra_size, ra_size / LOOKAHEAD_RATIO);
+
+	return ra_dispatch(ra, mapping, filp);
+}
+
+/*
+ * If there is a previous sequential read, it is likely to be another
+ * sequential read at the new position.
+ * Databases are known to have this seek-and-read-one-block pattern.
+ */
+static inline int
+try_readahead_on_seek(struct file_ra_state *ra, pgoff_t index,
+			unsigned long ra_size, unsigned long ra_max)
+{
+	unsigned long hit0 = ra_cache_hit(ra, 0);
+	unsigned long hit1 = ra_cache_hit(ra, 1) + hit0;
+	unsigned long hit2 = ra_cache_hit(ra, 2);
+	unsigned long hit3 = ra_cache_hit(ra, 3);
+
+	/* There's a previous read-ahead request? */
+	if (!ra_has_index(ra, ra->prev_page))
+		return 0;
+
+	/* The previous read-ahead sequences have similiar sizes? */
+	if (!(ra_size < hit1 && hit1 > hit2 / 2 &&
+				hit2 > hit3 / 2 &&
+				hit3 > hit1 / 2))
+		return 0;
+
+	hit1 = max(hit1, hit2);
+
+	/* Follow the same prefetching direction. */
+	if ((ra->flags & RA_CLASS_MASK) == RA_CLASS_BACKWARD)
+		index = ((index > hit1 - ra_size) ? index - hit1 + ra_size : 0);
+
+	ra_size = min(hit1, ra_max);
+
+	ra_set_class(ra, RA_CLASS_SEEK);
+	ra_set_index(ra, index, index);
+	ra_set_size(ra, ra_size, 0);
+
+	return 1;
+}
+
+/*
+ * ra_min is mainly determined by the size of cache memory.
+ * Table of concrete numbers for 4KB page size:
+ *   inactive + free (MB):    4   8   16   32   64  128  256  512 1024
+ *            ra_min (KB):   16  16   16   16   20   24   32   48   64
+ */
+static inline void get_readahead_bounds(struct file_ra_state *ra,
+					unsigned long *ra_min,
+					unsigned long *ra_max)
+{
+	unsigned long pages;
+
+	pages = max_sane_readahead(KB(1024*1024));
+	*ra_max = min(min(pages, 0xFFFFUL), ra->ra_pages);
+	*ra_min = min(min(MIN_RA_PAGES + (pages>>13), KB(128)), *ra_max/2);
+}
+
+/**
+ * page_cache_readahead_adaptive - adaptive read-ahead main function
+ * @mapping, @ra, @filp: the same as page_cache_readahead()
+ * @prev_page: the page at @index-1, may be NULL to let the function find it
+ * @page: the page at @index, or NULL if non-present
+ * @begin_index, @index, @end_index: offsets into @mapping
+ * 		[@begin_index, @end_index) is the read the caller is performing
+ *	 	@index indicates the page to be read now
+ *
+ * page_cache_readahead_adaptive() is the entry point of the adaptive
+ * read-ahead logic. It tries a set of methods in turn to determine the
+ * appropriate readahead action and submits the readahead I/O.
+ *
+ * The caller is expected to point ra->prev_page to the previously accessed
+ * page, and to call it on two conditions:
+ * 1. @page == NULL
+ *    A cache miss happened, some pages have to be read in
+ * 2. @page != NULL && PageReadahead(@page)
+ *    A look-ahead mark encountered, this is set by a previous read-ahead
+ *    invocation to instruct the caller to give the function a chance to
+ *    check up and do next read-ahead in advance.
+ */
+unsigned long
+page_cache_readahead_adaptive(struct address_space *mapping,
+			struct file_ra_state *ra, struct file *filp,
+			struct page *prev_page, struct page *page,
+			pgoff_t begin_index, pgoff_t index, pgoff_t end_index)
+{
+	unsigned long size;
+	unsigned long ra_min;
+	unsigned long ra_max;
+	int ret;
+
+	might_sleep();
+
+	if (page) {
+		if(!TestClearPageReadahead(page))
+			return 0;
+		if (bdi_read_congested(mapping->backing_dev_info)) {
+			ra_account(ra, RA_EVENT_IO_CONGESTION,
+							end_index - index);
+			return 0;
+		}
+		if (laptop_mode && laptop_spinned_down()) {
+			if (!renew_lookahead(mapping, ra, index,
+						index + LAPTOP_POLL_INTERVAL))
+				return 0;
+		}
+	}
+
+	if (page)
+		ra_account(ra, RA_EVENT_LOOKAHEAD_HIT,
+				ra->readahead_index - ra->lookahead_index);
+	else if (index)
+		ra_account(ra, RA_EVENT_CACHE_MISS, end_index - begin_index);
+
+	size = end_index - index;
+	get_readahead_bounds(ra, &ra_min, &ra_max);
+
+	/* readahead disabled? */
+	if (unlikely(!ra_max || !readahead_ratio)) {
+		size = max_sane_readahead(size);
+		goto readit;
+	}
+
+	/*
+	 * Start of file.
+	 */
+	if (index == 0)
+		return newfile_readahead(mapping, filp, ra, end_index, ra_min);
+
+	/*
+	 * State based sequential read-ahead.
+	 */
+	if (!disable_stateful_method &&
+			index == ra->lookahead_index && ra_cache_hit_ok(ra))
+		return state_based_readahead(mapping, filp, ra, page,
+							index, size, ra_max);
+
+	/*
+	 * Recover from possible thrashing.
+	 */
+	if (!page && index == ra->prev_page + 1 && ra_has_index(ra, index))
+		return thrashing_recovery_readahead(mapping, filp, ra,
+								index, ra_max);
+
+	/*
+	 * Backward read-ahead.
+	 */
+	if (!page && begin_index == index &&
+				try_read_backward(ra, index, size, ra_max))
+		return ra_dispatch(ra, mapping, filp);
+
+	/*
+	 * Context based sequential read-ahead.
+	 */
+	ret = try_context_based_readahead(mapping, ra, prev_page, page,
+							index, ra_min, ra_max);
+	if (ret > 0)
+		return ra_dispatch(ra, mapping, filp);
+	if (ret < 0)
+		return 0;
+
+	/* No action on look ahead time? */
+	if (page) {
+		ra_account(ra, RA_EVENT_LOOKAHEAD_NOACTION,
+						ra->readahead_index - index);
+		return 0;
+	}
+
+	/*
+	 * Random read that follows a sequential one.
+	 */
+	if (try_readahead_on_seek(ra, index, size, ra_max))
+		return ra_dispatch(ra, mapping, filp);
+
+	/*
+	 * Random read.
+	 */
+	if (size > ra_max)
+		size = ra_max;
+
+readit:
+	size = __do_page_cache_readahead(mapping, filp, index, size, 0);
+
+	ra_account(ra, RA_EVENT_READRANDOM, size);
+	dprintk("readrandom(ino=%lu, pages=%lu, index=%lu-%lu-%lu) = %lu\n",
+			mapping->host->i_ino, mapping->nrpages,
+			begin_index, index, end_index, size);
+
+	return size;
+}
+
+/**
+ * readahead_cache_hit - adaptive read-ahead feedback function
+ * @ra: file_ra_state which holds the readahead state
+ * @page: the page just accessed
+ *
+ * readahead_cache_hit() is the feedback route of the adaptive read-ahead
+ * logic. It must be called on every access on the read-ahead pages.
+ */
+void fastcall readahead_cache_hit(struct file_ra_state *ra, struct page *page)
+{
+	if (PageActive(page) || PageReferenced(page))
+		return;
+
+	if (!PageUptodate(page))
+		ra_account(ra, RA_EVENT_IO_BLOCK, 1);
+
+	if (!ra_has_index(ra, page->index))
+		return;
+
+	ra->cache_hits++;
+
+	if (page->index >= ra->ra_index)
+		ra_account(ra, RA_EVENT_READAHEAD_HIT, 1);
+	else
+		ra_account(ra, RA_EVENT_READAHEAD_HIT, -1);
+}
+
+#endif /* CONFIG_ADAPTIVE_READAHEAD */
diff -urN oldtree/mm/swap.c newtree/mm/swap.c
--- oldtree/mm/swap.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/swap.c	2006-04-01 08:53:57.995126000 -0500
@@ -17,6 +17,7 @@
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
+#include <linux/swap-prefetch.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
@@ -127,6 +128,8 @@
 		ClearPageReferenced(page);
 	} else if (!PageReferenced(page)) {
 		SetPageReferenced(page);
+		if (PageLRU(page))
+			inc_readahead_aging();
 	}
 }
 
@@ -382,6 +385,46 @@
 	pagevec_reinit(pvec);
 }
 
+static inline void __pagevec_lru_add_tail(struct pagevec *pvec)
+{
+	int i;
+	struct zone *zone = NULL;
+
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+		struct zone *pagezone = page_zone(page);
+
+		if (pagezone != zone) {
+			if (zone)
+				spin_unlock_irq(&zone->lru_lock);
+			zone = pagezone;
+			spin_lock_irq(&zone->lru_lock);
+		}
+		BUG_ON(PageLRU(page));
+		SetPageLRU(page);
+		add_page_to_inactive_list_tail(zone, page);
+	}
+	if (zone)
+		spin_unlock_irq(&zone->lru_lock);
+	release_pages(pvec->pages, pvec->nr, pvec->cold);
+	pagevec_reinit(pvec);
+}
+
+/*
+ * Function used uniquely to put pages back to the lru at the end of the
+ * inactive list to preserve the lru order. Currently only used by swap
+ * prefetch.
+ */
+void fastcall lru_cache_add_tail(struct page *page)
+{
+	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
+
+	page_cache_get(page);
+	if (!pagevec_add(pvec, page))
+		__pagevec_lru_add_tail(pvec);
+	put_cpu_var(lru_add_pvecs);
+}
+
 /*
  * Try to drop buffers from the pages in a pagevec
  */
@@ -536,5 +579,8 @@
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
 	 */
+
+	prepare_swap_prefetch();
+
 	hotcpu_notifier(cpu_swap_callback, 0);
 }
diff -urN oldtree/mm/swap_prefetch.c newtree/mm/swap_prefetch.c
--- oldtree/mm/swap_prefetch.c	1969-12-31 19:00:00.000000000 -0500
+++ newtree/mm/swap_prefetch.c	2006-04-01 08:54:41.745860250 -0500
@@ -0,0 +1,617 @@
+/*
+ * linux/mm/swap_prefetch.c
+ *
+ * Copyright (C) 2005-2006 Con Kolivas
+ *
+ * Written by Con Kolivas <kernel@kolivas.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/swap-prefetch.h>
+#include <linux/ioprio.h>
+#include <linux/kthread.h>
+#include <linux/pagemap.h>
+#include <linux/syscalls.h>
+#include <linux/writeback.h>
+
+/*
+ * Time to delay prefetching if vm is busy or prefetching unsuccessful. There
+ * needs to be at least this duration of idle time meaning in practice it can
+ * be much longer
+ */
+#define PREFETCH_DELAY	(HZ * 5)
+
+#define PREFETCH_NORMAL		(1 << 0)
+#define PREFETCH_AGGRESSIVE 	(1 << 1)
+/*
+ * sysctl - enable/disable swap prefetching bits
+ * This is composed of the bitflags PREFETCH_NORMAL and PREFETCH_AGGRESSIVE.
+ * Once PREFETCH_AGGRESSIVE is set, swap prefetching will be peformed as much
+ * as possible irrespective of load conditions and then the
+ * PREFETCH_AGGRESSIVE bit will be unset.
+ */
+int swap_prefetch __read_mostly = PREFETCH_NORMAL;
+
+#define aggressive_prefetch	(unlikely(swap_prefetch & PREFETCH_AGGRESSIVE))
+
+struct swapped_root {
+	unsigned long		busy;		/* vm busy */
+	spinlock_t		lock;		/* protects all data */
+	struct list_head	list;		/* MRU list of swapped pages */
+	struct radix_tree_root	swap_tree;	/* Lookup tree of pages */
+	unsigned int		count;		/* Number of entries */
+	unsigned int		maxcount;	/* Maximum entries allowed */
+	kmem_cache_t		*cache;		/* Of struct swapped_entry */
+};
+
+static struct swapped_root swapped = {
+	.lock		= SPIN_LOCK_UNLOCKED,
+	.list  		= LIST_HEAD_INIT(swapped.list),
+	.swap_tree	= RADIX_TREE_INIT(GFP_ATOMIC),
+};
+
+static task_t *kprefetchd_task;
+
+/*
+ * We check to see no part of the vm is busy. If it is this will interrupt
+ * trickle_swap and wait another PREFETCH_DELAY. Purposefully racy.
+ */
+inline void delay_swap_prefetch(void)
+{
+	if (!test_bit(0, &swapped.busy))
+		__set_bit(0, &swapped.busy);
+}
+
+/*
+ * Drop behind accounting which keeps a list of the most recently used swap
+ * entries.
+ */
+void add_to_swapped_list(struct page *page)
+{
+	struct swapped_entry *entry;
+	unsigned long index;
+	int wakeup;
+
+	if (!swap_prefetch)
+		return;
+
+	wakeup = 0;
+
+	spin_lock(&swapped.lock);
+	if (swapped.count >= swapped.maxcount) {
+		/*
+		 * We limit the number of entries to 2/3 of physical ram.
+		 * Once the number of entries exceeds this we start removing
+		 * the least recently used entries.
+		 */
+		entry = list_entry(swapped.list.next,
+			struct swapped_entry, swapped_list);
+		radix_tree_delete(&swapped.swap_tree, entry->swp_entry.val);
+		list_del(&entry->swapped_list);
+		swapped.count--;
+	} else {
+		entry = kmem_cache_alloc(swapped.cache, GFP_ATOMIC);
+		if (unlikely(!entry))
+			/* bad, can't allocate more mem */
+			goto out_locked;
+	}
+
+	index = page_private(page);
+	entry->swp_entry.val = index;
+	/*
+	 * On numa we need to store the node id to ensure that we prefetch to
+	 * the same node it came from.
+	 */
+	store_swap_entry_node(entry, page);
+
+	if (likely(!radix_tree_insert(&swapped.swap_tree, index, entry))) {
+		/*
+		 * If this is the first entry, kprefetchd needs to be
+		 * (re)started.
+		 */
+		if (!swapped.count)
+			wakeup = 1;
+		list_add(&entry->swapped_list, &swapped.list);
+		swapped.count++;
+	}
+
+out_locked:
+	spin_unlock(&swapped.lock);
+
+	/* Do the wakeup outside the lock to shorten lock hold time. */
+	if (wakeup)
+		wake_up_process(kprefetchd_task);
+
+	return;
+}
+
+/*
+ * Removes entries from the swapped_list. The radix tree allows us to quickly
+ * look up the entry from the index without having to iterate over the whole
+ * list.
+ */
+void remove_from_swapped_list(const unsigned long index)
+{
+	struct swapped_entry *entry;
+	unsigned long flags;
+
+	if (list_empty(&swapped.list))
+		return;
+
+	spin_lock_irqsave(&swapped.lock, flags);
+	entry = radix_tree_delete(&swapped.swap_tree, index);
+	if (likely(entry)) {
+		list_del_init(&entry->swapped_list);
+		swapped.count--;
+		kmem_cache_free(swapped.cache, entry);
+	}
+	spin_unlock_irqrestore(&swapped.lock, flags);
+}
+
+enum trickle_return {
+	TRICKLE_SUCCESS,
+	TRICKLE_FAILED,
+	TRICKLE_DELAY,
+};
+
+struct node_stats {
+	unsigned long	last_free;
+	/* Free ram after a cycle of prefetching */
+	unsigned long	current_free;
+	/* Free ram on this cycle of checking prefetch_suitable */
+	unsigned long	prefetch_watermark;
+	/* Maximum amount we will prefetch to */
+	unsigned long	highfree[MAX_NR_ZONES];
+	/* The amount of free ram before we start prefetching */
+	unsigned long	lowfree[MAX_NR_ZONES];
+	/* The amount of free ram where we will stop prefetching */
+	unsigned long	*pointfree[MAX_NR_ZONES];
+	/* highfree or lowfree depending on whether we've hit a watermark */
+};
+
+/*
+ * prefetch_stats stores the free ram data of each node and this is used to
+ * determine if a node is suitable for prefetching into.
+ */
+struct prefetch_stats {
+	nodemask_t	prefetch_nodes;
+	/* Which nodes are currently suited to prefetching */
+	unsigned long	prefetched_pages;
+	/* Total pages we've prefetched on this wakeup of kprefetchd */
+	struct node_stats node[MAX_NUMNODES];
+};
+
+static struct prefetch_stats sp_stat;
+
+/*
+ * This tries to read a swp_entry_t into swap cache for swap prefetching.
+ * If it returns TRICKLE_DELAY we should delay further prefetching.
+ */
+static enum trickle_return trickle_swap_cache_async(const swp_entry_t entry,
+	const int node)
+{
+	enum trickle_return ret = TRICKLE_FAILED;
+	struct page *page;
+
+	read_lock_irq(&swapper_space.tree_lock);
+	/* Entry may already exist */
+	page = radix_tree_lookup(&swapper_space.page_tree, entry.val);
+	read_unlock_irq(&swapper_space.tree_lock);
+	if (page) {
+		remove_from_swapped_list(entry.val);
+		goto out;
+	}
+
+	/*
+	 * Get a new page to read from swap. We have already checked the
+	 * watermarks so __alloc_pages will not call on reclaim.
+	 */
+	page = alloc_pages_node(node, GFP_HIGHUSER & ~__GFP_WAIT, 0);
+	if (unlikely(!page)) {
+		ret = TRICKLE_DELAY;
+		goto out;
+	}
+
+	if (add_to_swap_cache(page, entry)) {
+		/* Failed to add to swap cache */
+		goto out_release;
+	}
+
+	/* Add them to the tail of the inactive list to preserve LRU order */
+	lru_cache_add_tail(page);
+	if (unlikely(swap_readpage(NULL, page))) {
+		ret = TRICKLE_DELAY;
+		goto out_release;
+	}
+
+	sp_stat.prefetched_pages++;
+	sp_stat.node[node].last_free--;
+
+	ret = TRICKLE_SUCCESS;
+out_release:
+	page_cache_release(page);
+out:
+	return ret;
+}
+
+static void clear_last_prefetch_free(void)
+{
+	int node;
+
+	/*
+	 * Reset the nodes suitable for prefetching to all nodes. We could
+	 * update the data to take into account memory hotplug if desired..
+	 */
+	sp_stat.prefetch_nodes = node_online_map;
+	for_each_node_mask(node, sp_stat.prefetch_nodes) {
+		struct node_stats *ns = &sp_stat.node[node];
+
+		ns->last_free = 0;
+	}
+}
+
+static void clear_current_prefetch_free(void)
+{
+	int node;
+
+	sp_stat.prefetch_nodes = node_online_map;
+	for_each_node_mask(node, sp_stat.prefetch_nodes) {
+		struct node_stats *ns = &sp_stat.node[node];
+
+		ns->current_free = 0;
+	}
+}
+
+/*
+ * This updates the high and low watermarks of amount of free ram in each
+ * node used to start and stop prefetching. We prefetch from pages_high * 4
+ * down to pages_high * 3.
+ */
+static void examine_free_limits(void)
+{
+	struct zone *z;
+
+	for_each_zone(z) {
+		struct node_stats *ns;
+		int idx;
+
+		if (!populated_zone(z))
+			continue;
+
+		ns = &sp_stat.node[z->zone_pgdat->node_id];
+		idx = zone_idx(z);
+		ns->lowfree[idx] = z->pages_high * 3 + z->lowmem_reserve[idx];
+		ns->highfree[idx] = ns->lowfree[idx] + z->pages_high;
+
+		if (z->free_pages > ns->highfree[idx]) {
+			/*
+			 * We've gotten above the high watermark of free pages
+			 * so we can start prefetching till we get to the low
+			 * watermark.
+			 */
+			ns->pointfree[idx] = &ns->lowfree[idx];
+		}
+	}
+}
+
+/*
+ * Have some hysteresis between where page reclaiming and prefetching
+ * will occur to prevent ping-ponging between them.
+ */
+static void set_suitable_nodes(void)
+{
+	struct zone *z;
+
+	for_each_zone(z) {
+		struct node_stats *ns;
+		unsigned long free;
+		int node, idx;
+
+		if (!populated_zone(z))
+			continue;
+
+		node = z->zone_pgdat->node_id;
+		ns = &sp_stat.node[node];
+		idx = zone_idx(z);
+
+		free = z->free_pages;
+		if (free < *ns->pointfree[idx]) {
+			/*
+			 * Free pages have dropped below the low watermark so
+			 * we won't start prefetching again till we hit the
+			 * high watermark of free pages.
+			 */
+			ns->pointfree[idx] = &ns->highfree[idx];
+			node_clear(node, sp_stat.prefetch_nodes);
+			continue;
+		}
+		ns->current_free += free;
+	}
+}
+
+/*
+ * We want to be absolutely certain it's ok to start prefetching.
+ */
+static int prefetch_suitable(void)
+{
+	unsigned long limit;
+	int node, ret = 0, test_pagestate = 0;
+
+	if (aggressive_prefetch) {
+		clear_current_prefetch_free();
+		set_suitable_nodes();
+		if (!nodes_empty(sp_stat.prefetch_nodes))
+			ret = 1;
+		goto out;
+	}
+
+	/* Purposefully racy */
+	if (test_bit(0, &swapped.busy)) {
+		__clear_bit(0, &swapped.busy);
+		goto out;
+	}
+
+	/*
+	 * get_page_state and above_background_load are expensive so we only
+	 * perform them every SWAP_CLUSTER_MAX prefetched_pages.
+	 * We test to see if we're above_background_load as disk activity
+	 * even at low priority can cause interrupt induced scheduling
+	 * latencies.
+	 */
+	if (!(sp_stat.prefetched_pages % SWAP_CLUSTER_MAX)) {
+		if (above_background_load())
+			goto out;
+		test_pagestate = 1;
+	}
+
+	clear_current_prefetch_free();
+	set_suitable_nodes();
+
+	/*
+	 * We iterate over each node testing to see if it is suitable for
+	 * prefetching and clear the nodemask if it is not.
+	 */
+	for_each_node_mask(node, sp_stat.prefetch_nodes) {
+		struct node_stats *ns = &sp_stat.node[node];
+		struct page_state ps;
+
+		/*
+		 * We check to see that pages are not being allocated
+		 * elsewhere at any significant rate implying any
+		 * degree of memory pressure (eg during file reads)
+		 */
+		if (ns->last_free) {
+			if (ns->current_free + SWAP_CLUSTER_MAX <
+			    ns->last_free) {
+				ns->last_free = ns->current_free;
+				node_clear(node,
+					sp_stat.prefetch_nodes);
+				continue;
+			}
+		} else
+			ns->last_free = ns->current_free;
+
+		if (!test_pagestate)
+			continue;
+
+		get_page_state_node(&ps, node);
+
+		/* We shouldn't prefetch when we are doing writeback */
+		if (ps.nr_writeback) {
+			node_clear(node, sp_stat.prefetch_nodes);
+			continue;
+		}
+
+		/*
+		 * >2/3 of the ram on this node is mapped, slab, swapcache or
+		 * dirty, we need to leave some free for pagecache.
+		 * Note that currently nr_slab is innacurate on numa because
+		 * nr_slab is incremented on the node doing the accounting
+		 * even if the slab is being allocated on a remote node. This
+		 * would be expensive to fix and not of great significance.
+		 */
+		limit = ps.nr_mapped + ps.nr_slab + ps.nr_dirty +
+			ps.nr_unstable + total_swapcache_pages;
+		if (limit > ns->prefetch_watermark) {
+			node_clear(node, sp_stat.prefetch_nodes);
+			continue;
+		}
+	}
+
+	if (nodes_empty(sp_stat.prefetch_nodes))
+		goto out;
+
+	/* Survived all that? Hooray we can prefetch! */
+	ret = 1;
+out:
+	return ret;
+}
+
+/*
+ * Get previous swapped entry when iterating over all entries. swapped.lock
+ * should be held and we should already ensure that entry exists.
+ */
+static inline struct swapped_entry *prev_swapped_entry
+	(struct swapped_entry *entry)
+{
+	return list_entry(entry->swapped_list.prev->prev,
+		struct swapped_entry, swapped_list);
+}
+
+static unsigned long pages_prefetched(void)
+{
+	unsigned long pages = sp_stat.prefetched_pages;
+
+	if (pages) {
+		lru_add_drain();
+		sp_stat.prefetched_pages = 0;
+	}
+	return pages;
+}
+
+/*
+ * trickle_swap is the main function that initiates the swap prefetching. It
+ * first checks to see if the busy flag is set, and does not prefetch if it
+ * is, as the flag implied we are low on memory or swapping in currently.
+ * Otherwise it runs until prefetch_suitable fails which occurs when the
+ * vm is busy, we prefetch to the watermark, or the list is empty or we have
+ * iterated over all entries
+ */
+static enum trickle_return trickle_swap(void)
+{
+	enum trickle_return ret = TRICKLE_DELAY;
+	struct swapped_entry *entry;
+
+	/*
+	 * If laptop_mode is enabled don't prefetch to avoid hard drives
+	 * doing unnecessary spin-ups
+	 */
+	if (!swap_prefetch || (laptop_mode && !aggressive_prefetch))
+		return ret;
+
+	examine_free_limits();
+	entry = NULL;
+
+	for ( ; ; ) {
+		swp_entry_t swp_entry;
+		int node;
+
+		if (!prefetch_suitable())
+			break;
+
+		spin_lock(&swapped.lock);
+		if (list_empty(&swapped.list)) {
+			ret = TRICKLE_FAILED;
+			spin_unlock(&swapped.lock);
+			break;
+		}
+
+		if (!entry) {
+			/*
+			 * This sets the entry for the first iteration. It
+			 * also is a safeguard against the entry disappearing
+			 * while the lock is not held.
+			 */
+			entry = list_entry(swapped.list.prev,
+				struct swapped_entry, swapped_list);
+		} else if (entry->swapped_list.prev == swapped.list.next) {
+			/*
+			 * If we have iterated over all entries and there are
+			 * still entries that weren't swapped out there may
+			 * be a reason we could not swap them back in so
+			 * delay attempting further prefetching.
+			 */
+			spin_unlock(&swapped.lock);
+			if (aggressive_prefetch) {
+				/*
+				 * If we're prefetching aggressively and
+				 * making progress then don't give up.
+				 */
+				if (pages_prefetched())
+					continue;
+			}
+			break;
+		}
+
+		node = get_swap_entry_node(entry);
+		if (!node_isset(node, sp_stat.prefetch_nodes)) {
+			/*
+			 * We found an entry that belongs to a node that is
+			 * not suitable for prefetching so skip it.
+			 */
+			entry = prev_swapped_entry(entry);
+			spin_unlock(&swapped.lock);
+			continue;
+		}
+		swp_entry = entry->swp_entry;
+		entry = prev_swapped_entry(entry);
+		spin_unlock(&swapped.lock);
+
+		if (trickle_swap_cache_async(swp_entry, node) == TRICKLE_DELAY &&
+		    !aggressive_prefetch)
+			break;
+	}
+
+	/* Return value of pages_prefetched irrelevant here */
+	pages_prefetched();
+	if (aggressive_prefetch)
+		swap_prefetch &= ~PREFETCH_AGGRESSIVE;
+	return ret;
+}
+
+static int kprefetchd(void *__unused)
+{
+	set_user_nice(current, 19);
+	/* Set ioprio to lowest if supported by i/o scheduler */
+	sys_ioprio_set(IOPRIO_WHO_PROCESS, 0, IOPRIO_CLASS_IDLE);
+
+	do {
+		try_to_freeze();
+
+		/*
+		 * TRICKLE_FAILED implies no entries left - we do not schedule
+		 * a wakeup, and further delay the next one.
+		 */
+		if (trickle_swap() == TRICKLE_FAILED) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+		}
+		clear_last_prefetch_free();
+		schedule_timeout_interruptible(PREFETCH_DELAY);
+	} while (!kthread_should_stop());
+
+	return 0;
+}
+
+/*
+ * Create kmem cache for swapped entries
+ */
+void __init prepare_swap_prefetch(void)
+{
+	struct zone *zone;
+
+	swapped.cache = kmem_cache_create("swapped_entry",
+		sizeof(struct swapped_entry), 0, SLAB_PANIC, NULL, NULL);
+
+	/*
+	 * Set max number of entries to 2/3 the size of physical ram  as we
+	 * only ever prefetch to consume 2/3 of the ram.
+	 */
+	swapped.maxcount = nr_free_pagecache_pages() / 3 * 2;
+
+	for_each_zone(zone) {
+		unsigned long present;
+		struct node_stats *ns;
+		int idx;
+
+		present = zone->present_pages;
+		if (!present)
+			continue;
+
+		ns = &sp_stat.node[zone->zone_pgdat->node_id];
+		ns->prefetch_watermark += present / 3 * 2;
+		idx = zone_idx(zone);
+		ns->pointfree[idx] = &ns->highfree[idx];
+	}
+}
+
+static int __init kprefetchd_init(void)
+{
+	kprefetchd_task = kthread_run(kprefetchd, NULL, "kprefetchd");
+
+	return 0;
+}
+
+static void __exit kprefetchd_exit(void)
+{
+	kthread_stop(kprefetchd_task);
+}
+
+module_init(kprefetchd_init);
+module_exit(kprefetchd_exit);
diff -urN oldtree/mm/swap_state.c newtree/mm/swap_state.c
--- oldtree/mm/swap_state.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/swap_state.c	2006-04-01 08:53:57.999126250 -0500
@@ -10,6 +10,7 @@
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
+#include <linux/swap-prefetch.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
@@ -81,6 +82,7 @@
 		error = radix_tree_insert(&swapper_space.page_tree,
 						entry.val, page);
 		if (!error) {
+			remove_from_swapped_list(entry.val);
 			page_cache_get(page);
 			SetPageLocked(page);
 			SetPageSwapCache(page);
@@ -94,11 +96,12 @@
 	return error;
 }
 
-static int add_to_swap_cache(struct page *page, swp_entry_t entry)
+int add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
 	int error;
 
 	if (!swap_duplicate(entry)) {
+		remove_from_swapped_list(entry.val);
 		INC_CACHE_INFO(noent_race);
 		return -ENOENT;
 	}
@@ -147,6 +150,9 @@
 	swp_entry_t entry;
 	int err;
 
+	/* Swap prefetching is delayed if we're swapping pages */
+	delay_swap_prefetch();
+
 	if (!PageLocked(page))
 		BUG();
 
@@ -320,6 +326,9 @@
 	struct page *found_page, *new_page = NULL;
 	int err;
 
+	/* Swap prefetching is delayed if we're already reading from swap */
+	delay_swap_prefetch();
+
 	do {
 		/*
 		 * First check the swap cache.  Since this is normally
diff -urN oldtree/mm/vmscan.c newtree/mm/vmscan.c
--- oldtree/mm/vmscan.c	2006-04-01 04:48:27.000000000 -0500
+++ newtree/mm/vmscan.c	2006-04-01 08:54:48.086256500 -0500
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
+#include <linux/swap-prefetch.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
@@ -123,10 +124,11 @@
 #endif
 
 /*
- * From 0 .. 100.  Higher means more swappy.
+ * From 0 .. 100.  Lower means more swappy.
  */
-int vm_swappiness = 60;
-static long total_memory;
+int vm_mapped __read_mostly = 66;
+int vm_hardmaplimit __read_mostly = 1;
+static long total_memory __read_mostly;
 
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
@@ -396,6 +398,7 @@
 
 	if (PageSwapCache(page)) {
 		swp_entry_t swap = { .val = page_private(page) };
+		add_to_swapped_list(page);
 		__delete_from_swap_cache(page);
 		write_unlock_irq(&mapping->tree_lock);
 		swap_free(swap);
@@ -454,6 +457,9 @@
 		if (PageWriteback(page))
 			goto keep_locked;
 
+		if (!PageReferenced(page))
+			inc_readahead_aging();
+
 		referenced = page_referenced(page, 1);
 		/* In active use or really unfreeable?  Activate it. */
 		if (referenced && page_mapping_inuse(page))
@@ -1230,10 +1236,14 @@
 		 * The distress ratio is important - we don't want to start
 		 * going oom.
 		 *
-		 * A 100% value of vm_swappiness overrides this algorithm
-		 * altogether.
+		 * This distress value is ignored if we apply a hardmaplimit except
+		 * in extreme distress.
+		 *
+		 * A 0% value of vm_mapped overrides this algorithm altogether.
 		 */
-		swap_tendency = mapped_ratio / 2 + distress + vm_swappiness;
+		swap_tendency = mapped_ratio * 100 / (vm_mapped + 1);
+		if (!vm_hardmaplimit || distress == 100)
+			swap_tendency += distress;
 
 		/*
 		 * Now use this metric to decide whether to start moving mapped
@@ -1442,6 +1452,8 @@
 	sc.may_writepage = !laptop_mode;
 	sc.may_swap = 1;
 
+	delay_swap_prefetch();
+
 	inc_page_state(allocstall);
 
 	for (i = 0; zones[i] != NULL; i++) {
@@ -1571,6 +1583,7 @@
 			 */
 			for (i = pgdat->nr_zones - 1; i >= 0; i--) {
 				struct zone *zone = pgdat->node_zones + i;
+				unsigned long watermark;
 
 				if (!populated_zone(zone))
 					continue;
@@ -1579,8 +1592,17 @@
 						priority != DEF_PRIORITY)
 					continue;
 
+				/*
+				 * The watermark is relaxed depending on the
+				 * level of "priority" till it drops to
+				 * pages_high.
+				 */
+				watermark = zone->pages_high +
+					(zone->pages_high * priority /
+					DEF_PRIORITY);
+
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, 0, 0)) {
+						watermark, 0, 0)) {
 					end_zone = i;
 					goto scan;
 				}
@@ -1616,8 +1638,11 @@
 				continue;
 
 			if (nr_pages == 0) {	/* Not software suspend */
+				unsigned long watermark = zone->pages_high +
+					(zone->pages_high * priority /
+					DEF_PRIORITY);
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, end_zone, 0))
+						watermark, end_zone, 0))
 					all_zones_ok = 0;
 			}
 			zone->temp_priority = priority;
@@ -1788,6 +1813,8 @@
 		.reclaimed_slab = 0,
 	};
 
+	delay_swap_prefetch();
+
 	current->reclaim_state = &reclaim_state;
 	for_each_pgdat(pgdat) {
 		int freed;
diff -urN oldtree/scripts/Kbuild.include newtree/scripts/Kbuild.include
--- oldtree/scripts/Kbuild.include	2006-04-01 04:48:27.000000000 -0500
+++ newtree/scripts/Kbuild.include	2006-04-01 08:53:50.634666000 -0500
@@ -78,8 +78,9 @@
 # function to only execute the passed command if necessary
 # >'< substitution is for echo to work, >$< substitution to preserve $ when reloading .cmd file
 # note: when using inline perl scripts [perl -e '...$$t=1;...'] in $(cmd_xxx) double $$ your perl vars
-# 
-if_changed = $(if $(strip $? $(call arg-check, $(cmd_$(1)), $(cmd_$@)) ), \
+#
+if_changed = $(if $(strip $(filter-out $(PHONY),$?)          \
+		$(call arg-check, $(cmd_$(1)), $(cmd_$@)) ), \
 	@set -e; \
 	$(echo-cmd) \
 	$(cmd_$(1)); \
@@ -87,8 +88,9 @@
 
 # execute the command and also postprocess generated .d dependencies
 # file
-if_changed_dep = $(if $(strip $? $(filter-out FORCE $(wildcard $^),$^)\
-	$(call arg-check, $(cmd_$(1)), $(cmd_$@)) ),                  \
+if_changed_dep = $(if $(strip $(filter-out $(PHONY),$?)  \
+		$(filter-out FORCE $(wildcard $^),$^)    \
+	$(call arg-check, $(cmd_$(1)), $(cmd_$@)) ),     \
 	@set -e; \
 	$(echo-cmd) \
 	$(cmd_$(1)); \
@@ -99,6 +101,7 @@
 # Usage: $(call if_changed_rule,foo)
 # will check if $(cmd_foo) changed, or any of the prequisites changed,
 # and if so will execute $(rule_foo)
-if_changed_rule = $(if $(strip $? $(call arg-check, $(cmd_$(1)), $(cmd_$@)) ),\
+if_changed_rule = $(if $(strip $(filter-out $(PHONY),$?)            \
+			$(call arg-check, $(cmd_$(1)), $(cmd_$@)) ),\
 			@set -e; \
 			$(rule_$(1)))
diff -urN oldtree/scripts/Makefile.build newtree/scripts/Makefile.build
--- oldtree/scripts/Makefile.build	2006-04-01 04:48:27.000000000 -0500
+++ newtree/scripts/Makefile.build	2006-04-01 08:53:50.638666250 -0500
@@ -4,7 +4,7 @@
 
 src := $(obj)
 
-.PHONY: __build
+PHONY := __build
 __build:
 
 # Read .config if it exist, otherwise ignore
@@ -309,14 +309,14 @@
 # Descending
 # ---------------------------------------------------------------------------
 
-.PHONY: $(subdir-ym)
+PHONY += $(subdir-ym)
 $(subdir-ym):
 	$(Q)$(MAKE) $(build)=$@
 
 # Add FORCE to the prequisites of a target to force it to be always rebuilt.
 # ---------------------------------------------------------------------------
 
-.PHONY: FORCE
+PHONY += FORCE
 
 FORCE:
 
@@ -331,3 +331,9 @@
 ifneq ($(cmd_files),)
   include $(cmd_files)
 endif
+
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff -urN oldtree/scripts/Makefile.clean newtree/scripts/Makefile.clean
--- oldtree/scripts/Makefile.clean	2006-04-01 04:48:27.000000000 -0500
+++ newtree/scripts/Makefile.clean	2006-04-01 08:53:50.642666500 -0500
@@ -4,7 +4,7 @@
 
 src := $(obj)
 
-.PHONY: __clean
+PHONY := __clean
 __clean:
 
 # Shorthand for $(Q)$(MAKE) scripts/Makefile.clean obj=dir
@@ -87,10 +87,16 @@
 # Descending
 # ---------------------------------------------------------------------------
 
-.PHONY: $(subdir-ymn)
+PHONY += $(subdir-ymn)
 $(subdir-ymn):
 	$(Q)$(MAKE) $(clean)=$@
 
 # If quiet is set, only print short version of command
 
 cmd = @$(if $($(quiet)cmd_$(1)),echo '  $($(quiet)cmd_$(1))' &&) $(cmd_$(1))
+
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff -urN oldtree/scripts/Makefile.modinst newtree/scripts/Makefile.modinst
--- oldtree/scripts/Makefile.modinst	2006-04-01 04:48:27.000000000 -0500
+++ newtree/scripts/Makefile.modinst	2006-04-01 08:53:50.642666500 -0500
@@ -2,7 +2,7 @@
 # Installing modules
 # ==========================================================================
 
-.PHONY: __modinst
+PHONY := __modinst
 __modinst:
 
 include scripts/Kbuild.include
@@ -12,7 +12,7 @@
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
 modules := $(patsubst %.o,%.ko,$(wildcard $(__modules:.ko=.o)))
 
-.PHONY: $(modules)
+PHONY += $(modules)
 __modinst: $(modules)
 	@:
 
@@ -27,3 +27,9 @@
 
 $(modules):
 	$(call cmd,modules_install,$(MODLIB)/$(modinst_dir))
+
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff -urN oldtree/scripts/Makefile.modpost newtree/scripts/Makefile.modpost
--- oldtree/scripts/Makefile.modpost	2006-04-01 04:48:27.000000000 -0500
+++ newtree/scripts/Makefile.modpost	2006-04-01 08:53:50.646666750 -0500
@@ -32,7 +32,7 @@
 # Step 4 is solely used to allow module versioning in external modules,
 # where the CRC of each module is retrieved from the Module.symers file.
 
-.PHONY: _modpost
+PHONY := _modpost
 _modpost: __modpost
 
 include .config
@@ -57,7 +57,7 @@
 	$(if $(KBUILD_EXTMOD),-i,-o) $(symverfile) \
 	$(filter-out FORCE,$^)
 
-.PHONY: __modpost
+PHONY += __modpost
 __modpost: $(wildcard vmlinux) $(modules:.ko=.o) FORCE
 	$(call cmd,modpost)
 
@@ -94,7 +94,7 @@
 # Add FORCE to the prequisites of a target to force it to be always rebuilt.
 # ---------------------------------------------------------------------------
 
-.PHONY: FORCE
+PHONY += FORCE
 
 FORCE:
 
@@ -109,3 +109,9 @@
 ifneq ($(cmd_files),)
   include $(cmd_files)
 endif
+
+
+# Declare the contents of the .PHONY variable as phony.  We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff -urN oldtree/scripts/kconfig/Makefile newtree/scripts/kconfig/Makefile
--- oldtree/scripts/kconfig/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/scripts/kconfig/Makefile	2006-04-01 08:53:50.650667000 -0500
@@ -2,7 +2,7 @@
 # Kernel configuration targets
 # These targets are used from top-level makefile
 
-.PHONY: oldconfig xconfig gconfig menuconfig config silentoldconfig update-po-config
+PHONY += oldconfig xconfig gconfig menuconfig config silentoldconfig update-po-config
 
 xconfig: $(obj)/qconf
 	$< arch/$(ARCH)/Kconfig
@@ -42,7 +42,7 @@
 	$(Q)rm -f arch/um/Kconfig_arch
 	$(Q)rm -f scripts/kconfig/linux_*.pot scripts/kconfig/config.pot
 
-.PHONY: randconfig allyesconfig allnoconfig allmodconfig defconfig
+PHONY += randconfig allyesconfig allnoconfig allmodconfig defconfig
 
 randconfig: $(obj)/conf
 	$< -r arch/$(ARCH)/Kconfig
diff -urN oldtree/scripts/kconfig/lxdialog/Makefile newtree/scripts/kconfig/lxdialog/Makefile
--- oldtree/scripts/kconfig/lxdialog/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/scripts/kconfig/lxdialog/Makefile	2006-04-01 08:53:50.654667250 -0500
@@ -7,10 +7,10 @@
 # we really need to do so. (Do not call gcc as part of make mrproper)
 HOST_EXTRACFLAGS = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags)
 HOST_LOADLIBES   = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ldflags $(HOSTCC))
- 
-HOST_EXTRACFLAGS += -DLOCALE 
 
-.PHONY: dochecklxdialog
+HOST_EXTRACFLAGS += -DLOCALE
+
+PHONY += dochecklxdialog
 $(obj)/dochecklxdialog:
 	$(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_LOADLIBES)
 
diff -urN oldtree/scripts/package/Makefile newtree/scripts/package/Makefile
--- oldtree/scripts/package/Makefile	2006-04-01 04:48:27.000000000 -0500
+++ newtree/scripts/package/Makefile	2006-04-01 08:53:50.654667250 -0500
@@ -32,7 +32,7 @@
 PREV       := set -e; cd ..;
 
 # rpm-pkg
-.PHONY: rpm-pkg rpm
+PHONY += rpm-pkg rpm
 
 $(objtree)/kernel.spec: $(MKSPEC) $(srctree)/Makefile
 	$(CONFIG_SHELL) $(MKSPEC) > $@
@@ -54,10 +54,10 @@
 clean-files := $(objtree)/kernel.spec
 
 # binrpm-pkg
-.PHONY: binrpm-pkg
+PHONY += binrpm-pkg
 $(objtree)/binkernel.spec: $(MKSPEC) $(srctree)/Makefile
 	$(CONFIG_SHELL) $(MKSPEC) prebuilt > $@
-	
+
 binrpm-pkg: $(objtree)/binkernel.spec
 	$(MAKE) KBUILD_SRC=
 	set -e; \
@@ -72,7 +72,7 @@
 # Deb target
 # ---------------------------------------------------------------------------
 #
-.PHONY: deb-pkg
+PHONY += deb-pkg
 deb-pkg:
 	$(MAKE) KBUILD_SRC=
 	$(CONFIG_SHELL) $(srctree)/scripts/package/builddeb
@@ -82,7 +82,7 @@
 
 # tarball targets
 # ---------------------------------------------------------------------------
-.PHONY: tar%pkg
+PHONY += tar%pkg
 tar%pkg:
 	$(MAKE) KBUILD_SRC=
 	$(CONFIG_SHELL) $(srctree)/scripts/package/buildtar $@
