diff -urN oldtree/Documentation/fb/vesafb.txt newtree/Documentation/fb/vesafb.txt
--- oldtree/Documentation/fb/vesafb.txt	2006-01-03 03:21:10.000000000 +0000
+++ newtree/Documentation/fb/vesafb.txt	2006-02-18 15:28:25.299257312 +0000
@@ -2,16 +2,18 @@
 What is vesafb?
 ===============
 
-This is a generic driver for a graphic framebuffer on intel boxes.
+Vesafb is a generic framebuffer driver for x86 and x86_64 boxes.
 
-The idea is simple:  Turn on graphics mode at boot time with the help
-of the BIOS, and use this as framebuffer device /dev/fb0, like the m68k
-(and other) ports do.
-
-This means we decide at boot time whenever we want to run in text or
-graphics mode.  Switching mode later on (in protected mode) is
-impossible; BIOS calls work in real mode only.  VESA BIOS Extensions
-Version 2.0 are required, because we need a linear frame buffer.
+VESA BIOS Extensions Version 2.0 are required, because we need access to
+a linear frame buffer. VBE 3.0 is required if you want to use modes with a
+higher (than the standard 60Hz) refresh rate.
+
+The VESA framebuffer driver comes in two flavors - the standard 'vesafb'
+and 'vesafb-tng'. Vesafb-tng is available only on 32-bit x86 due to the
+technology it uses (vm86). Vesafb-tng has more features than vesafb
+(adjusting the refresh rate on VBE3.0-compliant boards, switching the
+video mode without rebooting, selecting a mode by providing its
+modedb name, and more).
 
 Advantages:
 
@@ -29,26 +31,35 @@
 How to use it?
 ==============
 
-Switching modes is done using the vga=... boot parameter.  Read
-Documentation/svga.txt for details.
-
-You should compile in both vgacon (for text mode) and vesafb (for
-graphics mode). Which of them takes over the console depends on
-whenever the specified mode is text or graphics.
-
-The graphic modes are NOT in the list which you get if you boot with
-vga=ask and hit return. The mode you wish to use is derived from the
-VESA mode number. Here are those VESA mode numbers:
+If you are running a 32-bit x86 system and you decide to use vesafb-tng,
+you can either compile the driver into the kernel or use it as a module.
+The graphics mode you want to use is in both cases specified using the
+standard modedb format.
+
+If your system doesn't support vm86 calls, things get a little more tricky.
+Since on such systems you can't do BIOS calls from protected mode in which
+kernel runs, you have to decide at boot time whenever you want to run in text
+or in graphics mode. Switching mode later on is impossible. Switching modes
+is done using the vga=... boot parameter.  Read Documentation/svga.txt for
+details. Below is a more detailed description of what to do on systems using
+the standard vesafb driver.
+
+You should compile in both vgacon (for text mode) and vesafb (for graphics
+mode). Which of them takes over the console depends on whenever the
+specified mode is text or graphics.
+
+The graphic modes are NOT in the list which you get if you boot with vga=ask
+and hit return. The mode you wish to use is derived from the VESA mode number.
+Here are those VESA mode numbers:
 
     | 640x480  800x600  1024x768 1280x1024
 ----+-------------------------------------
-256 |  0x101    0x103    0x105    0x107   
-32k |  0x110    0x113    0x116    0x119   
-64k |  0x111    0x114    0x117    0x11A   
-16M |  0x112    0x115    0x118    0x11B   
+256 |  0x101    0x103    0x105    0x107
+32k |  0x110    0x113    0x116    0x119
+64k |  0x111    0x114    0x117    0x11A
+16M |  0x112    0x115    0x118    0x11B
 
-The video mode number of the Linux kernel is the VESA mode number plus
-0x200.
+The video mode number of the Linux kernel is the VESA mode number plus 0x200.
  
  Linux_kernel_mode_number = VESA_mode_number + 0x200
 
@@ -56,15 +67,15 @@
 
     | 640x480  800x600  1024x768 1280x1024
 ----+-------------------------------------
-256 |  0x301    0x303    0x305    0x307   
-32k |  0x310    0x313    0x316    0x319   
-64k |  0x311    0x314    0x317    0x31A   
-16M |  0x312    0x315    0x318    0x31B   
-
-To enable one of those modes you have to specify "vga=ask" in the
-lilo.conf file and rerun LILO. Then you can type in the desired
-mode at the "vga=ask" prompt. For example if you like to use 
-1024x768x256 colors you have to say "305" at this prompt.
+256 |  0x301    0x303    0x305    0x307
+32k |  0x310    0x313    0x316    0x319
+64k |  0x311    0x314    0x317    0x31A
+16M |  0x312    0x315    0x318    0x31B
+
+To enable one of those modes you have to specify "vga=ask" in the lilo.conf
+file and rerun LILO. Then you can type in the desired mode at the "vga=ask"
+prompt. For example if you like to use 1024x768x256 colors you have to say
+"305" at this prompt.
 
 If this does not work, this might be because your BIOS does not support
 linear framebuffers or because it does not support this mode at all.
@@ -72,11 +83,12 @@
 Extensions v2.0 are required, 1.2 is NOT sufficient.  You will get a
 "bad mode number" message if something goes wrong.
 
-1. Note: LILO cannot handle hex, for booting directly with 
+1. Note: LILO cannot handle hex, for booting directly with
          "vga=mode-number" you have to transform the numbers to decimal.
 2. Note: Some newer versions of LILO appear to work with those hex values,
          if you set the 0x in front of the numbers.
 
+
 X11
 ===
 
@@ -84,98 +96,162 @@
 another (accelerated) X-Server like XF86_SVGA might or might not work.
 It depends on X-Server and graphics board.
 
-The X-Server must restore the video mode correctly, else you end up
+The X-Server must restore the video mode correctly, or else you end up
 with a broken console (and vesafb cannot do anything about this).
+With vesafb-tng chances are that the console will be restored properly
+even if the X server messes up the video mode.
 
 
 Refresh rates
 =============
 
-There is no way to change the vesafb video mode and/or timings after
-booting linux.  If you are not happy with the 60 Hz refresh rate, you
-have these options:
+With VBE3.0 compatible BIOSes and vesafb-tng it is possible to change
+the refresh rate either at boot time (by specifying the @<rr> part of
+the mode name) or later, using the fbset utility.
+
+If you want to use the default BIOS refresh rate while switching modes
+on a running system, set pixclock to 0.
+
+With VBE2.0 there is no way to change the mode timings after booting
+Linux. If you are not happy with the 60 Hz refresh rate, you have
+these options:
 
- * configure and load the DOS-Tools for your the graphics board (if
-   available) and boot linux with loadlin.
+ * configure and load the DOS tools for your the graphics board (if
+   available) and boot Linux with loadlin.
  * use a native driver (matroxfb/atyfb) instead if vesafb.  If none
    is available, write a new one!
- * VBE 3.0 might work too.  I have neither a gfx board with VBE 3.0
-   support nor the specs, so I have not checked this yet.
+ * use a BIOS editor to change the default refresh rate (such an
+   editor does exist at least for ATI Radeon BIOSes).
+ * if you're running a non-vm86 and VBE3.0-compatible system, you can
+   use a kernel patch (vesafb-rrc) to hard-code some mode timings in
+   the kernel and use these while setting the graphic mode at boot time.
+
+Note that there are some boards (nVidia 59**, 57** and newer models)
+claiming that their Video BIOS is VBE3.0 compliant, while ignoring the
+CRTC values provided by software such as vesafb-tng. You'll not be able
+to change the refresh rate if you're using one of these boards.
 
 
 Configuration
 =============
 
-The VESA BIOS provides protected mode interface for changing
-some parameters.  vesafb can use it for palette changes and
-to pan the display.  It is turned off by default because it
-seems not to work with some BIOS versions, but there are options
-to turn it on.
-
-You can pass options to vesafb using "video=vesafb:option" on
-the kernel command line.  Multiple options should be separated
-by comma, like this: "video=vesafb:ypan,invers"
-
-Accepted options:
-
-invers	no comment...
-
-ypan	enable display panning using the VESA protected mode 
-	interface.  The visible screen is just a window of the
-	video memory, console scrolling is done by changing the
-	start of the window.
-	pro:	* scrolling (fullscreen) is fast, because there is
-		  no need to copy around data.
-		* You'll get scrollback (the Shift-PgUp thing),
-		  the video memory can be used as scrollback buffer
-	kontra: * scrolling only parts of the screen causes some
-		  ugly flicker effects (boot logo flickers for
-		  example).
-
-ywrap	Same as ypan, but assumes your gfx board can wrap-around 
-	the video memory (i.e. starts reading from top if it
-	reaches the end of video memory).  Faster than ypan.
-
-redraw	scroll by redrawing the affected part of the screen, this
-	is the safe (and slow) default.
-
-
-vgapal	Use the standard vga registers for palette changes.
-	This is the default.
-pmipal	Use the protected mode interface for palette changes.
-
-mtrr:n	setup memory type range registers for the vesafb framebuffer
-	where n:
-	      0 - disabled (equivalent to nomtrr) (default)
-	      1 - uncachable
-	      2 - write-back
-	      3 - write-combining
-	      4 - write-through
+The VESA BIOS provides protected mode interface for changing some parameters.
+vesafb can use it for palette changes and to pan the display. It is turned
+off by default because it seems not to work with some BIOS versions, but
+there are options to turn it on.
+
+You can pass options to vesafb using "video=vesafb:option" on the kernel
+command line. Multiple options should be separated by comma, like this:
+"video=vesafb:ypan,1024x768-32@85"
+
+Note that vesafb-tng still uses the "video=vesafb:option" format of the
+kernel command line video parameter. "video=vesafb-tng:xxx" is incorrect.
+
+Accepted options (both vesafb and vesafb-tng):
+
+ypan    Enable display panning using the VESA protected mode interface
+        The visible screen is just a window of the video memory,
+        console scrolling is done by changing the start of the window.
+        pro:    * scrolling (fullscreen) is fast, because there is
+                  no need to copy around data.
+                * you'll get scrollback (the Shift-PgUp thing),
+                  the video memory can be used as scrollback buffer
+        con:    * scrolling only parts of the screen causes some
+                  ugly flicker effects (boot logo flickers for
+                  example).
+
+ywrap   Same as ypan, but assumes your gfx board can wrap-around the video
+        memory (i.e. starts reading from top if it reaches the end of
+        video memory). Faster than ypan.
+
+redraw  Scroll by redrawing the affected part of the screen, this is the
+        safe (and slow) default.
+
+vgapal  Use the standard VGA registers for palette changes.
+        This is the default.
+
+pmipal  Use the protected mode interface for palette changes.
+
+mtrr:n  Setup memory type range registers for the vesafb framebuffer
+        where n:
+              0 - disabled (equivalent to nomtrr) (default)
+              1 - uncachable
+              2 - write-back
+              3 - write-combining
+              4 - write-through
 
-	If you see the following in dmesg, choose the type that matches the
-	old one. In this example, use "mtrr:2".
+	If you see the following in dmesg, choose the type that matches
+        the old one. In this example, use "mtrr:2".
 ...
 mtrr: type mismatch for e0000000,8000000 old: write-back new: write-combining
 ...
 
-nomtrr  disable mtrr
+nomtrr  Do not use memory type range registers for vesafb.
 
 vremap:n
         remap 'n' MiB of video RAM. If 0 or not specified, remap memory
-	according to video mode. (2.5.66 patch/idea by Antonino Daplas
-	reversed to give override possibility (allocate more fb memory
-	than the kernel would) to 2.4 by tmb@iki.fi)
+        according to video mode. (2.5.66 patch/idea by Antonino Daplas
+        reversed to give override possibility (allocate more fb memory
+        than the kernel would) to 2.4 by tmb@iki.fi)
 
 vtotal:n
         if the video BIOS of your card incorrectly determines the total
         amount of video RAM, use this option to override the BIOS (in MiB).
 
-Have fun!
+Options accepted only by vesafb-tng:
 
-  Gerd
+<mode>  The mode you want to set, in the standard modedb format. Refer to
+        modedb.txt for detailed description. If you specify a mode that is
+        not supported by your board's BIOS, vesafb will attempt to set a
+        similar mode. The list of supported modes can be found in
+        /proc/fbx/modes, where x is the framebuffer number (usually 0).
+        When vesafb is compiled as a module, the mode string should be
+        provided as a value of the parameter 'mode'.
+
+vbemode:x
+        Force the use of VBE mode x. The mode will only be set if it's
+        found in VBE-provided list of supported modes.
+        NOTE: The mode number 'x' should be specified in VESA mode number
+        notation, not the Linux kernel one (ie. 257 instead of 769).
+        HINT: If you use this option because normal <mode> parameter does
+        not work for you and you use a X server, you'll probably want to
+        set the 'nocrtc' option to ensure that the video mode is properly
+        restored after console <-> X switches.
+
+nocrtc  Do not use CRTC timings while setting the graphic mode. This option
+        makes sence only with VBE3.0 compliant systems. Use it if you have
+        problems with the modes set in the standard way. Note that specifying
+        this option means the refresh rate will be ignored and will stay at
+        your BIOS default (60 Hz).
+
+noedid  Do not try to fetch and use EDID-provided modes.
+
+gtf     Force the use of VESA's GTF (Generalized Timing Formula). Specifying
+        this will cause vesafb to skip it's internal modedb and EDID-modedb
+        and jump straight to the GTF part of the code (normally used only if
+        everything else failed). This can be useful if you want to get as
+        much as possible from you graphics board but your BIOS doesn't
+        support modes with refresh rates you require. Note that you may need
+        to specify the maxhf, maxvf and maxclk parameters if they are not
+        provided by EDID.
+
+Additionally, the following parameters may be provided. They all override the
+EDID-provided values and BIOS defaults. Refer to your monitor's specs to get
+the correct values for maxhf, maxvf and maxclk for your hardware.
+
+maxhf:n     Maximum horizontal frequency (in kHz).
+maxvf:n     Maximum vertical frequency (in Hz).
+maxclk:n    Maximum pixel clock (in MHz).
+
+Have fun!
 
 --
+Original document for the vesafb driver by
 Gerd Knorr <kraxel@goldbach.in-berlin.de>
 
-Minor (mostly typo) changes 
-by Nico Schmoigl <schmoigl@rumms.uni-mannheim.de>
+Minor (mostly typo) changes by
+Nico Schmoigl <schmoigl@rumms.uni-mannheim.de>
+
+Extended documentation for vm86, VBE3.0 and vesafb-tng by
+Michal Januszewski <spock@gentoo.org>
+
diff -urN oldtree/arch/i386/boot/video.S newtree/arch/i386/boot/video.S
--- oldtree/arch/i386/boot/video.S	2006-02-18 15:18:21.337073520 +0000
+++ newtree/arch/i386/boot/video.S	2006-02-18 15:28:25.305256400 +0000
@@ -164,10 +164,12 @@
 # parameters in the default 80x25 mode -- these are set directly,
 # because some very obscure BIOSes supply insane values.
 mode_params:
+#ifdef CONFIG_FB_VESA_STD
 #ifdef CONFIG_VIDEO_SELECT
 	cmpb	$0, graphic_mode
 	jnz	mopar_gr
 #endif
+#endif
 	movb	$0x03, %ah			# Read cursor position
 	xorb	%bh, %bh
 	int	$0x10
@@ -200,6 +202,7 @@
 	ret
 
 #ifdef CONFIG_VIDEO_SELECT
+#ifdef CONFIG_FB_VESA_STD
 # Fetching of VESA frame buffer parameters
 mopar_gr:
 	leaw	modelist+1024, %di
@@ -278,6 +281,7 @@
 	movw	%es, %fs:(PARAM_VESAPM_SEG)
 	movw	%di, %fs:(PARAM_VESAPM_OFF)
 no_pm:	ret
+#endif
 
 # The video mode menu
 mode_menu:
@@ -492,10 +496,12 @@
 	
 	cmpb	$VIDEO_FIRST_V7>>8, %ah
 	jz	setv7
-	
+
+#ifdef CONFIG_FB_VESA_STD
 	cmpb	$VIDEO_FIRST_VESA>>8, %ah
 	jnc	check_vesa
-	
+#endif	
+
 	orb	%ah, %ah
 	jz	setmenu
 	
@@ -567,6 +573,7 @@
 	movw	-4(%si), %ax			# Fetch mode ID
 	jmp	_m_s
 
+#ifdef CONFIG_FB_VESA_STD
 check_vesa:
 	leaw	modelist+1024, %di
 	subb	$VIDEO_FIRST_VESA>>8, %bh
@@ -600,6 +607,7 @@
 	ret
 
 _setbad:	jmp	setbad          	# Ugly...
+#endif
 
 # Recalculate vertical display end registers -- this fixes various
 # inconsistencies of extended modes on many adapters. Called when
diff -urN oldtree/arch/i386/boot/video.S.orig newtree/arch/i386/boot/video.S.orig
--- oldtree/arch/i386/boot/video.S.orig	1970-01-01 00:00:00.000000000 +0000
+++ newtree/arch/i386/boot/video.S.orig	2006-02-18 15:18:21.000000000 +0000
@@ -0,0 +1,2007 @@
+/*	video.S
+ *
+ *	Display adapter & video mode setup, version 2.13 (14-May-99)
+ *
+ *	Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
+ *	Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
+ *
+ *	Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
+ *
+ *	For further information, look at Documentation/svga.txt.
+ *
+ */
+
+#include <linux/config.h> /* for CONFIG_VIDEO_* */
+
+/* Enable autodetection of SVGA adapters and modes. */
+#undef CONFIG_VIDEO_SVGA
+
+/* Enable autodetection of VESA modes */
+#define CONFIG_VIDEO_VESA
+
+/* Enable compacting of mode table */
+#define CONFIG_VIDEO_COMPACT
+
+/* Retain screen contents when switching modes */
+#define CONFIG_VIDEO_RETAIN
+
+/* Enable local mode list */
+#undef CONFIG_VIDEO_LOCAL
+
+/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
+#undef CONFIG_VIDEO_400_HACK
+
+/* Hack that lets you force specific BIOS mode ID and specific dimensions */
+#undef CONFIG_VIDEO_GFX_HACK
+#define VIDEO_GFX_BIOS_AX 0x4f02	/* 800x600 on ThinkPad */
+#define VIDEO_GFX_BIOS_BX 0x0102
+#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425	/* 100x37 */
+
+/* This code uses an extended set of video mode numbers. These include:
+ * Aliases for standard modes
+ *	NORMAL_VGA (-1)
+ *	EXTENDED_VGA (-2)
+ *	ASK_VGA (-3)
+ * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
+ * of compatibility when extending the table. These are between 0x00 and 0xff.
+ */
+#define VIDEO_FIRST_MENU 0x0000
+
+/* Standard BIOS video modes (BIOS number + 0x0100) */
+#define VIDEO_FIRST_BIOS 0x0100
+
+/* VESA BIOS video modes (VESA number + 0x0200) */
+#define VIDEO_FIRST_VESA 0x0200
+
+/* Video7 special modes (BIOS number + 0x0900) */
+#define VIDEO_FIRST_V7 0x0900
+
+/* Special video modes */
+#define VIDEO_FIRST_SPECIAL 0x0f00
+#define VIDEO_80x25 0x0f00
+#define VIDEO_8POINT 0x0f01
+#define VIDEO_80x43 0x0f02
+#define VIDEO_80x28 0x0f03
+#define VIDEO_CURRENT_MODE 0x0f04
+#define VIDEO_80x30 0x0f05
+#define VIDEO_80x34 0x0f06
+#define VIDEO_80x60 0x0f07
+#define VIDEO_GFX_HACK 0x0f08
+#define VIDEO_LAST_SPECIAL 0x0f09
+
+/* Video modes given by resolution */
+#define VIDEO_FIRST_RESOLUTION 0x1000
+
+/* The "recalculate timings" flag */
+#define VIDEO_RECALC 0x8000
+
+/* Positions of various video parameters passed to the kernel */
+/* (see also include/linux/tty.h) */
+#define PARAM_CURSOR_POS	0x00
+#define PARAM_VIDEO_PAGE	0x04
+#define PARAM_VIDEO_MODE	0x06
+#define PARAM_VIDEO_COLS	0x07
+#define PARAM_VIDEO_EGA_BX	0x0a
+#define PARAM_VIDEO_LINES	0x0e
+#define PARAM_HAVE_VGA		0x0f
+#define PARAM_FONT_POINTS	0x10
+
+#define PARAM_LFB_WIDTH		0x12
+#define PARAM_LFB_HEIGHT	0x14
+#define PARAM_LFB_DEPTH		0x16
+#define PARAM_LFB_BASE		0x18
+#define PARAM_LFB_SIZE		0x1c
+#define PARAM_LFB_LINELENGTH	0x24
+#define PARAM_LFB_COLORS	0x26
+#define PARAM_VESAPM_SEG	0x2e
+#define PARAM_VESAPM_OFF	0x30
+#define PARAM_LFB_PAGES		0x32
+#define PARAM_VESA_ATTRIB	0x34
+
+/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
+#ifdef CONFIG_VIDEO_RETAIN
+#define DO_STORE call store_screen
+#else
+#define DO_STORE
+#endif /* CONFIG_VIDEO_RETAIN */
+
+# This is the main entry point called by setup.S
+# %ds *must* be pointing to the bootsector
+video:	pushw	%ds		# We use different segments
+	pushw	%ds		# FS contains original DS
+	popw	%fs
+	pushw	%cs		# DS is equal to CS
+	popw	%ds
+	pushw	%cs		# ES is equal to CS
+	popw	%es
+	xorw	%ax, %ax
+	movw	%ax, %gs	# GS is zero
+	cld
+	call	basic_detect	# Basic adapter type testing (EGA/VGA/MDA/CGA)
+#ifdef CONFIG_VIDEO_SELECT
+	movw	%fs:(0x01fa), %ax		# User selected video mode
+	cmpw	$ASK_VGA, %ax			# Bring up the menu
+	jz	vid2
+
+	call	mode_set			# Set the mode
+	jc	vid1
+
+	leaw	badmdt, %si			# Invalid mode ID
+	call	prtstr
+vid2:	call	mode_menu
+vid1:
+#ifdef CONFIG_VIDEO_RETAIN
+	call	restore_screen			# Restore screen contents
+#endif /* CONFIG_VIDEO_RETAIN */
+	call	store_edid
+#endif /* CONFIG_VIDEO_SELECT */
+	call	mode_params			# Store mode parameters
+	popw	%ds				# Restore original DS
+	ret
+
+# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
+basic_detect:
+	movb	$0, %fs:(PARAM_HAVE_VGA)
+	movb	$0x12, %ah	# Check EGA/VGA
+	movb	$0x10, %bl
+	int	$0x10
+	movw	%bx, %fs:(PARAM_VIDEO_EGA_BX)	# Identifies EGA to the kernel
+	cmpb	$0x10, %bl			# No, it's a CGA/MDA/HGA card.
+	je	basret
+
+	incb	adapter
+	movw	$0x1a00, %ax			# Check EGA or VGA?
+	int	$0x10
+	cmpb	$0x1a, %al			# 1a means VGA...
+	jne	basret				# anything else is EGA.
+	
+	incb	%fs:(PARAM_HAVE_VGA)		# We've detected a VGA
+	incb	adapter
+basret:	ret
+
+# Store the video mode parameters for later usage by the kernel.
+# This is done by asking the BIOS except for the rows/columns
+# parameters in the default 80x25 mode -- these are set directly,
+# because some very obscure BIOSes supply insane values.
+mode_params:
+#ifdef CONFIG_VIDEO_SELECT
+	cmpb	$0, graphic_mode
+	jnz	mopar_gr
+#endif
+	movb	$0x03, %ah			# Read cursor position
+	xorb	%bh, %bh
+	int	$0x10
+	movw	%dx, %fs:(PARAM_CURSOR_POS)
+	movb	$0x0f, %ah			# Read page/mode/width
+	int	$0x10
+	movw	%bx, %fs:(PARAM_VIDEO_PAGE)
+	movw	%ax, %fs:(PARAM_VIDEO_MODE)	# Video mode and screen width
+	cmpb	$0x7, %al			# MDA/HGA => segment differs
+	jnz	mopar0
+
+	movw	$0xb000, video_segment
+mopar0: movw	%gs:(0x485), %ax		# Font size
+	movw	%ax, %fs:(PARAM_FONT_POINTS)	# (valid only on EGA/VGA)
+	movw	force_size, %ax			# Forced size?
+	orw	%ax, %ax
+	jz	mopar1
+
+	movb	%ah, %fs:(PARAM_VIDEO_COLS)
+	movb	%al, %fs:(PARAM_VIDEO_LINES)
+	ret
+
+mopar1:	movb	$25, %al
+	cmpb	$0, adapter			# If we are on CGA/MDA/HGA, the
+	jz	mopar2				# screen must have 25 lines.
+
+	movb	%gs:(0x484), %al		# On EGA/VGA, use the EGA+ BIOS
+	incb	%al				# location of max lines.
+mopar2: movb	%al, %fs:(PARAM_VIDEO_LINES)
+	ret
+
+#ifdef CONFIG_VIDEO_SELECT
+# Fetching of VESA frame buffer parameters
+mopar_gr:
+	leaw	modelist+1024, %di
+	movb	$0x23, %fs:(PARAM_HAVE_VGA)
+	movw	16(%di), %ax
+	movw	%ax, %fs:(PARAM_LFB_LINELENGTH)
+	movw	18(%di), %ax
+	movw	%ax, %fs:(PARAM_LFB_WIDTH)
+	movw	20(%di), %ax
+	movw	%ax, %fs:(PARAM_LFB_HEIGHT)
+	movb	25(%di), %al
+	movb	$0, %ah
+	movw	%ax, %fs:(PARAM_LFB_DEPTH)
+	movb	29(%di), %al	
+	movb	$0, %ah
+	movw	%ax, %fs:(PARAM_LFB_PAGES)
+	movl	40(%di), %eax
+	movl	%eax, %fs:(PARAM_LFB_BASE)
+	movl	31(%di), %eax
+	movl	%eax, %fs:(PARAM_LFB_COLORS)
+	movl	35(%di), %eax
+	movl	%eax, %fs:(PARAM_LFB_COLORS+4)
+	movw	0(%di), %ax
+	movw	%ax, %fs:(PARAM_VESA_ATTRIB)
+
+# get video mem size
+	leaw	modelist+1024, %di
+	movw	$0x4f00, %ax
+	int	$0x10
+	xorl	%eax, %eax
+	movw	18(%di), %ax
+	movl	%eax, %fs:(PARAM_LFB_SIZE)
+
+# switching the DAC to 8-bit is for <= 8 bpp only
+	movw	%fs:(PARAM_LFB_DEPTH), %ax
+	cmpw	$8, %ax
+	jg	dac_done
+
+# get DAC switching capability
+	xorl	%eax, %eax
+	movb	10(%di), %al
+	testb	$1, %al
+	jz	dac_set
+
+# attempt to switch DAC to 8-bit
+	movw	$0x4f08, %ax
+	movw	$0x0800, %bx
+	int	$0x10
+	cmpw	$0x004f, %ax
+	jne     dac_set
+	movb    %bh, dac_size		# store actual DAC size
+
+dac_set:
+# set color size to DAC size
+	movb	dac_size, %al
+	movb	%al, %fs:(PARAM_LFB_COLORS+0)
+	movb	%al, %fs:(PARAM_LFB_COLORS+2)
+	movb	%al, %fs:(PARAM_LFB_COLORS+4)
+	movb	%al, %fs:(PARAM_LFB_COLORS+6)
+
+# set color offsets to 0
+	movb	$0, %fs:(PARAM_LFB_COLORS+1)
+	movb	$0, %fs:(PARAM_LFB_COLORS+3)
+	movb	$0, %fs:(PARAM_LFB_COLORS+5)
+	movb	$0, %fs:(PARAM_LFB_COLORS+7)
+
+dac_done:
+# get protected mode interface informations
+	movw	$0x4f0a, %ax
+	xorw	%bx, %bx
+	xorw	%di, %di
+	int	$0x10
+	cmp	$0x004f, %ax
+	jnz	no_pm
+
+	movw	%es, %fs:(PARAM_VESAPM_SEG)
+	movw	%di, %fs:(PARAM_VESAPM_OFF)
+no_pm:	ret
+
+# The video mode menu
+mode_menu:
+	leaw	keymsg, %si			# "Return/Space/Timeout" message
+	call	prtstr
+	call	flush
+nokey:	call	getkt
+
+	cmpb	$0x0d, %al			# ENTER ?
+	je	listm				# yes - manual mode selection
+
+	cmpb	$0x20, %al			# SPACE ?
+	je	defmd1				# no - repeat
+
+	call 	beep
+	jmp	nokey
+
+defmd1:	ret					# No mode chosen? Default 80x25
+
+listm:	call	mode_table			# List mode table
+listm0:	leaw	name_bann, %si			# Print adapter name
+	call	prtstr
+	movw	card_name, %si
+	orw	%si, %si
+	jnz	an2
+
+	movb	adapter, %al
+	leaw	old_name, %si
+	orb	%al, %al
+	jz	an1
+
+	leaw	ega_name, %si
+	decb	%al
+	jz	an1
+
+	leaw	vga_name, %si
+	jmp	an1
+
+an2:	call	prtstr
+	leaw	svga_name, %si
+an1:	call	prtstr
+	leaw	listhdr, %si			# Table header
+	call	prtstr
+	movb	$0x30, %dl			# DL holds mode number
+	leaw	modelist, %si
+lm1:	cmpw	$ASK_VGA, (%si)			# End?
+	jz	lm2
+
+	movb	%dl, %al			# Menu selection number
+	call	prtchr
+	call	prtsp2
+	lodsw
+	call	prthw				# Mode ID
+	call	prtsp2
+	movb	0x1(%si), %al
+	call	prtdec				# Rows
+	movb	$0x78, %al			# the letter 'x'
+	call	prtchr
+	lodsw
+	call	prtdec				# Columns
+	movb	$0x0d, %al			# New line
+	call	prtchr
+	movb	$0x0a, %al
+	call	prtchr
+	incb	%dl				# Next character
+	cmpb	$0x3a, %dl
+	jnz	lm1
+
+	movb	$0x61, %dl
+	jmp	lm1
+
+lm2:	leaw	prompt, %si			# Mode prompt
+	call	prtstr
+	leaw	edit_buf, %di			# Editor buffer
+lm3:	call	getkey
+	cmpb	$0x0d, %al			# Enter?
+	jz	lment
+
+	cmpb	$0x08, %al			# Backspace?
+	jz	lmbs
+
+	cmpb	$0x20, %al			# Printable?
+	jc	lm3
+
+	cmpw	$edit_buf+4, %di		# Enough space?
+	jz	lm3
+
+	stosb
+	call	prtchr
+	jmp	lm3
+
+lmbs:	cmpw	$edit_buf, %di			# Backspace
+	jz	lm3
+
+	decw	%di
+	movb	$0x08, %al
+	call	prtchr
+	call	prtspc
+	movb	$0x08, %al
+	call	prtchr
+	jmp	lm3
+	
+lment:	movb	$0, (%di)
+	leaw	crlft, %si
+	call	prtstr
+	leaw	edit_buf, %si
+	cmpb	$0, (%si)			# Empty string = default mode
+	jz	lmdef
+
+	cmpb	$0, 1(%si)			# One character = menu selection
+	jz	mnusel
+
+	cmpw	$0x6373, (%si)			# "scan" => mode scanning
+	jnz	lmhx
+
+	cmpw	$0x6e61, 2(%si)
+	jz	lmscan
+
+lmhx:	xorw	%bx, %bx			# Else => mode ID in hex
+lmhex:	lodsb
+	orb	%al, %al
+	jz	lmuse1
+
+	subb	$0x30, %al
+	jc	lmbad
+
+	cmpb	$10, %al
+	jc	lmhx1
+
+	subb	$7, %al
+	andb	$0xdf, %al
+	cmpb	$10, %al
+	jc	lmbad
+
+	cmpb	$16, %al
+	jnc	lmbad
+
+lmhx1:	shlw	$4, %bx
+	orb	%al, %bl
+	jmp	lmhex
+
+lmuse1:	movw	%bx, %ax
+	jmp	lmuse
+
+mnusel:	lodsb					# Menu selection
+	xorb	%ah, %ah
+	subb	$0x30, %al
+	jc	lmbad
+
+	cmpb	$10, %al
+	jc	lmuse
+	
+	cmpb	$0x61-0x30, %al
+	jc	lmbad
+	
+	subb	$0x61-0x30-10, %al
+	cmpb	$36, %al
+	jnc	lmbad
+
+lmuse:	call	mode_set
+	jc	lmdef
+
+lmbad:	leaw	unknt, %si
+	call	prtstr
+	jmp	lm2
+lmscan:	cmpb	$0, adapter			# Scanning only on EGA/VGA
+	jz	lmbad
+
+	movw	$0, mt_end			# Scanning of modes is
+	movb	$1, scanning			# done as new autodetection.
+	call	mode_table
+	jmp	listm0
+lmdef:	ret
+
+# Additional parts of mode_set... (relative jumps, you know)
+setv7:						# Video7 extended modes
+	DO_STORE
+	subb	$VIDEO_FIRST_V7>>8, %bh
+	movw	$0x6f05, %ax
+	int	$0x10
+	stc
+	ret
+
+_setrec:	jmp	setrec			# Ugly...
+_set_80x25:	jmp	set_80x25
+
+# Aliases for backward compatibility.
+setalias:
+	movw	$VIDEO_80x25, %ax
+	incw	%bx
+	jz	mode_set
+
+	movb	$VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
+	incw	%bx
+	jnz	setbad				# Fall-through!
+
+# Setting of user mode (AX=mode ID) => CF=success
+mode_set:
+	movw	%ax, %fs:(0x01fa)		# Store mode for use in acpi_wakeup.S
+	movw	%ax, %bx
+	cmpb	$0xff, %ah
+	jz	setalias
+
+	testb	$VIDEO_RECALC>>8, %ah
+	jnz	_setrec
+
+	cmpb	$VIDEO_FIRST_RESOLUTION>>8, %ah
+	jnc	setres
+	
+	cmpb	$VIDEO_FIRST_SPECIAL>>8, %ah
+	jz	setspc
+	
+	cmpb	$VIDEO_FIRST_V7>>8, %ah
+	jz	setv7
+	
+	cmpb	$VIDEO_FIRST_VESA>>8, %ah
+	jnc	check_vesa
+	
+	orb	%ah, %ah
+	jz	setmenu
+	
+	decb	%ah
+	jz	setbios
+
+setbad:	clc
+	movb	$0, do_restore			# The screen needn't be restored
+	ret
+
+setvesa:
+	DO_STORE
+	subb	$VIDEO_FIRST_VESA>>8, %bh
+	movw	$0x4f02, %ax			# VESA BIOS mode set call
+	int	$0x10
+	cmpw	$0x004f, %ax			# AL=4f if implemented
+	jnz	setbad				# AH=0 if OK
+
+	stc
+	ret
+
+setbios:
+	DO_STORE
+	int	$0x10				# Standard BIOS mode set call
+	pushw	%bx
+	movb	$0x0f, %ah			# Check if really set
+	int	$0x10
+	popw	%bx
+	cmpb	%bl, %al
+	jnz	setbad
+	
+	stc
+	ret
+
+setspc:	xorb	%bh, %bh			# Set special mode
+	cmpb	$VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
+	jnc	setbad
+	
+	addw	%bx, %bx
+	jmp	*spec_inits(%bx)
+
+setmenu:
+	orb	%al, %al			# 80x25 is an exception
+	jz	_set_80x25
+	
+	pushw	%bx				# Set mode chosen from menu
+	call	mode_table			# Build the mode table
+	popw	%ax
+	shlw	$2, %ax
+	addw	%ax, %si
+	cmpw	%di, %si
+	jnc	setbad
+	
+	movw	(%si), %ax			# Fetch mode ID
+_m_s:	jmp	mode_set
+
+setres:	pushw	%bx				# Set mode chosen by resolution
+	call	mode_table
+	popw	%bx
+	xchgb	%bl, %bh
+setr1:	lodsw
+	cmpw	$ASK_VGA, %ax			# End of the list?
+	jz	setbad
+	
+	lodsw
+	cmpw	%bx, %ax
+	jnz	setr1
+	
+	movw	-4(%si), %ax			# Fetch mode ID
+	jmp	_m_s
+
+check_vesa:
+	leaw	modelist+1024, %di
+	subb	$VIDEO_FIRST_VESA>>8, %bh
+	movw	%bx, %cx			# Get mode information structure
+	movw	$0x4f01, %ax
+	int	$0x10
+	addb	$VIDEO_FIRST_VESA>>8, %bh
+	cmpw	$0x004f, %ax
+	jnz	setbad
+
+	movb	(%di), %al			# Check capabilities.
+	andb	$0x19, %al
+	cmpb	$0x09, %al
+	jz	setvesa				# This is a text mode
+
+	movb	(%di), %al			# Check capabilities.
+	andb	$0x99, %al
+	cmpb	$0x99, %al
+	jnz	_setbad				# Doh! No linear frame buffer.
+
+	subb	$VIDEO_FIRST_VESA>>8, %bh
+	orw	$0x4000, %bx			# Use linear frame buffer
+	movw	$0x4f02, %ax			# VESA BIOS mode set call
+	int	$0x10
+	cmpw	$0x004f, %ax			# AL=4f if implemented
+	jnz	_setbad				# AH=0 if OK
+
+	movb	$1, graphic_mode		# flag graphic mode
+	movb	$0, do_restore			# no screen restore
+	stc
+	ret
+
+_setbad:	jmp	setbad          	# Ugly...
+
+# Recalculate vertical display end registers -- this fixes various
+# inconsistencies of extended modes on many adapters. Called when
+# the VIDEO_RECALC flag is set in the mode ID.
+
+setrec:	subb	$VIDEO_RECALC>>8, %ah		# Set the base mode
+	call	mode_set
+	jnc	rct3
+
+	movw	%gs:(0x485), %ax		# Font size in pixels
+	movb	%gs:(0x484), %bl		# Number of rows
+	incb	%bl
+	mulb	%bl				# Number of visible
+	decw	%ax				# scan lines - 1
+	movw	$0x3d4, %dx
+	movw	%ax, %bx
+	movb	$0x12, %al			# Lower 8 bits
+	movb	%bl, %ah
+	outw	%ax, %dx
+	movb	$0x07, %al		# Bits 8 and 9 in the overflow register
+	call	inidx
+	xchgb	%al, %ah
+	andb	$0xbd, %ah
+	shrb	%bh
+	jnc	rct1
+	orb	$0x02, %ah
+rct1:	shrb	%bh
+	jnc	rct2
+	orb	$0x40, %ah
+rct2:	movb	$0x07, %al
+	outw	%ax, %dx
+	stc
+rct3:	ret
+
+# Table of routines for setting of the special modes.
+spec_inits:
+	.word	set_80x25
+	.word	set_8pixel
+	.word	set_80x43
+	.word	set_80x28
+	.word	set_current
+	.word	set_80x30
+	.word	set_80x34
+	.word	set_80x60
+	.word	set_gfx
+
+# Set the 80x25 mode. If already set, do nothing.
+set_80x25:
+	movw	$0x5019, force_size		# Override possibly broken BIOS
+use_80x25:
+#ifdef CONFIG_VIDEO_400_HACK
+	movw	$0x1202, %ax			# Force 400 scan lines
+	movb	$0x30, %bl
+	int	$0x10
+#else
+	movb	$0x0f, %ah			# Get current mode ID
+	int	$0x10
+	cmpw	$0x5007, %ax	# Mode 7 (80x25 mono) is the only one available
+	jz	st80		# on CGA/MDA/HGA and is also available on EGAM
+
+	cmpw	$0x5003, %ax	# Unknown mode, force 80x25 color
+	jnz	force3
+
+st80:	cmpb	$0, adapter	# CGA/MDA/HGA => mode 3/7 is always 80x25
+	jz	set80
+
+	movb	%gs:(0x0484), %al	# This is EGA+ -- beware of 80x50 etc.
+	orb	%al, %al		# Some buggy BIOS'es set 0 rows
+	jz	set80
+	
+	cmpb	$24, %al		# It's hopefully correct
+	jz	set80
+#endif /* CONFIG_VIDEO_400_HACK */
+force3:	DO_STORE
+	movw	$0x0003, %ax			# Forced set
+	int	$0x10
+set80:	stc
+	ret
+
+# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
+set_8pixel:
+	DO_STORE
+	call	use_80x25			# The base is 80x25
+set_8pt:
+	movw	$0x1112, %ax			# Use 8x8 font
+	xorb	%bl, %bl
+	int	$0x10
+	movw	$0x1200, %ax			# Use alternate print screen
+	movb	$0x20, %bl
+	int	$0x10
+	movw	$0x1201, %ax			# Turn off cursor emulation
+	movb	$0x34, %bl
+	int	$0x10
+	movb	$0x01, %ah			# Define cursor scan lines 6-7
+	movw	$0x0607, %cx
+	int	$0x10
+set_current:
+	stc
+	ret
+
+# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
+# 80x25 mode with 14-point fonts instead of 16-point.
+set_80x28:
+	DO_STORE
+	call	use_80x25			# The base is 80x25
+set14:	movw	$0x1111, %ax			# Use 9x14 font
+	xorb	%bl, %bl
+	int	$0x10
+	movb	$0x01, %ah			# Define cursor scan lines 11-12
+	movw	$0x0b0c, %cx
+	int	$0x10
+	stc
+	ret
+
+# Set the 80x43 mode. This mode is works on all VGA's.
+# It's a 350-scanline mode with 8-pixel font.
+set_80x43:
+	DO_STORE
+	movw	$0x1201, %ax			# Set 350 scans
+	movb	$0x30, %bl
+	int	$0x10
+	movw	$0x0003, %ax			# Reset video mode
+	int	$0x10
+	jmp	set_8pt				# Use 8-pixel font
+
+# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
+set_80x30:
+	call	use_80x25			# Start with real 80x25
+	DO_STORE
+	movw	$0x3cc, %dx			# Get CRTC port
+	inb	%dx, %al
+	movb	$0xd4, %dl
+	rorb	%al				# Mono or color?
+	jc	set48a
+
+	movb	$0xb4, %dl
+set48a:	movw	$0x0c11, %ax		# Vertical sync end (also unlocks CR0-7)
+ 	call	outidx
+	movw	$0x0b06, %ax			# Vertical total
+ 	call	outidx
+	movw	$0x3e07, %ax			# (Vertical) overflow
+ 	call	outidx
+	movw	$0xea10, %ax			# Vertical sync start
+ 	call	outidx
+	movw	$0xdf12, %ax			# Vertical display end
+	call	outidx
+	movw	$0xe715, %ax			# Vertical blank start
+ 	call	outidx
+	movw	$0x0416, %ax			# Vertical blank end
+ 	call	outidx
+	pushw	%dx
+	movb	$0xcc, %dl			# Misc output register (read)
+ 	inb	%dx, %al
+ 	movb	$0xc2, %dl			# (write)
+ 	andb	$0x0d, %al	# Preserve clock select bits and color bit
+ 	orb	$0xe2, %al			# Set correct sync polarity
+ 	outb	%al, %dx
+	popw	%dx
+	movw	$0x501e, force_size
+	stc					# That's all.
+	ret
+
+# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
+set_80x34:
+	call	set_80x30			# Set 480 scans
+	call	set14				# And 14-pt font
+	movw	$0xdb12, %ax			# VGA vertical display end
+	movw	$0x5022, force_size
+setvde:	call	outidx
+	stc
+	ret
+
+# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
+set_80x60:
+	call	set_80x30			# Set 480 scans
+	call	set_8pt				# And 8-pt font
+	movw	$0xdf12, %ax			# VGA vertical display end
+	movw	$0x503c, force_size
+	jmp	setvde
+
+# Special hack for ThinkPad graphics
+set_gfx:
+#ifdef CONFIG_VIDEO_GFX_HACK
+	movw	$VIDEO_GFX_BIOS_AX, %ax
+	movw	$VIDEO_GFX_BIOS_BX, %bx
+	int	$0x10
+	movw	$VIDEO_GFX_DUMMY_RESOLUTION, force_size
+	stc
+#endif
+	ret
+
+#ifdef CONFIG_VIDEO_RETAIN
+
+# Store screen contents to temporary buffer.
+store_screen:
+	cmpb	$0, do_restore			# Already stored?
+	jnz	stsr
+
+	testb	$CAN_USE_HEAP, loadflags	# Have we space for storing?
+	jz	stsr
+	
+	pushw	%ax
+	pushw	%bx
+	pushw	force_size			# Don't force specific size
+	movw	$0, force_size
+	call	mode_params			# Obtain params of current mode
+	popw	force_size
+	movb	%fs:(PARAM_VIDEO_LINES), %ah
+	movb	%fs:(PARAM_VIDEO_COLS), %al
+	movw	%ax, %bx			# BX=dimensions
+	mulb	%ah
+	movw	%ax, %cx			# CX=number of characters
+	addw	%ax, %ax			# Calculate image size
+	addw	$modelist+1024+4, %ax
+	cmpw	heap_end_ptr, %ax
+	jnc	sts1				# Unfortunately, out of memory
+
+	movw	%fs:(PARAM_CURSOR_POS), %ax	# Store mode params
+	leaw	modelist+1024, %di
+	stosw
+	movw	%bx, %ax
+	stosw
+	pushw	%ds				# Store the screen
+	movw	video_segment, %ds
+	xorw	%si, %si
+	rep
+	movsw
+	popw	%ds
+	incb	do_restore			# Screen will be restored later
+sts1:	popw	%bx
+	popw	%ax
+stsr:	ret
+
+# Restore screen contents from temporary buffer.
+restore_screen:
+	cmpb	$0, do_restore			# Has the screen been stored?
+	jz	res1
+
+	call	mode_params			# Get parameters of current mode
+	movb	%fs:(PARAM_VIDEO_LINES), %cl
+	movb	%fs:(PARAM_VIDEO_COLS), %ch
+	leaw	modelist+1024, %si		# Screen buffer
+	lodsw					# Set cursor position
+	movw	%ax, %dx
+	cmpb	%cl, %dh
+	jc	res2
+	
+	movb	%cl, %dh
+	decb	%dh
+res2:	cmpb	%ch, %dl
+	jc	res3
+	
+	movb	%ch, %dl
+	decb	%dl
+res3:	movb	$0x02, %ah
+	movb	$0x00, %bh
+	int	$0x10
+	lodsw					# Display size
+	movb	%ah, %dl			# DL=number of lines
+	movb	$0, %ah				# BX=phys. length of orig. line
+	movw	%ax, %bx
+	cmpb	%cl, %dl			# Too many?
+	jc	res4
+
+	pushw	%ax
+	movb	%dl, %al
+	subb	%cl, %al
+	mulb	%bl
+	addw	%ax, %si
+	addw	%ax, %si
+	popw	%ax
+	movb	%cl, %dl
+res4:	cmpb	%ch, %al			# Too wide?
+	jc	res5
+	
+	movb	%ch, %al			# AX=width of src. line
+res5:	movb	$0, %cl
+	xchgb	%ch, %cl
+	movw	%cx, %bp			# BP=width of dest. line
+	pushw	%es
+	movw	video_segment, %es
+	xorw	%di, %di			# Move the data
+	addw	%bx, %bx			# Convert BX and BP to _bytes_
+	addw	%bp, %bp
+res6:	pushw	%si
+	pushw	%di
+	movw	%ax, %cx
+	rep
+	movsw
+	popw	%di
+	popw	%si
+	addw	%bp, %di
+	addw	%bx, %si
+	decb	%dl
+	jnz	res6
+	
+	popw	%es				# Done
+res1:	ret
+#endif /* CONFIG_VIDEO_RETAIN */
+
+# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
+outidx:	outb	%al, %dx
+	pushw	%ax
+	movb	%ah, %al
+	incw	%dx
+	outb	%al, %dx
+	decw	%dx
+	popw	%ax
+	ret
+
+# Build the table of video modes (stored after the setup.S code at the
+# `modelist' label. Each video mode record looks like:
+#	.word	MODE-ID		(our special mode ID (see above))
+#	.byte	rows		(number of rows)
+#	.byte	columns		(number of columns)
+# Returns address of the end of the table in DI, the end is marked
+# with a ASK_VGA ID.
+mode_table:
+	movw	mt_end, %di			# Already filled?
+	orw	%di, %di
+	jnz	mtab1x
+	
+	leaw	modelist, %di			# Store standard modes:
+	movl	$VIDEO_80x25 + 0x50190000, %eax	# The 80x25 mode (ALL)
+	stosl
+	movb	adapter, %al			# CGA/MDA/HGA -- no more modes
+	orb	%al, %al
+	jz	mtabe
+	
+	decb	%al
+	jnz	mtabv
+	
+	movl	$VIDEO_8POINT + 0x502b0000, %eax	# The 80x43 EGA mode
+	stosl
+	jmp	mtabe
+
+mtab1x:	jmp	mtab1
+
+mtabv:	leaw	vga_modes, %si			# All modes for std VGA
+	movw	$vga_modes_end-vga_modes, %cx
+	rep	# I'm unable to use movsw as I don't know how to store a half
+	movsb	# of the expression above to cx without using explicit shr.
+
+	cmpb	$0, scanning			# Mode scan requested?
+	jz	mscan1
+	
+	call	mode_scan
+mscan1:
+
+#ifdef CONFIG_VIDEO_LOCAL
+	call	local_modes
+#endif /* CONFIG_VIDEO_LOCAL */
+
+#ifdef CONFIG_VIDEO_VESA
+	call	vesa_modes			# Detect VESA VGA modes
+#endif /* CONFIG_VIDEO_VESA */
+
+#ifdef CONFIG_VIDEO_SVGA
+	cmpb	$0, scanning			# Bypass when scanning
+	jnz	mscan2
+	
+	call	svga_modes			# Detect SVGA cards & modes
+mscan2:
+#endif /* CONFIG_VIDEO_SVGA */
+
+mtabe:
+
+#ifdef CONFIG_VIDEO_COMPACT
+	leaw	modelist, %si
+	movw	%di, %dx
+	movw	%si, %di
+cmt1:	cmpw	%dx, %si			# Scan all modes
+	jz	cmt2
+
+	leaw	modelist, %bx			# Find in previous entries
+	movw	2(%si), %cx
+cmt3:	cmpw	%bx, %si
+	jz	cmt4
+
+	cmpw	2(%bx), %cx			# Found => don't copy this entry
+	jz	cmt5
+
+	addw	$4, %bx
+	jmp	cmt3
+
+cmt4:	movsl					# Copy entry
+	jmp	cmt1
+
+cmt5:	addw	$4, %si				# Skip entry
+	jmp	cmt1
+
+cmt2:
+#endif	/* CONFIG_VIDEO_COMPACT */
+
+	movw	$ASK_VGA, (%di)			# End marker
+	movw	%di, mt_end
+mtab1:	leaw	modelist, %si			# SI=mode list, DI=list end
+ret0:	ret
+
+# Modes usable on all standard VGAs
+vga_modes:
+	.word	VIDEO_8POINT
+	.word	0x5032				# 80x50
+	.word	VIDEO_80x43
+	.word	0x502b				# 80x43
+	.word	VIDEO_80x28
+	.word	0x501c				# 80x28
+	.word	VIDEO_80x30
+	.word	0x501e				# 80x30
+	.word	VIDEO_80x34
+	.word	0x5022				# 80x34
+	.word	VIDEO_80x60
+	.word	0x503c				# 80x60
+#ifdef CONFIG_VIDEO_GFX_HACK
+	.word	VIDEO_GFX_HACK
+	.word	VIDEO_GFX_DUMMY_RESOLUTION
+#endif
+
+vga_modes_end:
+# Detect VESA modes.
+
+#ifdef CONFIG_VIDEO_VESA
+vesa_modes:
+	cmpb	$2, adapter			# VGA only
+	jnz	ret0
+
+	movw	%di, %bp			# BP=original mode table end
+	addw	$0x200, %di			# Buffer space
+	movw	$0x4f00, %ax			# VESA Get card info call
+	int	$0x10
+	movw	%bp, %di
+	cmpw	$0x004f, %ax			# Successful?
+	jnz	ret0
+	
+	cmpw	$0x4556, 0x200(%di)
+	jnz	ret0
+	
+	cmpw	$0x4153, 0x202(%di)
+	jnz	ret0
+	
+	movw	$vesa_name, card_name		# Set name to "VESA VGA"
+	pushw	%gs
+	lgsw	0x20e(%di), %si			# GS:SI=mode list
+	movw	$128, %cx			# Iteration limit
+vesa1:
+# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
+# XXX:	lodsw	%gs:(%si), %ax			# Get next mode in the list
+	gs; lodsw
+	cmpw	$0xffff, %ax			# End of the table?
+	jz	vesar
+	
+	cmpw	$0x0080, %ax			# Check validity of mode ID
+	jc	vesa2
+	
+	orb	%ah, %ah		# Valid IDs: 0x0000-0x007f/0x0100-0x07ff
+	jz	vesan			# Certain BIOSes report 0x80-0xff!
+
+	cmpw	$0x0800, %ax
+	jnc	vesae
+
+vesa2:	pushw	%cx
+	movw	%ax, %cx			# Get mode information structure
+	movw	$0x4f01, %ax
+	int	$0x10
+	movw	%cx, %bx			# BX=mode number
+	addb	$VIDEO_FIRST_VESA>>8, %bh
+	popw	%cx
+	cmpw	$0x004f, %ax
+	jnz	vesan			# Don't report errors (buggy BIOSES)
+
+	movb	(%di), %al			# Check capabilities. We require
+	andb	$0x19, %al			# a color text mode.
+	cmpb	$0x09, %al
+	jnz	vesan
+	
+	cmpw	$0xb800, 8(%di)		# Standard video memory address required
+	jnz	vesan
+
+	testb	$2, (%di)			# Mode characteristics supplied?
+	movw	%bx, (%di)			# Store mode number
+	jz	vesa3
+	
+	xorw	%dx, %dx
+	movw	0x12(%di), %bx			# Width
+	orb	%bh, %bh
+	jnz	vesan
+	
+	movb	%bl, 0x3(%di)
+	movw	0x14(%di), %ax			# Height
+	orb	%ah, %ah
+	jnz	vesan
+	
+	movb	%al, 2(%di)
+	mulb	%bl
+	cmpw	$8193, %ax		# Small enough for Linux console driver?
+	jnc	vesan
+
+	jmp	vesaok
+
+vesa3:	subw	$0x8108, %bx	# This mode has no detailed info specified,
+	jc	vesan		# so it must be a standard VESA mode.
+
+	cmpw	$5, %bx
+	jnc	vesan
+
+	movw	vesa_text_mode_table(%bx), %ax
+	movw	%ax, 2(%di)
+vesaok:	addw	$4, %di				# The mode is valid. Store it.
+vesan:	loop	vesa1			# Next mode. Limit exceeded => error
+vesae:	leaw	vesaer, %si
+	call	prtstr
+	movw	%bp, %di			# Discard already found modes.
+vesar:	popw	%gs
+	ret
+
+# Dimensions of standard VESA text modes
+vesa_text_mode_table:
+	.byte	60, 80				# 0108
+	.byte	25, 132				# 0109
+	.byte	43, 132				# 010A
+	.byte	50, 132				# 010B
+	.byte	60, 132				# 010C
+#endif	/* CONFIG_VIDEO_VESA */
+
+# Scan for video modes. A bit dirty, but should work.
+mode_scan:
+	movw	$0x0100, %cx			# Start with mode 0
+scm1:	movb	$0, %ah				# Test the mode
+	movb	%cl, %al
+	int	$0x10
+	movb	$0x0f, %ah
+	int	$0x10
+	cmpb	%cl, %al
+	jnz	scm2				# Mode not set
+
+	movw	$0x3c0, %dx			# Test if it's a text mode
+	movb	$0x10, %al			# Mode bits
+	call	inidx
+	andb	$0x03, %al
+	jnz	scm2
+	
+	movb	$0xce, %dl			# Another set of mode bits
+	movb	$0x06, %al
+	call	inidx
+	shrb	%al
+	jc	scm2
+	
+	movb	$0xd4, %dl			# Cursor location
+	movb	$0x0f, %al
+	call	inidx
+	orb	%al, %al
+	jnz	scm2
+	
+	movw	%cx, %ax			# Ok, store the mode
+	stosw
+	movb	%gs:(0x484), %al		# Number of rows
+	incb	%al
+	stosb
+	movw	%gs:(0x44a), %ax		# Number of columns
+	stosb
+scm2:	incb	%cl
+	jns	scm1
+	
+	movw	$0x0003, %ax			# Return back to mode 3
+	int	$0x10
+	ret
+
+tstidx:	outw	%ax, %dx			# OUT DX,AX and inidx
+inidx:	outb	%al, %dx			# Read from indexed VGA register
+	incw	%dx			# AL=index, DX=index reg port -> AL=data
+	inb	%dx, %al
+	decw	%dx
+	ret
+
+# Try to detect type of SVGA card and supply (usually approximate) video
+# mode table for it.
+
+#ifdef CONFIG_VIDEO_SVGA
+svga_modes:
+	leaw	svga_table, %si			# Test all known SVGA adapters
+dosvga:	lodsw
+	movw	%ax, %bp			# Default mode table
+	orw	%ax, %ax
+	jz	didsv1
+
+	lodsw					# Pointer to test routine
+	pushw	%si
+	pushw	%di
+	pushw	%es
+	movw	$0xc000, %bx
+	movw	%bx, %es
+	call	*%ax				# Call test routine
+	popw	%es
+	popw	%di
+	popw	%si
+	orw	%bp, %bp
+	jz	dosvga
+	
+	movw	%bp, %si			# Found, copy the modes
+	movb	svga_prefix, %ah
+cpsvga:	lodsb
+	orb	%al, %al
+	jz	didsv
+	
+	stosw
+	movsw
+	jmp	cpsvga
+
+didsv:	movw	%si, card_name			# Store pointer to card name
+didsv1:	ret
+
+# Table of all known SVGA cards. For each card, we store a pointer to
+# a table of video modes supported by the card and a pointer to a routine
+# used for testing of presence of the card. The video mode table is always
+# followed by the name of the card or the chipset.
+svga_table:
+	.word	ati_md, ati_test
+	.word	oak_md, oak_test
+	.word	paradise_md, paradise_test
+	.word	realtek_md, realtek_test
+	.word	s3_md, s3_test
+	.word	chips_md, chips_test
+	.word	video7_md, video7_test
+	.word	cirrus5_md, cirrus5_test
+	.word	cirrus6_md, cirrus6_test
+	.word	cirrus1_md, cirrus1_test
+	.word	ahead_md, ahead_test
+	.word	everex_md, everex_test
+	.word	genoa_md, genoa_test
+	.word	trident_md, trident_test
+	.word	tseng_md, tseng_test
+	.word	0
+
+# Test routines and mode tables:
+
+# S3 - The test algorithm was taken from the SuperProbe package
+# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
+s3_test:
+	movw	$0x0f35, %cx	# we store some constants in cl/ch
+	movw	$0x03d4, %dx
+	movb	$0x38, %al
+	call	inidx
+	movb	%al, %bh	# store current CRT-register 0x38
+	movw	$0x0038, %ax
+	call	outidx		# disable writing to special regs
+	movb	%cl, %al	# check whether we can write special reg 0x35
+	call	inidx
+	movb	%al, %bl	# save the current value of CRT reg 0x35
+	andb	$0xf0, %al	# clear bits 0-3
+	movb	%al, %ah
+	movb	%cl, %al	# and write it to CRT reg 0x35
+	call	outidx
+	call	inidx		# now read it back
+	andb	%ch, %al	# clear the upper 4 bits
+	jz	s3_2		# the first test failed. But we have a
+
+	movb	%bl, %ah	# second chance
+	movb	%cl, %al
+	call	outidx
+	jmp	s3_1		# do the other tests
+
+s3_2:	movw	%cx, %ax	# load ah with 0xf and al with 0x35
+	orb	%bl, %ah	# set the upper 4 bits of ah with the orig value
+	call	outidx		# write ...
+	call	inidx		# ... and reread 
+	andb	%cl, %al	# turn off the upper 4 bits
+	pushw	%ax
+	movb	%bl, %ah	# restore old value in register 0x35
+	movb	%cl, %al
+	call	outidx
+	popw	%ax
+	cmpb	%ch, %al	# setting lower 4 bits was successful => bad
+	je	no_s3		# writing is allowed => this is not an S3
+
+s3_1:	movw	$0x4838, %ax	# allow writing to special regs by putting
+	call	outidx		# magic number into CRT-register 0x38
+	movb	%cl, %al	# check whether we can write special reg 0x35
+	call	inidx
+	movb	%al, %bl
+	andb	$0xf0, %al
+	movb	%al, %ah
+	movb	%cl, %al
+	call	outidx
+	call	inidx
+	andb	%ch, %al
+	jnz	no_s3		# no, we can't write => no S3
+
+	movw	%cx, %ax
+	orb	%bl, %ah
+	call	outidx
+	call	inidx
+	andb	%ch, %al
+	pushw	%ax
+	movb	%bl, %ah	# restore old value in register 0x35
+	movb	%cl, %al
+	call	outidx
+	popw	%ax
+	cmpb	%ch, %al
+	jne	no_s31		# writing not possible => no S3
+	movb	$0x30, %al
+	call	inidx		# now get the S3 id ...
+	leaw	idS3, %di
+	movw	$0x10, %cx
+	repne
+	scasb
+	je	no_s31
+
+	movb	%bh, %ah
+	movb	$0x38, %al
+	jmp	s3rest
+
+no_s3:	movb	$0x35, %al	# restore CRT register 0x35
+	movb	%bl, %ah
+	call	outidx
+no_s31:	xorw	%bp, %bp	# Detection failed
+s3rest:	movb	%bh, %ah
+	movb	$0x38, %al	# restore old value of CRT register 0x38
+	jmp	outidx
+
+idS3:	.byte	0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
+	.byte	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
+
+s3_md:	.byte	0x54, 0x2b, 0x84
+	.byte	0x55, 0x19, 0x84
+	.byte	0
+	.ascii	"S3"
+	.byte	0
+
+# ATI cards.
+ati_test:
+	leaw 	idati, %si
+	movw	$0x31, %di
+	movw	$0x09, %cx
+	repe
+	cmpsb
+	je	atiok
+
+	xorw	%bp, %bp
+atiok:	ret
+
+idati:	.ascii	"761295520"
+
+ati_md:	.byte	0x23, 0x19, 0x84
+	.byte	0x33, 0x2c, 0x84
+	.byte	0x22, 0x1e, 0x64
+	.byte	0x21, 0x19, 0x64
+	.byte	0x58, 0x21, 0x50
+	.byte	0x5b, 0x1e, 0x50
+	.byte	0
+	.ascii	"ATI"
+	.byte	0
+
+# AHEAD
+ahead_test:
+	movw	$0x200f, %ax
+	movw	$0x3ce, %dx
+	outw	%ax, %dx
+	incw	%dx
+	inb	%dx, %al
+	cmpb	$0x20, %al
+	je	isahed
+
+	cmpb	$0x21, %al
+	je	isahed
+	
+	xorw	%bp, %bp
+isahed:	ret
+
+ahead_md:
+	.byte	0x22, 0x2c, 0x84
+	.byte	0x23, 0x19, 0x84
+	.byte	0x24, 0x1c, 0x84
+	.byte	0x2f, 0x32, 0xa0
+	.byte	0x32, 0x22, 0x50
+	.byte	0x34, 0x42, 0x50
+	.byte	0
+	.ascii	"Ahead"
+	.byte	0
+
+# Chips & Tech.
+chips_test:
+	movw	$0x3c3, %dx
+	inb	%dx, %al
+	orb	$0x10, %al
+	outb	%al, %dx
+	movw	$0x104, %dx
+	inb	%dx, %al
+	movb	%al, %bl
+	movw	$0x3c3, %dx
+	inb	%dx, %al
+	andb	$0xef, %al
+	outb	%al, %dx
+	cmpb	$0xa5, %bl
+	je	cantok
+	
+	xorw	%bp, %bp
+cantok:	ret
+
+chips_md:
+	.byte	0x60, 0x19, 0x84
+	.byte	0x61, 0x32, 0x84
+	.byte	0
+	.ascii	"Chips & Technologies"
+	.byte	0
+
+# Cirrus Logic 5X0
+cirrus1_test:
+	movw	$0x3d4, %dx
+	movb	$0x0c, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	movb	%al, %bl
+	xorb	%al, %al
+	outb	%al, %dx
+	decw	%dx
+	movb	$0x1f, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	movb	%al, %bh
+	xorb	%ah, %ah
+	shlb	$4, %al
+	movw	%ax, %cx
+	movb	%bh, %al
+	shrb	$4, %al
+	addw	%ax, %cx
+	shlw	$8, %cx
+	addw	$6, %cx
+	movw	%cx, %ax
+	movw	$0x3c4, %dx
+	outw	%ax, %dx
+	incw	%dx
+	inb	%dx, %al
+	andb	%al, %al
+	jnz	nocirr
+	
+	movb	%bh, %al
+	outb	%al, %dx
+	inb	%dx, %al
+	cmpb	$0x01, %al
+	je	iscirr
+
+nocirr:	xorw	%bp, %bp
+iscirr: movw	$0x3d4, %dx
+	movb	%bl, %al
+	xorb	%ah, %ah
+	shlw	$8, %ax
+	addw	$0x0c, %ax
+	outw	%ax, %dx
+	ret
+
+cirrus1_md:
+	.byte	0x1f, 0x19, 0x84
+	.byte	0x20, 0x2c, 0x84
+	.byte	0x22, 0x1e, 0x84
+	.byte	0x31, 0x25, 0x64
+	.byte	0
+	.ascii	"Cirrus Logic 5X0"
+	.byte	0
+
+# Cirrus Logic 54XX
+cirrus5_test:
+	movw	$0x3c4, %dx
+	movb	$6, %al
+	call	inidx
+	movb	%al, %bl			# BL=backup
+	movw	$6, %ax
+	call	tstidx
+	cmpb	$0x0f, %al
+	jne	c5fail
+	
+	movw	$0x1206, %ax
+	call	tstidx
+	cmpb	$0x12, %al
+	jne	c5fail
+	
+	movb	$0x1e, %al
+	call	inidx
+	movb	%al, %bh
+	movb	%bh, %ah
+	andb	$0xc0, %ah
+	movb	$0x1e, %al
+	call	tstidx
+	andb	$0x3f, %al
+	jne	c5xx
+	
+	movb	$0x1e, %al
+	movb	%bh, %ah
+	orb	$0x3f, %ah
+	call	tstidx
+	xorb	$0x3f, %al
+	andb	$0x3f, %al
+c5xx:	pushf
+	movb	$0x1e, %al
+	movb	%bh, %ah
+	outw	%ax, %dx
+	popf
+	je	c5done
+
+c5fail:	xorw	%bp, %bp
+c5done:	movb	$6, %al
+	movb	%bl, %ah
+	outw	%ax, %dx
+	ret
+
+cirrus5_md:
+	.byte	0x14, 0x19, 0x84
+	.byte	0x54, 0x2b, 0x84
+	.byte	0
+	.ascii	"Cirrus Logic 54XX"
+	.byte	0
+
+# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
+# it's misidentified by the Ahead test.
+cirrus6_test:
+	movw	$0x3ce, %dx
+	movb	$0x0a, %al
+	call	inidx
+	movb	%al, %bl	# BL=backup
+	movw	$0xce0a, %ax
+	call	tstidx
+	orb	%al, %al
+	jne	c2fail
+	
+	movw	$0xec0a, %ax
+	call	tstidx
+	cmpb	$0x01, %al
+	jne	c2fail
+	
+	movb	$0xaa, %al
+	call	inidx		# 4X, 5X, 7X and 8X are valid 64XX chip ID's. 
+	shrb	$4, %al
+	subb	$4, %al
+	jz	c6done
+	
+	decb	%al
+	jz	c6done
+	
+	subb	$2, %al
+	jz	c6done
+	
+	decb	%al
+	jz	c6done
+	
+c2fail:	xorw	%bp, %bp
+c6done:	movb	$0x0a, %al
+	movb	%bl, %ah
+	outw	%ax, %dx
+	ret
+
+cirrus6_md:
+	.byte	0
+	.ascii	"Cirrus Logic 64XX"
+	.byte	0
+
+# Everex / Trident
+everex_test:
+	movw	$0x7000, %ax
+	xorw	%bx, %bx
+	int	$0x10
+	cmpb	$0x70, %al
+	jne	noevrx
+	
+	shrw	$4, %dx
+	cmpw	$0x678, %dx
+	je	evtrid
+	
+	cmpw	$0x236, %dx
+	jne	evrxok
+
+evtrid:	leaw	trident_md, %bp
+evrxok:	ret
+
+noevrx:	xorw	%bp, %bp
+	ret
+
+everex_md:
+	.byte	0x03, 0x22, 0x50
+	.byte	0x04, 0x3c, 0x50
+	.byte	0x07, 0x2b, 0x64
+	.byte	0x08, 0x4b, 0x64
+	.byte	0x0a, 0x19, 0x84
+	.byte	0x0b, 0x2c, 0x84
+	.byte	0x16, 0x1e, 0x50
+	.byte	0x18, 0x1b, 0x64
+	.byte	0x21, 0x40, 0xa0
+	.byte	0x40, 0x1e, 0x84
+	.byte	0
+	.ascii	"Everex/Trident"
+	.byte	0
+
+# Genoa.
+genoa_test:
+	leaw	idgenoa, %si			# Check Genoa 'clues'
+	xorw	%ax, %ax
+	movb	%es:(0x37), %al
+	movw	%ax, %di
+	movw	$0x04, %cx
+	decw	%si
+	decw	%di
+l1:	incw	%si
+	incw	%di
+	movb	(%si), %al
+	testb	%al, %al
+	jz	l2
+
+	cmpb	%es:(%di), %al
+l2:	loope 	l1
+	orw	%cx, %cx
+	je	isgen
+	
+	xorw	%bp, %bp
+isgen:	ret
+
+idgenoa: .byte	0x77, 0x00, 0x99, 0x66
+
+genoa_md:
+	.byte	0x58, 0x20, 0x50
+	.byte	0x5a, 0x2a, 0x64
+	.byte	0x60, 0x19, 0x84
+	.byte	0x61, 0x1d, 0x84
+	.byte	0x62, 0x20, 0x84
+	.byte	0x63, 0x2c, 0x84
+	.byte	0x64, 0x3c, 0x84
+	.byte	0x6b, 0x4f, 0x64
+	.byte	0x72, 0x3c, 0x50
+	.byte	0x74, 0x42, 0x50
+	.byte	0x78, 0x4b, 0x64
+	.byte	0
+	.ascii	"Genoa"
+	.byte	0
+
+# OAK
+oak_test:
+	leaw	idoakvga, %si
+	movw	$0x08, %di
+	movw	$0x08, %cx
+	repe
+	cmpsb
+	je	isoak
+	
+	xorw	%bp, %bp
+isoak:	ret
+
+idoakvga: .ascii  "OAK VGA "
+
+oak_md: .byte	0x4e, 0x3c, 0x50
+	.byte	0x4f, 0x3c, 0x84
+	.byte	0x50, 0x19, 0x84
+	.byte	0x51, 0x2b, 0x84
+	.byte	0
+	.ascii	"OAK"
+	.byte	0
+
+# WD Paradise.
+paradise_test:
+	leaw	idparadise, %si
+	movw	$0x7d, %di
+	movw	$0x04, %cx
+	repe
+	cmpsb
+	je	ispara
+	
+	xorw	%bp, %bp
+ispara:	ret
+
+idparadise:	.ascii	"VGA="
+
+paradise_md:
+	.byte	0x41, 0x22, 0x50
+	.byte	0x47, 0x1c, 0x84
+	.byte	0x55, 0x19, 0x84
+	.byte	0x54, 0x2c, 0x84
+	.byte	0
+	.ascii	"Paradise"
+	.byte	0
+
+# Trident.
+trident_test:
+	movw	$0x3c4, %dx
+	movb	$0x0e, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	xchgb	%al, %ah
+	xorb	%al, %al
+	outb	%al, %dx
+	inb	%dx, %al
+	xchgb	%ah, %al
+	movb	%al, %bl	# Strange thing ... in the book this wasn't
+	andb	$0x02, %bl	# necessary but it worked on my card which
+	jz	setb2		# is a trident. Without it the screen goes
+				# blurred ...
+	andb	$0xfd, %al
+	jmp	clrb2		
+
+setb2:	orb	$0x02, %al	
+clrb2:	outb	%al, %dx
+	andb	$0x0f, %ah
+	cmpb	$0x02, %ah
+	je	istrid
+
+	xorw	%bp, %bp
+istrid:	ret
+
+trident_md:
+	.byte	0x50, 0x1e, 0x50
+	.byte	0x51, 0x2b, 0x50
+	.byte	0x52, 0x3c, 0x50
+	.byte	0x57, 0x19, 0x84
+	.byte	0x58, 0x1e, 0x84
+	.byte	0x59, 0x2b, 0x84
+	.byte	0x5a, 0x3c, 0x84
+	.byte	0
+	.ascii	"Trident"
+	.byte	0
+
+# Tseng.
+tseng_test:
+	movw	$0x3cd, %dx
+	inb	%dx, %al	# Could things be this simple ! :-)
+	movb	%al, %bl
+	movb	$0x55, %al
+	outb	%al, %dx
+	inb	%dx, %al
+	movb	%al, %ah
+	movb	%bl, %al
+	outb	%al, %dx
+	cmpb	$0x55, %ah
+ 	je	istsen
+
+isnot:	xorw	%bp, %bp
+istsen:	ret
+
+tseng_md:
+	.byte	0x26, 0x3c, 0x50
+	.byte	0x2a, 0x28, 0x64
+	.byte	0x23, 0x19, 0x84
+	.byte	0x24, 0x1c, 0x84
+	.byte	0x22, 0x2c, 0x84
+	.byte	0x21, 0x3c, 0x84
+	.byte	0
+	.ascii	"Tseng"
+	.byte	0
+
+# Video7.
+video7_test:
+	movw	$0x3cc, %dx
+	inb	%dx, %al
+	movw	$0x3b4, %dx
+	andb	$0x01, %al
+	jz	even7
+
+	movw	$0x3d4, %dx
+even7:	movb	$0x0c, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	movb	%al, %bl
+	movb	$0x55, %al
+	outb	%al, %dx
+	inb	%dx, %al
+	decw	%dx
+	movb	$0x1f, %al
+	outb	%al, %dx
+	incw	%dx
+	inb	%dx, %al
+	movb	%al, %bh
+	decw	%dx
+	movb	$0x0c, %al
+	outb	%al, %dx
+	incw	%dx
+	movb	%bl, %al
+	outb	%al, %dx
+	movb	$0x55, %al
+	xorb	$0xea, %al
+	cmpb	%bh, %al
+	jne	isnot
+	
+	movb	$VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
+	ret
+
+video7_md:
+	.byte	0x40, 0x2b, 0x50
+	.byte	0x43, 0x3c, 0x50
+	.byte	0x44, 0x3c, 0x64
+	.byte	0x41, 0x19, 0x84
+	.byte	0x42, 0x2c, 0x84
+	.byte	0x45, 0x1c, 0x84
+	.byte	0
+	.ascii	"Video 7"
+	.byte	0
+
+# Realtek VGA
+realtek_test:
+	leaw	idrtvga, %si
+	movw	$0x45, %di
+	movw	$0x0b, %cx
+	repe
+	cmpsb
+	je	isrt
+	
+	xorw	%bp, %bp
+isrt:	ret
+
+idrtvga:	.ascii	"REALTEK VGA"
+
+realtek_md:
+	.byte	0x1a, 0x3c, 0x50
+	.byte	0x1b, 0x19, 0x84
+	.byte	0x1c, 0x1e, 0x84
+	.byte	0x1d, 0x2b, 0x84
+	.byte	0x1e, 0x3c, 0x84
+	.byte	0
+	.ascii	"REALTEK"
+	.byte	0
+
+#endif	/* CONFIG_VIDEO_SVGA */
+
+# User-defined local mode table (VGA only)
+#ifdef CONFIG_VIDEO_LOCAL
+local_modes:
+	leaw	local_mode_table, %si
+locm1:	lodsw
+	orw	%ax, %ax
+	jz	locm2
+	
+	stosw
+	movsw
+	jmp	locm1
+
+locm2:	ret
+
+# This is the table of local video modes which can be supplied manually
+# by the user. Each entry consists of mode ID (word) and dimensions
+# (byte for column count and another byte for row count). These modes
+# are placed before all SVGA and VESA modes and override them if table
+# compacting is enabled. The table must end with a zero word followed
+# by NUL-terminated video adapter name.
+local_mode_table:
+	.word	0x0100				# Example: 40x25
+	.byte	25,40
+	.word	0
+	.ascii	"Local"
+	.byte	0
+#endif	/* CONFIG_VIDEO_LOCAL */
+
+# Read a key and return the ASCII code in al, scan code in ah
+getkey:	xorb	%ah, %ah
+	int	$0x16
+	ret
+
+# Read a key with a timeout of 30 seconds.
+# The hardware clock is used to get the time.
+getkt:	call	gettime
+	addb	$30, %al			# Wait 30 seconds
+	cmpb	$60, %al
+	jl	lminute
+
+	subb	$60, %al
+lminute:
+	movb	%al, %cl
+again:	movb	$0x01, %ah
+	int	$0x16
+	jnz	getkey				# key pressed, so get it
+
+	call	gettime
+	cmpb	%cl, %al
+	jne	again
+
+	movb	$0x20, %al			# timeout, return `space'
+	ret
+
+# Flush the keyboard buffer
+flush:	movb	$0x01, %ah
+	int	$0x16
+	jz	empty
+	
+	xorb	%ah, %ah
+	int	$0x16
+	jmp	flush
+
+empty:	ret
+
+# Print hexadecimal number.
+prthw:	pushw	%ax
+	movb	%ah, %al
+	call	prthb
+	popw	%ax
+prthb:	pushw	%ax
+	shrb	$4, %al
+	call	prthn
+	popw	%ax
+	andb	$0x0f, %al
+prthn:	cmpb	$0x0a, %al
+	jc	prth1
+
+	addb	$0x07, %al
+prth1:	addb	$0x30, %al
+	jmp	prtchr
+
+# Print decimal number in al
+prtdec:	pushw	%ax
+	pushw	%cx
+	xorb	%ah, %ah
+	movb	$0x0a, %cl
+	idivb	%cl
+	cmpb	$0x09, %al
+	jbe	lt100
+
+	call	prtdec
+	jmp	skip10
+
+lt100:	addb	$0x30, %al
+	call	prtchr
+skip10:	movb	%ah, %al
+	addb	$0x30, %al
+	call	prtchr	
+	popw	%cx
+	popw	%ax
+	ret
+
+store_edid:
+	pushw	%es				# just save all registers
+	pushw	%ax
+	pushw	%bx
+	pushw   %cx
+	pushw	%dx
+	pushw   %di
+
+	pushw	%fs
+	popw    %es
+
+	movl	$0x13131313, %eax		# memset block with 0x13
+	movw    $32, %cx
+	movw	$0x140, %di
+	cld
+	rep
+	stosl
+
+	movw	$0x4f15, %ax                    # do VBE/DDC
+	movw	$0x01, %bx
+	movw	$0x00, %cx
+	movw    $0x00, %dx
+	movw	$0x140, %di
+	int	$0x10
+
+	popw	%di				# restore all registers
+	popw	%dx
+	popw	%cx
+	popw	%bx
+	popw	%ax
+	popw	%es
+	ret
+
+# VIDEO_SELECT-only variables
+mt_end:		.word	0	# End of video mode table if built
+edit_buf:	.space	6	# Line editor buffer
+card_name:	.word	0	# Pointer to adapter name
+scanning:	.byte	0	# Performing mode scan
+do_restore:	.byte	0	# Screen contents altered during mode change
+svga_prefix:	.byte	VIDEO_FIRST_BIOS>>8	# Default prefix for BIOS modes
+graphic_mode:	.byte	0	# Graphic mode with a linear frame buffer
+dac_size:	.byte	6	# DAC bit depth
+
+# Status messages
+keymsg:		.ascii	"Press <RETURN> to see video modes available, "
+		.ascii	"<SPACE> to continue or wait 30 secs"
+		.byte	0x0d, 0x0a, 0
+
+listhdr:	.byte	0x0d, 0x0a
+		.ascii	"Mode:    COLSxROWS:"
+
+crlft:		.byte	0x0d, 0x0a, 0
+
+prompt:		.byte	0x0d, 0x0a
+		.asciz	"Enter mode number or `scan': "
+
+unknt:		.asciz	"Unknown mode ID. Try again."
+
+badmdt:		.ascii	"You passed an undefined mode number."
+		.byte	0x0d, 0x0a, 0
+
+vesaer:		.ascii	"Error: Scanning of VESA modes failed. Please "
+		.ascii	"report to <mj@ucw.cz>."
+		.byte	0x0d, 0x0a, 0
+
+old_name:	.asciz	"CGA/MDA/HGA"
+
+ega_name:	.asciz	"EGA"
+
+svga_name:	.ascii	" "
+
+vga_name:	.asciz	"VGA"
+
+vesa_name:	.asciz	"VESA"
+
+name_bann:	.asciz	"Video adapter: "
+#endif /* CONFIG_VIDEO_SELECT */
+
+# Other variables:
+adapter:	.byte	0	# Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
+video_segment:	.word	0xb800	# Video memory segment
+force_size:	.word	0	# Use this size instead of the one in BIOS vars
diff -urN oldtree/drivers/video/Kconfig newtree/drivers/video/Kconfig
--- oldtree/drivers/video/Kconfig	2006-02-18 15:18:29.156884728 +0000
+++ newtree/drivers/video/Kconfig	2006-02-18 15:28:25.307256096 +0000
@@ -454,8 +454,8 @@
 	  cards. Say Y if you have one of those.
 
 config FB_VESA
-	bool "VESA VGA graphics support"
-	depends on (FB = y) && X86
+	tristate "VESA VGA graphics support"
+	depends on (FB = y) && (X86 || X86_64)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
@@ -465,6 +465,48 @@
 	  You will get a boot time penguin logo at no additional cost. Please
 	  read <file:Documentation/fb/vesafb.txt>. If unsure, say Y.
 
+choice 
+	prompt "VESA driver type"
+	depends on FB_VESA
+	default FB_VESA_STD if X86_64
+	default FB_VESA_TNG if X86
+
+config FB_VESA_STD
+	bool "vesafb"
+	help
+	  This is the frame buffer device driver for generic VESA 2.0
+	  compliant graphic cards. The older VESA 1.2 cards are not supported.
+	  You will get a boot time penguin logo at no additional cost. Please
+	  read <file:Documentation/fb/vesafb.txt>. Choose this driver if you
+	  are experiencing problems with vesafb-tng or if you own a 64-bit system.
+
+	  Note that this driver cannot be compiled as a module.
+
+config FB_VESA_TNG
+	bool "vesafb-tng"
+	depends on !X86_64
+	select FB_MODE_HELPERS
+	help
+	  This is the frame buffer device driver for generic VESA 2.0 
+	  compliant graphic cards. It is capable of taking advantage of 
+	  VBE 3.0 features. With this driver you will be able to adjust
+	  the refresh rate (VBE 3.0 compliant boards only) and change
+	  the graphic mode on-the-fly.
+	  
+	  You will also get a boot time penguin logo at no additional cost. Please
+	  read <file:Documentation/fb/vesafb.txt>.
+
+endchoice
+
+config FB_VESA_DEFAULT_MODE
+	string "VESA default mode"
+	depends on FB_VESA_TNG
+	default "640x480@60"
+	help 
+	  This option is used to determine the default mode vesafb is
+	  supposed to switch to in case no mode is provided as a kernel
+	  command line parameter.
+
 config VIDEO_SELECT
 	bool
 	depends on FB_VESA
diff -urN oldtree/drivers/video/Makefile newtree/drivers/video/Makefile
--- oldtree/drivers/video/Makefile	2006-01-03 03:21:10.000000000 +0000
+++ newtree/drivers/video/Makefile	2006-02-18 15:28:25.308255944 +0000
@@ -96,7 +96,11 @@
 obj-$(CONFIG_FB_S3C2410)	  += s3c2410fb.o
 
 # Platform or fallback drivers go here
-obj-$(CONFIG_FB_VESA)             += vesafb.o
+ifeq ($(CONFIG_FB_VESA_STD),y)
+  obj-y				  += vesafb.o
+else
+  obj-$(CONFIG_FB_VESA)		  += vesafb-thread.o vesafb-tng.o
+endif
 obj-$(CONFIG_FB_VGA16)            += vga16fb.o vgastate.o
 obj-$(CONFIG_FB_OF)               += offb.o
 
diff -urN oldtree/drivers/video/fbmem.c newtree/drivers/video/fbmem.c
--- oldtree/drivers/video/fbmem.c	2006-02-18 15:18:29.188879864 +0000
+++ newtree/drivers/video/fbmem.c	2006-02-18 15:28:25.310255640 +0000
@@ -1430,6 +1430,7 @@
 		printk(KERN_WARNING "Unable to create fb class; errno = %ld\n", PTR_ERR(fb_class));
 		fb_class = NULL;
 	}
+
 	return 0;
 }
 
diff -urN oldtree/drivers/video/fbmem.c.orig newtree/drivers/video/fbmem.c.orig
--- oldtree/drivers/video/fbmem.c.orig	1970-01-01 00:00:00.000000000 +0000
+++ newtree/drivers/video/fbmem.c.orig	2006-02-18 15:18:29.000000000 +0000
@@ -0,0 +1,1619 @@
+/*
+ *  linux/drivers/video/fbmem.c
+ *
+ *  Copyright (C) 1994 Martin Schaller
+ *
+ *	2001 - Documented with DocBook
+ *	- Brad Douglas <brad@neruo.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/compat.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/tty.h>
+#include <linux/init.h>
+#include <linux/linux_logo.h>
+#include <linux/proc_fs.h>
+#include <linux/console.h>
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
+#include <linux/devfs_fs_kernel.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/efi.h>
+
+#if defined(__mc68000__) || defined(CONFIG_APUS)
+#include <asm/setup.h>
+#endif
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#include <linux/fb.h>
+
+    /*
+     *  Frame buffer device initialization and setup routines
+     */
+
+#define FBPIXMAPSIZE	(1024 * 8)
+
+static struct notifier_block *fb_notifier_list;
+struct fb_info *registered_fb[FB_MAX];
+int num_registered_fb;
+
+/*
+ * Helpers
+ */
+
+int fb_get_color_depth(struct fb_var_screeninfo *var,
+		       struct fb_fix_screeninfo *fix)
+{
+	int depth = 0;
+
+	if (fix->visual == FB_VISUAL_MONO01 ||
+	    fix->visual == FB_VISUAL_MONO10)
+		depth = 1;
+	else {
+		if (var->green.length == var->blue.length &&
+		    var->green.length == var->red.length &&
+		    var->green.offset == var->blue.offset &&
+		    var->green.offset == var->red.offset)
+			depth = var->green.length;
+		else
+			depth = var->green.length + var->red.length +
+				var->blue.length;
+	}
+
+	return depth;
+}
+EXPORT_SYMBOL(fb_get_color_depth);
+
+/*
+ * Data padding functions.
+ */
+void fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height)
+{
+	__fb_pad_aligned_buffer(dst, d_pitch, src, s_pitch, height);
+}
+EXPORT_SYMBOL(fb_pad_aligned_buffer);
+
+void fb_pad_unaligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 idx, u32 height,
+				u32 shift_high, u32 shift_low, u32 mod)
+{
+	u8 mask = (u8) (0xfff << shift_high), tmp;
+	int i, j;
+
+	for (i = height; i--; ) {
+		for (j = 0; j < idx; j++) {
+			tmp = dst[j];
+			tmp &= mask;
+			tmp |= *src >> shift_low;
+			dst[j] = tmp;
+			tmp = *src << shift_high;
+			dst[j+1] = tmp;
+			src++;
+		}
+		tmp = dst[idx];
+		tmp &= mask;
+		tmp |= *src >> shift_low;
+		dst[idx] = tmp;
+		if (shift_high < mod) {
+			tmp = *src << shift_high;
+			dst[idx+1] = tmp;
+		}
+		src++;
+		dst += d_pitch;
+	}
+}
+EXPORT_SYMBOL(fb_pad_unaligned_buffer);
+
+/*
+ * we need to lock this section since fb_cursor
+ * may use fb_imageblit()
+ */
+char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size)
+{
+	u32 align = buf->buf_align - 1, offset;
+	char *addr = buf->addr;
+
+	/* If IO mapped, we need to sync before access, no sharing of
+	 * the pixmap is done
+	 */
+	if (buf->flags & FB_PIXMAP_IO) {
+		if (info->fbops->fb_sync && (buf->flags & FB_PIXMAP_SYNC))
+			info->fbops->fb_sync(info);
+		return addr;
+	}
+
+	/* See if we fit in the remaining pixmap space */
+	offset = buf->offset + align;
+	offset &= ~align;
+	if (offset + size > buf->size) {
+		/* We do not fit. In order to be able to re-use the buffer,
+		 * we must ensure no asynchronous DMA'ing or whatever operation
+		 * is in progress, we sync for that.
+		 */
+		if (info->fbops->fb_sync && (buf->flags & FB_PIXMAP_SYNC))
+			info->fbops->fb_sync(info);
+		offset = 0;
+	}
+	buf->offset = offset + size;
+	addr += offset;
+
+	return addr;
+}
+
+#ifdef CONFIG_LOGO
+#include <linux/linux_logo.h>
+
+static inline unsigned safe_shift(unsigned d, int n)
+{
+	return n < 0 ? d >> -n : d << n;
+}
+
+static void fb_set_logocmap(struct fb_info *info,
+				   const struct linux_logo *logo)
+{
+	struct fb_cmap palette_cmap;
+	u16 palette_green[16];
+	u16 palette_blue[16];
+	u16 palette_red[16];
+	int i, j, n;
+	const unsigned char *clut = logo->clut;
+
+	palette_cmap.start = 0;
+	palette_cmap.len = 16;
+	palette_cmap.red = palette_red;
+	palette_cmap.green = palette_green;
+	palette_cmap.blue = palette_blue;
+	palette_cmap.transp = NULL;
+
+	for (i = 0; i < logo->clutsize; i += n) {
+		n = logo->clutsize - i;
+		/* palette_cmap provides space for only 16 colors at once */
+		if (n > 16)
+			n = 16;
+		palette_cmap.start = 32 + i;
+		palette_cmap.len = n;
+		for (j = 0; j < n; ++j) {
+			palette_cmap.red[j] = clut[0] << 8 | clut[0];
+			palette_cmap.green[j] = clut[1] << 8 | clut[1];
+			palette_cmap.blue[j] = clut[2] << 8 | clut[2];
+			clut += 3;
+		}
+		fb_set_cmap(&palette_cmap, info);
+	}
+}
+
+static void  fb_set_logo_truepalette(struct fb_info *info,
+					    const struct linux_logo *logo,
+					    u32 *palette)
+{
+	unsigned char mask[9] = { 0,0x80,0xc0,0xe0,0xf0,0xf8,0xfc,0xfe,0xff };
+	unsigned char redmask, greenmask, bluemask;
+	int redshift, greenshift, blueshift;
+	int i;
+	const unsigned char *clut = logo->clut;
+
+	/*
+	 * We have to create a temporary palette since console palette is only
+	 * 16 colors long.
+	 */
+	/* Bug: Doesn't obey msb_right ... (who needs that?) */
+	redmask   = mask[info->var.red.length   < 8 ? info->var.red.length   : 8];
+	greenmask = mask[info->var.green.length < 8 ? info->var.green.length : 8];
+	bluemask  = mask[info->var.blue.length  < 8 ? info->var.blue.length  : 8];
+	redshift   = info->var.red.offset   - (8 - info->var.red.length);
+	greenshift = info->var.green.offset - (8 - info->var.green.length);
+	blueshift  = info->var.blue.offset  - (8 - info->var.blue.length);
+
+	for ( i = 0; i < logo->clutsize; i++) {
+		palette[i+32] = (safe_shift((clut[0] & redmask), redshift) |
+				 safe_shift((clut[1] & greenmask), greenshift) |
+				 safe_shift((clut[2] & bluemask), blueshift));
+		clut += 3;
+	}
+}
+
+static void fb_set_logo_directpalette(struct fb_info *info,
+					     const struct linux_logo *logo,
+					     u32 *palette)
+{
+	int redshift, greenshift, blueshift;
+	int i;
+
+	redshift = info->var.red.offset;
+	greenshift = info->var.green.offset;
+	blueshift = info->var.blue.offset;
+
+	for (i = 32; i < logo->clutsize; i++)
+		palette[i] = i << redshift | i << greenshift | i << blueshift;
+}
+
+static void fb_set_logo(struct fb_info *info,
+			       const struct linux_logo *logo, u8 *dst,
+			       int depth)
+{
+	int i, j, k;
+	const u8 *src = logo->data;
+	u8 xor = (info->fix.visual == FB_VISUAL_MONO01) ? 0xff : 0;
+	u8 fg = 1, d;
+
+	if (fb_get_color_depth(&info->var, &info->fix) == 3)
+		fg = 7;
+
+	if (info->fix.visual == FB_VISUAL_MONO01 ||
+	    info->fix.visual == FB_VISUAL_MONO10)
+		fg = ~((u8) (0xfff << info->var.green.length));
+
+	switch (depth) {
+	case 4:
+		for (i = 0; i < logo->height; i++)
+			for (j = 0; j < logo->width; src++) {
+				*dst++ = *src >> 4;
+				j++;
+				if (j < logo->width) {
+					*dst++ = *src & 0x0f;
+					j++;
+				}
+			}
+		break;
+	case 1:
+		for (i = 0; i < logo->height; i++) {
+			for (j = 0; j < logo->width; src++) {
+				d = *src ^ xor;
+				for (k = 7; k >= 0; k--) {
+					*dst++ = ((d >> k) & 1) ? fg : 0;
+					j++;
+				}
+			}
+		}
+		break;
+	}
+}
+
+/*
+ * Three (3) kinds of logo maps exist.  linux_logo_clut224 (>16 colors),
+ * linux_logo_vga16 (16 colors) and linux_logo_mono (2 colors).  Depending on
+ * the visual format and color depth of the framebuffer, the DAC, the
+ * pseudo_palette, and the logo data will be adjusted accordingly.
+ *
+ * Case 1 - linux_logo_clut224:
+ * Color exceeds the number of console colors (16), thus we set the hardware DAC
+ * using fb_set_cmap() appropriately.  The "needs_cmapreset"  flag will be set.
+ *
+ * For visuals that require color info from the pseudo_palette, we also construct
+ * one for temporary use. The "needs_directpalette" or "needs_truepalette" flags
+ * will be set.
+ *
+ * Case 2 - linux_logo_vga16:
+ * The number of colors just matches the console colors, thus there is no need
+ * to set the DAC or the pseudo_palette.  However, the bitmap is packed, ie,
+ * each byte contains color information for two pixels (upper and lower nibble).
+ * To be consistent with fb_imageblit() usage, we therefore separate the two
+ * nibbles into separate bytes. The "depth" flag will be set to 4.
+ *
+ * Case 3 - linux_logo_mono:
+ * This is similar with Case 2.  Each byte contains information for 8 pixels.
+ * We isolate each bit and expand each into a byte. The "depth" flag will
+ * be set to 1.
+ */
+static struct logo_data {
+	int depth;
+	int needs_directpalette;
+	int needs_truepalette;
+	int needs_cmapreset;
+	const struct linux_logo *logo;
+} fb_logo;
+
+static void fb_rotate_logo_ud(const u8 *in, u8 *out, u32 width, u32 height)
+{
+	u32 size = width * height, i;
+
+	out += size - 1;
+
+	for (i = size; i--; )
+		*out-- = *in++;
+}
+
+static void fb_rotate_logo_cw(const u8 *in, u8 *out, u32 width, u32 height)
+{
+	int i, j, w = width - 1;
+
+	for (i = 0; i < height; i++)
+		for (j = 0; j < width; j++)
+			out[height * j + w - i] = *in++;
+}
+
+static void fb_rotate_logo_ccw(const u8 *in, u8 *out, u32 width, u32 height)
+{
+	int i, j, w = width - 1;
+
+	for (i = 0; i < height; i++)
+		for (j = 0; j < width; j++)
+			out[height * (w - j) + i] = *in++;
+}
+
+static void fb_rotate_logo(struct fb_info *info, u8 *dst,
+			   struct fb_image *image, int rotate)
+{
+	u32 tmp;
+
+	if (rotate == FB_ROTATE_UD) {
+		image->dx = info->var.xres - image->width;
+		image->dy = info->var.yres - image->height;
+		fb_rotate_logo_ud(image->data, dst, image->width,
+				  image->height);
+	} else if (rotate == FB_ROTATE_CW) {
+		tmp = image->width;
+		image->width = image->height;
+		image->height = tmp;
+		image->dx = info->var.xres - image->height;
+		fb_rotate_logo_cw(image->data, dst, image->width,
+				  image->height);
+	} else if (rotate == FB_ROTATE_CCW) {
+		tmp = image->width;
+		image->width = image->height;
+		image->height = tmp;
+		image->dy = info->var.yres - image->width;
+		fb_rotate_logo_ccw(image->data, dst, image->width,
+				   image->height);
+	}
+
+	image->data = dst;
+}
+
+static void fb_do_show_logo(struct fb_info *info, struct fb_image *image,
+			    int rotate)
+{
+	int x;
+
+	if (rotate == FB_ROTATE_UR) {
+		for (x = 0; x < num_online_cpus() &&
+			     x * (fb_logo.logo->width + 8) <=
+			     info->var.xres - fb_logo.logo->width; x++) {
+			info->fbops->fb_imageblit(info, image);
+			image->dx += fb_logo.logo->width + 8;
+		}
+	} else if (rotate == FB_ROTATE_UD) {
+		for (x = 0; x < num_online_cpus() &&
+			     x * (fb_logo.logo->width + 8) <=
+			     info->var.xres - fb_logo.logo->width; x++) {
+			info->fbops->fb_imageblit(info, image);
+			image->dx -= fb_logo.logo->width + 8;
+		}
+	} else if (rotate == FB_ROTATE_CW) {
+		for (x = 0; x < num_online_cpus() &&
+			     x * (fb_logo.logo->width + 8) <=
+			     info->var.yres - fb_logo.logo->width; x++) {
+			info->fbops->fb_imageblit(info, image);
+			image->dy += fb_logo.logo->width + 8;
+		}
+	} else if (rotate == FB_ROTATE_CCW) {
+		for (x = 0; x < num_online_cpus() &&
+			     x * (fb_logo.logo->width + 8) <=
+			     info->var.yres - fb_logo.logo->width; x++) {
+			info->fbops->fb_imageblit(info, image);
+			image->dy -= fb_logo.logo->width + 8;
+		}
+	}
+}
+
+int fb_prepare_logo(struct fb_info *info, int rotate)
+{
+	int depth = fb_get_color_depth(&info->var, &info->fix);
+	int yres;
+
+	memset(&fb_logo, 0, sizeof(struct logo_data));
+
+	if (info->flags & FBINFO_MISC_TILEBLITTING)
+		return 0;
+
+	if (info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
+		depth = info->var.blue.length;
+		if (info->var.red.length < depth)
+			depth = info->var.red.length;
+		if (info->var.green.length < depth)
+			depth = info->var.green.length;
+	}
+
+	if (depth >= 8) {
+		switch (info->fix.visual) {
+		case FB_VISUAL_TRUECOLOR:
+			fb_logo.needs_truepalette = 1;
+			break;
+		case FB_VISUAL_DIRECTCOLOR:
+			fb_logo.needs_directpalette = 1;
+			fb_logo.needs_cmapreset = 1;
+			break;
+		case FB_VISUAL_PSEUDOCOLOR:
+			fb_logo.needs_cmapreset = 1;
+			break;
+		}
+	}
+
+	/* Return if no suitable logo was found */
+	fb_logo.logo = fb_find_logo(depth);
+
+	if (!fb_logo.logo) {
+		return 0;
+	}
+	
+	if (rotate == FB_ROTATE_UR || rotate == FB_ROTATE_UD)
+		yres = info->var.yres;
+	else
+		yres = info->var.xres;
+
+	if (fb_logo.logo->height > yres) {
+		fb_logo.logo = NULL;
+		return 0;
+	}
+
+	/* What depth we asked for might be different from what we get */
+	if (fb_logo.logo->type == LINUX_LOGO_CLUT224)
+		fb_logo.depth = 8;
+	else if (fb_logo.logo->type == LINUX_LOGO_VGA16)
+		fb_logo.depth = 4;
+	else
+		fb_logo.depth = 1;		
+	return fb_logo.logo->height;
+}
+
+int fb_show_logo(struct fb_info *info, int rotate)
+{
+	u32 *palette = NULL, *saved_pseudo_palette = NULL;
+	unsigned char *logo_new = NULL, *logo_rotate = NULL;
+	struct fb_image image;
+
+	/* Return if the frame buffer is not mapped or suspended */
+	if (fb_logo.logo == NULL || info->state != FBINFO_STATE_RUNNING)
+		return 0;
+
+	image.depth = 8;
+	image.data = fb_logo.logo->data;
+
+	if (fb_logo.needs_cmapreset)
+		fb_set_logocmap(info, fb_logo.logo);
+
+	if (fb_logo.needs_truepalette || 
+	    fb_logo.needs_directpalette) {
+		palette = kmalloc(256 * 4, GFP_KERNEL);
+		if (palette == NULL)
+			return 0;
+
+		if (fb_logo.needs_truepalette)
+			fb_set_logo_truepalette(info, fb_logo.logo, palette);
+		else
+			fb_set_logo_directpalette(info, fb_logo.logo, palette);
+
+		saved_pseudo_palette = info->pseudo_palette;
+		info->pseudo_palette = palette;
+	}
+
+	if (fb_logo.depth <= 4) {
+		logo_new = kmalloc(fb_logo.logo->width * fb_logo.logo->height, 
+				   GFP_KERNEL);
+		if (logo_new == NULL) {
+			kfree(palette);
+			if (saved_pseudo_palette)
+				info->pseudo_palette = saved_pseudo_palette;
+			return 0;
+		}
+		image.data = logo_new;
+		fb_set_logo(info, fb_logo.logo, logo_new, fb_logo.depth);
+	}
+
+	image.dx = 0;
+	image.dy = 0;
+	image.width = fb_logo.logo->width;
+	image.height = fb_logo.logo->height;
+
+	if (rotate) {
+		logo_rotate = kmalloc(fb_logo.logo->width *
+				      fb_logo.logo->height, GFP_KERNEL);
+		if (logo_rotate)
+			fb_rotate_logo(info, logo_rotate, &image, rotate);
+	}
+
+	fb_do_show_logo(info, &image, rotate);
+
+	kfree(palette);
+	if (saved_pseudo_palette != NULL)
+		info->pseudo_palette = saved_pseudo_palette;
+	kfree(logo_new);
+	kfree(logo_rotate);
+	return fb_logo.logo->height;
+}
+#else
+int fb_prepare_logo(struct fb_info *info, int rotate) { return 0; }
+int fb_show_logo(struct fb_info *info, int rotate) { return 0; }
+#endif /* CONFIG_LOGO */
+
+static int fbmem_read_proc(char *buf, char **start, off_t offset,
+			   int len, int *eof, void *private)
+{
+	struct fb_info **fi;
+	int clen;
+
+	clen = 0;
+	for (fi = registered_fb; fi < &registered_fb[FB_MAX] && len < 4000; fi++)
+		if (*fi)
+			clen += sprintf(buf + clen, "%d %s\n",
+				        (*fi)->node,
+				        (*fi)->fix.id);
+	*start = buf + offset;
+	if (clen > offset)
+		clen -= offset;
+	else
+		clen = 0;
+	return clen < len ? clen : len;
+}
+
+static ssize_t
+fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	struct inode *inode = file->f_dentry->d_inode;
+	int fbidx = iminor(inode);
+	struct fb_info *info = registered_fb[fbidx];
+	u32 *buffer, *dst;
+	u32 __iomem *src;
+	int c, i, cnt = 0, err = 0;
+	unsigned long total_size;
+
+	if (!info || ! info->screen_base)
+		return -ENODEV;
+
+	if (info->state != FBINFO_STATE_RUNNING)
+		return -EPERM;
+
+	if (info->fbops->fb_read)
+		return info->fbops->fb_read(file, buf, count, ppos);
+	
+	total_size = info->screen_size;
+
+	if (total_size == 0)
+		total_size = info->fix.smem_len;
+
+	if (p >= total_size)
+		return 0;
+
+	if (count >= total_size)
+		count = total_size;
+
+	if (count + p > total_size)
+		count = total_size - p;
+
+	buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count,
+			 GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	src = (u32 __iomem *) (info->screen_base + p);
+
+	if (info->fbops->fb_sync)
+		info->fbops->fb_sync(info);
+
+	while (count) {
+		c  = (count > PAGE_SIZE) ? PAGE_SIZE : count;
+		dst = buffer;
+		for (i = c >> 2; i--; )
+			*dst++ = fb_readl(src++);
+		if (c & 3) {
+			u8 *dst8 = (u8 *) dst;
+			u8 __iomem *src8 = (u8 __iomem *) src;
+
+			for (i = c & 3; i--;)
+				*dst8++ = fb_readb(src8++);
+
+			src = (u32 __iomem *) src8;
+		}
+
+		if (copy_to_user(buf, buffer, c)) {
+			err = -EFAULT;
+			break;
+		}
+		*ppos += c;
+		buf += c;
+		cnt += c;
+		count -= c;
+	}
+
+	kfree(buffer);
+
+	return (err) ? err : cnt;
+}
+
+static ssize_t
+fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	struct inode *inode = file->f_dentry->d_inode;
+	int fbidx = iminor(inode);
+	struct fb_info *info = registered_fb[fbidx];
+	u32 *buffer, *src;
+	u32 __iomem *dst;
+	int c, i, cnt = 0, err = 0;
+	unsigned long total_size;
+
+	if (!info || !info->screen_base)
+		return -ENODEV;
+
+	if (info->state != FBINFO_STATE_RUNNING)
+		return -EPERM;
+
+	if (info->fbops->fb_write)
+		return info->fbops->fb_write(file, buf, count, ppos);
+	
+	total_size = info->screen_size;
+
+	if (total_size == 0)
+		total_size = info->fix.smem_len;
+
+	if (p > total_size)
+		return 0;
+
+	if (count >= total_size)
+		count = total_size;
+
+	if (count + p > total_size)
+		count = total_size - p;
+
+	buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count,
+			 GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	dst = (u32 __iomem *) (info->screen_base + p);
+
+	if (info->fbops->fb_sync)
+		info->fbops->fb_sync(info);
+
+	while (count) {
+		c = (count > PAGE_SIZE) ? PAGE_SIZE : count;
+		src = buffer;
+
+		if (copy_from_user(src, buf, c)) {
+			err = -EFAULT;
+			break;
+		}
+
+		for (i = c >> 2; i--; )
+			fb_writel(*src++, dst++);
+
+		if (c & 3) {
+			u8 *src8 = (u8 *) src;
+			u8 __iomem *dst8 = (u8 __iomem *) dst;
+
+			for (i = c & 3; i--; )
+				fb_writeb(*src8++, dst8++);
+
+			dst = (u32 __iomem *) dst8;
+		}
+
+		*ppos += c;
+		buf += c;
+		cnt += c;
+		count -= c;
+	}
+
+	kfree(buffer);
+
+	return (err) ? err : cnt;
+}
+
+#ifdef CONFIG_KMOD
+static void try_to_load(int fb)
+{
+	request_module("fb%d", fb);
+}
+#endif /* CONFIG_KMOD */
+
+int
+fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var)
+{
+	struct fb_fix_screeninfo *fix = &info->fix;
+        int xoffset = var->xoffset;
+        int yoffset = var->yoffset;
+        int err = 0, yres = info->var.yres;
+
+	if (var->yoffset > 0) {
+		if (var->vmode & FB_VMODE_YWRAP) {
+			if (!fix->ywrapstep || (var->yoffset % fix->ywrapstep))
+				err = -EINVAL;
+			else
+				yres = 0;
+		} else if (!fix->ypanstep || (var->yoffset % fix->ypanstep))
+			err = -EINVAL;
+	}
+
+	if (var->xoffset > 0 && (!fix->xpanstep ||
+				 (var->xoffset % fix->xpanstep)))
+		err = -EINVAL;
+
+        if (err || !info->fbops->fb_pan_display || xoffset < 0 ||
+	    yoffset < 0 || var->yoffset + yres > info->var.yres_virtual ||
+	    var->xoffset + info->var.xres > info->var.xres_virtual)
+		return -EINVAL;
+
+	if ((err = info->fbops->fb_pan_display(var, info)))
+		return err;
+        info->var.xoffset = var->xoffset;
+        info->var.yoffset = var->yoffset;
+        if (var->vmode & FB_VMODE_YWRAP)
+                info->var.vmode |= FB_VMODE_YWRAP;
+        else
+                info->var.vmode &= ~FB_VMODE_YWRAP;
+        return 0;
+}
+
+int
+fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
+{
+	int err, flags = info->flags;
+
+	if (var->activate & FB_ACTIVATE_INV_MODE) {
+		struct fb_videomode mode1, mode2;
+		int ret = 0;
+
+		fb_var_to_videomode(&mode1, var);
+		fb_var_to_videomode(&mode2, &info->var);
+		/* make sure we don't delete the videomode of current var */
+		ret = fb_mode_is_equal(&mode1, &mode2);
+
+		if (!ret) {
+		    struct fb_event event;
+
+		    event.info = info;
+		    event.data = &mode1;
+		    ret = notifier_call_chain(&fb_notifier_list,
+					      FB_EVENT_MODE_DELETE, &event);
+		}
+
+		if (!ret)
+		    fb_delete_videomode(&mode1, &info->modelist);
+
+		return ret;
+	}
+
+	if ((var->activate & FB_ACTIVATE_FORCE) ||
+	    memcmp(&info->var, var, sizeof(struct fb_var_screeninfo))) {
+		if (!info->fbops->fb_check_var) {
+			*var = info->var;
+			return 0;
+		}
+
+		if ((err = info->fbops->fb_check_var(var, info)))
+			return err;
+
+		if ((var->activate & FB_ACTIVATE_MASK) == FB_ACTIVATE_NOW) {
+			struct fb_videomode mode;
+			int err = 0;
+
+			info->var = *var;
+			if (info->fbops->fb_set_par)
+				info->fbops->fb_set_par(info);
+
+			fb_pan_display(info, &info->var);
+
+			fb_set_cmap(&info->cmap, info);
+
+			fb_var_to_videomode(&mode, &info->var);
+
+			if (info->modelist.prev && info->modelist.next &&
+			    !list_empty(&info->modelist))
+				err = fb_add_videomode(&mode, &info->modelist);
+
+			if (!err && (flags & FBINFO_MISC_USEREVENT)) {
+				struct fb_event event;
+				int evnt = (var->activate & FB_ACTIVATE_ALL) ?
+					FB_EVENT_MODE_CHANGE_ALL :
+					FB_EVENT_MODE_CHANGE;
+
+				info->flags &= ~FBINFO_MISC_USEREVENT;
+				event.info = info;
+				notifier_call_chain(&fb_notifier_list, evnt,
+						    &event);
+			}
+		}
+	}
+	return 0;
+}
+
+int
+fb_blank(struct fb_info *info, int blank)
+{	
+ 	int ret = -EINVAL;
+
+ 	if (blank > FB_BLANK_POWERDOWN)
+ 		blank = FB_BLANK_POWERDOWN;
+
+	if (info->fbops->fb_blank)
+ 		ret = info->fbops->fb_blank(blank, info);
+
+ 	if (!ret) {
+		struct fb_event event;
+
+		event.info = info;
+		event.data = &blank;
+		notifier_call_chain(&fb_notifier_list, FB_EVENT_BLANK, &event);
+	}
+
+ 	return ret;
+}
+
+static int 
+fb_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+	 unsigned long arg)
+{
+	int fbidx = iminor(inode);
+	struct fb_info *info = registered_fb[fbidx];
+	struct fb_ops *fb = info->fbops;
+	struct fb_var_screeninfo var;
+	struct fb_fix_screeninfo fix;
+	struct fb_con2fbmap con2fb;
+	struct fb_cmap_user cmap;
+	struct fb_event event;
+	void __user *argp = (void __user *)arg;
+	int i;
+	
+	if (!fb)
+		return -ENODEV;
+	switch (cmd) {
+	case FBIOGET_VSCREENINFO:
+		return copy_to_user(argp, &info->var,
+				    sizeof(var)) ? -EFAULT : 0;
+	case FBIOPUT_VSCREENINFO:
+		if (copy_from_user(&var, argp, sizeof(var)))
+			return -EFAULT;
+		acquire_console_sem();
+		info->flags |= FBINFO_MISC_USEREVENT;
+		i = fb_set_var(info, &var);
+		info->flags &= ~FBINFO_MISC_USEREVENT;
+		release_console_sem();
+		if (i) return i;
+		if (copy_to_user(argp, &var, sizeof(var)))
+			return -EFAULT;
+		return 0;
+	case FBIOGET_FSCREENINFO:
+		return copy_to_user(argp, &info->fix,
+				    sizeof(fix)) ? -EFAULT : 0;
+	case FBIOPUTCMAP:
+		if (copy_from_user(&cmap, argp, sizeof(cmap)))
+			return -EFAULT;
+		return (fb_set_user_cmap(&cmap, info));
+	case FBIOGETCMAP:
+		if (copy_from_user(&cmap, argp, sizeof(cmap)))
+			return -EFAULT;
+		return fb_cmap_to_user(&info->cmap, &cmap);
+	case FBIOPAN_DISPLAY:
+		if (copy_from_user(&var, argp, sizeof(var)))
+			return -EFAULT;
+		acquire_console_sem();
+		i = fb_pan_display(info, &var);
+		release_console_sem();
+		if (i)
+			return i;
+		if (copy_to_user(argp, &var, sizeof(var)))
+			return -EFAULT;
+		return 0;
+	case FBIO_CURSOR:
+		return -EINVAL;
+	case FBIOGET_CON2FBMAP:
+		if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
+			return -EFAULT;
+		if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES)
+		    return -EINVAL;
+		con2fb.framebuffer = -1;
+		event.info = info;
+		event.data = &con2fb;
+		notifier_call_chain(&fb_notifier_list,
+				    FB_EVENT_GET_CONSOLE_MAP, &event);
+		return copy_to_user(argp, &con2fb,
+				    sizeof(con2fb)) ? -EFAULT : 0;
+	case FBIOPUT_CON2FBMAP:
+		if (copy_from_user(&con2fb, argp, sizeof(con2fb)))
+			return - EFAULT;
+		if (con2fb.console < 0 || con2fb.console > MAX_NR_CONSOLES)
+		    return -EINVAL;
+		if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX)
+		    return -EINVAL;
+#ifdef CONFIG_KMOD
+		if (!registered_fb[con2fb.framebuffer])
+		    try_to_load(con2fb.framebuffer);
+#endif /* CONFIG_KMOD */
+		if (!registered_fb[con2fb.framebuffer])
+		    return -EINVAL;
+		event.info = info;
+		event.data = &con2fb;
+		return notifier_call_chain(&fb_notifier_list,
+					   FB_EVENT_SET_CONSOLE_MAP,
+					   &event);
+	case FBIOBLANK:
+		acquire_console_sem();
+		info->flags |= FBINFO_MISC_USEREVENT;
+		i = fb_blank(info, arg);
+		info->flags &= ~FBINFO_MISC_USEREVENT;
+		release_console_sem();
+		return i;
+	default:
+		if (fb->fb_ioctl == NULL)
+			return -EINVAL;
+		return fb->fb_ioctl(info, cmd, arg);
+	}
+}
+
+#ifdef CONFIG_COMPAT
+struct fb_fix_screeninfo32 {
+	char			id[16];
+	compat_caddr_t		smem_start;
+	u32			smem_len;
+	u32			type;
+	u32			type_aux;
+	u32			visual;
+	u16			xpanstep;
+	u16			ypanstep;
+	u16			ywrapstep;
+	u32			line_length;
+	compat_caddr_t		mmio_start;
+	u32			mmio_len;
+	u32			accel;
+	u16			reserved[3];
+};
+
+struct fb_cmap32 {
+	u32			start;
+	u32			len;
+	compat_caddr_t	red;
+	compat_caddr_t	green;
+	compat_caddr_t	blue;
+	compat_caddr_t	transp;
+};
+
+static int fb_getput_cmap(struct inode *inode, struct file *file,
+			unsigned int cmd, unsigned long arg)
+{
+	struct fb_cmap_user __user *cmap;
+	struct fb_cmap32 __user *cmap32;
+	__u32 data;
+	int err;
+
+	cmap = compat_alloc_user_space(sizeof(*cmap));
+	cmap32 = compat_ptr(arg);
+
+	if (copy_in_user(&cmap->start, &cmap32->start, 2 * sizeof(__u32)))
+		return -EFAULT;
+
+	if (get_user(data, &cmap32->red) ||
+	    put_user(compat_ptr(data), &cmap->red) ||
+	    get_user(data, &cmap32->green) ||
+	    put_user(compat_ptr(data), &cmap->green) ||
+	    get_user(data, &cmap32->blue) ||
+	    put_user(compat_ptr(data), &cmap->blue) ||
+	    get_user(data, &cmap32->transp) ||
+	    put_user(compat_ptr(data), &cmap->transp))
+		return -EFAULT;
+
+	err = fb_ioctl(inode, file, cmd, (unsigned long) cmap);
+
+	if (!err) {
+		if (copy_in_user(&cmap32->start,
+				 &cmap->start,
+				 2 * sizeof(__u32)))
+			err = -EFAULT;
+	}
+	return err;
+}
+
+static int do_fscreeninfo_to_user(struct fb_fix_screeninfo *fix,
+				  struct fb_fix_screeninfo32 __user *fix32)
+{
+	__u32 data;
+	int err;
+
+	err = copy_to_user(&fix32->id, &fix->id, sizeof(fix32->id));
+
+	data = (__u32) (unsigned long) fix->smem_start;
+	err |= put_user(data, &fix32->smem_start);
+
+	err |= put_user(fix->smem_len, &fix32->smem_len);
+	err |= put_user(fix->type, &fix32->type);
+	err |= put_user(fix->type_aux, &fix32->type_aux);
+	err |= put_user(fix->visual, &fix32->visual);
+	err |= put_user(fix->xpanstep, &fix32->xpanstep);
+	err |= put_user(fix->ypanstep, &fix32->ypanstep);
+	err |= put_user(fix->ywrapstep, &fix32->ywrapstep);
+	err |= put_user(fix->line_length, &fix32->line_length);
+
+	data = (__u32) (unsigned long) fix->mmio_start;
+	err |= put_user(data, &fix32->mmio_start);
+
+	err |= put_user(fix->mmio_len, &fix32->mmio_len);
+	err |= put_user(fix->accel, &fix32->accel);
+	err |= copy_to_user(fix32->reserved, fix->reserved,
+			    sizeof(fix->reserved));
+
+	return err;
+}
+
+static int fb_get_fscreeninfo(struct inode *inode, struct file *file,
+				unsigned int cmd, unsigned long arg)
+{
+	mm_segment_t old_fs;
+	struct fb_fix_screeninfo fix;
+	struct fb_fix_screeninfo32 __user *fix32;
+	int err;
+
+	fix32 = compat_ptr(arg);
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	err = fb_ioctl(inode, file, cmd, (unsigned long) &fix);
+	set_fs(old_fs);
+
+	if (!err)
+		err = do_fscreeninfo_to_user(&fix, fix32);
+
+	return err;
+}
+
+static long
+fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	int fbidx = iminor(inode);
+	struct fb_info *info = registered_fb[fbidx];
+	struct fb_ops *fb = info->fbops;
+	long ret = -ENOIOCTLCMD;
+
+	lock_kernel();
+	switch(cmd) {
+	case FBIOGET_VSCREENINFO:
+	case FBIOPUT_VSCREENINFO:
+	case FBIOPAN_DISPLAY:
+	case FBIOGET_CON2FBMAP:
+	case FBIOPUT_CON2FBMAP:
+		arg = (unsigned long) compat_ptr(arg);
+	case FBIOBLANK:
+		ret = fb_ioctl(inode, file, cmd, arg);
+		break;
+
+	case FBIOGET_FSCREENINFO:
+		ret = fb_get_fscreeninfo(inode, file, cmd, arg);
+		break;
+
+	case FBIOGETCMAP:
+	case FBIOPUTCMAP:
+		ret = fb_getput_cmap(inode, file, cmd, arg);
+		break;
+
+	default:
+		if (fb->fb_compat_ioctl)
+			ret = fb->fb_compat_ioctl(info, cmd, arg);
+		break;
+	}
+	unlock_kernel();
+	return ret;
+}
+#endif
+
+static int 
+fb_mmap(struct file *file, struct vm_area_struct * vma)
+{
+	int fbidx = iminor(file->f_dentry->d_inode);
+	struct fb_info *info = registered_fb[fbidx];
+	struct fb_ops *fb = info->fbops;
+	unsigned long off;
+#if !defined(__sparc__) || defined(__sparc_v9__)
+	unsigned long start;
+	u32 len;
+#endif
+
+	if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT))
+		return -EINVAL;
+	off = vma->vm_pgoff << PAGE_SHIFT;
+	if (!fb)
+		return -ENODEV;
+	if (fb->fb_mmap) {
+		int res;
+		lock_kernel();
+		res = fb->fb_mmap(info, vma);
+		unlock_kernel();
+		return res;
+	}
+
+#if defined(__sparc__) && !defined(__sparc_v9__)
+	/* Should never get here, all fb drivers should have their own
+	   mmap routines */
+	return -EINVAL;
+#else
+	/* !sparc32... */
+	lock_kernel();
+
+	/* frame buffer memory */
+	start = info->fix.smem_start;
+	len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.smem_len);
+	if (off >= len) {
+		/* memory mapped io */
+		off -= len;
+		if (info->var.accel_flags) {
+			unlock_kernel();
+			return -EINVAL;
+		}
+		start = info->fix.mmio_start;
+		len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len);
+	}
+	unlock_kernel();
+	start &= PAGE_MASK;
+	if ((vma->vm_end - vma->vm_start + off) > len)
+		return -EINVAL;
+	off += start;
+	vma->vm_pgoff = off >> PAGE_SHIFT;
+	/* This is an IO map - tell maydump to skip this VMA */
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+#if defined(__sparc_v9__)
+	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
+				vma->vm_end - vma->vm_start, vma->vm_page_prot))
+		return -EAGAIN;
+#else
+#if defined(__mc68000__)
+#if defined(CONFIG_SUN3)
+	pgprot_val(vma->vm_page_prot) |= SUN3_PAGE_NOCACHE;
+#elif defined(CONFIG_MMU)
+	if (CPU_IS_020_OR_030)
+		pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE030;
+	if (CPU_IS_040_OR_060) {
+		pgprot_val(vma->vm_page_prot) &= _CACHEMASK040;
+		/* Use no-cache mode, serialized */
+		pgprot_val(vma->vm_page_prot) |= _PAGE_NOCACHE_S;
+	}
+#endif
+#elif defined(__powerpc__)
+	vma->vm_page_prot = phys_mem_access_prot(file, off >> PAGE_SHIFT,
+						 vma->vm_end - vma->vm_start,
+						 vma->vm_page_prot);
+#elif defined(__alpha__)
+	/* Caching is off in the I/O space quadrant by design.  */
+#elif defined(__i386__) || defined(__x86_64__)
+	if (boot_cpu_data.x86 > 3)
+		pgprot_val(vma->vm_page_prot) |= _PAGE_PCD;
+#elif defined(__mips__)
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+#elif defined(__hppa__)
+	pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+#elif defined(__arm__) || defined(__sh__) || defined(__m32r__)
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+#elif defined(__ia64__)
+	if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	else
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+#else
+#warning What do we have to do here??
+#endif
+	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
+			     vma->vm_end - vma->vm_start, vma->vm_page_prot))
+		return -EAGAIN;
+#endif /* !__sparc_v9__ */
+	return 0;
+#endif /* !sparc32 */
+}
+
+static int
+fb_open(struct inode *inode, struct file *file)
+{
+	int fbidx = iminor(inode);
+	struct fb_info *info;
+	int res = 0;
+
+	if (fbidx >= FB_MAX)
+		return -ENODEV;
+#ifdef CONFIG_KMOD
+	if (!(info = registered_fb[fbidx]))
+		try_to_load(fbidx);
+#endif /* CONFIG_KMOD */
+	if (!(info = registered_fb[fbidx]))
+		return -ENODEV;
+	if (!try_module_get(info->fbops->owner))
+		return -ENODEV;
+	file->private_data = info;
+	if (info->fbops->fb_open) {
+		res = info->fbops->fb_open(info,1);
+		if (res)
+			module_put(info->fbops->owner);
+	}
+	return res;
+}
+
+static int 
+fb_release(struct inode *inode, struct file *file)
+{
+	struct fb_info * const info = file->private_data;
+
+	lock_kernel();
+	if (info->fbops->fb_release)
+		info->fbops->fb_release(info,1);
+	module_put(info->fbops->owner);
+	unlock_kernel();
+	return 0;
+}
+
+static struct file_operations fb_fops = {
+	.owner =	THIS_MODULE,
+	.read =		fb_read,
+	.write =	fb_write,
+	.ioctl =	fb_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = fb_compat_ioctl,
+#endif
+	.mmap =		fb_mmap,
+	.open =		fb_open,
+	.release =	fb_release,
+#ifdef HAVE_ARCH_FB_UNMAPPED_AREA
+	.get_unmapped_area = get_fb_unmapped_area,
+#endif
+};
+
+static struct class *fb_class;
+
+/**
+ *	register_framebuffer - registers a frame buffer device
+ *	@fb_info: frame buffer info structure
+ *
+ *	Registers a frame buffer device @fb_info.
+ *
+ *	Returns negative errno on error, or zero for success.
+ *
+ */
+
+int
+register_framebuffer(struct fb_info *fb_info)
+{
+	int i;
+	struct fb_event event;
+	struct fb_videomode mode;
+
+	if (num_registered_fb == FB_MAX)
+		return -ENXIO;
+	num_registered_fb++;
+	for (i = 0 ; i < FB_MAX; i++)
+		if (!registered_fb[i])
+			break;
+	fb_info->node = i;
+
+	fb_info->class_device = class_device_create(fb_class, NULL, MKDEV(FB_MAJOR, i),
+				    fb_info->device, "fb%d", i);
+	if (IS_ERR(fb_info->class_device)) {
+		/* Not fatal */
+		printk(KERN_WARNING "Unable to create class_device for framebuffer %d; errno = %ld\n", i, PTR_ERR(fb_info->class_device));
+		fb_info->class_device = NULL;
+	} else
+		fb_init_class_device(fb_info);
+
+	if (fb_info->pixmap.addr == NULL) {
+		fb_info->pixmap.addr = kmalloc(FBPIXMAPSIZE, GFP_KERNEL);
+		if (fb_info->pixmap.addr) {
+			fb_info->pixmap.size = FBPIXMAPSIZE;
+			fb_info->pixmap.buf_align = 1;
+			fb_info->pixmap.scan_align = 1;
+			fb_info->pixmap.access_align = 32;
+			fb_info->pixmap.flags = FB_PIXMAP_DEFAULT;
+		}
+	}	
+	fb_info->pixmap.offset = 0;
+
+	if (!fb_info->modelist.prev || !fb_info->modelist.next)
+		INIT_LIST_HEAD(&fb_info->modelist);
+
+	fb_var_to_videomode(&mode, &fb_info->var);
+	fb_add_videomode(&mode, &fb_info->modelist);
+	registered_fb[i] = fb_info;
+
+	devfs_mk_cdev(MKDEV(FB_MAJOR, i),
+			S_IFCHR | S_IRUGO | S_IWUGO, "fb/%d", i);
+	event.info = fb_info;
+	notifier_call_chain(&fb_notifier_list,
+			    FB_EVENT_FB_REGISTERED, &event);
+	return 0;
+}
+
+
+/**
+ *	unregister_framebuffer - releases a frame buffer device
+ *	@fb_info: frame buffer info structure
+ *
+ *	Unregisters a frame buffer device @fb_info.
+ *
+ *	Returns negative errno on error, or zero for success.
+ *
+ */
+
+int
+unregister_framebuffer(struct fb_info *fb_info)
+{
+	int i;
+
+	i = fb_info->node;
+	if (!registered_fb[i])
+		return -EINVAL;
+	devfs_remove("fb/%d", i);
+
+	if (fb_info->pixmap.addr && (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
+		kfree(fb_info->pixmap.addr);
+	fb_destroy_modelist(&fb_info->modelist);
+	registered_fb[i]=NULL;
+	num_registered_fb--;
+	fb_cleanup_class_device(fb_info);
+	class_device_destroy(fb_class, MKDEV(FB_MAJOR, i));
+	return 0;
+}
+
+/**
+ *	fb_register_client - register a client notifier
+ *	@nb: notifier block to callback on events
+ */
+int fb_register_client(struct notifier_block *nb)
+{
+	return notifier_chain_register(&fb_notifier_list, nb);
+}
+
+/**
+ *	fb_unregister_client - unregister a client notifier
+ *	@nb: notifier block to callback on events
+ */
+int fb_unregister_client(struct notifier_block *nb)
+{
+	return notifier_chain_unregister(&fb_notifier_list, nb);
+}
+
+/**
+ *	fb_set_suspend - low level driver signals suspend
+ *	@info: framebuffer affected
+ *	@state: 0 = resuming, !=0 = suspending
+ *
+ *	This is meant to be used by low level drivers to
+ * 	signal suspend/resume to the core & clients.
+ *	It must be called with the console semaphore held
+ */
+void fb_set_suspend(struct fb_info *info, int state)
+{
+	struct fb_event event;
+
+	event.info = info;
+	if (state) {
+		notifier_call_chain(&fb_notifier_list, FB_EVENT_SUSPEND, &event);
+		info->state = FBINFO_STATE_SUSPENDED;
+	} else {
+		info->state = FBINFO_STATE_RUNNING;
+		notifier_call_chain(&fb_notifier_list, FB_EVENT_RESUME, &event);
+	}
+}
+
+/**
+ *	fbmem_init - init frame buffer subsystem
+ *
+ *	Initialize the frame buffer subsystem.
+ *
+ *	NOTE: This function is _only_ to be called by drivers/char/mem.c.
+ *
+ */
+
+static int __init
+fbmem_init(void)
+{
+	create_proc_read_entry("fb", 0, NULL, fbmem_read_proc, NULL);
+
+	devfs_mk_dir("fb");
+	if (register_chrdev(FB_MAJOR,"fb",&fb_fops))
+		printk("unable to get major %d for fb devs\n", FB_MAJOR);
+
+	fb_class = class_create(THIS_MODULE, "graphics");
+	if (IS_ERR(fb_class)) {
+		printk(KERN_WARNING "Unable to create fb class; errno = %ld\n", PTR_ERR(fb_class));
+		fb_class = NULL;
+	}
+	return 0;
+}
+
+#ifdef MODULE
+module_init(fbmem_init);
+static void __exit
+fbmem_exit(void)
+{
+	class_destroy(fb_class);
+	unregister_chrdev(FB_MAJOR, "fb");
+}
+
+module_exit(fbmem_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Framebuffer base");
+#else
+subsys_initcall(fbmem_init);
+#endif
+
+int fb_new_modelist(struct fb_info *info)
+{
+	struct fb_event event;
+	struct fb_var_screeninfo var = info->var;
+	struct list_head *pos, *n;
+	struct fb_modelist *modelist;
+	struct fb_videomode *m, mode;
+	int err = 1;
+
+	list_for_each_safe(pos, n, &info->modelist) {
+		modelist = list_entry(pos, struct fb_modelist, list);
+		m = &modelist->mode;
+		fb_videomode_to_var(&var, m);
+		var.activate = FB_ACTIVATE_TEST;
+		err = fb_set_var(info, &var);
+		fb_var_to_videomode(&mode, &var);
+		if (err || !fb_mode_is_equal(m, &mode)) {
+			list_del(pos);
+			kfree(pos);
+		}
+	}
+
+	err = 1;
+
+	if (!list_empty(&info->modelist)) {
+		event.info = info;
+		err = notifier_call_chain(&fb_notifier_list,
+					   FB_EVENT_NEW_MODELIST,
+					   &event);
+	}
+
+	return err;
+}
+
+/**
+ * fb_con_duit - user<->fbcon passthrough
+ * @info: struct fb_info
+ * @event: notification event to be passed to fbcon
+ * @data: private data
+ *
+ * DESCRIPTION
+ * This function is an fbcon-user event passing channel
+ * which bypasses fbdev.  This is hopefully temporary
+ * until a user interface for fbcon is created
+ */
+int fb_con_duit(struct fb_info *info, int event, void *data)
+{
+	struct fb_event evnt;
+
+	evnt.info = info;
+	evnt.data = data;
+
+	return notifier_call_chain(&fb_notifier_list, event, &evnt);
+}
+EXPORT_SYMBOL(fb_con_duit);
+
+static char *video_options[FB_MAX];
+static int ofonly;
+
+extern const char *global_mode_option;
+
+/**
+ * fb_get_options - get kernel boot parameters
+ * @name:   framebuffer name as it would appear in
+ *          the boot parameter line
+ *          (video=<name>:<options>)
+ * @option: the option will be stored here
+ *
+ * NOTE: Needed to maintain backwards compatibility
+ */
+int fb_get_options(char *name, char **option)
+{
+	char *opt, *options = NULL;
+	int opt_len, retval = 0;
+	int name_len = strlen(name), i;
+
+	if (name_len && ofonly && strncmp(name, "offb", 4))
+		retval = 1;
+
+	if (name_len && !retval) {
+		for (i = 0; i < FB_MAX; i++) {
+			if (video_options[i] == NULL)
+				continue;
+			opt_len = strlen(video_options[i]);
+			if (!opt_len)
+				continue;
+			opt = video_options[i];
+			if (!strncmp(name, opt, name_len) &&
+			    opt[name_len] == ':')
+				options = opt + name_len + 1;
+		}
+	}
+	if (options && !strncmp(options, "off", 3))
+		retval = 1;
+
+	if (option)
+		*option = options;
+
+	return retval;
+}
+
+#ifndef MODULE
+/**
+ *	video_setup - process command line options
+ *	@options: string of options
+ *
+ *	Process command line options for frame buffer subsystem.
+ *
+ *	NOTE: This function is a __setup and __init function.
+ *            It only stores the options.  Drivers have to call
+ *            fb_get_options() as necessary.
+ *
+ *	Returns zero.
+ *
+ */
+static int __init video_setup(char *options)
+{
+	int i, global = 0;
+
+	if (!options || !*options)
+ 		global = 1;
+
+ 	if (!global && !strncmp(options, "ofonly", 6)) {
+ 		ofonly = 1;
+ 		global = 1;
+ 	}
+
+ 	if (!global && !strstr(options, "fb:")) {
+ 		global_mode_option = options;
+ 		global = 1;
+ 	}
+
+ 	if (!global) {
+ 		for (i = 0; i < FB_MAX; i++) {
+ 			if (video_options[i] == NULL) {
+ 				video_options[i] = options;
+ 				break;
+ 			}
+
+		}
+	}
+
+	return 0;
+}
+__setup("video=", video_setup);
+#endif
+
+    /*
+     *  Visible symbols for modules
+     */
+
+EXPORT_SYMBOL(register_framebuffer);
+EXPORT_SYMBOL(unregister_framebuffer);
+EXPORT_SYMBOL(num_registered_fb);
+EXPORT_SYMBOL(registered_fb);
+EXPORT_SYMBOL(fb_prepare_logo);
+EXPORT_SYMBOL(fb_show_logo);
+EXPORT_SYMBOL(fb_set_var);
+EXPORT_SYMBOL(fb_blank);
+EXPORT_SYMBOL(fb_pan_display);
+EXPORT_SYMBOL(fb_get_buffer_offset);
+EXPORT_SYMBOL(fb_set_suspend);
+EXPORT_SYMBOL(fb_register_client);
+EXPORT_SYMBOL(fb_unregister_client);
+EXPORT_SYMBOL(fb_get_options);
+EXPORT_SYMBOL(fb_new_modelist);
+
+MODULE_LICENSE("GPL");
diff -urN oldtree/drivers/video/modedb.c newtree/drivers/video/modedb.c
--- oldtree/drivers/video/modedb.c	2006-01-03 03:21:10.000000000 +0000
+++ newtree/drivers/video/modedb.c	2006-02-18 15:28:25.312255336 +0000
@@ -667,6 +667,7 @@
 {
 	u32 pixclock, hfreq, htotal, vtotal;
 
+	mode->refresh = 0;
 	mode->name = NULL;
 	mode->xres = var->xres;
 	mode->yres = var->yres;
diff -urN oldtree/drivers/video/vesafb-thread.c newtree/drivers/video/vesafb-thread.c
--- oldtree/drivers/video/vesafb-thread.c	1970-01-01 00:00:00.000000000 +0000
+++ newtree/drivers/video/vesafb-thread.c	2006-02-18 15:28:25.313255184 +0000
@@ -0,0 +1,722 @@
+/*
+ * Framebuffer driver for VBE 2.0+ compliant graphic boards.
+ * Kernel thread and vm86 routines.
+ *
+ * (c) 2004-2005 Michal Januszewski <spock@gentoo.org>
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/signal.h>
+#include <linux/suspend.h>
+#include <linux/unistd.h>
+#include <video/vesa.h>
+#include <video/edid.h>
+#include <asm/mman.h>
+#include <asm/page.h>
+#include <asm/vm86.h>
+#include <asm/thread_info.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include "edid.h"
+
+#ifdef MODULE
+int errno;
+#endif
+
+static DECLARE_COMPLETION(vesafb_th_completion);
+static DECLARE_MUTEX(vesafb_task_list_sem);
+static LIST_HEAD(vesafb_task_list);
+static DECLARE_WAIT_QUEUE_HEAD(vesafb_wait);
+
+static struct vm86_struct vm86;
+static int vesafb_pid = 0;
+
+_syscall3(int,ioperm,unsigned long, a, unsigned long, b, unsigned long, c);
+_syscall1(int,vm86old,struct vm86_struct __user*, v86);
+
+#define DEFAULT_VM86_FLAGS (IF_MASK | IOPL_MASK)
+#define VM86_PUSHW(x)					\
+do { 							\
+	vm86.regs.esp -= 2; 				\
+	*(u16*)(STACK_ADDR + vm86.regs.esp) = x;	\
+} while(0);
+
+/* Stack, the return code and buffers will be put into
+ * one contiguous memory chunk:
+ *
+ * [ STACK | RET_CODE | BUFFER ]
+ *
+ * We only need a buffer that is ca. 0x2000 bytes in size.
+ * Some video BIOSes (sis6326) try to store data somewhere
+ * in 0x7000-0x7fff, so we zeromap more memory to be safe.
+ */
+#define IVTBDA_SIZE 	PAGE_SIZE
+#define RET_CODE_SIZE	0x0010
+#define STACK_SIZE	0x0500
+#define BUFFER_SIZE	0x10000
+
+/* The amount of memory that will be allocated should be a multiple
+ * of PAGE_SIZE. */
+#define __MEM_SIZE 	(RET_CODE_SIZE + STACK_SIZE + BUFFER_SIZE)
+#define REAL_MEM_SIZE	(((__MEM_SIZE / PAGE_SIZE) + 1) * PAGE_SIZE)
+
+#define IVTBDA_ADDR	0x00000
+#define STACK_ADDR	(IVTBDA_ADDR + IVTBDA_SIZE)
+#define RET_CODE_ADDR	(STACK_ADDR + STACK_SIZE)
+#define BUF_ADDR	(RET_CODE_ADDR + RET_CODE_SIZE)
+
+#define FLAG_D 		(1 << 10)
+
+/* Segment prefix opcodes */
+enum {
+	P_CS = 0x2e,
+	P_SS = 0x36,
+	P_DS = 0x3e,
+	P_ES = 0x26,
+	P_FS = 0x64,
+	P_GS = 0x65
+};
+
+/* Emulated vm86 ins instruction */
+static void vm86_ins(int size)
+{
+	u32 edx, edi;
+	edx = vm86.regs.edx & 0xffff;
+	edi = (vm86.regs.edi & 0xffff) + (u32)(vm86.regs.es << 4);
+
+	if (vm86.regs.eflags & FLAG_D)
+		asm volatile ("std\n");
+	else
+		asm volatile ("cld\n");
+
+	switch (size) {
+	case 4:
+		asm volatile ("insl\n" : "=D" (edi) : "d" (edx), "0" (edi));
+		break;
+	case 2:
+		asm volatile ("insw\n" : "=D" (edi) : "d" (edx), "0" (edi));
+		break;
+	case 1:
+		asm volatile ("insb\n" : "=D" (edi) : "d" (edx), "0" (edi));
+		break;
+	}
+
+	if (vm86.regs.eflags & FLAG_D)
+		asm volatile ("cld\n");
+
+	edi -= (u32)(vm86.regs.es << 4);
+
+	vm86.regs.edi &= 0xffff0000;
+	vm86.regs.edi |= edi & 0xffff;
+}
+
+static void vm86_rep_ins(int size)
+{
+	u16 cx = vm86.regs.ecx;
+	while (cx--)
+		vm86_ins(size);
+
+	vm86.regs.ecx &= 0xffff0000;
+}
+
+/* Emulated vm86 outs instruction */
+static void vm86_outs(int size, int segment)
+{
+	u32 edx, esi, base;
+
+	edx = vm86.regs.edx & 0xffff;
+	esi = vm86.regs.esi & 0xffff;
+
+	switch (segment) {
+	case P_CS: base = vm86.regs.cs; break;
+	case P_SS: base = vm86.regs.ss; break;
+	case P_ES: base = vm86.regs.es; break;
+	case P_FS: base = vm86.regs.fs; break;
+	case P_GS: base = vm86.regs.gs; break;
+	default:   base = vm86.regs.ds; break;
+	}
+
+	esi += base << 4;
+
+	if (vm86.regs.eflags & FLAG_D)
+		asm volatile ("std\n");
+	else
+		asm volatile ("cld\n");
+
+	switch (size) {
+	case 4:
+		asm volatile ("outsl\n" : "=S" (esi) : "d" (edx), "0" (esi));
+		break;
+	case 2:
+		asm volatile ("outsw\n" : "=S" (esi) : "d" (edx), "0" (esi));
+		break;
+	case 1:
+		asm volatile ("outsb\n" : "=S" (esi) : "d" (edx), "0" (esi));
+		break;
+	}
+
+	if (vm86.regs.eflags & FLAG_D)
+		asm volatile ("cld");
+
+	esi -= base << 4;
+	vm86.regs.esi &= 0xffff0000;
+	vm86.regs.esi |= (esi & 0xffff);
+}
+
+static void vm86_rep_outs(int size, int segment)
+{
+	u16 cx = vm86.regs.ecx;
+	while (cx--)
+		vm86_outs(size, segment);
+
+	vm86.regs.ecx &= 0xffff0000;
+}
+
+static int vm86_do_unknown(void)
+{
+	u8 data32 = 0, segment = P_DS, rep = 0;
+	u8 *instr;
+	int ret = 0, i = 0;
+
+	instr = (u8*)((vm86.regs.cs << 4) + vm86.regs.eip);
+
+	while (1) {
+		switch(instr[i]) {
+		case 0x66:	/* operand size prefix */
+			data32 = 1 - data32;
+			i++;
+			break;
+		case 0xf2:	/* repnz */
+		case 0xf3:	/* rep */
+			rep = 1;
+			i++;
+			break;
+		case P_CS:	/* segment prefix */
+		case P_SS:
+		case P_DS:
+		case P_ES:
+		case P_FS:
+		case P_GS:
+			segment = instr[i];
+			i++;
+			break;
+		case 0xf0:	/* LOCK - ignored */
+		case 0x67:	/* address size prefix - ignored */
+			i++;
+			break;
+		case 0x6c:	/* insb */
+			if (rep)
+				vm86_rep_ins(1);
+			else
+				vm86_ins(1);
+			i++;
+			goto out;
+		case 0x6d:	/* insw / insd */
+			if (rep) {
+				if (data32)
+					vm86_rep_ins(4);
+				else
+					vm86_rep_ins(2);
+			} else {
+				if (data32)
+					vm86_ins(4);
+				else
+					vm86_ins(2);
+			}
+			i++;
+			goto out;
+		case 0x6e:	/* outsb */
+			if (rep)
+				vm86_rep_outs(1, segment);
+			else
+				vm86_outs(1, segment);
+			i++;
+			goto out;
+		case 0x6f:	/* outsw / outsd */
+			if (rep) {
+				if (data32)
+					vm86_rep_outs(4, segment);
+				else
+					vm86_rep_outs(2, segment);
+			} else {
+				if (data32)
+					vm86_outs(4, segment);
+				else
+					vm86_outs(2, segment);
+			}
+			i++;
+			goto out;
+		case 0xe4:	/* inb xx */
+			asm volatile (
+				"inb %w1, %b0"
+				: "=a" (vm86.regs.eax)
+				: "d" (instr[i+1]), "0" (vm86.regs.eax));
+			i += 2;
+			goto out;
+		case 0xe5:	/* inw xx / ind xx */
+			if (data32) {
+				asm volatile (
+					"inl %w1, %0"
+					: "=a" (vm86.regs.eax)
+					: "d" (instr[i+1]),
+					  "0" (vm86.regs.eax));
+			} else {
+				asm volatile (
+					"inw %w1, %w0"
+					: "=a" (vm86.regs.eax)
+					: "d" (instr[i+1]),
+					  "0" (vm86.regs.eax));
+			}
+			i += 2;
+			goto out;
+
+		case 0xec:	/* inb dx */
+			asm volatile (
+				"inb %w1, %b0"
+	 			: "=a" (vm86.regs.eax)
+				: "d" (vm86.regs.edx), "0" (vm86.regs.eax));
+			i++;
+			goto out;
+		case 0xed:	/* inw dx / ind dx */
+			if (data32) {
+				asm volatile (
+					"inl %w1, %0"
+					: "=a" (vm86.regs.eax)
+					: "d" (vm86.regs.edx));
+			} else {
+				asm volatile (
+					"inw %w1, %w0"
+					: "=a" (vm86.regs.eax)
+					: "d" (vm86.regs.edx));
+			}
+			i++;
+			goto out;
+		case 0xe6:	/* outb xx */
+			asm volatile (
+				"outb %b0, %w1"
+				: /* no return value */
+				: "a" (vm86.regs.eax), "d" (instr[i+1]));
+			i += 2;
+			goto out;
+		case 0xe7:	/* outw xx / outd xx */
+			if (data32) {
+				asm volatile (
+					"outl %0, %w1"
+					: /* no return value */
+					: "a" (vm86.regs.eax),
+					  "d" (instr[i+1]));
+			} else {
+				asm volatile (
+					"outw %w0, %w1"
+					: /* no return value */
+					: "a" (vm86.regs.eax),
+					  "d" (instr[i+1]));
+			}
+			i += 2;
+			goto out;
+		case 0xee:	/* outb dx */
+			asm volatile (
+				"outb %b0, %w1"
+				: /* no return value */
+				: "a" (vm86.regs.eax), "d" (vm86.regs.edx));
+			i++;
+			goto out;
+		case 0xef:	/* outw dx / outd dx */
+			if (data32) {
+				asm volatile (
+					"outl %0, %w1"
+					: /* no return value */
+					: "a" (vm86.regs.eax),
+					  "d" (vm86.regs.edx));
+			} else {
+				asm volatile (
+					"outw %w0, %w1"
+					: /* no return value */
+					: "a" (vm86.regs.eax),
+					  "d" (vm86.regs.edx));
+			}
+			i++;
+			goto out;
+		default:
+			printk(KERN_ERR "vesafb: BUG, opcode 0x%x emulation "
+					"not supported (EIP: 0x%lx)\n",
+					instr[i], (u32)(vm86.regs.cs << 4) +
+					vm86.regs.eip);
+			ret = 1;
+			goto out;
+		}
+	}
+out: 	vm86.regs.eip += i;
+	return ret;
+}
+
+void vesafb_do_vm86(struct vm86_regs *regs)
+{
+	unsigned int ret;
+	u8 *retcode = (void*)RET_CODE_ADDR;
+
+	memset(&vm86,0,sizeof(vm86));
+	memcpy(&vm86.regs, regs, sizeof(struct vm86_regs));
+
+	/* The return code */
+	retcode[0] = 0xcd;  		/* int opcode */
+	retcode[1] = 0xff;		/* int number (255) */
+
+        /* We use int 0xff to get back to protected mode */
+	memset(&vm86.int_revectored, 0, sizeof(vm86.int_revectored));
+        ((unsigned char *)&vm86.int_revectored)[0xff / 8] |= (1 << (0xff % 8));
+
+	/*
+	 * We want to call int 0x10, so we set:
+	 *   CS = 0x42 = 0x10 * 4 + 2
+	 *   IP = 0x40 = 0x10 * 4
+	 * and SS:ESP. It's up to the caller to set the rest of the registers.
+	 */
+	vm86.regs.eflags = DEFAULT_VM86_FLAGS;
+	vm86.regs.cs = *(unsigned short *)0x42;
+	vm86.regs.eip = *(unsigned short *)0x40;
+	vm86.regs.ss = (STACK_ADDR >> 4);
+	vm86.regs.esp = ((STACK_ADDR & 0x0000f) + STACK_SIZE);
+
+	/* These will be fetched off the stack when we come to an iret in the
+	 * int's 0x10 code. */
+	VM86_PUSHW(DEFAULT_VM86_FLAGS);
+	VM86_PUSHW((RET_CODE_ADDR >> 4));	/* return code segment */
+	VM86_PUSHW((RET_CODE_ADDR & 0x0000f));	/* return code offset */
+
+	while(1) {
+		ret = vm86old(&vm86);
+
+		if (VM86_TYPE(ret) == VM86_INTx) {
+			int vint = VM86_ARG(ret);
+
+			/* If exit from vm86 was caused by int 0xff, then
+			 * we're done.. */
+			if (vint == 0xff)
+				goto out;
+
+			/* .. otherwise, we have to call the int handler
+			 * manually */
+			VM86_PUSHW(vm86.regs.eflags);
+			VM86_PUSHW(vm86.regs.cs);
+			VM86_PUSHW(vm86.regs.eip);
+
+			vm86.regs.cs = *(u16 *)((vint << 2) + 2);
+			vm86.regs.eip = *(u16 *)(vint << 2);
+			vm86.regs.eflags &= ~(VIF_MASK | TF_MASK);
+		} else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
+			if (vm86_do_unknown())
+				goto out;
+		} else {
+			printk(KERN_ERR "vesafb: BUG, returned from "
+					"vm86 with %x (EIP: 0x%lx)\n",
+					ret, (u32)(vm86.regs.cs << 4) +
+					vm86.regs.eip);
+			goto out;
+		}
+	}
+
+out:	/* copy the registers' state back to the caller's struct */
+	memcpy(regs, &vm86.regs, sizeof(struct vm86_regs));
+}
+
+static int vesafb_remap_pfn_range(unsigned long start, unsigned long end,
+				  unsigned long pgoff, unsigned long prot,
+				  int type)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	int ret = 0;
+
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!vma)
+		return -ENOMEM;
+	memset(vma, 0, sizeof(*vma));
+	down_write(&mm->mmap_sem);
+	vma->vm_mm = mm;
+	vma->vm_start = start;
+	vma->vm_end = end;
+	vma->vm_flags = VM_READ | VM_WRITE | VM_EXEC;
+	vma->vm_flags |= mm->def_flags;
+	vma->vm_page_prot.pgprot = prot;
+	vma->vm_pgoff = pgoff;
+
+	if ((ret = insert_vm_struct(mm, vma))) {
+		up_write(&mm->mmap_sem);
+		kmem_cache_free(vm_area_cachep, vma);
+		return ret;
+	}
+
+	if (type) {
+		vma->vm_flags |= VM_SHARED;
+		ret = zeromap_page_range(vma,
+					 vma->vm_start,
+					 vma->vm_end - vma->vm_start,
+					 vma->vm_page_prot);
+	} else {
+		ret = remap_pfn_range(vma,
+				      vma->vm_start,
+				      vma->vm_pgoff,
+				      vma->vm_end - vma->vm_start,
+				      vma->vm_page_prot);
+	}
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+static inline int vesafb_init_mem(void)
+{
+	int ret = 0;
+
+	/* The memory chunks we're remapping here should be multiples
+	 * of PAGE_SIZE. */
+	ret += vesafb_remap_pfn_range(0x00000, IVTBDA_SIZE, 0,
+				      PROT_READ | PROT_EXEC | PROT_WRITE, 0);
+	ret += vesafb_remap_pfn_range(IVTBDA_SIZE, REAL_MEM_SIZE, 0,
+				      PROT_READ | PROT_EXEC | PROT_WRITE, 1);
+	ret += vesafb_remap_pfn_range(0x9f000, 0x100000, 
+				      0x9f000 >> PAGE_SHIFT,
+				      PROT_READ | PROT_EXEC | PROT_WRITE, 0);
+	if (ret)
+		printk(KERN_ERR "vesafb thread: memory remapping failed\n");
+
+	return ret;
+}
+
+#define vesafb_get_string(str) \
+{ 									\
+	/* The address is in the form ssssoooo, where oooo = offset,	\
+	 * ssss = segment */						\
+	addr = ((p_vbe(tsk->buf)->str & 0xffff0000) >> 12) +		\
+		(p_vbe(tsk->buf)->str & 0x0000ffff);			\
+									\
+	/* The data is in ROM which is shared between processes, so we 	\
+	 * just translate the real mode address into one visible from 	\
+	 * kernel space */						\
+	if (addr >= 0xa0000) {						\
+		p_vbe(tsk->buf)->str = (u32) __va(addr);		\
+									\
+	/* The data is in the buffer, we just have to convert the	\
+	 * address so that it points into the buffer user provided. */	\
+	} else if (addr > BUF_ADDR && addr < BUF_ADDR +			\
+		   sizeof(struct vesafb_vbe_ib)) {			\
+		addr -= BUF_ADDR;					\
+		p_vbe(tsk->buf)->str = (u32) (tsk->buf + addr);		\
+									\
+	/* This should never happen: someone was insane enough to put	\
+	 * the data somewhere in RAM.. */				\
+	} else {							\
+		p_vbe(tsk->buf)->str = (u32) "";			\
+	}								\
+}
+
+void vesafb_handle_getvbeib(struct vesafb_task *tsk)
+{
+	int addr, res;
+
+	tsk->regs.es  = (BUF_ADDR >> 4);
+	tsk->regs.edi = (BUF_ADDR & 0x000f);
+	strncpy(p_vbe(BUF_ADDR)->vbe_signature, "VBE2", 4);
+
+	vesafb_do_vm86(&tsk->regs);
+	memcpy(tsk->buf, (void*)(BUF_ADDR), sizeof(struct vesafb_vbe_ib));
+
+	/* The OEM fields were not defined prior to VBE 2.0 */
+	if (p_vbe(tsk->buf)->vbe_version >= 0x200) {
+		vesafb_get_string(oem_string_ptr);
+		vesafb_get_string(oem_vendor_name_ptr);
+		vesafb_get_string(oem_product_name_ptr);
+		vesafb_get_string(oem_product_rev_ptr);
+	}
+
+	/* This is basically the same as vesafb_get_string() */
+	addr = ((p_vbe(tsk->buf)->mode_list_ptr & 0xffff0000) >> 12) +
+		(p_vbe(tsk->buf)->mode_list_ptr & 0x0000ffff);
+
+	if (addr >= 0xa0000) {
+		p_vbe(tsk->buf)->mode_list_ptr = (u32) __va(addr);
+	} else if (addr > BUF_ADDR && addr < BUF_ADDR +
+		   sizeof(struct vesafb_vbe_ib)) {
+		addr -= BUF_ADDR;
+		p_vbe(tsk->buf)->mode_list_ptr = (u32) (tsk->buf + addr);
+	} else {
+		res = 0;
+		printk(KERN_WARNING "vesafb: warning, copying modelist "
+				    "from somewhere in RAM!\n");
+		while (*(u16*)(addr+res) != 0xffff &&
+		       res < (sizeof(p_vbe(tsk->buf)->reserved) - 2)) {
+			*(u16*) ((u32)&(p_vbe(tsk->buf)->reserved) + res) =
+				*(u16*)(addr+res);
+			res += 2;
+		}
+		*(u16*) ((u32)&(p_vbe(tsk->buf)->reserved) + res) = 0xffff;
+	}
+}
+
+int vesafb_handle_tasks(void)
+{
+	struct vesafb_task *tsk;
+	struct list_head *curr, *next;
+	int ret = 0;
+
+	down(&vesafb_task_list_sem);
+	list_for_each_safe(curr, next, &vesafb_task_list) {
+		tsk = list_entry(curr, struct vesafb_task, node);
+
+		if (tsk->flags & TF_EXIT) {
+			ret = 1;
+			goto task_done;
+		}
+		if (tsk->flags & TF_GETVBEIB) {
+			vesafb_handle_getvbeib(tsk);
+			goto task_done;
+		}
+		if (tsk->flags & TF_MASK_BUF) {
+			tsk->regs.es  = (BUF_ADDR >> 4);
+			tsk->regs.edi = (BUF_ADDR & 0x000f);
+		}
+		if (tsk->flags & TF_USE_BUF)
+			memcpy((void*)BUF_ADDR, tsk->buf, tsk->buf_len);
+
+		vesafb_do_vm86(&tsk->regs);
+
+		if (tsk->flags & TF_RETURN_BUF)
+			memcpy(tsk->buf, (void*)BUF_ADDR, tsk->buf_len);
+
+task_done:	list_del(curr);
+		complete(&tsk->done);
+	}
+
+	/* If we're going to kill this thread, don't allow any elements
+	 * to be added to the task list. */
+	if (!ret)
+		up(&vesafb_task_list_sem);
+
+	return ret;
+}
+
+/*
+ * This 'hybrid' thread serves as a backend for vesafb-tng, handling all vm86
+ * calls. It is started as a kernel thread. It then creates its own mm struct,
+ * thus separating itself from any userspace processes. At this moment, it
+ * stops being a kernel thread (kernel threads have mm = NULL) and becomes
+ * a 'hybrid' thread -- one that has full access to kernel space, yet runs
+ * with its own address space.
+ *
+ * This is necessary because in order to make vm86 calls some parts of the
+ * first 1MB of RAM have to be setup to mimic the real mode. These are:
+ *  - interrupt vector table	[0x00000-0x003ff]
+ *  - BIOS data area		[0x00400-0x004ff]
+ *  - Extended BIOS data area	[0x9fc00-0x9ffff]
+ *  - the video RAM		[0xa0000-0xbffff]
+ *  - video BIOS		[0xc0000-0xcffff]
+ *  - motherboard BIOS		[0xf0000-0xfffff]
+ */
+int vesafb_thread(void *unused)
+{
+	int err = 0;
+
+	set_fs(KERNEL_DS);
+	daemonize("vesafb");
+
+	if (set_new_mm()) {
+		err = -ENOMEM;
+		goto thr_end;
+	}
+	if (vesafb_init_mem()) {
+		err = -ENOMEM;
+		goto thr_end;
+	}
+
+	DPRINTK("started vesafb thread\n");
+
+	/* Having an IO bitmap makes things faster as we avoid GPFs
+	 * when running vm86 code. We can live if it fails, though,
+	 * so don't bother checking for errors. */
+	ioperm(0,1024,1);
+	set_user_nice(current, -10);
+
+	complete(&vesafb_th_completion);
+
+	while (1) {
+		if (vesafb_handle_tasks())
+			break;
+		wait_event_interruptible(vesafb_wait,
+					 !list_empty(&vesafb_task_list));
+		try_to_freeze();
+	}
+
+out:	DPRINTK("exiting the vesafb thread\n");
+	vesafb_pid = -1;
+
+	/* Now that all callers know this thread is no longer running
+	 * (pid < 0), allow them to continue. */
+	up(&vesafb_task_list_sem);
+	return err;
+thr_end:
+	down(&vesafb_task_list_sem);
+	complete(&vesafb_th_completion);
+	goto out;	
+}
+
+int vesafb_queue_task(struct vesafb_task *tsk)
+{
+	down(&vesafb_task_list_sem);
+	if (vesafb_pid < 0)
+		return -1;
+	list_add_tail(&tsk->node, &vesafb_task_list);
+	up(&vesafb_task_list_sem);
+	wake_up(&vesafb_wait);
+	return 0;
+}
+
+int vesafb_wait_for_thread(void)
+{
+	/* PID 0 means that the thread is still initializing. */
+	if (vesafb_pid < 0)
+		return -1;
+	wait_for_completion(&vesafb_th_completion);
+	return 0;
+}
+
+int __init vesafb_init_thread(void)
+{
+	vesafb_pid = kernel_thread(vesafb_thread,NULL,0);
+	return 0;
+}
+
+#ifdef MODULE
+void __exit vesafb_kill_thread(void)
+{
+	struct vesafb_task *tsk;
+	if (vesafb_pid <= 0)
+		return;
+
+	vesafb_create_task(tsk);
+	if (!tsk)
+		return;
+	tsk->flags |= TF_EXIT;
+	vesafb_queue_task(tsk);
+	vesafb_wait_for_task(tsk);
+	kfree(tsk);
+	return;
+}
+module_exit(vesafb_kill_thread);
+#endif
+module_init(vesafb_init_thread);
+
+EXPORT_SYMBOL_GPL(vesafb_queue_task);
+EXPORT_SYMBOL_GPL(vesafb_wait_for_thread);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Januszewski");
+
diff -urN oldtree/drivers/video/vesafb-tng.c newtree/drivers/video/vesafb-tng.c
--- oldtree/drivers/video/vesafb-tng.c	1970-01-01 00:00:00.000000000 +0000
+++ newtree/drivers/video/vesafb-tng.c	2006-02-18 15:28:25.316254728 +0000
@@ -0,0 +1,1478 @@
+/*
+ * Framebuffer driver for VBE 2.0+ compliant graphic boards
+ *
+ * (c) 2004-2005 Michal Januszewski <spock@gentoo.org
+ *     Based upon vesafb code by Gerd Knorr <kraxel@goldbach.in-berlin.de
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/completion.h>
+#include <linux/platform_device.h>
+#include <video/edid.h>
+#include <video/vesa.h>
+#include <video/vga.h>
+#include <asm/io.h>
+#include <asm/mtrr.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include "edid.h"
+
+#define dac_reg	(0x3c8)
+#define dac_val	(0x3c9)
+
+#define VESAFB_NEED_EXACT_RES 	1
+#define VESAFB_NEED_EXACT_DEPTH 2
+
+/* --------------------------------------------------------------------- */
+
+static struct fb_var_screeninfo vesafb_defined __initdata = {
+	.activate	= FB_ACTIVATE_NOW,
+	.height		= 0,
+	.width		= 0,
+	.right_margin	= 32,
+	.upper_margin	= 16,
+	.lower_margin	= 4,
+	.vsync_len	= 4,
+	.vmode		= FB_VMODE_NONINTERLACED,
+};
+
+static struct fb_fix_screeninfo vesafb_fix __initdata = {
+	.id	= "VESA VGA",
+	.type	= FB_TYPE_PACKED_PIXELS,
+	.accel	= FB_ACCEL_NONE,
+};
+
+static int  mtrr       = 0;	/* disable mtrr by default */
+static int  ypan       = 0;	/* 0 - nothing, 1 - ypan, 2 - ywrap */
+static int  pmi_setpal = 0;	/* pmi for palette changes */
+static u16  *pmi_base  = NULL;  /* protected mode interface location */
+static void (*pmi_start)(void) = NULL;
+static void (*pmi_pal)(void)   = NULL;
+static struct vesafb_vbe_ib  vbe_ib;
+static struct vesafb_mode_ib *vbe_modes;
+static int                   vbe_modes_cnt = 0;
+static struct fb_info	     *vesafb_info = NULL;
+static int  nocrtc		    = 0; /* ignore CRTC settings */
+static int  noedid       __initdata = 0; /* don't try DDC transfers */
+static int  vram_remap   __initdata = 0; /* set amount of memory to be used */
+static int  vram_total   __initdata = 0; /* set total amount of memory */
+static u16  maxclk       __initdata = 0; /* maximum pixel clock */
+static u16  maxvf        __initdata = 0; /* maximum vertical frequency */
+static u16  maxhf        __initdata = 0; /* maximum horizontal frequency */
+static int  gtf          __initdata = 0; /* forces use of the GTF */
+static char *mode_option __initdata = NULL;
+static u16  vbemode      __initdata = 0;
+
+/* --------------------------------------------------------------------- */
+
+static int vesafb_find_vbe_mode(int xres, int yres, int depth,
+				unsigned char flags)
+{
+	int i, match = -1, h = 0, d = 0x7fffffff;
+
+	for (i = 0; i < vbe_modes_cnt; i++) {
+		h = abs(vbe_modes[i].x_res - xres) +
+		    abs(vbe_modes[i].y_res - yres) +
+		    abs(depth - vbe_modes[i].depth);
+		if (h == 0)
+			return i;
+		if (h < d || (h == d && vbe_modes[i].depth > depth)) {
+			d = h;
+			match = i;
+		}
+	}
+	i = 1;
+
+	if (flags & VESAFB_NEED_EXACT_DEPTH && vbe_modes[match].depth != depth)
+		i = 0;
+	if (flags & VESAFB_NEED_EXACT_RES && d > 24)
+		i = 0;
+	if (i != 0)
+		return match;
+	else
+		return -1;
+}
+
+static int vesafb_pan_display(struct fb_var_screeninfo *var,
+                              struct fb_info *info)
+{
+	int offset;
+
+	if (!ypan)
+		return -EINVAL;
+	if (var->xoffset)
+		return -EINVAL;
+	if (var->yoffset > var->yres_virtual)
+		return -EINVAL;
+	if ((ypan==1) && var->yoffset+var->yres > var->yres_virtual)
+		return -EINVAL;
+
+	offset = (var->yoffset * info->fix.line_length + var->xoffset) / 4;
+
+	/* It turns out it's not the best idea to do panning via vm86,
+	 * so we only allow it if we have a PMI. */
+	if (pmi_start) {
+		__asm__ __volatile__(
+			"call *(%%edi)"
+			: /* no return value */
+			: "a" (0x4f07),         /* EAX */
+			  "b" (0),              /* EBX */
+			  "c" (offset),         /* ECX */
+			  "d" (offset >> 16),   /* EDX */
+			  "D" (&pmi_start));    /* EDI */
+	}
+	return 0;
+}
+
+static int vesafb_blank(int blank, struct fb_info *info)
+{
+	struct vesafb_task *tsk;
+	int err = 1;
+
+	if (vbe_ib.capabilities & VBE_CAP_VGACOMPAT) {
+		int loop = 10000;
+		u8 seq = 0, crtc17 = 0;
+
+		if (blank == FB_BLANK_POWERDOWN) {
+			seq = 0x20;
+			crtc17 = 0x00;
+			err = 0;
+		} else {
+			seq = 0x00;
+			crtc17 = 0x80;
+			err = (blank == FB_BLANK_UNBLANK) ? 0 : -EINVAL;
+		}
+
+		vga_wseq(NULL, 0x00, 0x01);
+		seq |= vga_rseq(NULL, 0x01) & ~0x20;
+		vga_wseq(NULL, 0x00, seq);
+
+		crtc17 |= vga_rcrt(NULL, 0x17) & ~0x80;
+		while (loop--);
+		vga_wcrt(NULL, 0x17, crtc17);
+		vga_wseq(NULL, 0x00, 0x03);
+	} else {
+		vesafb_create_task (tsk);
+		if (!tsk)
+			return -ENOMEM;
+		tsk->regs.eax = 0x4f10;
+		switch (blank) {
+		case FB_BLANK_UNBLANK:
+			tsk->regs.ebx = 0x0001;
+			break;
+		case FB_BLANK_NORMAL:
+			tsk->regs.ebx = 0x0101;	/* standby */
+			break;
+		case FB_BLANK_POWERDOWN:
+			tsk->regs.ebx = 0x0401;	/* powerdown */
+			break;
+		default:
+			goto out;
+		}
+		tsk->flags = TF_CALL;
+		if (!vesafb_queue_task (tsk))
+			vesafb_wait_for_task(tsk);
+
+		if ((tsk->regs.eax & 0xffff) == 0x004f)
+			err = 0;
+out:		kfree(tsk);
+	}
+	return err;
+}
+
+static int vesafb_setpalette(struct vesafb_pal_entry *entries, int count,
+			     int start, struct fb_info *info)
+{
+	struct vesafb_task *tsk;
+	int i = ((struct vesafb_par*)info->par)->mode_idx;
+	int ret = 0;
+
+	/* We support palette modifications for 8 bpp modes only, so
+	 * there can never be more than 256 entries. */
+	if (start + count > 256)
+		return -EINVAL;
+
+	if (pmi_setpal) {
+		__asm__ __volatile__(
+		"call *(%%esi)"
+		: /* no return value */
+		: "a" (0x4f09),         /* EAX */
+		  "b" (0),              /* EBX */
+		  "c" (count),          /* ECX */
+		  "d" (start),          /* EDX */
+		  "D" (entries),        /* EDI */
+		  "S" (&pmi_pal));      /* ESI */
+		return 0;
+	}
+
+	/* Use VGA registers if mode is VGA-compatible. */
+	if (i >= 0 && i < vbe_modes_cnt &&
+	    vbe_modes[i].mode_attr & VBE_MODE_VGACOMPAT) {
+		for (i = 0; i < count; i++) {
+			outb_p(start + i,        dac_reg);
+			outb_p(entries[i].red,   dac_val);
+			outb_p(entries[i].green, dac_val);
+			outb_p(entries[i].blue,  dac_val);
+		}
+	} else {
+		vesafb_create_task (tsk);
+		if (!tsk)
+			return -ENOMEM;
+		tsk->regs.eax = 0x4f09;
+		tsk->regs.ebx = 0x0;
+		tsk->regs.ecx = count;
+		tsk->regs.edx = start;
+		tsk->buf = entries;
+		tsk->buf_len = sizeof(struct vesafb_pal_entry) * count;
+		tsk->flags = TF_CALL | TF_USE_BUF;
+
+		if (!vesafb_queue_task (tsk))
+			vesafb_wait_for_task(tsk);
+		if ((tsk->regs.eax & 0xffff) != 0x004f)
+			ret = 1;
+		kfree(tsk);
+	}
+	return ret;
+}
+
+static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			    unsigned blue, unsigned transp,
+			    struct fb_info *info)
+{
+	struct vesafb_pal_entry entry;
+	int shift = 16 - info->var.green.length;
+	int ret = 0;
+
+	if (regno >= info->cmap.len)
+		return -EINVAL;
+
+	if (info->var.bits_per_pixel == 8) {
+		entry.red   = red   >> shift;
+		entry.green = green >> shift;
+		entry.blue  = blue  >> shift;
+		entry.pad   = 0;
+
+		ret = vesafb_setpalette(&entry, 1, regno, info);
+	} else if (regno < 16) {
+		switch (info->var.bits_per_pixel) {
+		case 16:
+			if (info->var.red.offset == 10) {
+				/* 1:5:5:5 */
+				((u32*) (info->pseudo_palette))[regno] =
+						((red   & 0xf800) >>  1) |
+						((green & 0xf800) >>  6) |
+						((blue  & 0xf800) >> 11);
+			} else {
+				/* 0:5:6:5 */
+				((u32*) (info->pseudo_palette))[regno] =
+						((red   & 0xf800)      ) |
+						((green & 0xfc00) >>  5) |
+						((blue  & 0xf800) >> 11);
+			}
+			break;
+
+		case 24:
+		case 32:
+			red   >>= 8;
+			green >>= 8;
+			blue  >>= 8;
+			((u32 *)(info->pseudo_palette))[regno] =
+				(red   << info->var.red.offset)   |
+				(green << info->var.green.offset) |
+				(blue  << info->var.blue.offset);
+			break;
+		}
+	}
+	return ret;
+}
+
+static int vesafb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
+{
+	struct vesafb_pal_entry *entries;
+	int shift = 16 - info->var.green.length;
+	int i, ret = 0;
+
+	if (info->var.bits_per_pixel == 8) {
+		if (cmap->start + cmap->len > info->cmap.start +
+		    info->cmap.len || cmap->start < info->cmap.start)
+			return -EINVAL;
+
+		entries = vmalloc(sizeof(struct vesafb_pal_entry) * cmap->len);
+		if (!entries)
+			return -ENOMEM;
+		for (i = 0; i < cmap->len; i++) {
+			entries[i].red   = cmap->red[i]   >> shift;
+			entries[i].green = cmap->green[i] >> shift;
+			entries[i].blue  = cmap->blue[i]  >> shift;
+			entries[i].pad   = 0;
+		}
+		ret = vesafb_setpalette(entries, cmap->len, cmap->start, info);
+		vfree(entries);
+	} else {
+		/* For modes with bpp > 8, we only set the pseudo palette in
+		 * the fb_info struct. We rely on vesafb_setcolreg to do all
+		 * sanity checking. */
+		for (i = 0; i < cmap->len; i++) {
+			ret += vesafb_setcolreg(cmap->start + i, cmap->red[i],
+						cmap->green[i], cmap->blue[i],
+						0, info);
+		}
+	}
+	return ret;
+}
+
+static int vesafb_set_par(struct fb_info *info)
+{
+	struct vesafb_par *par = (struct vesafb_par *) info->par;
+	struct vesafb_task *tsk;
+	struct vesafb_crtc_ib *crtc = NULL;
+	struct vesafb_mode_ib *mode = NULL;
+	int i, err = 0, depth = info->var.bits_per_pixel;
+
+	if (depth > 8 && depth != 32)
+		depth = info->var.red.length + info->var.green.length +
+			info->var.blue.length;
+
+	i = vesafb_find_vbe_mode(info->var.xres, info->var.yres, depth,
+				 VESAFB_NEED_EXACT_RES |
+				 VESAFB_NEED_EXACT_DEPTH);
+	if (i >= 0)
+		mode = &vbe_modes[i];
+	else
+		return -EINVAL;
+
+	vesafb_create_task (tsk);
+	if (!tsk)
+		return -ENOMEM;
+	tsk->regs.eax = 0x4f02;
+	tsk->regs.ebx = mode->mode_id | 0x4000;		/* use LFB */
+	tsk->flags = TF_CALL;
+
+	if (vbe_ib.vbe_version >= 0x0300 && !nocrtc &&
+	    info->var.pixclock != 0) {
+		tsk->regs.ebx |= 0x0800; 		/* use CRTC data */
+		tsk->flags |= TF_USE_BUF;
+		crtc = kmalloc(sizeof(struct vesafb_crtc_ib), GFP_KERNEL);
+		if (!crtc) {
+			err = -ENOMEM;
+			goto out;
+		}
+		crtc->horiz_start = info->var.xres + info->var.right_margin;
+		crtc->horiz_end	  = crtc->horiz_start + info->var.hsync_len;
+		crtc->horiz_total = crtc->horiz_end + info->var.left_margin;
+
+		crtc->vert_start  = info->var.yres + info->var.lower_margin;
+		crtc->vert_end    = crtc->vert_start + info->var.vsync_len;
+		crtc->vert_total  = crtc->vert_end + info->var.upper_margin;
+
+		crtc->pixel_clock = PICOS2KHZ(info->var.pixclock) * 1000;
+		crtc->refresh_rate = (u16)(100 * (crtc->pixel_clock /
+				     (crtc->vert_total * crtc->horiz_total)));
+		crtc->flags = 0;
+
+		if (info->var.vmode & FB_VMODE_DOUBLE)
+			crtc->flags |= 0x1;
+		if (info->var.vmode & FB_VMODE_INTERLACED)
+			crtc->flags |= 0x2;
+		if (!(info->var.sync & FB_SYNC_HOR_HIGH_ACT))
+			crtc->flags |= 0x4;
+		if (!(info->var.sync & FB_SYNC_VERT_HIGH_ACT))
+			crtc->flags |= 0x8;
+		memcpy(&par->crtc, crtc, sizeof(struct vesafb_crtc_ib));
+	} else
+		memset(&par->crtc, 0, sizeof(struct vesafb_crtc_ib));
+
+	tsk->buf = (void*)crtc;
+	tsk->buf_len = sizeof(struct vesafb_crtc_ib);
+
+	if (vesafb_queue_task (tsk)) {
+		err = -EINVAL;
+		goto out;
+	}
+	vesafb_wait_for_task(tsk);
+
+	if ((tsk->regs.eax & 0xffff) != 0x004f) {
+		printk(KERN_ERR "vesafb: mode switch failed (eax: 0x%lx)\n",
+				tsk->regs.eax);
+		err = -EINVAL;
+		goto out;
+	}
+	par->mode_idx = i;
+
+	/* For 8bpp modes, always try to set the DAC to 8 bits. */
+	if (vbe_ib.capabilities & VBE_CAP_CAN_SWITCH_DAC &&
+	    mode->bits_per_pixel <= 8) {
+		vesafb_reset_task(tsk);
+		tsk->flags = TF_CALL;
+		tsk->regs.eax = 0x4f08;
+		tsk->regs.ebx = 0x0800;
+
+		if (!vesafb_queue_task (tsk))
+			vesafb_wait_for_task(tsk);
+
+		if ((tsk->regs.eax & 0xffff) != 0x004f ||
+		    ((tsk->regs.ebx & 0xff00) >> 8) != 8) {
+			/* We've failed to set the DAC palette format -
+			 * time to correct var. */
+			info->var.red.length    = 6;
+			info->var.green.length  = 6;
+			info->var.blue.length   = 6;
+		}
+	}
+
+	info->fix.visual = (info->var.bits_per_pixel == 8) ?
+		           FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
+	info->fix.line_length = mode->bytes_per_scan_line;
+
+	DPRINTK("set new mode %dx%d-%d (0x%x)\n",
+		info->var.xres, info->var.yres, info->var.bits_per_pixel,
+		mode->mode_id);
+
+out:	if (crtc != NULL)
+		kfree(crtc);
+	kfree(tsk);
+
+	return err;
+}
+
+static void vesafb_setup_var(struct fb_var_screeninfo *var, struct fb_info *info,
+			     struct vesafb_mode_ib *mode)
+{
+	var->xres = mode->x_res;
+	var->yres = mode->y_res;
+	var->xres_virtual = mode->x_res;
+	var->yres_virtual = (ypan) ?
+			      info->fix.smem_len / mode->bytes_per_scan_line :
+			      mode->y_res;
+	var->xoffset = 0;
+	var->yoffset = 0;
+	var->bits_per_pixel = mode->bits_per_pixel;
+
+	if (var->bits_per_pixel == 15)
+		var->bits_per_pixel = 16;
+
+	if (var->bits_per_pixel > 8) {
+		var->red.offset    = mode->red_off;
+		var->red.length    = mode->red_len;
+		var->green.offset  = mode->green_off;
+		var->green.length  = mode->green_len;
+		var->blue.offset   = mode->blue_off;
+		var->blue.length   = mode->blue_len;
+		var->transp.offset = mode->rsvd_off;
+		var->transp.length = mode->rsvd_len;
+
+		DPRINTK("directcolor: size=%d:%d:%d:%d, shift=%d:%d:%d:%d\n",
+			mode->rsvd_len,
+			mode->red_len,
+			mode->green_len,
+			mode->blue_len,
+			mode->rsvd_off,
+			mode->red_off,
+			mode->green_off,
+			mode->blue_off);
+	} else {
+		var->red.offset    = 0;
+		var->green.offset  = 0;
+		var->blue.offset   = 0;
+		var->transp.offset = 0;
+
+		/* We're assuming that we can switch the DAC to 8 bits. If
+		 * this proves to be incorrect, we'll update the fields
+		 * later in set_par(). */
+		if (vbe_ib.capabilities & VBE_CAP_CAN_SWITCH_DAC) {
+			var->red.length    = 8;
+			var->green.length  = 8;
+			var->blue.length   = 8;
+			var->transp.length = 0;
+		} else {
+			var->red.length    = 6;
+			var->green.length  = 6;
+			var->blue.length   = 6;
+			var->transp.length = 0;
+		}
+	}
+}
+
+static void inline vesafb_check_limits(struct fb_var_screeninfo *var,
+		 		       struct fb_info *info)
+{
+	struct fb_videomode *mode;
+
+	if (!var->pixclock)
+		return;
+	if (vbe_ib.vbe_version < 0x0300) {
+		fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60, var, info);
+		return;
+	}
+	if (!fb_validate_mode(var, info))
+		return;
+	mode = fb_find_best_mode(var, &info->modelist);
+	if (mode) {
+		DPRINTK("find_best_mode: %d %d @ %d (vmode: %d)\n",
+			mode->xres, mode->yres, mode->refresh, mode->vmode);
+		if (mode->xres == var->xres && mode->yres == var->yres &&
+		    !(mode->vmode & (FB_VMODE_INTERLACED | FB_VMODE_DOUBLE))) {
+			fb_videomode_to_var(var, mode);
+			return;
+		}
+	}
+	if (info->monspecs.gtf && !fb_get_mode(FB_MAXTIMINGS, 0, var, info))
+		return;
+	/* Use default refresh rate */
+	var->pixclock = 0;
+}
+
+static int vesafb_check_var(struct fb_var_screeninfo *var,
+			    struct fb_info *info)
+{
+	int match = -1;
+	int depth = var->red.length + var->green.length + var->blue.length;
+
+	/* Various apps will use bits_per_pixel to set the color depth,
+	 * which is theoretically incorrect, but which we'll try to handle
+	 * here. */
+	if (depth == 0 || abs(depth - var->bits_per_pixel) >= 8)
+		depth = var->bits_per_pixel;
+	match = vesafb_find_vbe_mode(var->xres, var->yres, depth,
+				     VESAFB_NEED_EXACT_RES);
+
+	if (match == -1) {
+		DPRINTK("vesafb: mode %dx%d-%d not found\n", var->xres,
+			var->yres, depth);
+		return -EINVAL;
+	}
+
+	vesafb_setup_var(var, info, &vbe_modes[match]);
+	DPRINTK("found mode 0x%x (%dx%d-%dbpp)\n",
+		vbe_modes[match].mode_id, vbe_modes[match].x_res,
+		vbe_modes[match].y_res, vbe_modes[match].depth);
+
+	/* Check whether we have remapped enough memory for this mode. */
+	if (var->yres * vbe_modes[match].bytes_per_scan_line >
+	    info->fix.smem_len) {
+		return -EINVAL;
+	}
+
+	if ((var->vmode & FB_VMODE_DOUBLE) &&
+	    !(vbe_modes[match].mode_attr & 0x100))
+		var->vmode &= ~FB_VMODE_DOUBLE;
+	if ((var->vmode & FB_VMODE_INTERLACED) &&
+	    !(vbe_modes[match].mode_attr & 0x200))
+		var->vmode &= ~FB_VMODE_INTERLACED;
+	vesafb_check_limits(var, info);
+	return 0;
+}
+
+static void vesafb_platform_release(struct device *device)
+{
+	return;
+}
+
+static int __init vesafb_probe(struct platform_device *device);
+
+static struct fb_ops vesafb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_setcolreg	= vesafb_setcolreg,
+	.fb_setcmap	= vesafb_setcmap,
+	.fb_pan_display	= vesafb_pan_display,
+	.fb_blank       = vesafb_blank,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+	.fb_check_var	= vesafb_check_var,
+	.fb_set_par	= vesafb_set_par
+};
+
+static struct platform_driver vesafb_driver = {
+	.probe	= vesafb_probe,
+	.driver	= {
+		.name	= "vesafb",
+	},
+};
+
+static struct platform_device vesafb_device = {
+	.name	= "vesafb",
+        .dev    = {
+                .release = vesafb_platform_release,
+        }
+};
+
+#ifndef MODULE
+int __init vesafb_setup(char *options)
+{
+	char *this_opt;
+
+	if (!options || !*options)
+		return 0;
+
+	DPRINTK("options %s\n",options);
+
+	while ((this_opt = strsep(&options, ",")) != NULL) {
+		if (!*this_opt) continue;
+
+		DPRINTK("this_opt: %s\n",this_opt);
+
+		if (! strcmp(this_opt, "redraw"))
+			ypan=0;
+		else if (! strcmp(this_opt, "ypan"))
+			ypan=1;
+		else if (! strcmp(this_opt, "ywrap"))
+			ypan=2;
+		else if (! strcmp(this_opt, "vgapal"))
+			pmi_setpal=0;
+		else if (! strcmp(this_opt, "pmipal"))
+			pmi_setpal=1;
+		else if (! strncmp(this_opt, "mtrr:", 5))
+			mtrr = simple_strtoul(this_opt+5, NULL, 0);
+		else if (! strcmp(this_opt, "nomtrr"))
+			mtrr=0;
+		else if (! strcmp(this_opt, "nocrtc"))
+			nocrtc=1;
+		else if (! strcmp(this_opt, "noedid"))
+			noedid=1;
+		else if (! strcmp(this_opt, "gtf"))
+			gtf=1;
+		else if (! strncmp(this_opt, "vtotal:", 7))
+			vram_total = simple_strtoul(this_opt + 7, NULL, 0);
+		else if (! strncmp(this_opt, "vremap:", 7))
+			vram_remap = simple_strtoul(this_opt + 7, NULL, 0);
+		else if (! strncmp(this_opt, "maxhf:", 6))
+			maxhf = simple_strtoul(this_opt + 6, NULL, 0);
+		else if (! strncmp(this_opt, "maxvf:", 6))
+			maxvf = simple_strtoul(this_opt + 6, NULL, 0);
+		else if (! strncmp(this_opt, "maxclk:", 7))
+			maxclk = simple_strtoul(this_opt + 7, NULL, 0);
+		else if (! strncmp(this_opt, "vbemode:", 8))
+			vbemode = simple_strtoul(this_opt + 8, NULL,0);
+		else if (this_opt[0] >= '0' && this_opt[0] <= '9') {
+			DPRINTK("mode_option: %s\n",this_opt);
+			mode_option = this_opt;
+		} else {
+			printk(KERN_WARNING
+			       "vesafb: unrecognized option %s\n", this_opt);
+		}
+	}
+
+	return 0;
+}
+#endif /* !MODULE */
+
+static int vesafb_read_proc_modes(char *buf, char **start, off_t offset,
+			    	  int len, int *eof, void *private)
+{
+	int clen = 0, i;
+
+	for (i = 0; i < vbe_modes_cnt; i++) {
+		clen += sprintf(buf + clen, "%dx%d-%d\n", vbe_modes[i].x_res,
+				vbe_modes[i].y_res, vbe_modes[i].depth);
+	}
+	*start = buf + offset;
+
+	if (clen > offset) {
+		clen -= offset;
+	} else {
+		clen = 0;
+	}
+	return clen;
+}
+
+static int vesafb_read_proc_vbe_info(char *buf, char **start, off_t offset,
+			    	     int len, int *eof, void *private)
+{
+	int clen = 0;
+
+	clen += sprintf(buf + clen, "Version:    %d.%d\n",
+			((vbe_ib.vbe_version & 0xff00) >> 8),
+			vbe_ib.vbe_version & 0xff);
+	clen += sprintf(buf + clen, "Vendor:     %s\n",
+			(char*)vbe_ib.oem_vendor_name_ptr);
+	clen += sprintf(buf + clen, "Product:    %s\n",
+			(char*)vbe_ib.oem_product_name_ptr);
+	clen += sprintf(buf + clen, "OEM rev:    %s\n",
+			(char*)vbe_ib.oem_product_rev_ptr);
+	clen += sprintf(buf + clen, "OEM string: %s\n",
+			(char*)vbe_ib.oem_string_ptr);
+
+	*start = buf + offset;
+
+	if (clen > offset) {
+		clen -= offset;
+	} else {
+		clen = 0;
+	}
+	return clen;
+}
+
+static int __init inline vesafb_vbe_getinfo(struct vesafb_task *tsk)
+{
+	tsk->regs.eax = 0x4f00;
+	tsk->flags = TF_CALL | TF_GETVBEIB;
+	tsk->buf = &vbe_ib;
+	tsk->buf_len = sizeof(vbe_ib);
+	if (vesafb_queue_task (tsk))
+		return -EINVAL;
+	vesafb_wait_for_task(tsk);
+
+	if (vbe_ib.vbe_version < 0x0200) {
+		printk(KERN_ERR "vesafb: Sorry, pre-VBE 2.0 cards are "
+				"not supported.\n");
+		return -EINVAL;
+	}
+
+	if ((tsk->regs.eax & 0xffff) != 0x004f) {
+		printk(KERN_ERR "vesafb: Getting mode info block failed "
+				"(eax=0x%x)\n", (u32)tsk->regs.eax);
+		return -EINVAL;
+	}
+
+	printk(KERN_INFO "vesafb: %s, %s, %s (OEM: %s)\n",
+		(char*)vbe_ib.oem_vendor_name_ptr,
+		(char*)vbe_ib.oem_product_name_ptr,
+		(char*)vbe_ib.oem_product_rev_ptr,
+		(char*)vbe_ib.oem_string_ptr);
+
+	printk(KERN_INFO "vesafb: VBE version: %d.%d\n",
+			 ((vbe_ib.vbe_version & 0xff00) >> 8),
+			 vbe_ib.vbe_version & 0xff);
+	return 0;
+}
+
+static int __init inline vesafb_vbe_getmodes(struct vesafb_task *tsk)
+{
+	u16 *mode = 0;
+	int off = 0;
+
+	/* Count available modes. */
+	mode = (u16*)vbe_ib.mode_list_ptr;
+	while (*mode != 0xffff) {
+		vbe_modes_cnt++;
+		mode++;
+	}
+
+	vbe_modes = kmalloc(sizeof(struct vesafb_mode_ib)*
+			    vbe_modes_cnt, GFP_KERNEL);
+	if (!vbe_modes)
+		return -ENOMEM;
+
+	/* Get mode info for all available modes. */
+	mode = (u16*)vbe_ib.mode_list_ptr;
+
+	while (*mode != 0xffff) {
+		struct vesafb_mode_ib *mib;
+
+		vesafb_reset_task(tsk);
+		tsk->regs.eax = 0x4f01;
+		tsk->regs.ecx = (u32) *mode;
+		tsk->flags = TF_CALL | TF_RETURN_BUF;
+		tsk->buf = vbe_modes+off;
+		tsk->buf_len = sizeof(struct vesafb_mode_ib);
+		if (vesafb_queue_task(tsk))
+			return -EINVAL;
+		vesafb_wait_for_task(tsk);
+		mib = p_mode(tsk->buf);
+		mib->mode_id = *mode;
+
+		/* We only want modes that are supported with the currennt
+		 * hardware configuration (D0), color (D3), graphics (D4)
+		 * and that have support for the LFB (D7). */
+		if ((mib->mode_attr & 0x99) == 0x99 &&
+		    mib->bits_per_pixel >= 8) {
+			off++;
+		} else {
+			vbe_modes_cnt--;
+		}
+		mode++;
+		mib->depth = mib->red_len + mib->green_len + mib->blue_len;
+		/* Handle 8bpp modes and modes with broken color component
+		 * lengths. */
+		if (mib->depth == 0 ||
+		    (mib->depth == 24 && mib->bits_per_pixel == 32))
+			mib->depth = mib->bits_per_pixel;
+	}
+
+	return 0;
+}
+
+static int __init inline vesafb_vbe_getpmi(struct vesafb_task *tsk)
+{
+	int i;
+
+	vesafb_reset_task(tsk);
+	tsk->regs.eax = 0x4f0a;
+	tsk->regs.ebx = 0x0;
+	tsk->flags = TF_CALL;
+	if (vesafb_queue_task(tsk))
+		return -EINVAL;
+	vesafb_wait_for_task(tsk);
+
+	if ((tsk->regs.eax & 0xffff) != 0x004f || tsk->regs.es < 0xc000) {
+		pmi_setpal = ypan = 0;
+	} else {
+		pmi_base  = (u16*)phys_to_virt(((u32)tsk->regs.es << 4) +
+			     tsk->regs.edi);
+		pmi_start = (void*)((char*)pmi_base + pmi_base[1]);
+		pmi_pal   = (void*)((char*)pmi_base + pmi_base[2]);
+		printk(KERN_INFO "vesafb: protected mode interface info at "
+				 "%04x:%04x\n",
+				 (u16)tsk->regs.es, (u16)tsk->regs.edi);
+		printk(KERN_INFO "vesafb: pmi: set display start = %p, "
+				 "set palette = %p\n", pmi_start, pmi_pal);
+
+		if (pmi_base[3]) {
+			printk(KERN_INFO "vesafb: pmi: ports = ");
+			for (i = pmi_base[3]/2; pmi_base[i] != 0xffff; i++)
+				printk("%x ",pmi_base[i]);
+			printk("\n");
+
+			/*
+			 * memory areas not supported (yet?)
+			 *
+			 * Rules are: we have to set up a descriptor for the
+			 * requested memory area and pass it in the ES register
+			 * to the BIOS function.
+			 */
+			if (pmi_base[i] != 0xffff) {
+				printk(KERN_INFO "vesafb: can't handle memory "
+						 "requests, pmi disabled\n");
+				ypan = pmi_setpal = 0;
+			}
+		}
+	}
+	return 0;
+}
+
+static int __init inline vesafb_vbe_getedid(struct vesafb_task *tsk,
+					    struct fb_info *info)
+{
+	int res = 0;
+
+	if (noedid || vbe_ib.vbe_version < 0x0300)
+		return -EINVAL;
+
+	vesafb_reset_task(tsk);
+	tsk->regs.eax = 0x4f15;
+	tsk->regs.ebx = 0;
+	tsk->regs.ecx = 0;
+	if (vesafb_queue_task(tsk))
+		return -EINVAL;
+	vesafb_wait_for_task(tsk);
+
+	if ((tsk->regs.eax & 0xffff) != 0x004f)
+		return -EINVAL;
+
+	if ((tsk->regs.ebx & 0x3) == 3) {
+		printk(KERN_INFO "vesafb: VBIOS/hardware supports both "
+				 "DDC1 and DDC2 transfers\n");
+	} else if ((tsk->regs.ebx & 0x3) == 2) {
+		printk(KERN_INFO "vesafb: VBIOS/hardware supports DDC2 "
+				 "transfers\n");
+	} else if ((tsk->regs.ebx & 0x3) == 1) {
+		printk(KERN_INFO "vesafb: VBIOS/hardware supports DDC1 "
+				 "transfers\n");
+	} else {
+		printk(KERN_INFO "vesafb: VBIOS/hardware doesn't support "
+				 "DDC transfers\n");
+		return -EINVAL;
+	}
+
+	vesafb_reset_task(tsk);
+	tsk->regs.eax = 0x4f15;
+	tsk->regs.ebx = 1;
+	tsk->regs.ecx = tsk->regs.edx = 0;
+	tsk->flags = TF_CALL | TF_RETURN_BUF;
+	tsk->buf = kmalloc(EDID_LENGTH, GFP_KERNEL);
+	tsk->buf_len = EDID_LENGTH;
+
+	if (vesafb_queue_task(tsk)) {
+		res = -EINVAL;
+		goto out;
+	}
+	vesafb_wait_for_task(tsk);
+
+	if ((tsk->regs.eax & 0xffff) == 0x004f) {
+		fb_edid_to_monspecs(tsk->buf, &info->monspecs);
+		fb_videomode_to_modelist(info->monspecs.modedb,
+				info->monspecs.modedb_len, &info->modelist);
+		if (info->monspecs.vfmax && info->monspecs.hfmax) {
+			/* If the maximum pixel clock wasn't specified in
+			 * the EDID block, set it to 300 MHz. */
+			if (info->monspecs.dclkmax == 0)
+				info->monspecs.dclkmax = 300 * 1000000;
+			info->monspecs.gtf = 1;
+		} else {
+			res = -EINVAL;
+		}
+	}
+
+out:	kfree(tsk->buf);
+	return res;
+}
+
+static void __init inline vesafb_vbe_getmonspecs(struct vesafb_task *tsk,
+		                                 struct fb_info *info)
+{
+	struct fb_var_screeninfo var;
+	int i;
+	memset(&info->monspecs, 0, sizeof(struct fb_monspecs));
+
+	/* If we didn't get all necessary data from the EDID block,
+	 * mark it as incompatible with the GTF. */
+	if (vesafb_vbe_getedid(tsk, info))
+		info->monspecs.gtf = 0;
+
+	/* Kernel command line overrides. */
+	if (maxclk)
+		info->monspecs.dclkmax = maxclk * 1000000;
+	if (maxvf)
+		info->monspecs.vfmax = maxvf;
+	if (maxhf)
+		info->monspecs.hfmax = maxhf * 1000;
+
+	/* In case DDC transfers are not supported the user can provide
+	 * monitor limits manually. Lower limits are set to "safe" values. */
+	if (info->monspecs.gtf == 0 && maxclk && maxvf && maxhf) {
+		info->monspecs.dclkmin = 0;
+		info->monspecs.vfmin = 60;
+		info->monspecs.hfmin = 29000;
+		info->monspecs.gtf = 1;
+	}
+
+	if (info->monspecs.gtf) {
+		printk(KERN_INFO
+		       	"vesafb: monitor limits: vf = %d Hz, hf = %d kHz, "
+			"clk = %d MHz\n", info->monspecs.vfmax,
+			(int)(info->monspecs.hfmax / 1000),
+			(int)(info->monspecs.dclkmax / 1000000));
+		/* Add valid VESA video modes to our modelist. */
+		for (i = 0; i < VESA_MODEDB_SIZE; i++) {
+			fb_videomode_to_var(&var, (struct fb_videomode *)
+					    &vesa_modes[i]);
+			if (!fb_validate_mode(&var, info))
+				fb_add_videomode((struct fb_videomode *)
+						 &vesa_modes[i],
+						 &info->modelist);
+		}
+	} else {
+		/* Add all VESA video modes to our modelist. */
+		fb_videomode_to_modelist((struct fb_videomode *)vesa_modes,
+				 	 VESA_MODEDB_SIZE, &info->modelist);
+		printk(KERN_INFO "vesafb: no monitor limits have been set\n");
+	}
+	return;
+}
+
+static int __init inline vesafb_vbe_init(struct fb_info *info)
+{
+	struct vesafb_task *tsk;
+	int res = 0;
+
+	vesafb_create_task(tsk);
+	if (!tsk)
+		return -EINVAL;
+	if ((res = vesafb_vbe_getinfo(tsk)) != 0)
+		goto out;
+	if ((res = vesafb_vbe_getmodes(tsk)) != 0)
+		goto out;
+	if (pmi_setpal || ypan)
+		vesafb_vbe_getpmi(tsk);
+	vesafb_vbe_getmonspecs(tsk, info);
+out:	kfree(tsk);
+	return res;
+}
+
+static int __init decode_mode(u32 *xres, u32 *yres, u32 *bpp, u32 *refresh)
+{
+	int len = strlen(mode_option), i, err = 0;
+	u8 res_specified = 0, bpp_specified = 0, refresh_specified = 0,
+	   yres_specified = 0;
+
+	for (i = len-1; i >= 0; i--) {
+ 		switch (mode_option[i]) {
+		case '@':
+    			len = i;
+    			if (!refresh_specified && !bpp_specified &&
+			    !yres_specified) {
+				*refresh = simple_strtoul(&mode_option[i+1],
+							  NULL, 0);
+				refresh_specified = 1;
+			} else
+				goto out;
+		    	break;
+		case '-':
+			len = i;
+		    	if (!bpp_specified && !yres_specified) {
+			    	*bpp = simple_strtoul(&mode_option[i+1],
+						      NULL, 0);
+				bpp_specified = 1;
+		    	} else
+				goto out;
+		    	break;
+		case 'x':
+			if (!yres_specified) {
+				*yres = simple_strtoul(&mode_option[i+1],
+						       NULL, 0);
+				yres_specified = 1;
+		    	} else
+				goto out;
+		    	break;
+		case '0'...'9':
+			break;
+		default:
+			goto out;
+	    	}
+	}
+
+	if (i < 0 && yres_specified) {
+		*xres = simple_strtoul(mode_option, NULL, 0);
+	   	res_specified = 1;
+	}
+
+out:	if (!res_specified || !yres_specified) {
+		printk(KERN_ERR "vesafb: invalid resolution, "
+				"%s not specified\n",
+				(!res_specified) ? "width" : "height");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int __init vesafb_init_set_mode(struct fb_info *info)
+{
+	struct fb_videomode *fbmode;
+	struct fb_videomode mode;
+	int i, modeid, refresh = 0;
+	u8 refresh_specified = 0;
+
+	if (!mode_option)
+		mode_option = CONFIG_FB_VESA_DEFAULT_MODE;
+
+	if (vbemode > 0) {
+		for (i = 0; i < vbe_modes_cnt; i++) {
+			if (vbe_modes[i].mode_id == vbemode) {
+				info->var.vmode = FB_VMODE_NONINTERLACED;
+				info->var.sync = FB_SYNC_VERT_HIGH_ACT;
+				vesafb_setup_var(&info->var, info,
+						 &vbe_modes[i]);
+				fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON,
+					    60, &info->var, info);
+				/* With pixclock set to 0, the default BIOS
+				 * timings will be used in set_par(). */
+				info->var.pixclock = 0;
+				modeid = i;
+				goto out;
+			}
+		}
+		printk(KERN_INFO "specified VBE mode %d not found\n",
+				 vbemode);
+		vbemode = 0;
+	}
+
+	/* Decode the mode specified on the kernel command line. We save
+	 * the depth into bits_per_pixel, which is wrong, but will work
+	 * anyway. */
+	if (decode_mode(&info->var.xres, &info->var.yres,
+			&info->var.bits_per_pixel, &refresh))
+		return -EINVAL;
+	if (refresh)
+		refresh_specified = 1;
+	else
+		refresh = 60;
+
+	/* Look for a matching VBE mode. We can live if an exact match
+	 * cannot be found. */
+	modeid = vesafb_find_vbe_mode(info->var.xres, info->var.yres,
+			              info->var.bits_per_pixel, 0);
+
+	if (modeid == -1) {
+		return -EINVAL;
+	} else {
+		info->var.vmode = FB_VMODE_NONINTERLACED;
+		info->var.sync = FB_SYNC_VERT_HIGH_ACT;
+		vesafb_setup_var(&info->var, info, &vbe_modes[modeid]);
+	}
+	if (vbe_ib.vbe_version < 0x0300) {
+		fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60,
+			    &info->var, info);
+		goto out;
+	}
+	if (!gtf) {
+		struct fb_videomode tmode;
+
+		if (refresh_specified) {
+			fb_var_to_videomode(&tmode, &info->var);
+			tmode.refresh = refresh;
+			fbmode = fb_find_nearest_mode(&tmode, 
+						      &info->modelist);
+		} else
+			fbmode = fb_find_best_mode(&info->var, 
+						   &info->modelist);
+
+		if (fbmode->xres == info->var.xres &&
+		    fbmode->yres == info->var.yres &&
+		    !(fbmode->vmode & (FB_VMODE_INTERLACED | FB_VMODE_DOUBLE))
+		    && (!refresh_specified || 
+		    abs(refresh - fbmode->refresh) <= 5)) {
+			fb_videomode_to_var(&info->var, fbmode);
+			return modeid;
+		}
+	}
+	i = FB_MAXTIMINGS;
+	if (!info->monspecs.gtf)
+		i = FB_IGNOREMON | FB_VSYNCTIMINGS;
+	else if (refresh_specified)
+		i = FB_VSYNCTIMINGS;
+	if (!fb_get_mode(i, refresh, &info->var, info))
+		goto out;
+	if (info->monspecs.gtf &&
+	    !fb_get_mode(FB_MAXTIMINGS, 0, &info->var, info))
+		goto out;
+	/* Use default refresh rate */
+	printk(KERN_WARNING "vesafb: using default BIOS refresh rate\n");
+	info->var.pixclock = 0;
+
+out:
+	fb_var_to_videomode(&mode, &info->var);
+	fb_add_videomode(&mode, &info->modelist);
+	return modeid;
+}
+
+static int __init vesafb_probe(struct platform_device *dev)
+{
+	char entry[16];
+	struct fb_info *info;
+	struct vesafb_mode_ib *mode = NULL;
+	int err = 0, i, h;
+	unsigned int size_vmode;
+	unsigned int size_remap;
+	unsigned int size_total;
+
+	vesafb_info = info = framebuffer_alloc(sizeof(struct vesafb_par) +
+			                       sizeof(u32) * 256, &dev->dev);
+	if (!info)
+	 	return -ENOMEM;
+
+	if (vesafb_wait_for_thread()) {
+		printk(KERN_ERR "vesafb: vesafb thread not running\n");
+		framebuffer_release(info);
+		return -EINVAL;
+	}
+
+	if (vesafb_vbe_init(info)) {
+		printk(KERN_ERR "vesafb: vbe_init failed\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	vesafb_fix.ypanstep  = ypan     ? 1 : 0;
+	vesafb_fix.ywrapstep = (ypan>1) ? 1 : 0;
+
+	info->pseudo_palette = ((u8*)info->par + sizeof(struct vesafb_par));
+	info->fbops = &vesafb_ops;
+	info->var = vesafb_defined;
+	info->fix = vesafb_fix;
+
+	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	i = vesafb_init_set_mode(info);
+	if (i < 0) {
+		err = -EINVAL;
+		goto out_cmap;
+	} else
+		mode = &vbe_modes[i];
+
+	/* Find out how much IO memory is required for the mode with
+	 * the highest resolution. */
+	size_remap = 0;
+	for (i = 0; i < vbe_modes_cnt; i++) {
+		h = vbe_modes[i].bytes_per_scan_line * vbe_modes[i].y_res;
+		if (h > size_remap)
+			size_remap = h;
+	}
+	size_remap *= 2;
+
+	/*   size_vmode -- that is the amount of memory needed for the
+	 *                 used video mode, i.e. the minimum amount of
+	 *                 memory we need. */
+	if (mode != NULL) {
+		size_vmode = info->var.yres * mode->bytes_per_scan_line;
+	} else {
+		size_vmode = info->var.yres * info->var.xres *
+			     ((info->var.bits_per_pixel + 7) >> 3);
+	}
+
+	/*   size_total -- all video memory we have. Used for mtrr
+	 *                 entries, ressource allocation and bounds
+	 *                 checking. */
+	size_total = vbe_ib.total_memory * 65536;
+	if (vram_total)
+		size_total = vram_total * 1024 * 1024;
+	if (size_total < size_vmode)
+		size_total = size_vmode;
+	((struct vesafb_par*)(info->par))->mem_total = size_total;
+
+	/*   size_remap -- the amount of video memory we are going to
+	 *                 use for vesafb.  With modern cards it is no
+	 *                 option to simply use size_total as th
+	 *                 wastes plenty of kernel address space. */
+	if (vram_remap)
+		size_remap = vram_remap * 1024 * 1024;
+	if (size_remap < size_vmode)
+		size_remap = size_vmode;
+	if (size_remap > size_total)
+		size_remap = size_total;
+
+	info->fix.smem_len = size_remap;
+	info->fix.smem_start = mode->phys_base_ptr;
+
+	/* We have to set it here, because when setup_var() was called,
+	 * smem_len wasn't defined yet. */
+	info->var.yres_virtual = info->fix.smem_len /
+				 mode->bytes_per_scan_line;
+
+	if (ypan && info->var.yres_virtual > info->var.yres) {
+		printk(KERN_INFO "vesafb: scrolling: %s "
+		       "using protected mode interface, "
+		       "yres_virtual=%d\n",
+		       (ypan > 1) ? "ywrap" : "ypan",info->var.yres_virtual);
+	} else {
+		printk(KERN_INFO "vesafb: scrolling: redraw\n");
+		info->var.yres_virtual = info->var.yres;
+		ypan = 0;
+	}
+
+	info->flags = FBINFO_FLAG_DEFAULT |
+		(ypan) ? FBINFO_HWACCEL_YPAN : 0;
+
+	if (!request_mem_region(info->fix.smem_start, size_total, "vesafb")) {
+		printk(KERN_WARNING "vesafb: cannot reserve video memory at "
+		       "0x%lx\n", info->fix.smem_start);
+		/* We cannot make this fatal. Sometimes this comes from magic
+		   spaces our resource handlers simply don't know about. */
+	}
+
+	info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len);
+
+	if (!info->screen_base) {
+		printk(KERN_ERR
+		       "vesafb: abort, cannot ioremap video memory "
+		       "0x%x @ 0x%lx\n",
+		       info->fix.smem_len, info->fix.smem_start);
+		err = -EIO;
+		goto out_mem;
+ 	}
+
+	/* Request failure does not faze us, as vgacon probably has this
+	   region already (FIXME) */
+	request_region(0x3c0, 32, "vesafb");
+
+	if (mtrr && !(info->fix.smem_start & (PAGE_SIZE - 1))) {
+		int temp_size = size_total;
+		unsigned int type = 0;
+
+		switch (mtrr) {
+		case 1:
+			type = MTRR_TYPE_UNCACHABLE;
+			break;
+		case 2:
+			type = MTRR_TYPE_WRBACK;
+			break;
+		case 3:
+			type = MTRR_TYPE_WRCOMB;
+			break;
+		case 4:
+			type = MTRR_TYPE_WRTHROUGH;
+			break;
+		default:
+			type = 0;
+			break;
+		}
+
+		if (type) {
+			int rc;
+
+			/* Find the largest power-of-two */
+			while (temp_size & (temp_size - 1))
+				temp_size &= (temp_size - 1);
+
+			/* Try and find a power of two to add */
+			do {
+				rc = mtrr_add(info->fix.smem_start,
+					      temp_size, type, 1);
+				temp_size >>= 1;
+			} while (temp_size >= PAGE_SIZE && rc == -EINVAL);
+  		}
+  	}
+
+	if (register_framebuffer(info) < 0) {
+		printk(KERN_ERR
+		       "vesafb: failed to register framebuffer device\n");
+		err = -EINVAL;
+		goto out_mem;
+	}
+
+  	printk(KERN_INFO "vesafb: framebuffer at 0x%lx, mapped to 0x%p, "
+	       "using %dk, total %dk\n", info->fix.smem_start,
+	       info->screen_base, size_remap/1024, size_total/1024);
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
+	       info->fix.id);
+
+	sprintf(entry, "fb%d", info->node);
+	proc_mkdir(entry, 0);
+
+	sprintf(entry, "fb%d/modes", info->node);
+	create_proc_read_entry(entry, 0, 0, vesafb_read_proc_modes, NULL);
+
+	sprintf(entry, "fb%d/vbe_info", info->node);
+	create_proc_read_entry(entry, 0, 0, vesafb_read_proc_vbe_info, NULL);
+	return 0;
+
+out_mem:
+	release_mem_region(info->fix.smem_start, size_total);
+	if (!list_empty(&info->modelist))
+		fb_destroy_modelist(&info->modelist);
+	fb_destroy_modedb(info->monspecs.modedb);
+out_cmap:
+	fb_dealloc_cmap(&info->cmap);
+out:
+	framebuffer_release(info);
+	vesafb_info = NULL;
+	kfree(vbe_modes);
+	vbe_modes = NULL;
+	return err;
+}
+
+int __init vesafb_init(void)
+{
+	int ret;
+#ifndef MODULE
+	char *option = NULL;
+
+	if (fb_get_options("vesafb", &option))
+		return -ENODEV;
+	vesafb_setup(option);
+#endif
+	ret = platform_driver_register(&vesafb_driver);
+
+	if (!ret) {
+		ret = platform_device_register(&vesafb_device);
+		if (ret)
+			platform_driver_unregister(&vesafb_driver);
+	}
+	return ret;
+}
+
+module_init(vesafb_init);
+
+#ifdef MODULE
+void __exit vesafb_exit(void)
+{
+	char entry[16];
+
+	if (vesafb_info)
+		unregister_framebuffer(vesafb_info);
+
+	platform_device_unregister(&vesafb_device);
+	platform_driver_unregister(&vesafb_driver);
+
+	if (vesafb_info) {
+		struct vesafb_par *par = (struct vesafb_par*)vesafb_info->par;
+
+		sprintf(entry, "fb%d/modes", vesafb_info->node);
+		remove_proc_entry(entry, NULL);
+
+		sprintf(entry, "fb%d/vbe_info", vesafb_info->node);
+		remove_proc_entry(entry, NULL);
+
+		sprintf(entry, "fb%d", vesafb_info->node);
+		remove_proc_entry(entry, NULL);
+
+		iounmap(vesafb_info->screen_base);
+		release_mem_region(vesafb_info->fix.smem_start,
+				   par->mem_total);
+		fb_dealloc_cmap(&vesafb_info->cmap);
+		if (!list_empty(&vesafb_info->modelist))
+			fb_destroy_modelist(&vesafb_info->modelist);
+		fb_destroy_modedb(vesafb_info->monspecs.modedb);
+		framebuffer_release(vesafb_info);
+	}
+
+	if (vbe_modes != NULL)
+		kfree(vbe_modes);
+}
+
+module_exit(vesafb_exit);
+
+static inline int param_get_scroll(char *buffer, struct kernel_param *kp)
+{
+	return 0;
+}
+static inline int param_set_scroll(const char *val, struct kernel_param *kp)
+{
+	ypan = 0;
+
+	if (! strcmp(val, "redraw"))
+		ypan = 0;
+	else if (! strcmp(val, "ypan"))
+		ypan = 1;
+	else if (! strcmp(val, "ywrap"))
+		ypan = 2;
+
+	return 0;
+}
+
+#define param_check_scroll(name, p) __param_check(name, p, void);
+
+module_param_named(scroll, ypan, scroll, 0);
+MODULE_PARM_DESC(scroll,"Scrolling mode, set to 'redraw', 'ypan' or 'ywrap'");
+module_param_named(vgapal, pmi_setpal, invbool, 0);
+MODULE_PARM_DESC(vgapal,"bool: set palette using VGA registers");
+module_param_named(pmipal, pmi_setpal, bool, 0);
+MODULE_PARM_DESC(pmipal,"bool: set palette using PMI calls");
+module_param_named(nomtrr, mtrr, invbool, 0);
+MODULE_PARM_DESC(nomtrr,"bool: disable use of MTRR registers");
+module_param(nocrtc, bool, 0);
+MODULE_PARM_DESC(nocrtc,"bool: ignore CRTC timings when setting modes");
+module_param(noedid, bool, 0);
+MODULE_PARM_DESC(noedid,"bool: ignore EDID-provided monitor limits "
+		        "when setting modes");
+module_param(gtf, bool, 0);
+MODULE_PARM_DESC(gtf,"bool: force use of VESA GTF to calculate mode timings");
+module_param(vram_remap, uint, 0);
+MODULE_PARM_DESC(vram_remap,"Set amount of video memory to be used [MiB]");
+module_param(vram_total, uint, 0);
+MODULE_PARM_DESC(vram_total,"Set total amount of video memoery [MiB]");
+module_param(maxclk, ushort, 0);
+MODULE_PARM_DESC(maxclk,"Maximum pixelclock [MHz], overrides EDID data");
+module_param(maxhf, ushort, 0);
+MODULE_PARM_DESC(maxhf,"Maximum horizontal frequency [kHz], "
+		       "overrides EDID data");
+module_param(maxvf, ushort, 0);
+MODULE_PARM_DESC(maxvf,"Maximum vertical frequency [Hz], "
+		       "overrides EDID data");
+module_param_named(mode, mode_option, charp, 0);
+MODULE_PARM_DESC(mode, "Specify resolution as "
+		       "\"<xres>x<yres>[-<bpp>][@<refresh>]\"");
+module_param(vbemode, ushort, 0);
+MODULE_PARM_DESC(vbemode,"VBE mode number to set, overrides 'mode' setting");
+
+#endif /* MODULE */
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Januszewski");
+MODULE_DESCRIPTION("Framebuffer driver for VBE2.0+ compliant graphics boards");
+
diff -urN oldtree/include/linux/sched.h newtree/include/linux/sched.h
--- oldtree/include/linux/sched.h	2006-02-18 15:18:29.959762672 +0000
+++ newtree/include/linux/sched.h	2006-02-18 15:28:25.318254424 +0000
@@ -1140,6 +1140,8 @@
 extern struct mm_struct *get_task_mm(struct task_struct *task);
 /* Remove the current tasks stale references to the old mm_struct */
 extern void mm_release(struct task_struct *, struct mm_struct *);
+/* Create a new mm for a kernel thread */
+extern int set_new_mm(void);
 
 extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
 extern void flush_thread(void);
diff -urN oldtree/include/linux/sched.h.orig newtree/include/linux/sched.h.orig
--- oldtree/include/linux/sched.h.orig	1970-01-01 00:00:00.000000000 +0000
+++ newtree/include/linux/sched.h.orig	2006-02-18 15:18:29.000000000 +0000
@@ -0,0 +1,1465 @@
+#ifndef _LINUX_SCHED_H
+#define _LINUX_SCHED_H
+
+#include <asm/param.h>	/* for HZ */
+
+#include <linux/config.h>
+#include <linux/capability.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timex.h>
+#include <linux/jiffies.h>
+#include <linux/rbtree.h>
+#include <linux/thread_info.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/nodemask.h>
+
+#include <asm/system.h>
+#include <asm/semaphore.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/mmu.h>
+#include <asm/cputime.h>
+
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/signal.h>
+#include <linux/securebits.h>
+#include <linux/fs_struct.h>
+#include <linux/compiler.h>
+#include <linux/completion.h>
+#include <linux/pid.h>
+#include <linux/percpu.h>
+#include <linux/topology.h>
+#include <linux/seccomp.h>
+#include <linux/rcupdate.h>
+
+#include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
+
+struct exec_domain;
+
+/*
+ * cloning flags:
+ */
+#define CSIGNAL		0x000000ff	/* signal mask to be sent at exit */
+#define CLONE_VM	0x00000100	/* set if VM shared between processes */
+#define CLONE_FS	0x00000200	/* set if fs info shared between processes */
+#define CLONE_FILES	0x00000400	/* set if open files shared between processes */
+#define CLONE_SIGHAND	0x00000800	/* set if signal handlers and blocked signals shared */
+#define CLONE_PTRACE	0x00002000	/* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK	0x00004000	/* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT	0x00008000	/* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD	0x00010000	/* Same thread group? */
+#define CLONE_NEWNS	0x00020000	/* New namespace group? */
+#define CLONE_SYSVSEM	0x00040000	/* share system V SEM_UNDO semantics */
+#define CLONE_SETTLS	0x00080000	/* create a new TLS for the child */
+#define CLONE_PARENT_SETTID	0x00100000	/* set the TID in the parent */
+#define CLONE_CHILD_CLEARTID	0x00200000	/* clear the TID in the child */
+#define CLONE_DETACHED		0x00400000	/* Unused, ignored */
+#define CLONE_UNTRACED		0x00800000	/* set if the tracing process can't force CLONE_PTRACE on this clone */
+#define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
+#define CLONE_STOPPED		0x02000000	/* Start in stopped state */
+
+/*
+ * List of flags we want to share for kernel threads,
+ * if only because they are not used by them anyway.
+ */
+#define CLONE_KERNEL	(CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
+
+/*
+ * These are the constant used to fake the fixed-point load-average
+ * counting. Some notes:
+ *  - 11 bit fractions expand to 22 bits by the multiplies: this gives
+ *    a load-average precision of 10 bits integer + 11 bits fractional
+ *  - if you want to count load-averages more often, you need more
+ *    precision, or rounding will get you. With 2-second counting freq,
+ *    the EXP_n values would be 1981, 2034 and 2043 if still using only
+ *    11 bit fractions.
+ */
+extern unsigned long avenrun[];		/* Load averages */
+
+#define FSHIFT		11		/* nr of bits of precision */
+#define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
+#define LOAD_FREQ	(5*HZ)		/* 5 sec intervals */
+#define EXP_1		1884		/* 1/exp(5sec/1min) as fixed-point */
+#define EXP_5		2014		/* 1/exp(5sec/5min) */
+#define EXP_15		2037		/* 1/exp(5sec/15min) */
+
+#define CALC_LOAD(load,exp,n) \
+	load *= exp; \
+	load += n*(FIXED_1-exp); \
+	load >>= FSHIFT;
+
+extern unsigned long total_forks;
+extern int nr_threads;
+extern int last_pid;
+DECLARE_PER_CPU(unsigned long, process_counts);
+extern int nr_processes(void);
+extern unsigned long nr_running(void);
+extern unsigned long nr_uninterruptible(void);
+extern unsigned long nr_iowait(void);
+
+#include <linux/time.h>
+#include <linux/param.h>
+#include <linux/resource.h>
+#include <linux/timer.h>
+#include <linux/hrtimer.h>
+
+#include <asm/processor.h>
+
+/*
+ * Task state bitmask. NOTE! These bits are also
+ * encoded in fs/proc/array.c: get_task_state().
+ *
+ * We have two separate sets of flags: task->state
+ * is about runnability, while task->exit_state are
+ * about the task exiting. Confusing, but this way
+ * modifying one set can't modify the other one by
+ * mistake.
+ */
+#define TASK_RUNNING		0
+#define TASK_INTERRUPTIBLE	1
+#define TASK_UNINTERRUPTIBLE	2
+#define TASK_STOPPED		4
+#define TASK_TRACED		8
+/* in tsk->exit_state */
+#define EXIT_ZOMBIE		16
+#define EXIT_DEAD		32
+/* in tsk->state again */
+#define TASK_NONINTERACTIVE	64
+
+#define __set_task_state(tsk, state_value)		\
+	do { (tsk)->state = (state_value); } while (0)
+#define set_task_state(tsk, state_value)		\
+	set_mb((tsk)->state, (state_value))
+
+/*
+ * set_current_state() includes a barrier so that the write of current->state
+ * is correctly serialised wrt the caller's subsequent test of whether to
+ * actually sleep:
+ *
+ *	set_current_state(TASK_UNINTERRUPTIBLE);
+ *	if (do_i_need_to_sleep())
+ *		schedule();
+ *
+ * If the caller does not need such serialisation then use __set_current_state()
+ */
+#define __set_current_state(state_value)			\
+	do { current->state = (state_value); } while (0)
+#define set_current_state(state_value)		\
+	set_mb(current->state, (state_value))
+
+/* Task command name length */
+#define TASK_COMM_LEN 16
+
+/*
+ * Scheduling policies
+ */
+#define SCHED_NORMAL		0
+#define SCHED_FIFO		1
+#define SCHED_RR		2
+#define SCHED_BATCH		3
+
+struct sched_param {
+	int sched_priority;
+};
+
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+
+/*
+ * This serializes "schedule()" and also protects
+ * the run-queue from deletions/modifications (but
+ * _adding_ to the beginning of the run-queue has
+ * a separate lock).
+ */
+extern rwlock_t tasklist_lock;
+extern spinlock_t mmlist_lock;
+
+typedef struct task_struct task_t;
+
+extern void sched_init(void);
+extern void sched_init_smp(void);
+extern void init_idle(task_t *idle, int cpu);
+
+extern cpumask_t nohz_cpu_mask;
+
+extern void show_state(void);
+extern void show_regs(struct pt_regs *);
+
+/*
+ * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
+ * task), SP is the stack pointer of the first frame that should be shown in the back
+ * trace (or NULL if the entire call-chain of the task should be shown).
+ */
+extern void show_stack(struct task_struct *task, unsigned long *sp);
+
+void io_schedule(void);
+long io_schedule_timeout(long timeout);
+
+extern void cpu_init (void);
+extern void trap_init(void);
+extern void update_process_times(int user);
+extern void scheduler_tick(void);
+
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+extern void softlockup_tick(struct pt_regs *regs);
+extern void spawn_softlockup_task(void);
+extern void touch_softlockup_watchdog(void);
+#else
+static inline void softlockup_tick(struct pt_regs *regs)
+{
+}
+static inline void spawn_softlockup_task(void)
+{
+}
+static inline void touch_softlockup_watchdog(void)
+{
+}
+#endif
+
+
+/* Attach to any functions which should be ignored in wchan output. */
+#define __sched		__attribute__((__section__(".sched.text")))
+/* Is this address in the __sched functions? */
+extern int in_sched_functions(unsigned long addr);
+
+#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
+extern signed long FASTCALL(schedule_timeout(signed long timeout));
+extern signed long schedule_timeout_interruptible(signed long timeout);
+extern signed long schedule_timeout_uninterruptible(signed long timeout);
+asmlinkage void schedule(void);
+
+struct namespace;
+
+/* Maximum number of active map areas.. This is a random (large) number */
+#define DEFAULT_MAX_MAP_COUNT	65536
+
+extern int sysctl_max_map_count;
+
+#include <linux/aio.h>
+
+extern unsigned long
+arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
+		       unsigned long, unsigned long);
+extern unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+			  unsigned long len, unsigned long pgoff,
+			  unsigned long flags);
+extern void arch_unmap_area(struct mm_struct *, unsigned long);
+extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * The mm counters are not protected by its page_table_lock,
+ * so must be incremented atomically.
+ */
+#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
+typedef atomic_long_t mm_counter_t;
+
+#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+/*
+ * The mm counters are protected by its page_table_lock,
+ * so can be incremented directly.
+ */
+#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
+#define get_mm_counter(mm, member) ((mm)->_##member)
+#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
+#define inc_mm_counter(mm, member) (mm)->_##member++
+#define dec_mm_counter(mm, member) (mm)->_##member--
+typedef unsigned long mm_counter_t;
+
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define get_mm_rss(mm)					\
+	(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
+#define update_hiwater_rss(mm)	do {			\
+	unsigned long _rss = get_mm_rss(mm);		\
+	if ((mm)->hiwater_rss < _rss)			\
+		(mm)->hiwater_rss = _rss;		\
+} while (0)
+#define update_hiwater_vm(mm)	do {			\
+	if ((mm)->hiwater_vm < (mm)->total_vm)		\
+		(mm)->hiwater_vm = (mm)->total_vm;	\
+} while (0)
+
+struct mm_struct {
+	struct vm_area_struct * mmap;		/* list of VMAs */
+	struct rb_root mm_rb;
+	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+	unsigned long (*get_unmapped_area) (struct file *filp,
+				unsigned long addr, unsigned long len,
+				unsigned long pgoff, unsigned long flags);
+	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+        unsigned long mmap_base;		/* base of mmap area */
+        unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
+	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
+	pgd_t * pgd;
+	atomic_t mm_users;			/* How many users with user space? */
+	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
+	int map_count;				/* number of VMAs */
+	struct rw_semaphore mmap_sem;
+	spinlock_t page_table_lock;		/* Protects page tables and some counters */
+
+	struct list_head mmlist;		/* List of maybe swapped mm's.  These are globally strung
+						 * together off init_mm.mmlist, and are protected
+						 * by mmlist_lock
+						 */
+
+	/* Special counters, in some configurations protected by the
+	 * page_table_lock, in other configurations by being atomic.
+	 */
+	mm_counter_t _file_rss;
+	mm_counter_t _anon_rss;
+
+	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
+	unsigned long hiwater_vm;	/* High-water virtual memory usage */
+
+	unsigned long total_vm, locked_vm, shared_vm, exec_vm;
+	unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
+	unsigned long start_code, end_code, start_data, end_data;
+	unsigned long start_brk, brk, start_stack;
+	unsigned long arg_start, arg_end, env_start, env_end;
+
+	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+
+	unsigned dumpable:2;
+	cpumask_t cpu_vm_mask;
+
+	/* Architecture-specific MM context */
+	mm_context_t context;
+
+	/* Token based thrashing protection. */
+	unsigned long swap_token_time;
+	char recent_pagein;
+
+	/* coredumping support */
+	int core_waiters;
+	struct completion *core_startup_done, core_done;
+
+	/* aio bits */
+	rwlock_t		ioctx_list_lock;
+	struct kioctx		*ioctx_list;
+};
+
+struct sighand_struct {
+	atomic_t		count;
+	struct k_sigaction	action[_NSIG];
+	spinlock_t		siglock;
+	struct rcu_head		rcu;
+};
+
+extern void sighand_free_cb(struct rcu_head *rhp);
+
+static inline void sighand_free(struct sighand_struct *sp)
+{
+	call_rcu(&sp->rcu, sighand_free_cb);
+}
+
+/*
+ * NOTE! "signal_struct" does not have it's own
+ * locking, because a shared signal_struct always
+ * implies a shared sighand_struct, so locking
+ * sighand_struct is always a proper superset of
+ * the locking of signal_struct.
+ */
+struct signal_struct {
+	atomic_t		count;
+	atomic_t		live;
+
+	wait_queue_head_t	wait_chldexit;	/* for wait4() */
+
+	/* current thread group signal load-balancing target: */
+	task_t			*curr_target;
+
+	/* shared signal handling: */
+	struct sigpending	shared_pending;
+
+	/* thread group exit support */
+	int			group_exit_code;
+	/* overloaded:
+	 * - notify group_exit_task when ->count is equal to notify_count
+	 * - everyone except group_exit_task is stopped during signal delivery
+	 *   of fatal signals, group_exit_task processes the signal.
+	 */
+	struct task_struct	*group_exit_task;
+	int			notify_count;
+
+	/* thread group stop support, overloads group_exit_code too */
+	int			group_stop_count;
+	unsigned int		flags; /* see SIGNAL_* flags below */
+
+	/* POSIX.1b Interval Timers */
+	struct list_head posix_timers;
+
+	/* ITIMER_REAL timer for the process */
+	struct hrtimer real_timer;
+	ktime_t it_real_incr;
+
+	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
+	cputime_t it_prof_expires, it_virt_expires;
+	cputime_t it_prof_incr, it_virt_incr;
+
+	/* job control IDs */
+	pid_t pgrp;
+	pid_t tty_old_pgrp;
+	pid_t session;
+	/* boolean value for session group leader */
+	int leader;
+
+	struct tty_struct *tty; /* NULL if no tty */
+
+	/*
+	 * Cumulative resource counters for dead threads in the group,
+	 * and for reaped dead child processes forked by this group.
+	 * Live threads maintain their own counters and add to these
+	 * in __exit_signal, except for the group leader.
+	 */
+	cputime_t utime, stime, cutime, cstime;
+	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
+	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+
+	/*
+	 * Cumulative ns of scheduled CPU time for dead threads in the
+	 * group, not including a zombie group leader.  (This only differs
+	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
+	 * other than jiffies.)
+	 */
+	unsigned long long sched_time;
+
+	/*
+	 * We don't bother to synchronize most readers of this at all,
+	 * because there is no reader checking a limit that actually needs
+	 * to get both rlim_cur and rlim_max atomically, and either one
+	 * alone is a single word that can safely be read normally.
+	 * getrlimit/setrlimit use task_lock(current->group_leader) to
+	 * protect this instead of the siglock, because they really
+	 * have no need to disable irqs.
+	 */
+	struct rlimit rlim[RLIM_NLIMITS];
+
+	struct list_head cpu_timers[3];
+
+	/* keep the process-shared keyrings here so that they do the right
+	 * thing in threads created with CLONE_THREAD */
+#ifdef CONFIG_KEYS
+	struct key *session_keyring;	/* keyring inherited over fork */
+	struct key *process_keyring;	/* keyring private to this process */
+#endif
+};
+
+/* Context switch must be unlocked if interrupts are to be enabled */
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+# define __ARCH_WANT_UNLOCKED_CTXSW
+#endif
+
+/*
+ * Bits in flags field of signal_struct.
+ */
+#define SIGNAL_STOP_STOPPED	0x00000001 /* job control stop in effect */
+#define SIGNAL_STOP_DEQUEUED	0x00000002 /* stop signal dequeued */
+#define SIGNAL_STOP_CONTINUED	0x00000004 /* SIGCONT since WCONTINUED reap */
+#define SIGNAL_GROUP_EXIT	0x00000008 /* group exit in progress */
+
+
+/*
+ * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+ * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+ * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
+ * values are inverted: lower p->prio value means higher priority.
+ *
+ * The MAX_USER_RT_PRIO value allows the actual maximum
+ * RT priority to be separate from the value exported to
+ * user-space.  This allows kernel threads to set their
+ * priority to a value higher than any user task. Note:
+ * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
+ */
+
+#define MAX_USER_RT_PRIO	100
+#define MAX_RT_PRIO		MAX_USER_RT_PRIO
+
+#define MAX_PRIO		(MAX_RT_PRIO + 40)
+
+#define rt_task(p)		(unlikely((p)->prio < MAX_RT_PRIO))
+
+/*
+ * Some day this will be a full-fledged user tracking system..
+ */
+struct user_struct {
+	atomic_t __count;	/* reference count */
+	atomic_t processes;	/* How many processes does this user have? */
+	atomic_t files;		/* How many open files does this user have? */
+	atomic_t sigpending;	/* How many pending signals does this user have? */
+#ifdef CONFIG_INOTIFY
+	atomic_t inotify_watches; /* How many inotify watches does this user have? */
+	atomic_t inotify_devs;	/* How many inotify devs does this user have opened? */
+#endif
+	/* protected by mq_lock	*/
+	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
+	unsigned long locked_shm; /* How many pages of mlocked shm ? */
+
+#ifdef CONFIG_KEYS
+	struct key *uid_keyring;	/* UID specific keyring */
+	struct key *session_keyring;	/* UID's default session keyring */
+#endif
+
+	/* Hash table maintenance information */
+	struct list_head uidhash_list;
+	uid_t uid;
+};
+
+extern struct user_struct *find_user(uid_t);
+
+extern struct user_struct root_user;
+#define INIT_USER (&root_user)
+
+typedef struct prio_array prio_array_t;
+struct backing_dev_info;
+struct reclaim_state;
+
+#ifdef CONFIG_SCHEDSTATS
+struct sched_info {
+	/* cumulative counters */
+	unsigned long	cpu_time,	/* time spent on the cpu */
+			run_delay,	/* time spent waiting on a runqueue */
+			pcnt;		/* # of timeslices run on this cpu */
+
+	/* timestamps */
+	unsigned long	last_arrival,	/* when we last ran on a cpu */
+			last_queued;	/* when we were last queued to run */
+};
+
+extern struct file_operations proc_schedstat_operations;
+#endif
+
+enum idle_type
+{
+	SCHED_IDLE,
+	NOT_IDLE,
+	NEWLY_IDLE,
+	MAX_IDLE_TYPES
+};
+
+/*
+ * sched-domains (multiprocessor balancing) declarations:
+ */
+#ifdef CONFIG_SMP
+#define SCHED_LOAD_SCALE	128UL	/* increase resolution of load */
+
+#define SD_LOAD_BALANCE		1	/* Do load balancing on this domain. */
+#define SD_BALANCE_NEWIDLE	2	/* Balance when about to become idle */
+#define SD_BALANCE_EXEC		4	/* Balance on exec */
+#define SD_BALANCE_FORK		8	/* Balance on fork, clone */
+#define SD_WAKE_IDLE		16	/* Wake to idle CPU on task wakeup */
+#define SD_WAKE_AFFINE		32	/* Wake task to waking CPU */
+#define SD_WAKE_BALANCE		64	/* Perform balancing at task wakeup */
+#define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
+
+struct sched_group {
+	struct sched_group *next;	/* Must be a circular list */
+	cpumask_t cpumask;
+
+	/*
+	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
+	 * single CPU. This is read only (except for setup, hotplug CPU).
+	 */
+	unsigned long cpu_power;
+};
+
+struct sched_domain {
+	/* These fields must be setup */
+	struct sched_domain *parent;	/* top domain must be null terminated */
+	struct sched_group *groups;	/* the balancing groups of the domain */
+	cpumask_t span;			/* span of all CPUs in this domain */
+	unsigned long min_interval;	/* Minimum balance interval ms */
+	unsigned long max_interval;	/* Maximum balance interval ms */
+	unsigned int busy_factor;	/* less balancing by factor if busy */
+	unsigned int imbalance_pct;	/* No balance until over watermark */
+	unsigned long long cache_hot_time; /* Task considered cache hot (ns) */
+	unsigned int cache_nice_tries;	/* Leave cache hot tasks for # tries */
+	unsigned int per_cpu_gain;	/* CPU % gained by adding domain cpus */
+	unsigned int busy_idx;
+	unsigned int idle_idx;
+	unsigned int newidle_idx;
+	unsigned int wake_idx;
+	unsigned int forkexec_idx;
+	int flags;			/* See SD_* */
+
+	/* Runtime fields. */
+	unsigned long last_balance;	/* init to jiffies. units in jiffies */
+	unsigned int balance_interval;	/* initialise to 1. units in ms. */
+	unsigned int nr_balance_failed; /* initialise to 0 */
+
+#ifdef CONFIG_SCHEDSTATS
+	/* load_balance() stats */
+	unsigned long lb_cnt[MAX_IDLE_TYPES];
+	unsigned long lb_failed[MAX_IDLE_TYPES];
+	unsigned long lb_balanced[MAX_IDLE_TYPES];
+	unsigned long lb_imbalance[MAX_IDLE_TYPES];
+	unsigned long lb_gained[MAX_IDLE_TYPES];
+	unsigned long lb_hot_gained[MAX_IDLE_TYPES];
+	unsigned long lb_nobusyg[MAX_IDLE_TYPES];
+	unsigned long lb_nobusyq[MAX_IDLE_TYPES];
+
+	/* Active load balancing */
+	unsigned long alb_cnt;
+	unsigned long alb_failed;
+	unsigned long alb_pushed;
+
+	/* SD_BALANCE_EXEC stats */
+	unsigned long sbe_cnt;
+	unsigned long sbe_balanced;
+	unsigned long sbe_pushed;
+
+	/* SD_BALANCE_FORK stats */
+	unsigned long sbf_cnt;
+	unsigned long sbf_balanced;
+	unsigned long sbf_pushed;
+
+	/* try_to_wake_up() stats */
+	unsigned long ttwu_wake_remote;
+	unsigned long ttwu_move_affine;
+	unsigned long ttwu_move_balance;
+#endif
+};
+
+extern void partition_sched_domains(cpumask_t *partition1,
+				    cpumask_t *partition2);
+
+/*
+ * Maximum cache size the migration-costs auto-tuning code will
+ * search from:
+ */
+extern unsigned int max_cache_size;
+
+#endif	/* CONFIG_SMP */
+
+
+struct io_context;			/* See blkdev.h */
+void exit_io_context(void);
+struct cpuset;
+
+#define NGROUPS_SMALL		32
+#define NGROUPS_PER_BLOCK	((int)(PAGE_SIZE / sizeof(gid_t)))
+struct group_info {
+	int ngroups;
+	atomic_t usage;
+	gid_t small_block[NGROUPS_SMALL];
+	int nblocks;
+	gid_t *blocks[0];
+};
+
+/*
+ * get_group_info() must be called with the owning task locked (via task_lock())
+ * when task != current.  The reason being that the vast majority of callers are
+ * looking at current->group_info, which can not be changed except by the
+ * current task.  Changing current->group_info requires the task lock, too.
+ */
+#define get_group_info(group_info) do { \
+	atomic_inc(&(group_info)->usage); \
+} while (0)
+
+#define put_group_info(group_info) do { \
+	if (atomic_dec_and_test(&(group_info)->usage)) \
+		groups_free(group_info); \
+} while (0)
+
+extern struct group_info *groups_alloc(int gidsetsize);
+extern void groups_free(struct group_info *group_info);
+extern int set_current_groups(struct group_info *group_info);
+extern int groups_search(struct group_info *group_info, gid_t grp);
+/* access the groups "array" with this macro */
+#define GROUP_AT(gi, i) \
+    ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
+
+#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
+extern void prefetch_stack(struct task_struct*);
+#else
+static inline void prefetch_stack(struct task_struct *t) { }
+#endif
+
+struct audit_context;		/* See audit.c */
+struct mempolicy;
+
+struct task_struct {
+	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
+	struct thread_info *thread_info;
+	atomic_t usage;
+	unsigned long flags;	/* per process flags, defined below */
+	unsigned long ptrace;
+
+	int lock_depth;		/* BKL lock depth */
+
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
+	int oncpu;
+#endif
+	int prio, static_prio;
+	struct list_head run_list;
+	prio_array_t *array;
+
+	unsigned short ioprio;
+
+	unsigned long sleep_avg;
+	unsigned long long timestamp, last_ran;
+	unsigned long long sched_time; /* sched_clock time spent running */
+	int activated;
+
+	unsigned long policy;
+	cpumask_t cpus_allowed;
+	unsigned int time_slice, first_time_slice;
+
+#ifdef CONFIG_SCHEDSTATS
+	struct sched_info sched_info;
+#endif
+
+	struct list_head tasks;
+	/*
+	 * ptrace_list/ptrace_children forms the list of my children
+	 * that were stolen by a ptracer.
+	 */
+	struct list_head ptrace_children;
+	struct list_head ptrace_list;
+
+	struct mm_struct *mm, *active_mm;
+
+/* task state */
+	struct linux_binfmt *binfmt;
+	long exit_state;
+	int exit_code, exit_signal;
+	int pdeath_signal;  /*  The signal sent when the parent dies  */
+	/* ??? */
+	unsigned long personality;
+	unsigned did_exec:1;
+	pid_t pid;
+	pid_t tgid;
+	/* 
+	 * pointers to (original) parent process, youngest child, younger sibling,
+	 * older sibling, respectively.  (p->father can be replaced with 
+	 * p->parent->pid)
+	 */
+	struct task_struct *real_parent; /* real parent process (when being debugged) */
+	struct task_struct *parent;	/* parent process */
+	/*
+	 * children/sibling forms the list of my children plus the
+	 * tasks I'm ptracing.
+	 */
+	struct list_head children;	/* list of my children */
+	struct list_head sibling;	/* linkage in my parent's children list */
+	struct task_struct *group_leader;	/* threadgroup leader */
+
+	/* PID/PID hash table linkage. */
+	struct pid pids[PIDTYPE_MAX];
+
+	struct completion *vfork_done;		/* for vfork() */
+	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
+	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
+
+	unsigned long rt_priority;
+	cputime_t utime, stime;
+	unsigned long nvcsw, nivcsw; /* context switch counts */
+	struct timespec start_time;
+/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
+	unsigned long min_flt, maj_flt;
+
+  	cputime_t it_prof_expires, it_virt_expires;
+	unsigned long long it_sched_expires;
+	struct list_head cpu_timers[3];
+
+/* process credentials */
+	uid_t uid,euid,suid,fsuid;
+	gid_t gid,egid,sgid,fsgid;
+	struct group_info *group_info;
+	kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
+	unsigned keep_capabilities:1;
+	struct user_struct *user;
+#ifdef CONFIG_KEYS
+	struct key *request_key_auth;	/* assumed request_key authority */
+	struct key *thread_keyring;	/* keyring private to this thread */
+	unsigned char jit_keyring;	/* default keyring to attach requested keys to */
+#endif
+	int oomkilladj; /* OOM kill score adjustment (bit shift). */
+	char comm[TASK_COMM_LEN]; /* executable name excluding path
+				     - access with [gs]et_task_comm (which lock
+				       it with task_lock())
+				     - initialized normally by flush_old_exec */
+/* file system info */
+	int link_count, total_link_count;
+/* ipc stuff */
+	struct sysv_sem sysvsem;
+/* CPU-specific state of this task */
+	struct thread_struct thread;
+/* filesystem information */
+	struct fs_struct *fs;
+/* open file information */
+	struct files_struct *files;
+/* namespace */
+	struct namespace *namespace;
+/* signal handlers */
+	struct signal_struct *signal;
+	struct sighand_struct *sighand;
+
+	sigset_t blocked, real_blocked;
+	sigset_t saved_sigmask;		/* To be restored with TIF_RESTORE_SIGMASK */
+	struct sigpending pending;
+
+	unsigned long sas_ss_sp;
+	size_t sas_ss_size;
+	int (*notifier)(void *priv);
+	void *notifier_data;
+	sigset_t *notifier_mask;
+	
+	void *security;
+	struct audit_context *audit_context;
+	seccomp_t seccomp;
+
+/* Thread group tracking */
+   	u32 parent_exec_id;
+   	u32 self_exec_id;
+/* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
+	spinlock_t alloc_lock;
+/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
+	spinlock_t proc_lock;
+
+#ifdef CONFIG_DEBUG_MUTEXES
+	/* mutex deadlock detection */
+	struct mutex_waiter *blocked_on;
+#endif
+
+/* journalling filesystem info */
+	void *journal_info;
+
+/* VM state */
+	struct reclaim_state *reclaim_state;
+
+	struct dentry *proc_dentry;
+	struct backing_dev_info *backing_dev_info;
+
+	struct io_context *io_context;
+
+	unsigned long ptrace_message;
+	siginfo_t *last_siginfo; /* For ptrace use.  */
+/*
+ * current io wait handle: wait queue entry to use for io waits
+ * If this thread is processing aio, this points at the waitqueue
+ * inside the currently handled kiocb. It may be NULL (i.e. default
+ * to a stack based synchronous wait) if its doing sync IO.
+ */
+	wait_queue_t *io_wait;
+/* i/o counters(bytes read/written, #syscalls */
+	u64 rchar, wchar, syscr, syscw;
+#if defined(CONFIG_BSD_PROCESS_ACCT)
+	u64 acct_rss_mem1;	/* accumulated rss usage */
+	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
+	clock_t acct_stimexpd;	/* clock_t-converted stime since last update */
+#endif
+#ifdef CONFIG_NUMA
+  	struct mempolicy *mempolicy;
+	short il_next;
+#endif
+#ifdef CONFIG_CPUSETS
+	struct cpuset *cpuset;
+	nodemask_t mems_allowed;
+	int cpuset_mems_generation;
+#endif
+	atomic_t fs_excl;	/* holding fs exclusive resources */
+	struct rcu_head rcu;
+};
+
+static inline pid_t process_group(struct task_struct *tsk)
+{
+	return tsk->signal->pgrp;
+}
+
+/**
+ * pid_alive - check that a task structure is not stale
+ * @p: Task structure to be checked.
+ *
+ * Test if a process is not yet dead (at most zombie state)
+ * If pid_alive fails, then pointers within the task structure
+ * can be stale and must not be dereferenced.
+ */
+static inline int pid_alive(struct task_struct *p)
+{
+	return p->pids[PIDTYPE_PID].nr != 0;
+}
+
+extern void free_task(struct task_struct *tsk);
+extern void __put_task_struct(struct task_struct *tsk);
+#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+
+extern void __put_task_struct_cb(struct rcu_head *rhp);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+	if (atomic_dec_and_test(&t->usage))
+		call_rcu(&t->rcu, __put_task_struct_cb);
+}
+
+/*
+ * Per process flags
+ */
+#define PF_ALIGNWARN	0x00000001	/* Print alignment warning msgs */
+					/* Not implemented yet, only for 486*/
+#define PF_STARTING	0x00000002	/* being created */
+#define PF_EXITING	0x00000004	/* getting shut down */
+#define PF_DEAD		0x00000008	/* Dead */
+#define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
+#define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
+#define PF_DUMPCORE	0x00000200	/* dumped core */
+#define PF_SIGNALED	0x00000400	/* killed by a signal */
+#define PF_MEMALLOC	0x00000800	/* Allocating memory */
+#define PF_FLUSHER	0x00001000	/* responsible for disk writeback */
+#define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
+#define PF_FREEZE	0x00004000	/* this task is being frozen for suspend now */
+#define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
+#define PF_FROZEN	0x00010000	/* frozen for system suspend */
+#define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
+#define PF_KSWAPD	0x00040000	/* I am kswapd */
+#define PF_SWAPOFF	0x00080000	/* I am in swapoff */
+#define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
+#define PF_SYNCWRITE	0x00200000	/* I am doing a sync write */
+#define PF_BORROWED_MM	0x00400000	/* I am a kthread doing use_mm */
+#define PF_RANDOMIZE	0x00800000	/* randomize virtual address space */
+#define PF_SWAPWRITE	0x01000000	/* Allowed to write to swap */
+
+/*
+ * Only the _current_ task can read/write to tsk->flags, but other
+ * tasks can access tsk->flags in readonly mode for example
+ * with tsk_used_math (like during threaded core dumping).
+ * There is however an exception to this rule during ptrace
+ * or during fork: the ptracer task is allowed to write to the
+ * child->flags of its traced child (same goes for fork, the parent
+ * can write to the child->flags), because we're guaranteed the
+ * child is not running and in turn not changing child->flags
+ * at the same time the parent does it.
+ */
+#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
+#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
+#define clear_used_math() clear_stopped_child_used_math(current)
+#define set_used_math() set_stopped_child_used_math(current)
+#define conditional_stopped_child_used_math(condition, child) \
+	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
+#define conditional_used_math(condition) \
+	conditional_stopped_child_used_math(condition, current)
+#define copy_to_stopped_child_used_math(child) \
+	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
+/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
+#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
+#define used_math() tsk_used_math(current)
+
+#ifdef CONFIG_SMP
+extern int set_cpus_allowed(task_t *p, cpumask_t new_mask);
+#else
+static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask)
+{
+	if (!cpu_isset(0, new_mask))
+		return -EINVAL;
+	return 0;
+}
+#endif
+
+extern unsigned long long sched_clock(void);
+extern unsigned long long current_sched_time(const task_t *current_task);
+
+/* sched_exec is called by processes performing an exec */
+#ifdef CONFIG_SMP
+extern void sched_exec(void);
+#else
+#define sched_exec()   {}
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void idle_task_exit(void);
+#else
+static inline void idle_task_exit(void) {}
+#endif
+
+extern void sched_idle_next(void);
+extern void set_user_nice(task_t *p, long nice);
+extern int task_prio(const task_t *p);
+extern int task_nice(const task_t *p);
+extern int can_nice(const task_t *p, const int nice);
+extern int task_curr(const task_t *p);
+extern int idle_cpu(int cpu);
+extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
+extern task_t *idle_task(int cpu);
+extern task_t *curr_task(int cpu);
+extern void set_curr_task(int cpu, task_t *p);
+
+void yield(void);
+
+/*
+ * The default (Linux) execution domain.
+ */
+extern struct exec_domain	default_exec_domain;
+
+union thread_union {
+	struct thread_info thread_info;
+	unsigned long stack[THREAD_SIZE/sizeof(long)];
+};
+
+#ifndef __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+	/* Reliable end of stack detection:
+	 * Some APM bios versions misalign the stack
+	 */
+	return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
+}
+#endif
+
+extern union thread_union init_thread_union;
+extern struct task_struct init_task;
+
+extern struct   mm_struct init_mm;
+
+#define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
+extern struct task_struct *find_task_by_pid_type(int type, int pid);
+extern void set_special_pids(pid_t session, pid_t pgrp);
+extern void __set_special_pids(pid_t session, pid_t pgrp);
+
+/* per-UID process charging. */
+extern struct user_struct * alloc_uid(uid_t);
+static inline struct user_struct *get_uid(struct user_struct *u)
+{
+	atomic_inc(&u->__count);
+	return u;
+}
+extern void free_uid(struct user_struct *);
+extern void switch_uid(struct user_struct *);
+
+#include <asm/current.h>
+
+extern void do_timer(struct pt_regs *);
+
+extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
+extern int FASTCALL(wake_up_process(struct task_struct * tsk));
+extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
+						unsigned long clone_flags));
+#ifdef CONFIG_SMP
+ extern void kick_process(struct task_struct *tsk);
+#else
+ static inline void kick_process(struct task_struct *tsk) { }
+#endif
+extern void FASTCALL(sched_fork(task_t * p, int clone_flags));
+extern void FASTCALL(sched_exit(task_t * p));
+
+extern int in_group_p(gid_t);
+extern int in_egroup_p(gid_t);
+
+extern void proc_caches_init(void);
+extern void flush_signals(struct task_struct *);
+extern void flush_signal_handlers(struct task_struct *, int force_default);
+extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
+
+static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&tsk->sighand->siglock, flags);
+	ret = dequeue_signal(tsk, mask, info);
+	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
+
+	return ret;
+}	
+
+extern void block_all_signals(int (*notifier)(void *priv), void *priv,
+			      sigset_t *mask);
+extern void unblock_all_signals(void);
+extern void release_task(struct task_struct * p);
+extern int send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int send_group_sig_info(int, struct siginfo *, struct task_struct *);
+extern int force_sigsegv(int, struct task_struct *);
+extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
+extern int kill_pg_info(int, struct siginfo *, pid_t);
+extern int kill_proc_info(int, struct siginfo *, pid_t);
+extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t);
+extern void do_notify_parent(struct task_struct *, int);
+extern void force_sig(int, struct task_struct *);
+extern void force_sig_specific(int, struct task_struct *);
+extern int send_sig(int, struct task_struct *, int);
+extern void zap_other_threads(struct task_struct *p);
+extern int kill_pg(pid_t, int, int);
+extern int kill_sl(pid_t, int, int);
+extern int kill_proc(pid_t, int, int);
+extern struct sigqueue *sigqueue_alloc(void);
+extern void sigqueue_free(struct sigqueue *);
+extern int send_sigqueue(int, struct sigqueue *,  struct task_struct *);
+extern int send_group_sigqueue(int, struct sigqueue *,  struct task_struct *);
+extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
+
+/* These can be the second arg to send_sig_info/send_group_sig_info.  */
+#define SEND_SIG_NOINFO ((struct siginfo *) 0)
+#define SEND_SIG_PRIV	((struct siginfo *) 1)
+#define SEND_SIG_FORCED	((struct siginfo *) 2)
+
+static inline int is_si_special(const struct siginfo *info)
+{
+	return info <= SEND_SIG_FORCED;
+}
+
+/* True if we are on the alternate signal stack.  */
+
+static inline int on_sig_stack(unsigned long sp)
+{
+	return (sp - current->sas_ss_sp < current->sas_ss_size);
+}
+
+static inline int sas_ss_flags(unsigned long sp)
+{
+	return (current->sas_ss_size == 0 ? SS_DISABLE
+		: on_sig_stack(sp) ? SS_ONSTACK : 0);
+}
+
+/*
+ * Routines for handling mm_structs
+ */
+extern struct mm_struct * mm_alloc(void);
+
+/* mmdrop drops the mm and the page tables */
+extern void FASTCALL(__mmdrop(struct mm_struct *));
+static inline void mmdrop(struct mm_struct * mm)
+{
+	if (atomic_dec_and_test(&mm->mm_count))
+		__mmdrop(mm);
+}
+
+/* mmput gets rid of the mappings and all user-space */
+extern void mmput(struct mm_struct *);
+/* Grab a reference to a task's mm, if it is not already going away */
+extern struct mm_struct *get_task_mm(struct task_struct *task);
+/* Remove the current tasks stale references to the old mm_struct */
+extern void mm_release(struct task_struct *, struct mm_struct *);
+
+extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
+extern void flush_thread(void);
+extern void exit_thread(void);
+
+extern void exit_files(struct task_struct *);
+extern void exit_signal(struct task_struct *);
+extern void __exit_signal(struct task_struct *);
+extern void exit_sighand(struct task_struct *);
+extern void __exit_sighand(struct task_struct *);
+extern void exit_itimers(struct signal_struct *);
+
+extern NORET_TYPE void do_group_exit(int);
+
+extern void daemonize(const char *, ...);
+extern int allow_signal(int);
+extern int disallow_signal(int);
+extern task_t *child_reaper;
+
+extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
+extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+task_t *fork_idle(int);
+
+extern void set_task_comm(struct task_struct *tsk, char *from);
+extern void get_task_comm(char *to, struct task_struct *tsk);
+
+#ifdef CONFIG_SMP
+extern void wait_task_inactive(task_t * p);
+#else
+#define wait_task_inactive(p)	do { } while (0)
+#endif
+
+#define remove_parent(p)	list_del_init(&(p)->sibling)
+#define add_parent(p, parent)	list_add_tail(&(p)->sibling,&(parent)->children)
+
+#define REMOVE_LINKS(p) do {					\
+	if (thread_group_leader(p))				\
+		list_del_init(&(p)->tasks);			\
+	remove_parent(p);					\
+	} while (0)
+
+#define SET_LINKS(p) do {					\
+	if (thread_group_leader(p))				\
+		list_add_tail(&(p)->tasks,&init_task.tasks);	\
+	add_parent(p, (p)->parent);				\
+	} while (0)
+
+#define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
+#define prev_task(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
+
+#define for_each_process(p) \
+	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
+
+/*
+ * Careful: do_each_thread/while_each_thread is a double loop so
+ *          'break' will not work as expected - use goto instead.
+ */
+#define do_each_thread(g, t) \
+	for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do
+
+#define while_each_thread(g, t) \
+	while ((t = next_thread(t)) != g)
+
+extern task_t * FASTCALL(next_thread(const task_t *p));
+
+#define thread_group_leader(p)	(p->pid == p->tgid)
+
+static inline int thread_group_empty(task_t *p)
+{
+	return list_empty(&p->pids[PIDTYPE_TGID].pid_list);
+}
+
+#define delay_group_leader(p) \
+		(thread_group_leader(p) && !thread_group_empty(p))
+
+extern void unhash_process(struct task_struct *p);
+
+/*
+ * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring
+ * subscriptions and synchronises with wait4().  Also used in procfs.  Also
+ * pins the final release of task.io_context.  Also protects ->cpuset.
+ *
+ * Nests both inside and outside of read_lock(&tasklist_lock).
+ * It must not be nested with write_lock_irq(&tasklist_lock),
+ * neither inside nor outside.
+ */
+static inline void task_lock(struct task_struct *p)
+{
+	spin_lock(&p->alloc_lock);
+}
+
+static inline void task_unlock(struct task_struct *p)
+{
+	spin_unlock(&p->alloc_lock);
+}
+
+#ifndef __HAVE_THREAD_FUNCTIONS
+
+#define task_thread_info(task) (task)->thread_info
+#define task_stack_page(task) ((void*)((task)->thread_info))
+
+static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
+{
+	*task_thread_info(p) = *task_thread_info(org);
+	task_thread_info(p)->task = p;
+}
+
+static inline unsigned long *end_of_stack(struct task_struct *p)
+{
+	return (unsigned long *)(p->thread_info + 1);
+}
+
+#endif
+
+/* set thread flags in other task's structures
+ * - see asm/thread_info.h for TIF_xxxx flags available
+ */
+static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
+{
+	set_ti_thread_flag(task_thread_info(tsk), flag);
+}
+
+static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
+{
+	clear_ti_thread_flag(task_thread_info(tsk), flag);
+}
+
+static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
+{
+	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
+}
+
+static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
+{
+	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
+}
+
+static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
+{
+	return test_ti_thread_flag(task_thread_info(tsk), flag);
+}
+
+static inline void set_tsk_need_resched(struct task_struct *tsk)
+{
+	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+}
+
+static inline void clear_tsk_need_resched(struct task_struct *tsk)
+{
+	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+}
+
+static inline int signal_pending(struct task_struct *p)
+{
+	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
+}
+  
+static inline int need_resched(void)
+{
+	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
+}
+
+/*
+ * cond_resched() and cond_resched_lock(): latency reduction via
+ * explicit rescheduling in places that are safe. The return
+ * value indicates whether a reschedule was done in fact.
+ * cond_resched_lock() will drop the spinlock before scheduling,
+ * cond_resched_softirq() will enable bhs before scheduling.
+ */
+extern int cond_resched(void);
+extern int cond_resched_lock(spinlock_t * lock);
+extern int cond_resched_softirq(void);
+
+/*
+ * Does a critical section need to be broken due to another
+ * task waiting?:
+ */
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
+# define need_lockbreak(lock) ((lock)->break_lock)
+#else
+# define need_lockbreak(lock) 0
+#endif
+
+/*
+ * Does a critical section need to be broken due to another
+ * task waiting or preemption being signalled:
+ */
+static inline int lock_need_resched(spinlock_t *lock)
+{
+	if (need_lockbreak(lock) || need_resched())
+		return 1;
+	return 0;
+}
+
+/* Reevaluate whether the task has signals pending delivery.
+   This is required every time the blocked sigset_t changes.
+   callers must hold sighand->siglock.  */
+
+extern FASTCALL(void recalc_sigpending_tsk(struct task_struct *t));
+extern void recalc_sigpending(void);
+
+extern void signal_wake_up(struct task_struct *t, int resume_stopped);
+
+/*
+ * Wrappers for p->thread_info->cpu access. No-op on UP.
+ */
+#ifdef CONFIG_SMP
+
+static inline unsigned int task_cpu(const struct task_struct *p)
+{
+	return task_thread_info(p)->cpu;
+}
+
+static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+	task_thread_info(p)->cpu = cpu;
+}
+
+#else
+
+static inline unsigned int task_cpu(const struct task_struct *p)
+{
+	return 0;
+}
+
+static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	mm->mmap_base = TASK_UNMAPPED_BASE;
+	mm->get_unmapped_area = arch_get_unmapped_area;
+	mm->unmap_area = arch_unmap_area;
+}
+#endif
+
+extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
+extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
+
+extern void normalize_rt_tasks(void);
+
+#ifdef CONFIG_PM
+/*
+ * Check if a process has been frozen
+ */
+static inline int frozen(struct task_struct *p)
+{
+	return p->flags & PF_FROZEN;
+}
+
+/*
+ * Check if there is a request to freeze a process
+ */
+static inline int freezing(struct task_struct *p)
+{
+	return p->flags & PF_FREEZE;
+}
+
+/*
+ * Request that a process be frozen
+ * FIXME: SMP problem. We may not modify other process' flags!
+ */
+static inline void freeze(struct task_struct *p)
+{
+	p->flags |= PF_FREEZE;
+}
+
+/*
+ * Wake up a frozen process
+ */
+static inline int thaw_process(struct task_struct *p)
+{
+	if (frozen(p)) {
+		p->flags &= ~PF_FROZEN;
+		wake_up_process(p);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * freezing is complete, mark process as frozen
+ */
+static inline void frozen_process(struct task_struct *p)
+{
+	p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN;
+}
+
+extern void refrigerator(void);
+extern int freeze_processes(void);
+extern void thaw_processes(void);
+
+static inline int try_to_freeze(void)
+{
+	if (freezing(current)) {
+		refrigerator();
+		return 1;
+	} else
+		return 0;
+}
+#else
+static inline int frozen(struct task_struct *p) { return 0; }
+static inline int freezing(struct task_struct *p) { return 0; }
+static inline void freeze(struct task_struct *p) { BUG(); }
+static inline int thaw_process(struct task_struct *p) { return 1; }
+static inline void frozen_process(struct task_struct *p) { BUG(); }
+
+static inline void refrigerator(void) {}
+static inline int freeze_processes(void) { BUG(); return 0; }
+static inline void thaw_processes(void) {}
+
+static inline int try_to_freeze(void) { return 0; }
+
+#endif /* CONFIG_PM */
+#endif /* __KERNEL__ */
+
+#endif
diff -urN oldtree/include/video/vesa.h newtree/include/video/vesa.h
--- oldtree/include/video/vesa.h	1970-01-01 00:00:00.000000000 +0000
+++ newtree/include/video/vesa.h	2006-02-18 15:28:25.320254120 +0000
@@ -0,0 +1,146 @@
+#if 0
+#define DPRINTK(fmt, args...)	printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , \
+						  ## args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+#define p_crtc(arg) ((struct vesafb_crtc_ib*)(arg))
+#define p_vbe(arg)  ((struct vesafb_vbe_ib*)(arg))
+#define p_mode(arg) ((struct vesafb_mode_ib*)(arg))
+
+struct vesafb_task {
+	u8 flags;
+	void *buf;
+	int buf_len;
+	struct vm86_regs regs;
+	struct list_head node;
+	struct completion done;
+};
+
+/* Vesafb task flags and masks */
+#define TF_CALL		0x00
+#define TF_EXIT		0x01
+#define TF_GETVBEIB	0x02
+#define TF_USE_BUF	0x04
+#define TF_RETURN_BUF	0x08
+#define TF_MASK_BUF	(TF_USE_BUF | TF_RETURN_BUF)
+
+/* Macros and functions for manipulating vesafb tasks */
+#define vesafb_create_task(task)				\
+do { 								\
+	task = kmalloc(sizeof(struct vesafb_task), GFP_ATOMIC); \
+	if (task) 						\
+		memset(task, 0, sizeof(struct vesafb_task));	\
+	init_completion(&task->done);				\
+} while (0)
+
+#define vesafb_wait_for_task(task) 	wait_for_completion(&task->done);
+#define vesafb_reset_task(task)		init_completion(&task->done);
+int vesafb_queue_task(struct vesafb_task *task);
+
+/* Functions for controlling the vesafb thread */
+int vesafb_wait_for_thread(void);
+
+#define VBE_CAP_CAN_SWITCH_DAC	0x01
+#define VBE_CAP_VGACOMPAT	0x02
+
+/* This struct is 512 bytes long */
+struct vesafb_vbe_ib {
+	char vbe_signature[4];
+	u16  vbe_version;
+	u32  oem_string_ptr;
+	u32  capabilities;
+	u32  mode_list_ptr;
+	u16  total_memory;
+	u16  oem_software_rev;
+	u32  oem_vendor_name_ptr;
+	u32  oem_product_name_ptr;
+	u32  oem_product_rev_ptr;
+	u8   reserved[222];
+	char oem_data[256];
+} __attribute__ ((packed));
+
+struct vesafb_crtc_ib {
+	u16 horiz_total;
+	u16 horiz_start;
+	u16 horiz_end;
+	u16 vert_total;
+	u16 vert_start;
+	u16 vert_end;
+	u8  flags;
+	u32 pixel_clock;
+	u16 refresh_rate;
+	u8  reserved[40];
+} __attribute__ ((packed));
+
+#define VBE_MODE_VGACOMPAT	0x20
+
+struct vesafb_mode_ib {
+	/* for all VBE revisions */
+	u16 mode_attr;
+	u8  winA_attr;
+	u8  winB_attr;
+	u16 win_granularity;
+	u16 win_size;
+	u16 winA_seg;
+	u16 winB_seg;
+	u32 win_func_ptr;
+	u16 bytes_per_scan_line;
+
+	/* for VBE 1.2+ */
+	u16 x_res;
+	u16 y_res;
+	u8  x_char_size;
+	u8  y_char_size;
+	u8  planes;
+	u8  bits_per_pixel;
+	u8  banks;
+	u8  memory_model;
+	u8  bank_size;
+	u8  image_pages;
+	u8  reserved1;
+
+	/* Direct color fields for direct/6 and YUV/7 memory models. */
+	/* Offsets are bit positions of lsb in the mask. */
+	u8  red_len;
+	u8  red_off;
+	u8  green_len;
+	u8  green_off;
+	u8  blue_len;
+	u8  blue_off;
+	u8  rsvd_len;
+	u8  rsvd_off;
+	u8  direct_color_info;	/* direct color mode attributes */
+
+	/* for VBE 2.0+ */
+	u32 phys_base_ptr;
+	u8  reserved2[6];
+
+	/* for VBE 3.0+ */
+	u16 lin_bytes_per_scan_line;
+	u8  bnk_image_pages;
+	u8  lin_image_pages;
+	u8  lin_red_len;
+	u8  lin_red_off;
+	u8  lin_green_len;
+	u8  lin_green_off;
+	u8  lin_blue_len;
+	u8  lin_blue_off;
+	u8  lin_rsvd_len;
+	u8  lin_rsvd_off;
+	u32 max_pixel_clock;
+	u16 mode_id;
+	u8  depth;
+} __attribute__ ((packed));
+
+struct vesafb_pal_entry {
+	u_char blue, green, red, pad;
+} __attribute__ ((packed));
+
+struct vesafb_par {
+	u32 mem_total;
+	int mode_idx;
+	struct vesafb_crtc_ib crtc;
+};
+
diff -urN oldtree/kernel/fork.c newtree/kernel/fork.c
--- oldtree/kernel/fork.c	2006-02-18 15:18:30.074745192 +0000
+++ newtree/kernel/fork.c	2006-02-18 15:28:25.323253664 +0000
@@ -97,6 +97,7 @@
 
 /* SLAB cache for vm_area_struct structures */
 kmem_cache_t *vm_area_cachep;
+EXPORT_SYMBOL_GPL(vm_area_cachep);
 
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static kmem_cache_t *mm_cachep;
@@ -383,6 +384,40 @@
 EXPORT_SYMBOL_GPL(mmput);
 
 /**
+ * set_new_mm - allocate, init and activate a new mm for a kernel thread
+ */
+int set_new_mm(void)
+{
+	struct mm_struct *mm;
+	struct task_struct *tsk = current;
+	struct mm_struct *active_mm;
+
+	mm = mm_alloc();
+	if (!mm)
+		goto fail_nomem;
+	if (init_new_context(current,mm))
+		goto fail_nocontext;
+
+	task_lock(tsk);
+	tsk->flags |= PF_BORROWED_MM;	
+	active_mm = tsk->active_mm;
+	current->mm = mm;
+	current->active_mm = mm;
+	activate_mm(active_mm, mm);
+	task_unlock(current);
+
+	/* Drop the previous active_mm */
+	mmdrop(active_mm);
+	return 0;
+	
+fail_nocontext:
+	mmdrop(mm);
+fail_nomem:
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(set_new_mm);
+
+/**
  * get_task_mm - acquire a reference to the task's mm
  *
  * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
diff -urN oldtree/kernel/fork.c.orig newtree/kernel/fork.c.orig
--- oldtree/kernel/fork.c.orig	1970-01-01 00:00:00.000000000 +0000
+++ newtree/kernel/fork.c.orig	2006-02-18 15:18:30.000000000 +0000
@@ -0,0 +1,1610 @@
+/*
+ *  linux/kernel/fork.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ *  'fork.c' contains the help-routines for the 'fork' system call
+ * (see also entry.S and others).
+ * Fork is rather simple, once you get the hang of it, but the memory
+ * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/unistd.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/completion.h>
+#include <linux/namespace.h>
+#include <linux/personality.h>
+#include <linux/mempolicy.h>
+#include <linux/sem.h>
+#include <linux/file.h>
+#include <linux/key.h>
+#include <linux/binfmts.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/security.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/jiffies.h>
+#include <linux/futex.h>
+#include <linux/rcupdate.h>
+#include <linux/ptrace.h>
+#include <linux/mount.h>
+#include <linux/audit.h>
+#include <linux/profile.h>
+#include <linux/rmap.h>
+#include <linux/acct.h>
+#include <linux/cn_proc.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Protected counters by write_lock_irq(&tasklist_lock)
+ */
+unsigned long total_forks;	/* Handle normal Linux uptimes. */
+int nr_threads; 		/* The idle threads do not count.. */
+
+int max_threads;		/* tunable limit on nr_threads */
+
+DEFINE_PER_CPU(unsigned long, process_counts) = 0;
+
+ __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
+
+EXPORT_SYMBOL(tasklist_lock);
+
+int nr_processes(void)
+{
+	int cpu;
+	int total = 0;
+
+	for_each_online_cpu(cpu)
+		total += per_cpu(process_counts, cpu);
+
+	return total;
+}
+
+#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+# define alloc_task_struct()	kmem_cache_alloc(task_struct_cachep, GFP_KERNEL)
+# define free_task_struct(tsk)	kmem_cache_free(task_struct_cachep, (tsk))
+static kmem_cache_t *task_struct_cachep;
+#endif
+
+/* SLAB cache for signal_struct structures (tsk->signal) */
+kmem_cache_t *signal_cachep;
+
+/* SLAB cache for sighand_struct structures (tsk->sighand) */
+kmem_cache_t *sighand_cachep;
+
+/* SLAB cache for files_struct structures (tsk->files) */
+kmem_cache_t *files_cachep;
+
+/* SLAB cache for fs_struct structures (tsk->fs) */
+kmem_cache_t *fs_cachep;
+
+/* SLAB cache for vm_area_struct structures */
+kmem_cache_t *vm_area_cachep;
+
+/* SLAB cache for mm_struct structures (tsk->mm) */
+static kmem_cache_t *mm_cachep;
+
+void free_task(struct task_struct *tsk)
+{
+	free_thread_info(tsk->thread_info);
+	free_task_struct(tsk);
+}
+EXPORT_SYMBOL(free_task);
+
+void __put_task_struct(struct task_struct *tsk)
+{
+	WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+	WARN_ON(atomic_read(&tsk->usage));
+	WARN_ON(tsk == current);
+
+	if (unlikely(tsk->audit_context))
+		audit_free(tsk);
+	security_task_free(tsk);
+	free_uid(tsk->user);
+	put_group_info(tsk->group_info);
+
+	if (!profile_handoff_task(tsk))
+		free_task(tsk);
+}
+
+void __init fork_init(unsigned long mempages)
+{
+#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+#ifndef ARCH_MIN_TASKALIGN
+#define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
+#endif
+	/* create a slab on which task_structs can be allocated */
+	task_struct_cachep =
+		kmem_cache_create("task_struct", sizeof(struct task_struct),
+			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL);
+#endif
+
+	/*
+	 * The default maximum number of threads is set to a safe
+	 * value: the thread structures can take up at most half
+	 * of memory.
+	 */
+	max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
+
+	/*
+	 * we need to allow at least 20 threads to boot a system
+	 */
+	if(max_threads < 20)
+		max_threads = 20;
+
+	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
+	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
+	init_task.signal->rlim[RLIMIT_SIGPENDING] =
+		init_task.signal->rlim[RLIMIT_NPROC];
+}
+
+static struct task_struct *dup_task_struct(struct task_struct *orig)
+{
+	struct task_struct *tsk;
+	struct thread_info *ti;
+
+	prepare_to_copy(orig);
+
+	tsk = alloc_task_struct();
+	if (!tsk)
+		return NULL;
+
+	ti = alloc_thread_info(tsk);
+	if (!ti) {
+		free_task_struct(tsk);
+		return NULL;
+	}
+
+	*tsk = *orig;
+	tsk->thread_info = ti;
+	setup_thread_stack(tsk, orig);
+
+	/* One for us, one for whoever does the "release_task()" (usually parent) */
+	atomic_set(&tsk->usage,2);
+	atomic_set(&tsk->fs_excl, 0);
+	return tsk;
+}
+
+#ifdef CONFIG_MMU
+static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	struct vm_area_struct *mpnt, *tmp, **pprev;
+	struct rb_node **rb_link, *rb_parent;
+	int retval;
+	unsigned long charge;
+	struct mempolicy *pol;
+
+	down_write(&oldmm->mmap_sem);
+	flush_cache_mm(oldmm);
+	down_write(&mm->mmap_sem);
+
+	mm->locked_vm = 0;
+	mm->mmap = NULL;
+	mm->mmap_cache = NULL;
+	mm->free_area_cache = oldmm->mmap_base;
+	mm->cached_hole_size = ~0UL;
+	mm->map_count = 0;
+	cpus_clear(mm->cpu_vm_mask);
+	mm->mm_rb = RB_ROOT;
+	rb_link = &mm->mm_rb.rb_node;
+	rb_parent = NULL;
+	pprev = &mm->mmap;
+
+	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
+		struct file *file;
+
+		if (mpnt->vm_flags & VM_DONTCOPY) {
+			long pages = vma_pages(mpnt);
+			mm->total_vm -= pages;
+			vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
+								-pages);
+			continue;
+		}
+		charge = 0;
+		if (mpnt->vm_flags & VM_ACCOUNT) {
+			unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+			if (security_vm_enough_memory(len))
+				goto fail_nomem;
+			charge = len;
+		}
+		tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		if (!tmp)
+			goto fail_nomem;
+		*tmp = *mpnt;
+		pol = mpol_copy(vma_policy(mpnt));
+		retval = PTR_ERR(pol);
+		if (IS_ERR(pol))
+			goto fail_nomem_policy;
+		vma_set_policy(tmp, pol);
+		tmp->vm_flags &= ~VM_LOCKED;
+		tmp->vm_mm = mm;
+		tmp->vm_next = NULL;
+		anon_vma_link(tmp);
+		file = tmp->vm_file;
+		if (file) {
+			struct inode *inode = file->f_dentry->d_inode;
+			get_file(file);
+			if (tmp->vm_flags & VM_DENYWRITE)
+				atomic_dec(&inode->i_writecount);
+      
+			/* insert tmp into the share list, just after mpnt */
+			spin_lock(&file->f_mapping->i_mmap_lock);
+			tmp->vm_truncate_count = mpnt->vm_truncate_count;
+			flush_dcache_mmap_lock(file->f_mapping);
+			vma_prio_tree_add(tmp, mpnt);
+			flush_dcache_mmap_unlock(file->f_mapping);
+			spin_unlock(&file->f_mapping->i_mmap_lock);
+		}
+
+		/*
+		 * Link in the new vma and copy the page table entries.
+		 */
+		*pprev = tmp;
+		pprev = &tmp->vm_next;
+
+		__vma_link_rb(mm, tmp, rb_link, rb_parent);
+		rb_link = &tmp->vm_rb.rb_right;
+		rb_parent = &tmp->vm_rb;
+
+		mm->map_count++;
+		retval = copy_page_range(mm, oldmm, mpnt);
+
+		if (tmp->vm_ops && tmp->vm_ops->open)
+			tmp->vm_ops->open(tmp);
+
+		if (retval)
+			goto out;
+	}
+	retval = 0;
+out:
+	up_write(&mm->mmap_sem);
+	flush_tlb_mm(oldmm);
+	up_write(&oldmm->mmap_sem);
+	return retval;
+fail_nomem_policy:
+	kmem_cache_free(vm_area_cachep, tmp);
+fail_nomem:
+	retval = -ENOMEM;
+	vm_unacct_memory(charge);
+	goto out;
+}
+
+static inline int mm_alloc_pgd(struct mm_struct * mm)
+{
+	mm->pgd = pgd_alloc(mm);
+	if (unlikely(!mm->pgd))
+		return -ENOMEM;
+	return 0;
+}
+
+static inline void mm_free_pgd(struct mm_struct * mm)
+{
+	pgd_free(mm->pgd);
+}
+#else
+#define dup_mmap(mm, oldmm)	(0)
+#define mm_alloc_pgd(mm)	(0)
+#define mm_free_pgd(mm)
+#endif /* CONFIG_MMU */
+
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
+
+#define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
+#define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
+
+#include <linux/init_task.h>
+
+static struct mm_struct * mm_init(struct mm_struct * mm)
+{
+	atomic_set(&mm->mm_users, 1);
+	atomic_set(&mm->mm_count, 1);
+	init_rwsem(&mm->mmap_sem);
+	INIT_LIST_HEAD(&mm->mmlist);
+	mm->core_waiters = 0;
+	mm->nr_ptes = 0;
+	set_mm_counter(mm, file_rss, 0);
+	set_mm_counter(mm, anon_rss, 0);
+	spin_lock_init(&mm->page_table_lock);
+	rwlock_init(&mm->ioctx_list_lock);
+	mm->ioctx_list = NULL;
+	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	mm->cached_hole_size = ~0UL;
+
+	if (likely(!mm_alloc_pgd(mm))) {
+		mm->def_flags = 0;
+		return mm;
+	}
+	free_mm(mm);
+	return NULL;
+}
+
+/*
+ * Allocate and initialize an mm_struct.
+ */
+struct mm_struct * mm_alloc(void)
+{
+	struct mm_struct * mm;
+
+	mm = allocate_mm();
+	if (mm) {
+		memset(mm, 0, sizeof(*mm));
+		mm = mm_init(mm);
+	}
+	return mm;
+}
+
+/*
+ * Called when the last reference to the mm
+ * is dropped: either by a lazy thread or by
+ * mmput. Free the page directory and the mm.
+ */
+void fastcall __mmdrop(struct mm_struct *mm)
+{
+	BUG_ON(mm == &init_mm);
+	mm_free_pgd(mm);
+	destroy_context(mm);
+	free_mm(mm);
+}
+
+/*
+ * Decrement the use count and release all resources for an mm.
+ */
+void mmput(struct mm_struct *mm)
+{
+	if (atomic_dec_and_test(&mm->mm_users)) {
+		exit_aio(mm);
+		exit_mmap(mm);
+		if (!list_empty(&mm->mmlist)) {
+			spin_lock(&mmlist_lock);
+			list_del(&mm->mmlist);
+			spin_unlock(&mmlist_lock);
+		}
+		put_swap_token(mm);
+		mmdrop(mm);
+	}
+}
+EXPORT_SYMBOL_GPL(mmput);
+
+/**
+ * get_task_mm - acquire a reference to the task's mm
+ *
+ * Returns %NULL if the task has no mm.  Checks PF_BORROWED_MM (meaning
+ * this kernel workthread has transiently adopted a user mm with use_mm,
+ * to do its AIO) is not set and if so returns a reference to it, after
+ * bumping up the use count.  User must release the mm via mmput()
+ * after use.  Typically used by /proc and ptrace.
+ */
+struct mm_struct *get_task_mm(struct task_struct *task)
+{
+	struct mm_struct *mm;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm) {
+		if (task->flags & PF_BORROWED_MM)
+			mm = NULL;
+		else
+			atomic_inc(&mm->mm_users);
+	}
+	task_unlock(task);
+	return mm;
+}
+EXPORT_SYMBOL_GPL(get_task_mm);
+
+/* Please note the differences between mmput and mm_release.
+ * mmput is called whenever we stop holding onto a mm_struct,
+ * error success whatever.
+ *
+ * mm_release is called after a mm_struct has been removed
+ * from the current process.
+ *
+ * This difference is important for error handling, when we
+ * only half set up a mm_struct for a new process and need to restore
+ * the old one.  Because we mmput the new mm_struct before
+ * restoring the old one. . .
+ * Eric Biederman 10 January 1998
+ */
+void mm_release(struct task_struct *tsk, struct mm_struct *mm)
+{
+	struct completion *vfork_done = tsk->vfork_done;
+
+	/* Get rid of any cached register state */
+	deactivate_mm(tsk, mm);
+
+	/* notify parent sleeping on vfork() */
+	if (vfork_done) {
+		tsk->vfork_done = NULL;
+		complete(vfork_done);
+	}
+	if (tsk->clear_child_tid && atomic_read(&mm->mm_users) > 1) {
+		u32 __user * tidptr = tsk->clear_child_tid;
+		tsk->clear_child_tid = NULL;
+
+		/*
+		 * We don't check the error code - if userspace has
+		 * not set up a proper pointer then tough luck.
+		 */
+		put_user(0, tidptr);
+		sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
+	}
+}
+
+/*
+ * Allocate a new mm structure and copy contents from the
+ * mm structure of the passed in task structure.
+ */
+static struct mm_struct *dup_mm(struct task_struct *tsk)
+{
+	struct mm_struct *mm, *oldmm = current->mm;
+	int err;
+
+	if (!oldmm)
+		return NULL;
+
+	mm = allocate_mm();
+	if (!mm)
+		goto fail_nomem;
+
+	memcpy(mm, oldmm, sizeof(*mm));
+
+	if (!mm_init(mm))
+		goto fail_nomem;
+
+	if (init_new_context(tsk, mm))
+		goto fail_nocontext;
+
+	err = dup_mmap(mm, oldmm);
+	if (err)
+		goto free_pt;
+
+	mm->hiwater_rss = get_mm_rss(mm);
+	mm->hiwater_vm = mm->total_vm;
+
+	return mm;
+
+free_pt:
+	mmput(mm);
+
+fail_nomem:
+	return NULL;
+
+fail_nocontext:
+	/*
+	 * If init_new_context() failed, we cannot use mmput() to free the mm
+	 * because it calls destroy_context()
+	 */
+	mm_free_pgd(mm);
+	free_mm(mm);
+	return NULL;
+}
+
+static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
+{
+	struct mm_struct * mm, *oldmm;
+	int retval;
+
+	tsk->min_flt = tsk->maj_flt = 0;
+	tsk->nvcsw = tsk->nivcsw = 0;
+
+	tsk->mm = NULL;
+	tsk->active_mm = NULL;
+
+	/*
+	 * Are we cloning a kernel thread?
+	 *
+	 * We need to steal a active VM for that..
+	 */
+	oldmm = current->mm;
+	if (!oldmm)
+		return 0;
+
+	if (clone_flags & CLONE_VM) {
+		atomic_inc(&oldmm->mm_users);
+		mm = oldmm;
+		goto good_mm;
+	}
+
+	retval = -ENOMEM;
+	mm = dup_mm(tsk);
+	if (!mm)
+		goto fail_nomem;
+
+good_mm:
+	tsk->mm = mm;
+	tsk->active_mm = mm;
+	return 0;
+
+fail_nomem:
+	return retval;
+}
+
+static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+{
+	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
+	/* We don't need to lock fs - think why ;-) */
+	if (fs) {
+		atomic_set(&fs->count, 1);
+		rwlock_init(&fs->lock);
+		fs->umask = old->umask;
+		read_lock(&old->lock);
+		fs->rootmnt = mntget(old->rootmnt);
+		fs->root = dget(old->root);
+		fs->pwdmnt = mntget(old->pwdmnt);
+		fs->pwd = dget(old->pwd);
+		if (old->altroot) {
+			fs->altrootmnt = mntget(old->altrootmnt);
+			fs->altroot = dget(old->altroot);
+		} else {
+			fs->altrootmnt = NULL;
+			fs->altroot = NULL;
+		}
+		read_unlock(&old->lock);
+	}
+	return fs;
+}
+
+struct fs_struct *copy_fs_struct(struct fs_struct *old)
+{
+	return __copy_fs_struct(old);
+}
+
+EXPORT_SYMBOL_GPL(copy_fs_struct);
+
+static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+{
+	if (clone_flags & CLONE_FS) {
+		atomic_inc(&current->fs->count);
+		return 0;
+	}
+	tsk->fs = __copy_fs_struct(current->fs);
+	if (!tsk->fs)
+		return -ENOMEM;
+	return 0;
+}
+
+static int count_open_files(struct fdtable *fdt)
+{
+	int size = fdt->max_fdset;
+	int i;
+
+	/* Find the last open fd */
+	for (i = size/(8*sizeof(long)); i > 0; ) {
+		if (fdt->open_fds->fds_bits[--i])
+			break;
+	}
+	i = (i+1) * 8 * sizeof(long);
+	return i;
+}
+
+static struct files_struct *alloc_files(void)
+{
+	struct files_struct *newf;
+	struct fdtable *fdt;
+
+	newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
+	if (!newf)
+		goto out;
+
+	atomic_set(&newf->count, 1);
+
+	spin_lock_init(&newf->file_lock);
+	fdt = &newf->fdtab;
+	fdt->next_fd = 0;
+	fdt->max_fds = NR_OPEN_DEFAULT;
+	fdt->max_fdset = __FD_SETSIZE;
+	fdt->close_on_exec = &newf->close_on_exec_init;
+	fdt->open_fds = &newf->open_fds_init;
+	fdt->fd = &newf->fd_array[0];
+	INIT_RCU_HEAD(&fdt->rcu);
+	fdt->free_files = NULL;
+	fdt->next = NULL;
+	rcu_assign_pointer(newf->fdt, fdt);
+out:
+	return newf;
+}
+
+/*
+ * Allocate a new files structure and copy contents from the
+ * passed in files structure.
+ */
+static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
+{
+	struct files_struct *newf;
+	struct file **old_fds, **new_fds;
+	int open_files, size, i, expand;
+	struct fdtable *old_fdt, *new_fdt;
+
+	newf = alloc_files();
+	if (!newf)
+		goto out;
+
+	spin_lock(&oldf->file_lock);
+	old_fdt = files_fdtable(oldf);
+	new_fdt = files_fdtable(newf);
+	size = old_fdt->max_fdset;
+	open_files = count_open_files(old_fdt);
+	expand = 0;
+
+	/*
+	 * Check whether we need to allocate a larger fd array or fd set.
+	 * Note: we're not a clone task, so the open count won't  change.
+	 */
+	if (open_files > new_fdt->max_fdset) {
+		new_fdt->max_fdset = 0;
+		expand = 1;
+	}
+	if (open_files > new_fdt->max_fds) {
+		new_fdt->max_fds = 0;
+		expand = 1;
+	}
+
+	/* if the old fdset gets grown now, we'll only copy up to "size" fds */
+	if (expand) {
+		spin_unlock(&oldf->file_lock);
+		spin_lock(&newf->file_lock);
+		*errorp = expand_files(newf, open_files-1);
+		spin_unlock(&newf->file_lock);
+		if (*errorp < 0)
+			goto out_release;
+		new_fdt = files_fdtable(newf);
+		/*
+		 * Reacquire the oldf lock and a pointer to its fd table
+		 * who knows it may have a new bigger fd table. We need
+		 * the latest pointer.
+		 */
+		spin_lock(&oldf->file_lock);
+		old_fdt = files_fdtable(oldf);
+	}
+
+	old_fds = old_fdt->fd;
+	new_fds = new_fdt->fd;
+
+	memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8);
+	memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8);
+
+	for (i = open_files; i != 0; i--) {
+		struct file *f = *old_fds++;
+		if (f) {
+			get_file(f);
+		} else {
+			/*
+			 * The fd may be claimed in the fd bitmap but not yet
+			 * instantiated in the files array if a sibling thread
+			 * is partway through open().  So make sure that this
+			 * fd is available to the new process.
+			 */
+			FD_CLR(open_files - i, new_fdt->open_fds);
+		}
+		rcu_assign_pointer(*new_fds++, f);
+	}
+	spin_unlock(&oldf->file_lock);
+
+	/* compute the remainder to be cleared */
+	size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
+
+	/* This is long word aligned thus could use a optimized version */ 
+	memset(new_fds, 0, size); 
+
+	if (new_fdt->max_fdset > open_files) {
+		int left = (new_fdt->max_fdset-open_files)/8;
+		int start = open_files / (8 * sizeof(unsigned long));
+
+		memset(&new_fdt->open_fds->fds_bits[start], 0, left);
+		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
+	}
+
+out:
+	return newf;
+
+out_release:
+	free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
+	free_fdset (new_fdt->open_fds, new_fdt->max_fdset);
+	free_fd_array(new_fdt->fd, new_fdt->max_fds);
+	kmem_cache_free(files_cachep, newf);
+	goto out;
+}
+
+static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+{
+	struct files_struct *oldf, *newf;
+	int error = 0;
+
+	/*
+	 * A background process may not have any files ...
+	 */
+	oldf = current->files;
+	if (!oldf)
+		goto out;
+
+	if (clone_flags & CLONE_FILES) {
+		atomic_inc(&oldf->count);
+		goto out;
+	}
+
+	/*
+	 * Note: we may be using current for both targets (See exec.c)
+	 * This works because we cache current->files (old) as oldf. Don't
+	 * break this.
+	 */
+	tsk->files = NULL;
+	error = -ENOMEM;
+	newf = dup_fd(oldf, &error);
+	if (!newf)
+		goto out;
+
+	tsk->files = newf;
+	error = 0;
+out:
+	return error;
+}
+
+/*
+ *	Helper to unshare the files of the current task.
+ *	We don't want to expose copy_files internals to
+ *	the exec layer of the kernel.
+ */
+
+int unshare_files(void)
+{
+	struct files_struct *files  = current->files;
+	int rc;
+
+	if(!files)
+		BUG();
+
+	/* This can race but the race causes us to copy when we don't
+	   need to and drop the copy */
+	if(atomic_read(&files->count) == 1)
+	{
+		atomic_inc(&files->count);
+		return 0;
+	}
+	rc = copy_files(0, current);
+	if(rc)
+		current->files = files;
+	return rc;
+}
+
+EXPORT_SYMBOL(unshare_files);
+
+void sighand_free_cb(struct rcu_head *rhp)
+{
+	struct sighand_struct *sp;
+
+	sp = container_of(rhp, struct sighand_struct, rcu);
+	kmem_cache_free(sighand_cachep, sp);
+}
+
+static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
+{
+	struct sighand_struct *sig;
+
+	if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
+		atomic_inc(&current->sighand->count);
+		return 0;
+	}
+	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+	rcu_assign_pointer(tsk->sighand, sig);
+	if (!sig)
+		return -ENOMEM;
+	spin_lock_init(&sig->siglock);
+	atomic_set(&sig->count, 1);
+	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+	return 0;
+}
+
+static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
+{
+	struct signal_struct *sig;
+	int ret;
+
+	if (clone_flags & CLONE_THREAD) {
+		atomic_inc(&current->signal->count);
+		atomic_inc(&current->signal->live);
+		return 0;
+	}
+	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+	tsk->signal = sig;
+	if (!sig)
+		return -ENOMEM;
+
+	ret = copy_thread_group_keys(tsk);
+	if (ret < 0) {
+		kmem_cache_free(signal_cachep, sig);
+		return ret;
+	}
+
+	atomic_set(&sig->count, 1);
+	atomic_set(&sig->live, 1);
+	init_waitqueue_head(&sig->wait_chldexit);
+	sig->flags = 0;
+	sig->group_exit_code = 0;
+	sig->group_exit_task = NULL;
+	sig->group_stop_count = 0;
+	sig->curr_target = NULL;
+	init_sigpending(&sig->shared_pending);
+	INIT_LIST_HEAD(&sig->posix_timers);
+
+	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
+	sig->it_real_incr.tv64 = 0;
+	sig->real_timer.function = it_real_fn;
+	sig->real_timer.data = tsk;
+
+	sig->it_virt_expires = cputime_zero;
+	sig->it_virt_incr = cputime_zero;
+	sig->it_prof_expires = cputime_zero;
+	sig->it_prof_incr = cputime_zero;
+
+	sig->leader = 0;	/* session leadership doesn't inherit */
+	sig->tty_old_pgrp = 0;
+
+	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+	sig->sched_time = 0;
+	INIT_LIST_HEAD(&sig->cpu_timers[0]);
+	INIT_LIST_HEAD(&sig->cpu_timers[1]);
+	INIT_LIST_HEAD(&sig->cpu_timers[2]);
+
+	task_lock(current->group_leader);
+	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
+	task_unlock(current->group_leader);
+
+	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+		/*
+		 * New sole thread in the process gets an expiry time
+		 * of the whole CPU time limit.
+		 */
+		tsk->it_prof_expires =
+			secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
+	}
+
+	return 0;
+}
+
+static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+{
+	unsigned long new_flags = p->flags;
+
+	new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE);
+	new_flags |= PF_FORKNOEXEC;
+	if (!(clone_flags & CLONE_PTRACE))
+		p->ptrace = 0;
+	p->flags = new_flags;
+}
+
+asmlinkage long sys_set_tid_address(int __user *tidptr)
+{
+	current->clear_child_tid = tidptr;
+
+	return current->pid;
+}
+
+/*
+ * This creates a new process as a copy of the old one,
+ * but does not actually start it yet.
+ *
+ * It copies the registers, and all the appropriate
+ * parts of the process environment (as per the clone
+ * flags). The actual kick-off is left to the caller.
+ */
+static task_t *copy_process(unsigned long clone_flags,
+				 unsigned long stack_start,
+				 struct pt_regs *regs,
+				 unsigned long stack_size,
+				 int __user *parent_tidptr,
+				 int __user *child_tidptr,
+				 int pid)
+{
+	int retval;
+	struct task_struct *p = NULL;
+
+	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * Thread groups must share signals as well, and detached threads
+	 * can only be started up within the thread group.
+	 */
+	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * Shared signal handlers imply shared VM. By way of the above,
+	 * thread groups also imply shared VM. Blocking this case allows
+	 * for various simplifications in other code.
+	 */
+	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
+		return ERR_PTR(-EINVAL);
+
+	retval = security_task_create(clone_flags);
+	if (retval)
+		goto fork_out;
+
+	retval = -ENOMEM;
+	p = dup_task_struct(current);
+	if (!p)
+		goto fork_out;
+
+	retval = -EAGAIN;
+	if (atomic_read(&p->user->processes) >=
+			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
+		if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
+				p->user != &root_user)
+			goto bad_fork_free;
+	}
+
+	atomic_inc(&p->user->__count);
+	atomic_inc(&p->user->processes);
+	get_group_info(p->group_info);
+
+	/*
+	 * If multiple threads are within copy_process(), then this check
+	 * triggers too late. This doesn't hurt, the check is only there
+	 * to stop root fork bombs.
+	 */
+	if (nr_threads >= max_threads)
+		goto bad_fork_cleanup_count;
+
+	if (!try_module_get(task_thread_info(p)->exec_domain->module))
+		goto bad_fork_cleanup_count;
+
+	if (p->binfmt && !try_module_get(p->binfmt->module))
+		goto bad_fork_cleanup_put_domain;
+
+	p->did_exec = 0;
+	copy_flags(clone_flags, p);
+	p->pid = pid;
+	retval = -EFAULT;
+	if (clone_flags & CLONE_PARENT_SETTID)
+		if (put_user(p->pid, parent_tidptr))
+			goto bad_fork_cleanup;
+
+	p->proc_dentry = NULL;
+
+	INIT_LIST_HEAD(&p->children);
+	INIT_LIST_HEAD(&p->sibling);
+	p->vfork_done = NULL;
+	spin_lock_init(&p->alloc_lock);
+	spin_lock_init(&p->proc_lock);
+
+	clear_tsk_thread_flag(p, TIF_SIGPENDING);
+	init_sigpending(&p->pending);
+
+	p->utime = cputime_zero;
+	p->stime = cputime_zero;
+ 	p->sched_time = 0;
+	p->rchar = 0;		/* I/O counter: bytes read */
+	p->wchar = 0;		/* I/O counter: bytes written */
+	p->syscr = 0;		/* I/O counter: read syscalls */
+	p->syscw = 0;		/* I/O counter: write syscalls */
+	acct_clear_integrals(p);
+
+ 	p->it_virt_expires = cputime_zero;
+	p->it_prof_expires = cputime_zero;
+ 	p->it_sched_expires = 0;
+ 	INIT_LIST_HEAD(&p->cpu_timers[0]);
+ 	INIT_LIST_HEAD(&p->cpu_timers[1]);
+ 	INIT_LIST_HEAD(&p->cpu_timers[2]);
+
+	p->lock_depth = -1;		/* -1 = no lock */
+	do_posix_clock_monotonic_gettime(&p->start_time);
+	p->security = NULL;
+	p->io_context = NULL;
+	p->io_wait = NULL;
+	p->audit_context = NULL;
+	cpuset_fork(p);
+#ifdef CONFIG_NUMA
+ 	p->mempolicy = mpol_copy(p->mempolicy);
+ 	if (IS_ERR(p->mempolicy)) {
+ 		retval = PTR_ERR(p->mempolicy);
+ 		p->mempolicy = NULL;
+ 		goto bad_fork_cleanup_cpuset;
+ 	}
+#endif
+
+#ifdef CONFIG_DEBUG_MUTEXES
+	p->blocked_on = NULL; /* not blocked yet */
+#endif
+
+	p->tgid = p->pid;
+	if (clone_flags & CLONE_THREAD)
+		p->tgid = current->tgid;
+
+	if ((retval = security_task_alloc(p)))
+		goto bad_fork_cleanup_policy;
+	if ((retval = audit_alloc(p)))
+		goto bad_fork_cleanup_security;
+	/* copy all the process information */
+	if ((retval = copy_semundo(clone_flags, p)))
+		goto bad_fork_cleanup_audit;
+	if ((retval = copy_files(clone_flags, p)))
+		goto bad_fork_cleanup_semundo;
+	if ((retval = copy_fs(clone_flags, p)))
+		goto bad_fork_cleanup_files;
+	if ((retval = copy_sighand(clone_flags, p)))
+		goto bad_fork_cleanup_fs;
+	if ((retval = copy_signal(clone_flags, p)))
+		goto bad_fork_cleanup_sighand;
+	if ((retval = copy_mm(clone_flags, p)))
+		goto bad_fork_cleanup_signal;
+	if ((retval = copy_keys(clone_flags, p)))
+		goto bad_fork_cleanup_mm;
+	if ((retval = copy_namespace(clone_flags, p)))
+		goto bad_fork_cleanup_keys;
+	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
+	if (retval)
+		goto bad_fork_cleanup_namespace;
+
+	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
+	/*
+	 * Clear TID on mm_release()?
+	 */
+	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
+
+	/*
+	 * Syscall tracing should be turned off in the child regardless
+	 * of CLONE_PTRACE.
+	 */
+	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
+#endif
+
+	/* Our parent execution domain becomes current domain
+	   These must match for thread signalling to apply */
+	   
+	p->parent_exec_id = p->self_exec_id;
+
+	/* ok, now we should be set up.. */
+	p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
+	p->pdeath_signal = 0;
+	p->exit_state = 0;
+
+	/*
+	 * Ok, make it visible to the rest of the system.
+	 * We dont wake it up yet.
+	 */
+	p->group_leader = p;
+	INIT_LIST_HEAD(&p->ptrace_children);
+	INIT_LIST_HEAD(&p->ptrace_list);
+
+	/* Perform scheduler related setup. Assign this task to a CPU. */
+	sched_fork(p, clone_flags);
+
+	/* Need tasklist lock for parent etc handling! */
+	write_lock_irq(&tasklist_lock);
+
+	/*
+	 * The task hasn't been attached yet, so its cpus_allowed mask will
+	 * not be changed, nor will its assigned CPU.
+	 *
+	 * The cpus_allowed mask of the parent may have changed after it was
+	 * copied first time - so re-copy it here, then check the child's CPU
+	 * to ensure it is on a valid CPU (and if not, just force it back to
+	 * parent's CPU). This avoids alot of nasty races.
+	 */
+	p->cpus_allowed = current->cpus_allowed;
+	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+			!cpu_online(task_cpu(p))))
+		set_task_cpu(p, smp_processor_id());
+
+	/*
+	 * Check for pending SIGKILL! The new thread should not be allowed
+	 * to slip out of an OOM kill. (or normal SIGKILL.)
+	 */
+	if (sigismember(&current->pending.signal, SIGKILL)) {
+		write_unlock_irq(&tasklist_lock);
+		retval = -EINTR;
+		goto bad_fork_cleanup_namespace;
+	}
+
+	/* CLONE_PARENT re-uses the old parent */
+	if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
+		p->real_parent = current->real_parent;
+	else
+		p->real_parent = current;
+	p->parent = p->real_parent;
+
+	spin_lock(&current->sighand->siglock);
+	if (clone_flags & CLONE_THREAD) {
+		/*
+		 * Important: if an exit-all has been started then
+		 * do not create this new thread - the whole thread
+		 * group is supposed to exit anyway.
+		 */
+		if (current->signal->flags & SIGNAL_GROUP_EXIT) {
+			spin_unlock(&current->sighand->siglock);
+			write_unlock_irq(&tasklist_lock);
+			retval = -EAGAIN;
+			goto bad_fork_cleanup_namespace;
+		}
+		p->group_leader = current->group_leader;
+
+		if (current->signal->group_stop_count > 0) {
+			/*
+			 * There is an all-stop in progress for the group.
+			 * We ourselves will stop as soon as we check signals.
+			 * Make the new thread part of that group stop too.
+			 */
+			current->signal->group_stop_count++;
+			set_tsk_thread_flag(p, TIF_SIGPENDING);
+		}
+
+		if (!cputime_eq(current->signal->it_virt_expires,
+				cputime_zero) ||
+		    !cputime_eq(current->signal->it_prof_expires,
+				cputime_zero) ||
+		    current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY ||
+		    !list_empty(&current->signal->cpu_timers[0]) ||
+		    !list_empty(&current->signal->cpu_timers[1]) ||
+		    !list_empty(&current->signal->cpu_timers[2])) {
+			/*
+			 * Have child wake up on its first tick to check
+			 * for process CPU timers.
+			 */
+			p->it_prof_expires = jiffies_to_cputime(1);
+		}
+	}
+
+	/*
+	 * inherit ioprio
+	 */
+	p->ioprio = current->ioprio;
+
+	SET_LINKS(p);
+	if (unlikely(p->ptrace & PT_PTRACED))
+		__ptrace_link(p, current->parent);
+
+	if (thread_group_leader(p)) {
+		p->signal->tty = current->signal->tty;
+		p->signal->pgrp = process_group(current);
+		p->signal->session = current->signal->session;
+		attach_pid(p, PIDTYPE_PGID, process_group(p));
+		attach_pid(p, PIDTYPE_SID, p->signal->session);
+		if (p->pid)
+			__get_cpu_var(process_counts)++;
+	}
+	attach_pid(p, PIDTYPE_TGID, p->tgid);
+	attach_pid(p, PIDTYPE_PID, p->pid);
+
+	nr_threads++;
+	total_forks++;
+	spin_unlock(&current->sighand->siglock);
+	write_unlock_irq(&tasklist_lock);
+	proc_fork_connector(p);
+	return p;
+
+bad_fork_cleanup_namespace:
+	exit_namespace(p);
+bad_fork_cleanup_keys:
+	exit_keys(p);
+bad_fork_cleanup_mm:
+	if (p->mm)
+		mmput(p->mm);
+bad_fork_cleanup_signal:
+	exit_signal(p);
+bad_fork_cleanup_sighand:
+	exit_sighand(p);
+bad_fork_cleanup_fs:
+	exit_fs(p); /* blocking */
+bad_fork_cleanup_files:
+	exit_files(p); /* blocking */
+bad_fork_cleanup_semundo:
+	exit_sem(p);
+bad_fork_cleanup_audit:
+	audit_free(p);
+bad_fork_cleanup_security:
+	security_task_free(p);
+bad_fork_cleanup_policy:
+#ifdef CONFIG_NUMA
+	mpol_free(p->mempolicy);
+bad_fork_cleanup_cpuset:
+#endif
+	cpuset_exit(p);
+bad_fork_cleanup:
+	if (p->binfmt)
+		module_put(p->binfmt->module);
+bad_fork_cleanup_put_domain:
+	module_put(task_thread_info(p)->exec_domain->module);
+bad_fork_cleanup_count:
+	put_group_info(p->group_info);
+	atomic_dec(&p->user->processes);
+	free_uid(p->user);
+bad_fork_free:
+	free_task(p);
+fork_out:
+	return ERR_PTR(retval);
+}
+
+struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
+{
+	memset(regs, 0, sizeof(struct pt_regs));
+	return regs;
+}
+
+task_t * __devinit fork_idle(int cpu)
+{
+	task_t *task;
+	struct pt_regs regs;
+
+	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
+	if (!task)
+		return ERR_PTR(-ENOMEM);
+	init_idle(task, cpu);
+	unhash_process(task);
+	return task;
+}
+
+static inline int fork_traceflag (unsigned clone_flags)
+{
+	if (clone_flags & CLONE_UNTRACED)
+		return 0;
+	else if (clone_flags & CLONE_VFORK) {
+		if (current->ptrace & PT_TRACE_VFORK)
+			return PTRACE_EVENT_VFORK;
+	} else if ((clone_flags & CSIGNAL) != SIGCHLD) {
+		if (current->ptrace & PT_TRACE_CLONE)
+			return PTRACE_EVENT_CLONE;
+	} else if (current->ptrace & PT_TRACE_FORK)
+		return PTRACE_EVENT_FORK;
+
+	return 0;
+}
+
+/*
+ *  Ok, this is the main fork-routine.
+ *
+ * It copies the process, and if successful kick-starts
+ * it and waits for it to finish using the VM if required.
+ */
+long do_fork(unsigned long clone_flags,
+	      unsigned long stack_start,
+	      struct pt_regs *regs,
+	      unsigned long stack_size,
+	      int __user *parent_tidptr,
+	      int __user *child_tidptr)
+{
+	struct task_struct *p;
+	int trace = 0;
+	long pid = alloc_pidmap();
+
+	if (pid < 0)
+		return -EAGAIN;
+	if (unlikely(current->ptrace)) {
+		trace = fork_traceflag (clone_flags);
+		if (trace)
+			clone_flags |= CLONE_PTRACE;
+	}
+
+	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+	/*
+	 * Do this prior waking up the new thread - the thread pointer
+	 * might get invalid after that point, if the thread exits quickly.
+	 */
+	if (!IS_ERR(p)) {
+		struct completion vfork;
+
+		if (clone_flags & CLONE_VFORK) {
+			p->vfork_done = &vfork;
+			init_completion(&vfork);
+		}
+
+		if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
+			/*
+			 * We'll start up with an immediate SIGSTOP.
+			 */
+			sigaddset(&p->pending.signal, SIGSTOP);
+			set_tsk_thread_flag(p, TIF_SIGPENDING);
+		}
+
+		if (!(clone_flags & CLONE_STOPPED))
+			wake_up_new_task(p, clone_flags);
+		else
+			p->state = TASK_STOPPED;
+
+		if (unlikely (trace)) {
+			current->ptrace_message = pid;
+			ptrace_notify ((trace << 8) | SIGTRAP);
+		}
+
+		if (clone_flags & CLONE_VFORK) {
+			wait_for_completion(&vfork);
+			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
+		}
+	} else {
+		free_pidmap(pid);
+		pid = PTR_ERR(p);
+	}
+	return pid;
+}
+
+#ifndef ARCH_MIN_MMSTRUCT_ALIGN
+#define ARCH_MIN_MMSTRUCT_ALIGN 0
+#endif
+
+void __init proc_caches_init(void)
+{
+	sighand_cachep = kmem_cache_create("sighand_cache",
+			sizeof(struct sighand_struct), 0,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+	signal_cachep = kmem_cache_create("signal_cache",
+			sizeof(struct signal_struct), 0,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+	files_cachep = kmem_cache_create("files_cache", 
+			sizeof(struct files_struct), 0,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+	fs_cachep = kmem_cache_create("fs_cache", 
+			sizeof(struct fs_struct), 0,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+	vm_area_cachep = kmem_cache_create("vm_area_struct",
+			sizeof(struct vm_area_struct), 0,
+			SLAB_PANIC, NULL, NULL);
+	mm_cachep = kmem_cache_create("mm_struct",
+			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+}
+
+
+/*
+ * Check constraints on flags passed to the unshare system call and
+ * force unsharing of additional process context as appropriate.
+ */
+static inline void check_unshare_flags(unsigned long *flags_ptr)
+{
+	/*
+	 * If unsharing a thread from a thread group, must also
+	 * unshare vm.
+	 */
+	if (*flags_ptr & CLONE_THREAD)
+		*flags_ptr |= CLONE_VM;
+
+	/*
+	 * If unsharing vm, must also unshare signal handlers.
+	 */
+	if (*flags_ptr & CLONE_VM)
+		*flags_ptr |= CLONE_SIGHAND;
+
+	/*
+	 * If unsharing signal handlers and the task was created
+	 * using CLONE_THREAD, then must unshare the thread
+	 */
+	if ((*flags_ptr & CLONE_SIGHAND) &&
+	    (atomic_read(&current->signal->count) > 1))
+		*flags_ptr |= CLONE_THREAD;
+
+	/*
+	 * If unsharing namespace, must also unshare filesystem information.
+	 */
+	if (*flags_ptr & CLONE_NEWNS)
+		*flags_ptr |= CLONE_FS;
+}
+
+/*
+ * Unsharing of tasks created with CLONE_THREAD is not supported yet
+ */
+static int unshare_thread(unsigned long unshare_flags)
+{
+	if (unshare_flags & CLONE_THREAD)
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * Unshare the filesystem structure if it is being shared
+ */
+static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
+{
+	struct fs_struct *fs = current->fs;
+
+	if ((unshare_flags & CLONE_FS) &&
+	    (fs && atomic_read(&fs->count) > 1)) {
+		*new_fsp = __copy_fs_struct(current->fs);
+		if (!*new_fsp)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Unshare the namespace structure if it is being shared
+ */
+static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
+{
+	struct namespace *ns = current->namespace;
+
+	if ((unshare_flags & CLONE_NEWNS) &&
+	    (ns && atomic_read(&ns->count) > 1)) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		*new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
+		if (!*new_nsp)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
+ * supported yet
+ */
+static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
+{
+	struct sighand_struct *sigh = current->sighand;
+
+	if ((unshare_flags & CLONE_SIGHAND) &&
+	    (sigh && atomic_read(&sigh->count) > 1))
+		return -EINVAL;
+	else
+		return 0;
+}
+
+/*
+ * Unshare vm if it is being shared
+ */
+static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
+{
+	struct mm_struct *mm = current->mm;
+
+	if ((unshare_flags & CLONE_VM) &&
+	    (mm && atomic_read(&mm->mm_users) > 1)) {
+		*new_mmp = dup_mm(current);
+		if (!*new_mmp)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Unshare file descriptor table if it is being shared
+ */
+static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
+{
+	struct files_struct *fd = current->files;
+	int error = 0;
+
+	if ((unshare_flags & CLONE_FILES) &&
+	    (fd && atomic_read(&fd->count) > 1)) {
+		*new_fdp = dup_fd(fd, &error);
+		if (!*new_fdp)
+			return error;
+	}
+
+	return 0;
+}
+
+/*
+ * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
+ * supported yet
+ */
+static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
+{
+	if (unshare_flags & CLONE_SYSVSEM)
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * unshare allows a process to 'unshare' part of the process
+ * context which was originally shared using clone.  copy_*
+ * functions used by do_fork() cannot be used here directly
+ * because they modify an inactive task_struct that is being
+ * constructed. Here we are modifying the current, active,
+ * task_struct.
+ */
+asmlinkage long sys_unshare(unsigned long unshare_flags)
+{
+	int err = 0;
+	struct fs_struct *fs, *new_fs = NULL;
+	struct namespace *ns, *new_ns = NULL;
+	struct sighand_struct *sigh, *new_sigh = NULL;
+	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
+	struct files_struct *fd, *new_fd = NULL;
+	struct sem_undo_list *new_ulist = NULL;
+
+	check_unshare_flags(&unshare_flags);
+
+	if ((err = unshare_thread(unshare_flags)))
+		goto bad_unshare_out;
+	if ((err = unshare_fs(unshare_flags, &new_fs)))
+		goto bad_unshare_cleanup_thread;
+	if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
+		goto bad_unshare_cleanup_fs;
+	if ((err = unshare_sighand(unshare_flags, &new_sigh)))
+		goto bad_unshare_cleanup_ns;
+	if ((err = unshare_vm(unshare_flags, &new_mm)))
+		goto bad_unshare_cleanup_sigh;
+	if ((err = unshare_fd(unshare_flags, &new_fd)))
+		goto bad_unshare_cleanup_vm;
+	if ((err = unshare_semundo(unshare_flags, &new_ulist)))
+		goto bad_unshare_cleanup_fd;
+
+	if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+
+		task_lock(current);
+
+		if (new_fs) {
+			fs = current->fs;
+			current->fs = new_fs;
+			new_fs = fs;
+		}
+
+		if (new_ns) {
+			ns = current->namespace;
+			current->namespace = new_ns;
+			new_ns = ns;
+		}
+
+		if (new_sigh) {
+			sigh = current->sighand;
+			current->sighand = new_sigh;
+			new_sigh = sigh;
+		}
+
+		if (new_mm) {
+			mm = current->mm;
+			active_mm = current->active_mm;
+			current->mm = new_mm;
+			current->active_mm = new_mm;
+			activate_mm(active_mm, new_mm);
+			new_mm = mm;
+		}
+
+		if (new_fd) {
+			fd = current->files;
+			current->files = new_fd;
+			new_fd = fd;
+		}
+
+		task_unlock(current);
+	}
+
+bad_unshare_cleanup_fd:
+	if (new_fd)
+		put_files_struct(new_fd);
+
+bad_unshare_cleanup_vm:
+	if (new_mm)
+		mmput(new_mm);
+
+bad_unshare_cleanup_sigh:
+	if (new_sigh)
+		if (atomic_dec_and_test(&new_sigh->count))
+			kmem_cache_free(sighand_cachep, new_sigh);
+
+bad_unshare_cleanup_ns:
+	if (new_ns)
+		put_namespace(new_ns);
+
+bad_unshare_cleanup_fs:
+	if (new_fs)
+		put_fs_struct(new_fs);
+
+bad_unshare_cleanup_thread:
+bad_unshare_out:
+	return err;
+}
diff -urN oldtree/mm/memory.c newtree/mm/memory.c
--- oldtree/mm/memory.c	2006-02-18 15:18:30.117738656 +0000
+++ newtree/mm/memory.c	2006-02-18 15:28:25.326253208 +0000
@@ -1107,6 +1107,7 @@
 	pte_unmap_unlock(pte - 1, ptl);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(zeromap_page_range);
 
 static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
 			unsigned long addr, unsigned long end, pgprot_t prot)
diff -urN oldtree/mm/memory.c.orig newtree/mm/memory.c.orig
--- oldtree/mm/memory.c.orig	1970-01-01 00:00:00.000000000 +0000
+++ newtree/mm/memory.c.orig	2006-02-18 15:18:30.000000000 +0000
@@ -0,0 +1,2446 @@
+/*
+ *  linux/mm/memory.c
+ *
+ *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ */
+
+/*
+ * demand-loading started 01.12.91 - seems it is high on the list of
+ * things wanted, and it should be easy to implement. - Linus
+ */
+
+/*
+ * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
+ * pages started 02.12.91, seems to work. - Linus.
+ *
+ * Tested sharing by executing about 30 /bin/sh: under the old kernel it
+ * would have taken more than the 6M I have free, but it worked well as
+ * far as I could see.
+ *
+ * Also corrected some "invalidate()"s - I wasn't doing enough of them.
+ */
+
+/*
+ * Real VM (paging to/from disk) started 18.12.91. Much more work and
+ * thought has to go into this. Oh, well..
+ * 19.12.91  -  works, somewhat. Sometimes I get faults, don't know why.
+ *		Found it. Everything seems to work now.
+ * 20.12.91  -  Ok, making the swap-device changeable like the root.
+ */
+
+/*
+ * 05.04.94  -  Multi-page memory management added for v1.1.
+ * 		Idea by Alex Bligh (alex@cconcepts.co.uk)
+ *
+ * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
+ *		(Gerhard.Wichert@pdb.siemens.de)
+ *
+ * Aug/Sep 2004 Changed to four level page tables (Andi Kleen)
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/mman.h>
+#include <linux/swap.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgtable.h>
+
+#include <linux/swapops.h>
+#include <linux/elf.h>
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+/* use the per-pgdat data instead for discontigmem - mbligh */
+unsigned long max_mapnr;
+struct page *mem_map;
+
+EXPORT_SYMBOL(max_mapnr);
+EXPORT_SYMBOL(mem_map);
+#endif
+
+unsigned long num_physpages;
+/*
+ * A number of key systems in x86 including ioremap() rely on the assumption
+ * that high_memory defines the upper bound on direct map memory, then end
+ * of ZONE_NORMAL.  Under CONFIG_DISCONTIG this means that max_low_pfn and
+ * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL
+ * and ZONE_HIGHMEM.
+ */
+void * high_memory;
+unsigned long vmalloc_earlyreserve;
+
+EXPORT_SYMBOL(num_physpages);
+EXPORT_SYMBOL(high_memory);
+EXPORT_SYMBOL(vmalloc_earlyreserve);
+
+int randomize_va_space __read_mostly = 1;
+
+static int __init disable_randmaps(char *s)
+{
+	randomize_va_space = 0;
+	return 0;
+}
+__setup("norandmaps", disable_randmaps);
+
+
+/*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none.  Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+	pgd_ERROR(*pgd);
+	pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+	pud_ERROR(*pud);
+	pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+	pmd_ERROR(*pmd);
+	pmd_clear(pmd);
+}
+
+/*
+ * Note: this doesn't free the actual pages themselves. That
+ * has been handled earlier when unmapping all the memory regions.
+ */
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+{
+	struct page *page = pmd_page(*pmd);
+	pmd_clear(pmd);
+	pte_lock_deinit(page);
+	pte_free_tlb(tlb, page);
+	dec_page_state(nr_page_table_pages);
+	tlb->mm->nr_ptes--;
+}
+
+static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+				unsigned long addr, unsigned long end,
+				unsigned long floor, unsigned long ceiling)
+{
+	pmd_t *pmd;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		free_pte_range(tlb, pmd);
+	} while (pmd++, addr = next, addr != end);
+
+	start &= PUD_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PUD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pmd = pmd_offset(pud, start);
+	pud_clear(pud);
+	pmd_free_tlb(tlb, pmd);
+}
+
+static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				unsigned long floor, unsigned long ceiling)
+{
+	pud_t *pud;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+	} while (pud++, addr = next, addr != end);
+
+	start &= PGDIR_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PGDIR_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pud = pud_offset(pgd, start);
+	pgd_clear(pgd);
+	pud_free_tlb(tlb, pud);
+}
+
+/*
+ * This function frees user-level page tables of a process.
+ *
+ * Must be called with pagetable lock held.
+ */
+void free_pgd_range(struct mmu_gather **tlb,
+			unsigned long addr, unsigned long end,
+			unsigned long floor, unsigned long ceiling)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	unsigned long start;
+
+	/*
+	 * The next few lines have given us lots of grief...
+	 *
+	 * Why are we testing PMD* at this top level?  Because often
+	 * there will be no work to do at all, and we'd prefer not to
+	 * go all the way down to the bottom just to discover that.
+	 *
+	 * Why all these "- 1"s?  Because 0 represents both the bottom
+	 * of the address space and the top of it (using -1 for the
+	 * top wouldn't help much: the masks would do the wrong thing).
+	 * The rule is that addr 0 and floor 0 refer to the bottom of
+	 * the address space, but end 0 and ceiling 0 refer to the top
+	 * Comparisons need to use "end - 1" and "ceiling - 1" (though
+	 * that end 0 case should be mythical).
+	 *
+	 * Wherever addr is brought up or ceiling brought down, we must
+	 * be careful to reject "the opposite 0" before it confuses the
+	 * subsequent tests.  But what about where end is brought down
+	 * by PMD_SIZE below? no, end can't go down to 0 there.
+	 *
+	 * Whereas we round start (addr) and ceiling down, by different
+	 * masks at different levels, in order to test whether a table
+	 * now has no other vmas using it, so can be freed, we don't
+	 * bother to round floor or end up - the tests don't need that.
+	 */
+
+	addr &= PMD_MASK;
+	if (addr < floor) {
+		addr += PMD_SIZE;
+		if (!addr)
+			return;
+	}
+	if (ceiling) {
+		ceiling &= PMD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		end -= PMD_SIZE;
+	if (addr > end - 1)
+		return;
+
+	start = addr;
+	pgd = pgd_offset((*tlb)->mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
+	} while (pgd++, addr = next, addr != end);
+
+	if (!(*tlb)->fullmm)
+		flush_tlb_pgtables((*tlb)->mm, start, end);
+}
+
+void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
+		unsigned long floor, unsigned long ceiling)
+{
+	while (vma) {
+		struct vm_area_struct *next = vma->vm_next;
+		unsigned long addr = vma->vm_start;
+
+		/*
+		 * Hide vma from rmap and vmtruncate before freeing pgtables
+		 */
+		anon_vma_unlink(vma);
+		unlink_file_vma(vma);
+
+		if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+			hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
+				floor, next? next->vm_start: ceiling);
+		} else {
+			/*
+			 * Optimization: gather nearby vmas into one call down
+			 */
+			while (next && next->vm_start <= vma->vm_end + PMD_SIZE
+			  && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
+							HPAGE_SIZE)) {
+				vma = next;
+				next = vma->vm_next;
+				anon_vma_unlink(vma);
+				unlink_file_vma(vma);
+			}
+			free_pgd_range(tlb, addr, vma->vm_end,
+				floor, next? next->vm_start: ceiling);
+		}
+		vma = next;
+	}
+}
+
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+{
+	struct page *new = pte_alloc_one(mm, address);
+	if (!new)
+		return -ENOMEM;
+
+	pte_lock_init(new);
+	spin_lock(&mm->page_table_lock);
+	if (pmd_present(*pmd)) {	/* Another has populated it */
+		pte_lock_deinit(new);
+		pte_free(new);
+	} else {
+		mm->nr_ptes++;
+		inc_page_state(nr_page_table_pages);
+		pmd_populate(mm, pmd, new);
+	}
+	spin_unlock(&mm->page_table_lock);
+	return 0;
+}
+
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+{
+	pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+	if (!new)
+		return -ENOMEM;
+
+	spin_lock(&init_mm.page_table_lock);
+	if (pmd_present(*pmd))		/* Another has populated it */
+		pte_free_kernel(new);
+	else
+		pmd_populate_kernel(&init_mm, pmd, new);
+	spin_unlock(&init_mm.page_table_lock);
+	return 0;
+}
+
+static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+{
+	if (file_rss)
+		add_mm_counter(mm, file_rss, file_rss);
+	if (anon_rss)
+		add_mm_counter(mm, anon_rss, anon_rss);
+}
+
+/*
+ * This function is called to print an error when a bad pte
+ * is found. For example, we might have a PFN-mapped pte in
+ * a region that doesn't allow it.
+ *
+ * The calling function must still handle the error.
+ */
+void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
+{
+	printk(KERN_ERR "Bad pte = %08llx, process = %s, "
+			"vm_flags = %lx, vaddr = %lx\n",
+		(long long)pte_val(pte),
+		(vma->vm_mm == current->mm ? current->comm : "???"),
+		vma->vm_flags, vaddr);
+	dump_stack();
+}
+
+static inline int is_cow_mapping(unsigned int flags)
+{
+	return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
+/*
+ * This function gets the "struct page" associated with a pte.
+ *
+ * NOTE! Some mappings do not have "struct pages". A raw PFN mapping
+ * will have each page table entry just pointing to a raw page frame
+ * number, and as far as the VM layer is concerned, those do not have
+ * pages associated with them - even if the PFN might point to memory
+ * that otherwise is perfectly fine and has a "struct page".
+ *
+ * The way we recognize those mappings is through the rules set up
+ * by "remap_pfn_range()": the vma will have the VM_PFNMAP bit set,
+ * and the vm_pgoff will point to the first PFN mapped: thus every
+ * page that is a raw mapping will always honor the rule
+ *
+ *	pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
+ *
+ * and if that isn't true, the page has been COW'ed (in which case it
+ * _does_ have a "struct page" associated with it even if it is in a
+ * VM_PFNMAP range).
+ */
+struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	unsigned long pfn = pte_pfn(pte);
+
+	if (vma->vm_flags & VM_PFNMAP) {
+		unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
+		if (pfn == vma->vm_pgoff + off)
+			return NULL;
+		if (!is_cow_mapping(vma->vm_flags))
+			return NULL;
+	}
+
+	/*
+	 * Add some anal sanity checks for now. Eventually,
+	 * we should just do "return pfn_to_page(pfn)", but
+	 * in the meantime we check that we get a valid pfn,
+	 * and that the resulting page looks ok.
+	 *
+	 * Remove this test eventually!
+	 */
+	if (unlikely(!pfn_valid(pfn))) {
+		print_bad_pte(vma, pte, addr);
+		return NULL;
+	}
+
+	/*
+	 * NOTE! We still have PageReserved() pages in the page 
+	 * tables. 
+	 *
+	 * The PAGE_ZERO() pages and various VDSO mappings can
+	 * cause them to exist.
+	 */
+	return pfn_to_page(pfn);
+}
+
+/*
+ * copy one vm_area from one task to the other. Assumes the page tables
+ * already present in the new task to be cleared in the whole range
+ * covered by this vma.
+ */
+
+static inline void
+copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+		unsigned long addr, int *rss)
+{
+	unsigned long vm_flags = vma->vm_flags;
+	pte_t pte = *src_pte;
+	struct page *page;
+
+	/* pte contains position in swap or file, so copy. */
+	if (unlikely(!pte_present(pte))) {
+		if (!pte_file(pte)) {
+			swap_duplicate(pte_to_swp_entry(pte));
+			/* make sure dst_mm is on swapoff's mmlist. */
+			if (unlikely(list_empty(&dst_mm->mmlist))) {
+				spin_lock(&mmlist_lock);
+				if (list_empty(&dst_mm->mmlist))
+					list_add(&dst_mm->mmlist,
+						 &src_mm->mmlist);
+				spin_unlock(&mmlist_lock);
+			}
+		}
+		goto out_set_pte;
+	}
+
+	/*
+	 * If it's a COW mapping, write protect it both
+	 * in the parent and the child
+	 */
+	if (is_cow_mapping(vm_flags)) {
+		ptep_set_wrprotect(src_mm, addr, src_pte);
+		pte = *src_pte;
+	}
+
+	/*
+	 * If it's a shared mapping, mark it clean in
+	 * the child
+	 */
+	if (vm_flags & VM_SHARED)
+		pte = pte_mkclean(pte);
+	pte = pte_mkold(pte);
+
+	page = vm_normal_page(vma, addr, pte);
+	if (page) {
+		get_page(page);
+		page_dup_rmap(page);
+		rss[!!PageAnon(page)]++;
+	}
+
+out_set_pte:
+	set_pte_at(dst_mm, addr, dst_pte, pte);
+}
+
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end)
+{
+	pte_t *src_pte, *dst_pte;
+	spinlock_t *src_ptl, *dst_ptl;
+	int progress = 0;
+	int rss[2];
+
+again:
+	rss[1] = rss[0] = 0;
+	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
+	if (!dst_pte)
+		return -ENOMEM;
+	src_pte = pte_offset_map_nested(src_pmd, addr);
+	src_ptl = pte_lockptr(src_mm, src_pmd);
+	spin_lock(src_ptl);
+
+	do {
+		/*
+		 * We are holding two locks at this point - either of them
+		 * could generate latencies in another task on another CPU.
+		 */
+		if (progress >= 32) {
+			progress = 0;
+			if (need_resched() ||
+			    need_lockbreak(src_ptl) ||
+			    need_lockbreak(dst_ptl))
+				break;
+		}
+		if (pte_none(*src_pte)) {
+			progress++;
+			continue;
+		}
+		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+		progress += 8;
+	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+
+	spin_unlock(src_ptl);
+	pte_unmap_nested(src_pte - 1);
+	add_mm_rss(dst_mm, rss[0], rss[1]);
+	pte_unmap_unlock(dst_pte - 1, dst_ptl);
+	cond_resched();
+	if (addr != end)
+		goto again;
+	return 0;
+}
+
+static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end)
+{
+	pmd_t *src_pmd, *dst_pmd;
+	unsigned long next;
+
+	dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
+	if (!dst_pmd)
+		return -ENOMEM;
+	src_pmd = pmd_offset(src_pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(src_pmd))
+			continue;
+		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
+						vma, addr, next))
+			return -ENOMEM;
+	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+		unsigned long addr, unsigned long end)
+{
+	pud_t *src_pud, *dst_pud;
+	unsigned long next;
+
+	dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
+	if (!dst_pud)
+		return -ENOMEM;
+	src_pud = pud_offset(src_pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(src_pud))
+			continue;
+		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
+						vma, addr, next))
+			return -ENOMEM;
+	} while (dst_pud++, src_pud++, addr = next, addr != end);
+	return 0;
+}
+
+int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		struct vm_area_struct *vma)
+{
+	pgd_t *src_pgd, *dst_pgd;
+	unsigned long next;
+	unsigned long addr = vma->vm_start;
+	unsigned long end = vma->vm_end;
+
+	/*
+	 * Don't copy ptes where a page fault will fill them correctly.
+	 * Fork becomes much lighter when there are big shared or private
+	 * readonly mappings. The tradeoff is that copy_page_range is more
+	 * efficient than faulting.
+	 */
+	if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
+		if (!vma->anon_vma)
+			return 0;
+	}
+
+	if (is_vm_hugetlb_page(vma))
+		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
+
+	dst_pgd = pgd_offset(dst_mm, addr);
+	src_pgd = pgd_offset(src_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(src_pgd))
+			continue;
+		if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
+						vma, addr, next))
+			return -ENOMEM;
+	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
+	return 0;
+}
+
+static unsigned long zap_pte_range(struct mmu_gather *tlb,
+				struct vm_area_struct *vma, pmd_t *pmd,
+				unsigned long addr, unsigned long end,
+				long *zap_work, struct zap_details *details)
+{
+	struct mm_struct *mm = tlb->mm;
+	pte_t *pte;
+	spinlock_t *ptl;
+	int file_rss = 0;
+	int anon_rss = 0;
+
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	do {
+		pte_t ptent = *pte;
+		if (pte_none(ptent)) {
+			(*zap_work)--;
+			continue;
+		}
+		if (pte_present(ptent)) {
+			struct page *page;
+
+			(*zap_work) -= PAGE_SIZE;
+
+			page = vm_normal_page(vma, addr, ptent);
+			if (unlikely(details) && page) {
+				/*
+				 * unmap_shared_mapping_pages() wants to
+				 * invalidate cache without truncating:
+				 * unmap shared but keep private pages.
+				 */
+				if (details->check_mapping &&
+				    details->check_mapping != page->mapping)
+					continue;
+				/*
+				 * Each page->index must be checked when
+				 * invalidating or truncating nonlinear.
+				 */
+				if (details->nonlinear_vma &&
+				    (page->index < details->first_index ||
+				     page->index > details->last_index))
+					continue;
+			}
+			ptent = ptep_get_and_clear_full(mm, addr, pte,
+							tlb->fullmm);
+			tlb_remove_tlb_entry(tlb, pte, addr);
+			if (unlikely(!page))
+				continue;
+			if (unlikely(details) && details->nonlinear_vma
+			    && linear_page_index(details->nonlinear_vma,
+						addr) != page->index)
+				set_pte_at(mm, addr, pte,
+					   pgoff_to_pte(page->index));
+			if (PageAnon(page))
+				anon_rss--;
+			else {
+				if (pte_dirty(ptent))
+					set_page_dirty(page);
+				if (pte_young(ptent))
+					mark_page_accessed(page);
+				file_rss--;
+			}
+			page_remove_rmap(page);
+			tlb_remove_page(tlb, page);
+			continue;
+		}
+		/*
+		 * If details->check_mapping, we leave swap entries;
+		 * if details->nonlinear_vma, we leave file entries.
+		 */
+		if (unlikely(details))
+			continue;
+		if (!pte_file(ptent))
+			free_swap_and_cache(pte_to_swp_entry(ptent));
+		pte_clear_full(mm, addr, pte, tlb->fullmm);
+	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+
+	add_mm_rss(mm, file_rss, anon_rss);
+	pte_unmap_unlock(pte - 1, ptl);
+
+	return addr;
+}
+
+static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
+				struct vm_area_struct *vma, pud_t *pud,
+				unsigned long addr, unsigned long end,
+				long *zap_work, struct zap_details *details)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd)) {
+			(*zap_work)--;
+			continue;
+		}
+		next = zap_pte_range(tlb, vma, pmd, addr, next,
+						zap_work, details);
+	} while (pmd++, addr = next, (addr != end && *zap_work > 0));
+
+	return addr;
+}
+
+static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
+				struct vm_area_struct *vma, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				long *zap_work, struct zap_details *details)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud)) {
+			(*zap_work)--;
+			continue;
+		}
+		next = zap_pmd_range(tlb, vma, pud, addr, next,
+						zap_work, details);
+	} while (pud++, addr = next, (addr != end && *zap_work > 0));
+
+	return addr;
+}
+
+static unsigned long unmap_page_range(struct mmu_gather *tlb,
+				struct vm_area_struct *vma,
+				unsigned long addr, unsigned long end,
+				long *zap_work, struct zap_details *details)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	if (details && !details->check_mapping && !details->nonlinear_vma)
+		details = NULL;
+
+	BUG_ON(addr >= end);
+	tlb_start_vma(tlb, vma);
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd)) {
+			(*zap_work)--;
+			continue;
+		}
+		next = zap_pud_range(tlb, vma, pgd, addr, next,
+						zap_work, details);
+	} while (pgd++, addr = next, (addr != end && *zap_work > 0));
+	tlb_end_vma(tlb, vma);
+
+	return addr;
+}
+
+#ifdef CONFIG_PREEMPT
+# define ZAP_BLOCK_SIZE	(8 * PAGE_SIZE)
+#else
+/* No preempt: go for improved straight-line efficiency */
+# define ZAP_BLOCK_SIZE	(1024 * PAGE_SIZE)
+#endif
+
+/**
+ * unmap_vmas - unmap a range of memory covered by a list of vma's
+ * @tlbp: address of the caller's struct mmu_gather
+ * @vma: the starting vma
+ * @start_addr: virtual address at which to start unmapping
+ * @end_addr: virtual address at which to end unmapping
+ * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
+ * @details: details of nonlinear truncation or shared cache invalidation
+ *
+ * Returns the end address of the unmapping (restart addr if interrupted).
+ *
+ * Unmap all pages in the vma list.
+ *
+ * We aim to not hold locks for too long (for scheduling latency reasons).
+ * So zap pages in ZAP_BLOCK_SIZE bytecounts.  This means we need to
+ * return the ending mmu_gather to the caller.
+ *
+ * Only addresses between `start' and `end' will be unmapped.
+ *
+ * The VMA list must be sorted in ascending virtual address order.
+ *
+ * unmap_vmas() assumes that the caller will flush the whole unmapped address
+ * range after unmap_vmas() returns.  So the only responsibility here is to
+ * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
+ * drops the lock and schedules.
+ */
+unsigned long unmap_vmas(struct mmu_gather **tlbp,
+		struct vm_area_struct *vma, unsigned long start_addr,
+		unsigned long end_addr, unsigned long *nr_accounted,
+		struct zap_details *details)
+{
+	long zap_work = ZAP_BLOCK_SIZE;
+	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
+	int tlb_start_valid = 0;
+	unsigned long start = start_addr;
+	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
+	int fullmm = (*tlbp)->fullmm;
+
+	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
+		unsigned long end;
+
+		start = max(vma->vm_start, start_addr);
+		if (start >= vma->vm_end)
+			continue;
+		end = min(vma->vm_end, end_addr);
+		if (end <= vma->vm_start)
+			continue;
+
+		if (vma->vm_flags & VM_ACCOUNT)
+			*nr_accounted += (end - start) >> PAGE_SHIFT;
+
+		while (start != end) {
+			if (!tlb_start_valid) {
+				tlb_start = start;
+				tlb_start_valid = 1;
+			}
+
+			if (unlikely(is_vm_hugetlb_page(vma))) {
+				unmap_hugepage_range(vma, start, end);
+				zap_work -= (end - start) /
+						(HPAGE_SIZE / PAGE_SIZE);
+				start = end;
+			} else
+				start = unmap_page_range(*tlbp, vma,
+						start, end, &zap_work, details);
+
+			if (zap_work > 0) {
+				BUG_ON(start != end);
+				break;
+			}
+
+			tlb_finish_mmu(*tlbp, tlb_start, start);
+
+			if (need_resched() ||
+				(i_mmap_lock && need_lockbreak(i_mmap_lock))) {
+				if (i_mmap_lock) {
+					*tlbp = NULL;
+					goto out;
+				}
+				cond_resched();
+			}
+
+			*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
+			tlb_start_valid = 0;
+			zap_work = ZAP_BLOCK_SIZE;
+		}
+	}
+out:
+	return start;	/* which is now the end (or restart) address */
+}
+
+/**
+ * zap_page_range - remove user pages in a given range
+ * @vma: vm_area_struct holding the applicable pages
+ * @address: starting address of pages to zap
+ * @size: number of bytes to zap
+ * @details: details of nonlinear truncation or shared cache invalidation
+ */
+unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
+		unsigned long size, struct zap_details *details)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct mmu_gather *tlb;
+	unsigned long end = address + size;
+	unsigned long nr_accounted = 0;
+
+	lru_add_drain();
+	tlb = tlb_gather_mmu(mm, 0);
+	update_hiwater_rss(mm);
+	end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+	if (tlb)
+		tlb_finish_mmu(tlb, address, end);
+	return end;
+}
+
+/*
+ * Do a quick page-table lookup for a single page.
+ */
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+			unsigned int flags)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+	struct page *page;
+	struct mm_struct *mm = vma->vm_mm;
+
+	page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
+	if (!IS_ERR(page)) {
+		BUG_ON(flags & FOLL_GET);
+		goto out;
+	}
+
+	page = NULL;
+	pgd = pgd_offset(mm, address);
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+		goto no_page_table;
+
+	pud = pud_offset(pgd, address);
+	if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+		goto no_page_table;
+	
+	pmd = pmd_offset(pud, address);
+	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+		goto no_page_table;
+
+	if (pmd_huge(*pmd)) {
+		BUG_ON(flags & FOLL_GET);
+		page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
+		goto out;
+	}
+
+	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+	if (!ptep)
+		goto out;
+
+	pte = *ptep;
+	if (!pte_present(pte))
+		goto unlock;
+	if ((flags & FOLL_WRITE) && !pte_write(pte))
+		goto unlock;
+	page = vm_normal_page(vma, address, pte);
+	if (unlikely(!page))
+		goto unlock;
+
+	if (flags & FOLL_GET)
+		get_page(page);
+	if (flags & FOLL_TOUCH) {
+		if ((flags & FOLL_WRITE) &&
+		    !pte_dirty(pte) && !PageDirty(page))
+			set_page_dirty(page);
+		mark_page_accessed(page);
+	}
+unlock:
+	pte_unmap_unlock(ptep, ptl);
+out:
+	return page;
+
+no_page_table:
+	/*
+	 * When core dumping an enormous anonymous area that nobody
+	 * has touched so far, we don't want to allocate page tables.
+	 */
+	if (flags & FOLL_ANON) {
+		page = ZERO_PAGE(address);
+		if (flags & FOLL_GET)
+			get_page(page);
+		BUG_ON(flags & FOLL_WRITE);
+	}
+	return page;
+}
+
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, int len, int write, int force,
+		struct page **pages, struct vm_area_struct **vmas)
+{
+	int i;
+	unsigned int vm_flags;
+
+	/* 
+	 * Require read or write permissions.
+	 * If 'force' is set, we only require the "MAY" flags.
+	 */
+	vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+	vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+	i = 0;
+
+	do {
+		struct vm_area_struct *vma;
+		unsigned int foll_flags;
+
+		vma = find_extend_vma(mm, start);
+		if (!vma && in_gate_area(tsk, start)) {
+			unsigned long pg = start & PAGE_MASK;
+			struct vm_area_struct *gate_vma = get_gate_vma(tsk);
+			pgd_t *pgd;
+			pud_t *pud;
+			pmd_t *pmd;
+			pte_t *pte;
+			if (write) /* user gate pages are read-only */
+				return i ? : -EFAULT;
+			if (pg > TASK_SIZE)
+				pgd = pgd_offset_k(pg);
+			else
+				pgd = pgd_offset_gate(mm, pg);
+			BUG_ON(pgd_none(*pgd));
+			pud = pud_offset(pgd, pg);
+			BUG_ON(pud_none(*pud));
+			pmd = pmd_offset(pud, pg);
+			if (pmd_none(*pmd))
+				return i ? : -EFAULT;
+			pte = pte_offset_map(pmd, pg);
+			if (pte_none(*pte)) {
+				pte_unmap(pte);
+				return i ? : -EFAULT;
+			}
+			if (pages) {
+				struct page *page = vm_normal_page(gate_vma, start, *pte);
+				pages[i] = page;
+				if (page)
+					get_page(page);
+			}
+			pte_unmap(pte);
+			if (vmas)
+				vmas[i] = gate_vma;
+			i++;
+			start += PAGE_SIZE;
+			len--;
+			continue;
+		}
+
+		if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
+				|| !(vm_flags & vma->vm_flags))
+			return i ? : -EFAULT;
+
+		if (is_vm_hugetlb_page(vma)) {
+			i = follow_hugetlb_page(mm, vma, pages, vmas,
+						&start, &len, i);
+			continue;
+		}
+
+		foll_flags = FOLL_TOUCH;
+		if (pages)
+			foll_flags |= FOLL_GET;
+		if (!write && !(vma->vm_flags & VM_LOCKED) &&
+		    (!vma->vm_ops || !vma->vm_ops->nopage))
+			foll_flags |= FOLL_ANON;
+
+		do {
+			struct page *page;
+
+			if (write)
+				foll_flags |= FOLL_WRITE;
+
+			cond_resched();
+			while (!(page = follow_page(vma, start, foll_flags))) {
+				int ret;
+				ret = __handle_mm_fault(mm, vma, start,
+						foll_flags & FOLL_WRITE);
+				/*
+				 * The VM_FAULT_WRITE bit tells us that do_wp_page has
+				 * broken COW when necessary, even if maybe_mkwrite
+				 * decided not to set pte_write. We can thus safely do
+				 * subsequent page lookups as if they were reads.
+				 */
+				if (ret & VM_FAULT_WRITE)
+					foll_flags &= ~FOLL_WRITE;
+				
+				switch (ret & ~VM_FAULT_WRITE) {
+				case VM_FAULT_MINOR:
+					tsk->min_flt++;
+					break;
+				case VM_FAULT_MAJOR:
+					tsk->maj_flt++;
+					break;
+				case VM_FAULT_SIGBUS:
+					return i ? i : -EFAULT;
+				case VM_FAULT_OOM:
+					return i ? i : -ENOMEM;
+				default:
+					BUG();
+				}
+			}
+			if (pages) {
+				pages[i] = page;
+				flush_dcache_page(page);
+			}
+			if (vmas)
+				vmas[i] = vma;
+			i++;
+			start += PAGE_SIZE;
+			len--;
+		} while (len && start < vma->vm_end);
+	} while (len);
+	return i;
+}
+EXPORT_SYMBOL(get_user_pages);
+
+static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long addr, unsigned long end, pgprot_t prot)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+	if (!pte)
+		return -ENOMEM;
+	do {
+		struct page *page = ZERO_PAGE(addr);
+		pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+		page_cache_get(page);
+		page_add_file_rmap(page);
+		inc_mm_counter(mm, file_rss);
+		BUG_ON(!pte_none(*pte));
+		set_pte_at(mm, addr, pte, zero_pte);
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(pte - 1, ptl);
+	return 0;
+}
+
+static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
+			unsigned long addr, unsigned long end, pgprot_t prot)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return -ENOMEM;
+	do {
+		next = pmd_addr_end(addr, end);
+		if (zeromap_pte_range(mm, pmd, addr, next, prot))
+			return -ENOMEM;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+			unsigned long addr, unsigned long end, pgprot_t prot)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_alloc(mm, pgd, addr);
+	if (!pud)
+		return -ENOMEM;
+	do {
+		next = pud_addr_end(addr, end);
+		if (zeromap_pmd_range(mm, pud, addr, next, prot))
+			return -ENOMEM;
+	} while (pud++, addr = next, addr != end);
+	return 0;
+}
+
+int zeromap_page_range(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long size, pgprot_t prot)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	unsigned long end = addr + size;
+	struct mm_struct *mm = vma->vm_mm;
+	int err;
+
+	BUG_ON(addr >= end);
+	pgd = pgd_offset(mm, addr);
+	flush_cache_range(vma, addr, end);
+	do {
+		next = pgd_addr_end(addr, end);
+		err = zeromap_pud_range(mm, pgd, addr, next, prot);
+		if (err)
+			break;
+	} while (pgd++, addr = next, addr != end);
+	return err;
+}
+
+pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
+{
+	pgd_t * pgd = pgd_offset(mm, addr);
+	pud_t * pud = pud_alloc(mm, pgd, addr);
+	if (pud) {
+		pmd_t * pmd = pmd_alloc(mm, pud, addr);
+		if (pmd)
+			return pte_alloc_map_lock(mm, pmd, addr, ptl);
+	}
+	return NULL;
+}
+
+/*
+ * This is the old fallback for page remapping.
+ *
+ * For historical reasons, it only allows reserved pages. Only
+ * old drivers should use this, and they needed to mark their
+ * pages reserved for the old functions anyway.
+ */
+static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
+{
+	int retval;
+	pte_t *pte;
+	spinlock_t *ptl;  
+
+	retval = -EINVAL;
+	if (PageAnon(page))
+		goto out;
+	retval = -ENOMEM;
+	flush_dcache_page(page);
+	pte = get_locked_pte(mm, addr, &ptl);
+	if (!pte)
+		goto out;
+	retval = -EBUSY;
+	if (!pte_none(*pte))
+		goto out_unlock;
+
+	/* Ok, finally just insert the thing.. */
+	get_page(page);
+	inc_mm_counter(mm, file_rss);
+	page_add_file_rmap(page);
+	set_pte_at(mm, addr, pte, mk_pte(page, prot));
+
+	retval = 0;
+out_unlock:
+	pte_unmap_unlock(pte, ptl);
+out:
+	return retval;
+}
+
+/*
+ * This allows drivers to insert individual pages they've allocated
+ * into a user vma.
+ *
+ * The page has to be a nice clean _individual_ kernel allocation.
+ * If you allocate a compound page, you need to have marked it as
+ * such (__GFP_COMP), or manually just split the page up yourself
+ * (which is mainly an issue of doing "set_page_count(page, 1)" for
+ * each sub-page, and then freeing them one by one when you free
+ * them rather than freeing it as a compound page).
+ *
+ * NOTE! Traditionally this was done with "remap_pfn_range()" which
+ * took an arbitrary page protection parameter. This doesn't allow
+ * that. Your vma protection will have to be set up correctly, which
+ * means that if you want a shared writable mapping, you'd better
+ * ask for a shared writable mapping!
+ *
+ * The page does not need to be reserved.
+ */
+int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page)
+{
+	if (addr < vma->vm_start || addr >= vma->vm_end)
+		return -EFAULT;
+	if (!page_count(page))
+		return -EINVAL;
+	vma->vm_flags |= VM_INSERTPAGE;
+	return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_page);
+
+/*
+ * maps a range of physical memory into the requested pages. the old
+ * mappings are removed. any references to nonexistent pages results
+ * in null mappings (currently treated as "copy-on-access")
+ */
+static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long addr, unsigned long end,
+			unsigned long pfn, pgprot_t prot)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+	if (!pte)
+		return -ENOMEM;
+	do {
+		BUG_ON(!pte_none(*pte));
+		set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
+		pfn++;
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(pte - 1, ptl);
+	return 0;
+}
+
+static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
+			unsigned long addr, unsigned long end,
+			unsigned long pfn, pgprot_t prot)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pfn -= addr >> PAGE_SHIFT;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return -ENOMEM;
+	do {
+		next = pmd_addr_end(addr, end);
+		if (remap_pte_range(mm, pmd, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot))
+			return -ENOMEM;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
+			unsigned long addr, unsigned long end,
+			unsigned long pfn, pgprot_t prot)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pfn -= addr >> PAGE_SHIFT;
+	pud = pud_alloc(mm, pgd, addr);
+	if (!pud)
+		return -ENOMEM;
+	do {
+		next = pud_addr_end(addr, end);
+		if (remap_pmd_range(mm, pud, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot))
+			return -ENOMEM;
+	} while (pud++, addr = next, addr != end);
+	return 0;
+}
+
+/*  Note: this is only safe if the mm semaphore is held when called. */
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
+		    unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+	pgd_t *pgd;
+	unsigned long next;
+	unsigned long end = addr + PAGE_ALIGN(size);
+	struct mm_struct *mm = vma->vm_mm;
+	int err;
+
+	/*
+	 * Physically remapped pages are special. Tell the
+	 * rest of the world about it:
+	 *   VM_IO tells people not to look at these pages
+	 *	(accesses can have side effects).
+	 *   VM_RESERVED is specified all over the place, because
+	 *	in 2.4 it kept swapout's vma scan off this vma; but
+	 *	in 2.6 the LRU scan won't even find its pages, so this
+	 *	flag means no more than count its pages in reserved_vm,
+	 * 	and omit it from core dump, even when VM_IO turned off.
+	 *   VM_PFNMAP tells the core MM that the base pages are just
+	 *	raw PFN mappings, and do not have a "struct page" associated
+	 *	with them.
+	 *
+	 * There's a horrible special case to handle copy-on-write
+	 * behaviour that some programs depend on. We mark the "original"
+	 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
+	 */
+	if (is_cow_mapping(vma->vm_flags)) {
+		if (addr != vma->vm_start || end != vma->vm_end)
+			return -EINVAL;
+		vma->vm_pgoff = pfn;
+	}
+
+	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+
+	BUG_ON(addr >= end);
+	pfn -= addr >> PAGE_SHIFT;
+	pgd = pgd_offset(mm, addr);
+	flush_cache_range(vma, addr, end);
+	do {
+		next = pgd_addr_end(addr, end);
+		err = remap_pud_range(mm, pgd, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			break;
+	} while (pgd++, addr = next, addr != end);
+	return err;
+}
+EXPORT_SYMBOL(remap_pfn_range);
+
+/*
+ * handle_pte_fault chooses page fault handler according to an entry
+ * which was read non-atomically.  Before making any commitment, on
+ * those architectures or configurations (e.g. i386 with PAE) which
+ * might give a mix of unmatched parts, do_swap_page and do_file_page
+ * must check under lock before unmapping the pte and proceeding
+ * (but do_wp_page is only called after already making such a check;
+ * and do_anonymous_page and do_no_page can safely check later on).
+ */
+static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
+				pte_t *page_table, pte_t orig_pte)
+{
+	int same = 1;
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+	if (sizeof(pte_t) > sizeof(unsigned long)) {
+		spinlock_t *ptl = pte_lockptr(mm, pmd);
+		spin_lock(ptl);
+		same = pte_same(*page_table, orig_pte);
+		spin_unlock(ptl);
+	}
+#endif
+	pte_unmap(page_table);
+	return same;
+}
+
+/*
+ * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
+ * servicing faults for write access.  In the normal case, do always want
+ * pte_mkwrite.  But get_user_pages can cause write faults for mappings
+ * that do not have writing enabled, when used by access_process_vm.
+ */
+static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_flags & VM_WRITE))
+		pte = pte_mkwrite(pte);
+	return pte;
+}
+
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va)
+{
+	/*
+	 * If the source page was a PFN mapping, we don't have
+	 * a "struct page" for it. We do a best-effort copy by
+	 * just copying from the original user address. If that
+	 * fails, we just zero-fill it. Live with it.
+	 */
+	if (unlikely(!src)) {
+		void *kaddr = kmap_atomic(dst, KM_USER0);
+		void __user *uaddr = (void __user *)(va & PAGE_MASK);
+
+		/*
+		 * This really shouldn't fail, because the page is there
+		 * in the page tables. But it might just be unreadable,
+		 * in which case we just give up and fill the result with
+		 * zeroes.
+		 */
+		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+			memset(kaddr, 0, PAGE_SIZE);
+		kunmap_atomic(kaddr, KM_USER0);
+		return;
+		
+	}
+	copy_user_highpage(dst, src, va);
+}
+
+/*
+ * This routine handles present pages, when users try to write
+ * to a shared page. It is done by copying the page to a new address
+ * and decrementing the shared-page counter for the old page.
+ *
+ * Note that this routine assumes that the protection checks have been
+ * done by the caller (the low-level page fault routine in most cases).
+ * Thus we can safely just mark it writable once we've done any necessary
+ * COW.
+ *
+ * We also mark the page dirty at this point even though the page will
+ * change only once the write actually happens. This avoids a few races,
+ * and potentially makes it more efficient.
+ *
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), with pte both mapped and locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		unsigned long address, pte_t *page_table, pmd_t *pmd,
+		spinlock_t *ptl, pte_t orig_pte)
+{
+	struct page *old_page, *new_page;
+	pte_t entry;
+	int ret = VM_FAULT_MINOR;
+
+	old_page = vm_normal_page(vma, address, orig_pte);
+	if (!old_page)
+		goto gotten;
+
+	if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
+		int reuse = can_share_swap_page(old_page);
+		unlock_page(old_page);
+		if (reuse) {
+			flush_cache_page(vma, address, pte_pfn(orig_pte));
+			entry = pte_mkyoung(orig_pte);
+			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+			ptep_set_access_flags(vma, address, page_table, entry, 1);
+			update_mmu_cache(vma, address, entry);
+			lazy_mmu_prot_update(entry);
+			ret |= VM_FAULT_WRITE;
+			goto unlock;
+		}
+	}
+
+	/*
+	 * Ok, we need to copy. Oh, well..
+	 */
+	page_cache_get(old_page);
+gotten:
+	pte_unmap_unlock(page_table, ptl);
+
+	if (unlikely(anon_vma_prepare(vma)))
+		goto oom;
+	if (old_page == ZERO_PAGE(address)) {
+		new_page = alloc_zeroed_user_highpage(vma, address);
+		if (!new_page)
+			goto oom;
+	} else {
+		new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+		if (!new_page)
+			goto oom;
+		cow_user_page(new_page, old_page, address);
+	}
+
+	/*
+	 * Re-check the pte - we dropped the lock
+	 */
+	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+	if (likely(pte_same(*page_table, orig_pte))) {
+		if (old_page) {
+			page_remove_rmap(old_page);
+			if (!PageAnon(old_page)) {
+				dec_mm_counter(mm, file_rss);
+				inc_mm_counter(mm, anon_rss);
+			}
+		} else
+			inc_mm_counter(mm, anon_rss);
+		flush_cache_page(vma, address, pte_pfn(orig_pte));
+		entry = mk_pte(new_page, vma->vm_page_prot);
+		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		ptep_establish(vma, address, page_table, entry);
+		update_mmu_cache(vma, address, entry);
+		lazy_mmu_prot_update(entry);
+		lru_cache_add_active(new_page);
+		page_add_new_anon_rmap(new_page, vma, address);
+
+		/* Free the old page.. */
+		new_page = old_page;
+		ret |= VM_FAULT_WRITE;
+	}
+	if (new_page)
+		page_cache_release(new_page);
+	if (old_page)
+		page_cache_release(old_page);
+unlock:
+	pte_unmap_unlock(page_table, ptl);
+	return ret;
+oom:
+	if (old_page)
+		page_cache_release(old_page);
+	return VM_FAULT_OOM;
+}
+
+/*
+ * Helper functions for unmap_mapping_range().
+ *
+ * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __
+ *
+ * We have to restart searching the prio_tree whenever we drop the lock,
+ * since the iterator is only valid while the lock is held, and anyway
+ * a later vma might be split and reinserted earlier while lock dropped.
+ *
+ * The list of nonlinear vmas could be handled more efficiently, using
+ * a placeholder, but handle it in the same way until a need is shown.
+ * It is important to search the prio_tree before nonlinear list: a vma
+ * may become nonlinear and be shifted from prio_tree to nonlinear list
+ * while the lock is dropped; but never shifted from list to prio_tree.
+ *
+ * In order to make forward progress despite restarting the search,
+ * vm_truncate_count is used to mark a vma as now dealt with, so we can
+ * quickly skip it next time around.  Since the prio_tree search only
+ * shows us those vmas affected by unmapping the range in question, we
+ * can't efficiently keep all vmas in step with mapping->truncate_count:
+ * so instead reset them all whenever it wraps back to 0 (then go to 1).
+ * mapping->truncate_count and vma->vm_truncate_count are protected by
+ * i_mmap_lock.
+ *
+ * In order to make forward progress despite repeatedly restarting some
+ * large vma, note the restart_addr from unmap_vmas when it breaks out:
+ * and restart from that address when we reach that vma again.  It might
+ * have been split or merged, shrunk or extended, but never shifted: so
+ * restart_addr remains valid so long as it remains in the vma's range.
+ * unmap_mapping_range forces truncate_count to leap over page-aligned
+ * values so we can save vma's restart_addr in its truncate_count field.
+ */
+#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK))
+
+static void reset_vma_truncate_counts(struct address_space *mapping)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
+		vma->vm_truncate_count = 0;
+	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
+		vma->vm_truncate_count = 0;
+}
+
+static int unmap_mapping_range_vma(struct vm_area_struct *vma,
+		unsigned long start_addr, unsigned long end_addr,
+		struct zap_details *details)
+{
+	unsigned long restart_addr;
+	int need_break;
+
+again:
+	restart_addr = vma->vm_truncate_count;
+	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
+		start_addr = restart_addr;
+		if (start_addr >= end_addr) {
+			/* Top of vma has been split off since last time */
+			vma->vm_truncate_count = details->truncate_count;
+			return 0;
+		}
+	}
+
+	restart_addr = zap_page_range(vma, start_addr,
+					end_addr - start_addr, details);
+	need_break = need_resched() ||
+			need_lockbreak(details->i_mmap_lock);
+
+	if (restart_addr >= end_addr) {
+		/* We have now completed this vma: mark it so */
+		vma->vm_truncate_count = details->truncate_count;
+		if (!need_break)
+			return 0;
+	} else {
+		/* Note restart_addr in vma's truncate_count field */
+		vma->vm_truncate_count = restart_addr;
+		if (!need_break)
+			goto again;
+	}
+
+	spin_unlock(details->i_mmap_lock);
+	cond_resched();
+	spin_lock(details->i_mmap_lock);
+	return -EINTR;
+}
+
+static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
+					    struct zap_details *details)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	pgoff_t vba, vea, zba, zea;
+
+restart:
+	vma_prio_tree_foreach(vma, &iter, root,
+			details->first_index, details->last_index) {
+		/* Skip quickly over those we have already dealt with */
+		if (vma->vm_truncate_count == details->truncate_count)
+			continue;
+
+		vba = vma->vm_pgoff;
+		vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
+		/* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */
+		zba = details->first_index;
+		if (zba < vba)
+			zba = vba;
+		zea = details->last_index;
+		if (zea > vea)
+			zea = vea;
+
+		if (unmap_mapping_range_vma(vma,
+			((zba - vba) << PAGE_SHIFT) + vma->vm_start,
+			((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start,
+				details) < 0)
+			goto restart;
+	}
+}
+
+static inline void unmap_mapping_range_list(struct list_head *head,
+					    struct zap_details *details)
+{
+	struct vm_area_struct *vma;
+
+	/*
+	 * In nonlinear VMAs there is no correspondence between virtual address
+	 * offset and file offset.  So we must perform an exhaustive search
+	 * across *all* the pages in each nonlinear VMA, not just the pages
+	 * whose virtual address lies outside the file truncation point.
+	 */
+restart:
+	list_for_each_entry(vma, head, shared.vm_set.list) {
+		/* Skip quickly over those we have already dealt with */
+		if (vma->vm_truncate_count == details->truncate_count)
+			continue;
+		details->nonlinear_vma = vma;
+		if (unmap_mapping_range_vma(vma, vma->vm_start,
+					vma->vm_end, details) < 0)
+			goto restart;
+	}
+}
+
+/**
+ * unmap_mapping_range - unmap the portion of all mmaps
+ * in the specified address_space corresponding to the specified
+ * page range in the underlying file.
+ * @mapping: the address space containing mmaps to be unmapped.
+ * @holebegin: byte in first page to unmap, relative to the start of
+ * the underlying file.  This will be rounded down to a PAGE_SIZE
+ * boundary.  Note that this is different from vmtruncate(), which
+ * must keep the partial page.  In contrast, we must get rid of
+ * partial pages.
+ * @holelen: size of prospective hole in bytes.  This will be rounded
+ * up to a PAGE_SIZE boundary.  A holelen of zero truncates to the
+ * end of the file.
+ * @even_cows: 1 when truncating a file, unmap even private COWed pages;
+ * but 0 when invalidating pagecache, don't throw away private data.
+ */
+void unmap_mapping_range(struct address_space *mapping,
+		loff_t const holebegin, loff_t const holelen, int even_cows)
+{
+	struct zap_details details;
+	pgoff_t hba = holebegin >> PAGE_SHIFT;
+	pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	/* Check for overflow. */
+	if (sizeof(holelen) > sizeof(hlen)) {
+		long long holeend =
+			(holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		if (holeend & ~(long long)ULONG_MAX)
+			hlen = ULONG_MAX - hba + 1;
+	}
+
+	details.check_mapping = even_cows? NULL: mapping;
+	details.nonlinear_vma = NULL;
+	details.first_index = hba;
+	details.last_index = hba + hlen - 1;
+	if (details.last_index < details.first_index)
+		details.last_index = ULONG_MAX;
+	details.i_mmap_lock = &mapping->i_mmap_lock;
+
+	spin_lock(&mapping->i_mmap_lock);
+
+	/* serialize i_size write against truncate_count write */
+	smp_wmb();
+	/* Protect against page faults, and endless unmapping loops */
+	mapping->truncate_count++;
+	/*
+	 * For archs where spin_lock has inclusive semantics like ia64
+	 * this smp_mb() will prevent to read pagetable contents
+	 * before the truncate_count increment is visible to
+	 * other cpus.
+	 */
+	smp_mb();
+	if (unlikely(is_restart_addr(mapping->truncate_count))) {
+		if (mapping->truncate_count == 0)
+			reset_vma_truncate_counts(mapping);
+		mapping->truncate_count++;
+	}
+	details.truncate_count = mapping->truncate_count;
+
+	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
+		unmap_mapping_range_tree(&mapping->i_mmap, &details);
+	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
+		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
+	spin_unlock(&mapping->i_mmap_lock);
+}
+EXPORT_SYMBOL(unmap_mapping_range);
+
+/*
+ * Handle all mappings that got truncated by a "truncate()"
+ * system call.
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ */
+int vmtruncate(struct inode * inode, loff_t offset)
+{
+	struct address_space *mapping = inode->i_mapping;
+	unsigned long limit;
+
+	if (inode->i_size < offset)
+		goto do_expand;
+	/*
+	 * truncation of in-use swapfiles is disallowed - it would cause
+	 * subsequent swapout to scribble on the now-freed blocks.
+	 */
+	if (IS_SWAPFILE(inode))
+		goto out_busy;
+	i_size_write(inode, offset);
+	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+	truncate_inode_pages(mapping, offset);
+	goto out_truncate;
+
+do_expand:
+	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	if (limit != RLIM_INFINITY && offset > limit)
+		goto out_sig;
+	if (offset > inode->i_sb->s_maxbytes)
+		goto out_big;
+	i_size_write(inode, offset);
+
+out_truncate:
+	if (inode->i_op && inode->i_op->truncate)
+		inode->i_op->truncate(inode);
+	return 0;
+out_sig:
+	send_sig(SIGXFSZ, current, 0);
+out_big:
+	return -EFBIG;
+out_busy:
+	return -ETXTBSY;
+}
+EXPORT_SYMBOL(vmtruncate);
+
+int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
+{
+	struct address_space *mapping = inode->i_mapping;
+
+	/*
+	 * If the underlying filesystem is not going to provide
+	 * a way to truncate a range of blocks (punch a hole) -
+	 * we should return failure right now.
+	 */
+	if (!inode->i_op || !inode->i_op->truncate_range)
+		return -ENOSYS;
+
+	mutex_lock(&inode->i_mutex);
+	down_write(&inode->i_alloc_sem);
+	unmap_mapping_range(mapping, offset, (end - offset), 1);
+	truncate_inode_pages_range(mapping, offset, end);
+	inode->i_op->truncate_range(inode, offset, end);
+	up_write(&inode->i_alloc_sem);
+	mutex_unlock(&inode->i_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(vmtruncate_range);
+
+/* 
+ * Primitive swap readahead code. We simply read an aligned block of
+ * (1 << page_cluster) entries in the swap area. This method is chosen
+ * because it doesn't cost us any seek time.  We also make sure to queue
+ * the 'original' request together with the readahead ones...  
+ *
+ * This has been extended to use the NUMA policies from the mm triggering
+ * the readahead.
+ *
+ * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
+ */
+void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma)
+{
+#ifdef CONFIG_NUMA
+	struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL;
+#endif
+	int i, num;
+	struct page *new_page;
+	unsigned long offset;
+
+	/*
+	 * Get the number of handles we should do readahead io to.
+	 */
+	num = valid_swaphandles(entry, &offset);
+	for (i = 0; i < num; offset++, i++) {
+		/* Ok, do the async read-ahead now */
+		new_page = read_swap_cache_async(swp_entry(swp_type(entry),
+							   offset), vma, addr);
+		if (!new_page)
+			break;
+		page_cache_release(new_page);
+#ifdef CONFIG_NUMA
+		/*
+		 * Find the next applicable VMA for the NUMA policy.
+		 */
+		addr += PAGE_SIZE;
+		if (addr == 0)
+			vma = NULL;
+		if (vma) {
+			if (addr >= vma->vm_end) {
+				vma = next_vma;
+				next_vma = vma ? vma->vm_next : NULL;
+			}
+			if (vma && addr < vma->vm_start)
+				vma = NULL;
+		} else {
+			if (next_vma && addr >= next_vma->vm_start) {
+				vma = next_vma;
+				next_vma = vma->vm_next;
+			}
+		}
+#endif
+	}
+	lru_add_drain();	/* Push any new pages onto the LRU now */
+}
+
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		unsigned long address, pte_t *page_table, pmd_t *pmd,
+		int write_access, pte_t orig_pte)
+{
+	spinlock_t *ptl;
+	struct page *page;
+	swp_entry_t entry;
+	pte_t pte;
+	int ret = VM_FAULT_MINOR;
+
+	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+		goto out;
+
+	entry = pte_to_swp_entry(orig_pte);
+again:
+	page = lookup_swap_cache(entry);
+	if (!page) {
+ 		swapin_readahead(entry, address, vma);
+ 		page = read_swap_cache_async(entry, vma, address);
+		if (!page) {
+			/*
+			 * Back out if somebody else faulted in this pte
+			 * while we released the pte lock.
+			 */
+			page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+			if (likely(pte_same(*page_table, orig_pte)))
+				ret = VM_FAULT_OOM;
+			goto unlock;
+		}
+
+		/* Had to read the page from swap area: Major fault */
+		ret = VM_FAULT_MAJOR;
+		inc_page_state(pgmajfault);
+		grab_swap_token();
+	}
+
+	mark_page_accessed(page);
+	lock_page(page);
+	if (!PageSwapCache(page)) {
+		/* Page migration has occured */
+		unlock_page(page);
+		page_cache_release(page);
+		goto again;
+	}
+
+	/*
+	 * Back out if somebody else already faulted in this pte.
+	 */
+	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+	if (unlikely(!pte_same(*page_table, orig_pte)))
+		goto out_nomap;
+
+	if (unlikely(!PageUptodate(page))) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_nomap;
+	}
+
+	/* The page isn't present yet, go ahead with the fault. */
+
+	inc_mm_counter(mm, anon_rss);
+	pte = mk_pte(page, vma->vm_page_prot);
+	if (write_access && can_share_swap_page(page)) {
+		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+		write_access = 0;
+	}
+
+	flush_icache_page(vma, page);
+	set_pte_at(mm, address, page_table, pte);
+	page_add_anon_rmap(page, vma, address);
+
+	swap_free(entry);
+	if (vm_swap_full())
+		remove_exclusive_swap_page(page);
+	unlock_page(page);
+
+	if (write_access) {
+		if (do_wp_page(mm, vma, address,
+				page_table, pmd, ptl, pte) == VM_FAULT_OOM)
+			ret = VM_FAULT_OOM;
+		goto out;
+	}
+
+	/* No need to invalidate - it was non-present before */
+	update_mmu_cache(vma, address, pte);
+	lazy_mmu_prot_update(pte);
+unlock:
+	pte_unmap_unlock(page_table, ptl);
+out:
+	return ret;
+out_nomap:
+	pte_unmap_unlock(page_table, ptl);
+	unlock_page(page);
+	page_cache_release(page);
+	return ret;
+}
+
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		unsigned long address, pte_t *page_table, pmd_t *pmd,
+		int write_access)
+{
+	struct page *page;
+	spinlock_t *ptl;
+	pte_t entry;
+
+	if (write_access) {
+		/* Allocate our own private page. */
+		pte_unmap(page_table);
+
+		if (unlikely(anon_vma_prepare(vma)))
+			goto oom;
+		page = alloc_zeroed_user_highpage(vma, address);
+		if (!page)
+			goto oom;
+
+		entry = mk_pte(page, vma->vm_page_prot);
+		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+
+		page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+		if (!pte_none(*page_table))
+			goto release;
+		inc_mm_counter(mm, anon_rss);
+		lru_cache_add_active(page);
+		page_add_new_anon_rmap(page, vma, address);
+	} else {
+		/* Map the ZERO_PAGE - vm_page_prot is readonly */
+		page = ZERO_PAGE(address);
+		page_cache_get(page);
+		entry = mk_pte(page, vma->vm_page_prot);
+
+		ptl = pte_lockptr(mm, pmd);
+		spin_lock(ptl);
+		if (!pte_none(*page_table))
+			goto release;
+		inc_mm_counter(mm, file_rss);
+		page_add_file_rmap(page);
+	}
+
+	set_pte_at(mm, address, page_table, entry);
+
+	/* No need to invalidate - it was non-present before */
+	update_mmu_cache(vma, address, entry);
+	lazy_mmu_prot_update(entry);
+unlock:
+	pte_unmap_unlock(page_table, ptl);
+	return VM_FAULT_MINOR;
+release:
+	page_cache_release(page);
+	goto unlock;
+oom:
+	return VM_FAULT_OOM;
+}
+
+/*
+ * do_no_page() tries to create a new page mapping. It aggressively
+ * tries to share with existing pages, but makes a separate copy if
+ * the "write_access" parameter is true in order to avoid the next
+ * page fault.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
+ *
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		unsigned long address, pte_t *page_table, pmd_t *pmd,
+		int write_access)
+{
+	spinlock_t *ptl;
+	struct page *new_page;
+	struct address_space *mapping = NULL;
+	pte_t entry;
+	unsigned int sequence = 0;
+	int ret = VM_FAULT_MINOR;
+	int anon = 0;
+
+	pte_unmap(page_table);
+	BUG_ON(vma->vm_flags & VM_PFNMAP);
+
+	if (vma->vm_file) {
+		mapping = vma->vm_file->f_mapping;
+		sequence = mapping->truncate_count;
+		smp_rmb(); /* serializes i_size against truncate_count */
+	}
+retry:
+	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
+	/*
+	 * No smp_rmb is needed here as long as there's a full
+	 * spin_lock/unlock sequence inside the ->nopage callback
+	 * (for the pagecache lookup) that acts as an implicit
+	 * smp_mb() and prevents the i_size read to happen
+	 * after the next truncate_count read.
+	 */
+
+	/* no page was available -- either SIGBUS or OOM */
+	if (new_page == NOPAGE_SIGBUS)
+		return VM_FAULT_SIGBUS;
+	if (new_page == NOPAGE_OOM)
+		return VM_FAULT_OOM;
+
+	/*
+	 * Should we do an early C-O-W break?
+	 */
+	if (write_access && !(vma->vm_flags & VM_SHARED)) {
+		struct page *page;
+
+		if (unlikely(anon_vma_prepare(vma)))
+			goto oom;
+		page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+		if (!page)
+			goto oom;
+		copy_user_highpage(page, new_page, address);
+		page_cache_release(new_page);
+		new_page = page;
+		anon = 1;
+	}
+
+	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+	/*
+	 * For a file-backed vma, someone could have truncated or otherwise
+	 * invalidated this page.  If unmap_mapping_range got called,
+	 * retry getting the page.
+	 */
+	if (mapping && unlikely(sequence != mapping->truncate_count)) {
+		pte_unmap_unlock(page_table, ptl);
+		page_cache_release(new_page);
+		cond_resched();
+		sequence = mapping->truncate_count;
+		smp_rmb();
+		goto retry;
+	}
+
+	/*
+	 * This silly early PAGE_DIRTY setting removes a race
+	 * due to the bad i386 page protection. But it's valid
+	 * for other architectures too.
+	 *
+	 * Note that if write_access is true, we either now have
+	 * an exclusive copy of the page, or this is a shared mapping,
+	 * so we can make it writable and dirty to avoid having to
+	 * handle that later.
+	 */
+	/* Only go through if we didn't race with anybody else... */
+	if (pte_none(*page_table)) {
+		flush_icache_page(vma, new_page);
+		entry = mk_pte(new_page, vma->vm_page_prot);
+		if (write_access)
+			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		set_pte_at(mm, address, page_table, entry);
+		if (anon) {
+			inc_mm_counter(mm, anon_rss);
+			lru_cache_add_active(new_page);
+			page_add_new_anon_rmap(new_page, vma, address);
+		} else {
+			inc_mm_counter(mm, file_rss);
+			page_add_file_rmap(new_page);
+		}
+	} else {
+		/* One of our sibling threads was faster, back out. */
+		page_cache_release(new_page);
+		goto unlock;
+	}
+
+	/* no need to invalidate: a not-present page shouldn't be cached */
+	update_mmu_cache(vma, address, entry);
+	lazy_mmu_prot_update(entry);
+unlock:
+	pte_unmap_unlock(page_table, ptl);
+	return ret;
+oom:
+	page_cache_release(new_page);
+	return VM_FAULT_OOM;
+}
+
+/*
+ * Fault of a previously existing named mapping. Repopulate the pte
+ * from the encoded file_pte if possible. This enables swappable
+ * nonlinear vmas.
+ *
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
+		unsigned long address, pte_t *page_table, pmd_t *pmd,
+		int write_access, pte_t orig_pte)
+{
+	pgoff_t pgoff;
+	int err;
+
+	if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+		return VM_FAULT_MINOR;
+
+	if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
+		/*
+		 * Page table corrupted: show pte and kill process.
+		 */
+		print_bad_pte(vma, orig_pte, address);
+		return VM_FAULT_OOM;
+	}
+	/* We can then assume vm->vm_ops && vma->vm_ops->populate */
+
+	pgoff = pte_to_pgoff(orig_pte);
+	err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
+					vma->vm_page_prot, pgoff, 0);
+	if (err == -ENOMEM)
+		return VM_FAULT_OOM;
+	if (err)
+		return VM_FAULT_SIGBUS;
+	return VM_FAULT_MAJOR;
+}
+
+/*
+ * These routines also need to handle stuff like marking pages dirty
+ * and/or accessed for architectures that don't do it in hardware (most
+ * RISC architectures).  The early dirtying is also good on the i386.
+ *
+ * There is also a hook called "update_mmu_cache()" that architectures
+ * with external mmu caches can use to update those (ie the Sparc or
+ * PowerPC hashed page tables that act as extended TLBs).
+ *
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+ */
+static inline int handle_pte_fault(struct mm_struct *mm,
+		struct vm_area_struct *vma, unsigned long address,
+		pte_t *pte, pmd_t *pmd, int write_access)
+{
+	pte_t entry;
+	pte_t old_entry;
+	spinlock_t *ptl;
+
+	old_entry = entry = *pte;
+	if (!pte_present(entry)) {
+		if (pte_none(entry)) {
+			if (!vma->vm_ops || !vma->vm_ops->nopage)
+				return do_anonymous_page(mm, vma, address,
+					pte, pmd, write_access);
+			return do_no_page(mm, vma, address,
+					pte, pmd, write_access);
+		}
+		if (pte_file(entry))
+			return do_file_page(mm, vma, address,
+					pte, pmd, write_access, entry);
+		return do_swap_page(mm, vma, address,
+					pte, pmd, write_access, entry);
+	}
+
+	ptl = pte_lockptr(mm, pmd);
+	spin_lock(ptl);
+	if (unlikely(!pte_same(*pte, entry)))
+		goto unlock;
+	if (write_access) {
+		if (!pte_write(entry))
+			return do_wp_page(mm, vma, address,
+					pte, pmd, ptl, entry);
+		entry = pte_mkdirty(entry);
+	}
+	entry = pte_mkyoung(entry);
+	if (!pte_same(old_entry, entry)) {
+		ptep_set_access_flags(vma, address, pte, entry, write_access);
+		update_mmu_cache(vma, address, entry);
+		lazy_mmu_prot_update(entry);
+	} else {
+		/*
+		 * This is needed only for protection faults but the arch code
+		 * is not yet telling us if this is a protection fault or not.
+		 * This still avoids useless tlb flushes for .text page faults
+		 * with threads.
+		 */
+		if (write_access)
+			flush_tlb_page(vma, address);
+	}
+unlock:
+	pte_unmap_unlock(pte, ptl);
+	return VM_FAULT_MINOR;
+}
+
+/*
+ * By the time we get here, we already hold the mm semaphore
+ */
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+		unsigned long address, int write_access)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	__set_current_state(TASK_RUNNING);
+
+	inc_page_state(pgfault);
+
+	if (unlikely(is_vm_hugetlb_page(vma)))
+		return hugetlb_fault(mm, vma, address, write_access);
+
+	pgd = pgd_offset(mm, address);
+	pud = pud_alloc(mm, pgd, address);
+	if (!pud)
+		return VM_FAULT_OOM;
+	pmd = pmd_alloc(mm, pud, address);
+	if (!pmd)
+		return VM_FAULT_OOM;
+	pte = pte_alloc_map(mm, pmd, address);
+	if (!pte)
+		return VM_FAULT_OOM;
+
+	return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+}
+
+EXPORT_SYMBOL_GPL(__handle_mm_fault);
+
+#ifndef __PAGETABLE_PUD_FOLDED
+/*
+ * Allocate page upper directory.
+ * We've already handled the fast-path in-line.
+ */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+	pud_t *new = pud_alloc_one(mm, address);
+	if (!new)
+		return -ENOMEM;
+
+	spin_lock(&mm->page_table_lock);
+	if (pgd_present(*pgd))		/* Another has populated it */
+		pud_free(new);
+	else
+		pgd_populate(mm, pgd, new);
+	spin_unlock(&mm->page_table_lock);
+	return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+	return 0;
+}
+#endif /* __PAGETABLE_PUD_FOLDED */
+
+#ifndef __PAGETABLE_PMD_FOLDED
+/*
+ * Allocate page middle directory.
+ * We've already handled the fast-path in-line.
+ */
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+	pmd_t *new = pmd_alloc_one(mm, address);
+	if (!new)
+		return -ENOMEM;
+
+	spin_lock(&mm->page_table_lock);
+#ifndef __ARCH_HAS_4LEVEL_HACK
+	if (pud_present(*pud))		/* Another has populated it */
+		pmd_free(new);
+	else
+		pud_populate(mm, pud, new);
+#else
+	if (pgd_present(*pud))		/* Another has populated it */
+		pmd_free(new);
+	else
+		pgd_populate(mm, pud, new);
+#endif /* __ARCH_HAS_4LEVEL_HACK */
+	spin_unlock(&mm->page_table_lock);
+	return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+	return 0;
+}
+#endif /* __PAGETABLE_PMD_FOLDED */
+
+int make_pages_present(unsigned long addr, unsigned long end)
+{
+	int ret, len, write;
+	struct vm_area_struct * vma;
+
+	vma = find_vma(current->mm, addr);
+	if (!vma)
+		return -1;
+	write = (vma->vm_flags & VM_WRITE) != 0;
+	if (addr >= end)
+		BUG();
+	if (end > vma->vm_end)
+		BUG();
+	len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
+	ret = get_user_pages(current, current->mm, addr,
+			len, write, 0, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	return ret == len ? 0 : -1;
+}
+
+/* 
+ * Map a vmalloc()-space virtual address to the physical page.
+ */
+struct page * vmalloc_to_page(void * vmalloc_addr)
+{
+	unsigned long addr = (unsigned long) vmalloc_addr;
+	struct page *page = NULL;
+	pgd_t *pgd = pgd_offset_k(addr);
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep, pte;
+  
+	if (!pgd_none(*pgd)) {
+		pud = pud_offset(pgd, addr);
+		if (!pud_none(*pud)) {
+			pmd = pmd_offset(pud, addr);
+			if (!pmd_none(*pmd)) {
+				ptep = pte_offset_map(pmd, addr);
+				pte = *ptep;
+				if (pte_present(pte))
+					page = pte_page(pte);
+				pte_unmap(ptep);
+			}
+		}
+	}
+	return page;
+}
+
+EXPORT_SYMBOL(vmalloc_to_page);
+
+/*
+ * Map a vmalloc()-space virtual address to the physical page frame number.
+ */
+unsigned long vmalloc_to_pfn(void * vmalloc_addr)
+{
+	return page_to_pfn(vmalloc_to_page(vmalloc_addr));
+}
+
+EXPORT_SYMBOL(vmalloc_to_pfn);
+
+#if !defined(__HAVE_ARCH_GATE_AREA)
+
+#if defined(AT_SYSINFO_EHDR)
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+	gate_vma.vm_mm = NULL;
+	gate_vma.vm_start = FIXADDR_USER_START;
+	gate_vma.vm_end = FIXADDR_USER_END;
+	gate_vma.vm_page_prot = PAGE_READONLY;
+	gate_vma.vm_flags = 0;
+	return 0;
+}
+__initcall(gate_vma_init);
+#endif
+
+struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
+{
+#ifdef AT_SYSINFO_EHDR
+	return &gate_vma;
+#else
+	return NULL;
+#endif
+}
+
+int in_gate_area_no_task(unsigned long addr)
+{
+#ifdef AT_SYSINFO_EHDR
+	if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
+		return 1;
+#endif
+	return 0;
+}
+
+#endif	/* __HAVE_ARCH_GATE_AREA */
diff -urN oldtree/mm/mmap.c newtree/mm/mmap.c
--- oldtree/mm/mmap.c	2006-02-18 15:18:30.121738048 +0000
+++ newtree/mm/mmap.c	2006-02-18 15:28:25.328252904 +0000
@@ -1991,6 +1991,7 @@
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(insert_vm_struct);
 
 /*
  * Copy the vma structure to a new location in the same mm,
diff -urN oldtree/mm/mmap.c.orig newtree/mm/mmap.c.orig
--- oldtree/mm/mmap.c.orig	1970-01-01 00:00:00.000000000 +0000
+++ newtree/mm/mmap.c.orig	2006-02-18 15:18:30.000000000 +0000
@@ -0,0 +1,2063 @@
+/*
+ * mm/mmap.c
+ *
+ * Written by obz.
+ *
+ * Address space accounting code	<alan@redhat.com>
+ */
+
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/mman.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/syscalls.h>
+#include <linux/capability.h>
+#include <linux/init.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/personality.h>
+#include <linux/security.h>
+#include <linux/hugetlb.h>
+#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/mempolicy.h>
+#include <linux/rmap.h>
+
+#include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/tlb.h>
+
+static void unmap_region(struct mm_struct *mm,
+		struct vm_area_struct *vma, struct vm_area_struct *prev,
+		unsigned long start, unsigned long end);
+
+/*
+ * WARNING: the debugging will use recursive algorithms so never enable this
+ * unless you know what you are doing.
+ */
+#undef DEBUG_MM_RB
+
+/* description of effects of mapping type and prot in current implementation.
+ * this is due to the limited x86 page protection hardware.  The expected
+ * behavior is in parens:
+ *
+ * map_type	prot
+ *		PROT_NONE	PROT_READ	PROT_WRITE	PROT_EXEC
+ * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
+ *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
+ *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+ *		
+ * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
+ *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
+ *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+ *
+ */
+pgprot_t protection_map[16] = {
+	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
+	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
+};
+
+int sysctl_overcommit_memory = OVERCOMMIT_GUESS;  /* heuristic overcommit */
+int sysctl_overcommit_ratio = 50;	/* default is 50% */
+int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
+atomic_t vm_committed_space = ATOMIC_INIT(0);
+
+/*
+ * Check that a process has enough memory to allocate a new virtual
+ * mapping. 0 means there is enough memory for the allocation to
+ * succeed and -ENOMEM implies there is not.
+ *
+ * We currently support three overcommit policies, which are set via the
+ * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
+ *
+ * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
+ * Additional code 2002 Jul 20 by Robert Love.
+ *
+ * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
+ *
+ * Note this is a helper function intended to be used by LSMs which
+ * wish to use this logic.
+ */
+int __vm_enough_memory(long pages, int cap_sys_admin)
+{
+	unsigned long free, allowed;
+
+	vm_acct_memory(pages);
+
+	/*
+	 * Sometimes we want to use more memory than we have
+	 */
+	if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
+		return 0;
+
+	if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
+		unsigned long n;
+
+		free = get_page_cache_size();
+		free += nr_swap_pages;
+
+		/*
+		 * Any slabs which are created with the
+		 * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+		 * which are reclaimable, under pressure.  The dentry
+		 * cache and most inode caches should fall into this
+		 */
+		free += atomic_read(&slab_reclaim_pages);
+
+		/*
+		 * Leave the last 3% for root
+		 */
+		if (!cap_sys_admin)
+			free -= free / 32;
+
+		if (free > pages)
+			return 0;
+
+		/*
+		 * nr_free_pages() is very expensive on large systems,
+		 * only call if we're about to fail.
+		 */
+		n = nr_free_pages();
+		if (!cap_sys_admin)
+			n -= n / 32;
+		free += n;
+
+		if (free > pages)
+			return 0;
+		vm_unacct_memory(pages);
+		return -ENOMEM;
+	}
+
+	allowed = (totalram_pages - hugetlb_total_pages())
+	       	* sysctl_overcommit_ratio / 100;
+	/*
+	 * Leave the last 3% for root
+	 */
+	if (!cap_sys_admin)
+		allowed -= allowed / 32;
+	allowed += total_swap_pages;
+
+	/* Don't let a single process grow too big:
+	   leave 3% of the size of this process for other processes */
+	allowed -= current->mm->total_vm / 32;
+
+	/*
+	 * cast `allowed' as a signed long because vm_committed_space
+	 * sometimes has a negative value
+	 */
+	if (atomic_read(&vm_committed_space) < (long)allowed)
+		return 0;
+
+	vm_unacct_memory(pages);
+
+	return -ENOMEM;
+}
+
+EXPORT_SYMBOL(__vm_enough_memory);
+
+/*
+ * Requires inode->i_mapping->i_mmap_lock
+ */
+static void __remove_shared_vm_struct(struct vm_area_struct *vma,
+		struct file *file, struct address_space *mapping)
+{
+	if (vma->vm_flags & VM_DENYWRITE)
+		atomic_inc(&file->f_dentry->d_inode->i_writecount);
+	if (vma->vm_flags & VM_SHARED)
+		mapping->i_mmap_writable--;
+
+	flush_dcache_mmap_lock(mapping);
+	if (unlikely(vma->vm_flags & VM_NONLINEAR))
+		list_del_init(&vma->shared.vm_set.list);
+	else
+		vma_prio_tree_remove(vma, &mapping->i_mmap);
+	flush_dcache_mmap_unlock(mapping);
+}
+
+/*
+ * Unlink a file-based vm structure from its prio_tree, to hide
+ * vma from rmap and vmtruncate before freeing its page tables.
+ */
+void unlink_file_vma(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+
+	if (file) {
+		struct address_space *mapping = file->f_mapping;
+		spin_lock(&mapping->i_mmap_lock);
+		__remove_shared_vm_struct(vma, file, mapping);
+		spin_unlock(&mapping->i_mmap_lock);
+	}
+}
+
+/*
+ * Close a vm structure and free it, returning the next.
+ */
+static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+{
+	struct vm_area_struct *next = vma->vm_next;
+
+	might_sleep();
+	if (vma->vm_ops && vma->vm_ops->close)
+		vma->vm_ops->close(vma);
+	if (vma->vm_file)
+		fput(vma->vm_file);
+	mpol_free(vma_policy(vma));
+	kmem_cache_free(vm_area_cachep, vma);
+	return next;
+}
+
+asmlinkage unsigned long sys_brk(unsigned long brk)
+{
+	unsigned long rlim, retval;
+	unsigned long newbrk, oldbrk;
+	struct mm_struct *mm = current->mm;
+
+	down_write(&mm->mmap_sem);
+
+	if (brk < mm->end_code)
+		goto out;
+	newbrk = PAGE_ALIGN(brk);
+	oldbrk = PAGE_ALIGN(mm->brk);
+	if (oldbrk == newbrk)
+		goto set_brk;
+
+	/* Always allow shrinking brk. */
+	if (brk <= mm->brk) {
+		if (!do_munmap(mm, newbrk, oldbrk-newbrk))
+			goto set_brk;
+		goto out;
+	}
+
+	/* Check against rlimit.. */
+	rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
+	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
+		goto out;
+
+	/* Check against existing mmap mappings. */
+	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
+		goto out;
+
+	/* Ok, looks good - let it rip. */
+	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
+		goto out;
+set_brk:
+	mm->brk = brk;
+out:
+	retval = mm->brk;
+	up_write(&mm->mmap_sem);
+	return retval;
+}
+
+#ifdef DEBUG_MM_RB
+static int browse_rb(struct rb_root *root)
+{
+	int i = 0, j;
+	struct rb_node *nd, *pn = NULL;
+	unsigned long prev = 0, pend = 0;
+
+	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+		struct vm_area_struct *vma;
+		vma = rb_entry(nd, struct vm_area_struct, vm_rb);
+		if (vma->vm_start < prev)
+			printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
+		if (vma->vm_start < pend)
+			printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
+		if (vma->vm_start > vma->vm_end)
+			printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
+		i++;
+		pn = nd;
+	}
+	j = 0;
+	for (nd = pn; nd; nd = rb_prev(nd)) {
+		j++;
+	}
+	if (i != j)
+		printk("backwards %d, forwards %d\n", j, i), i = 0;
+	return i;
+}
+
+void validate_mm(struct mm_struct *mm)
+{
+	int bug = 0;
+	int i = 0;
+	struct vm_area_struct *tmp = mm->mmap;
+	while (tmp) {
+		tmp = tmp->vm_next;
+		i++;
+	}
+	if (i != mm->map_count)
+		printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
+	i = browse_rb(&mm->mm_rb);
+	if (i != mm->map_count)
+		printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
+	if (bug)
+		BUG();
+}
+#else
+#define validate_mm(mm) do { } while (0)
+#endif
+
+static struct vm_area_struct *
+find_vma_prepare(struct mm_struct *mm, unsigned long addr,
+		struct vm_area_struct **pprev, struct rb_node ***rb_link,
+		struct rb_node ** rb_parent)
+{
+	struct vm_area_struct * vma;
+	struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
+
+	__rb_link = &mm->mm_rb.rb_node;
+	rb_prev = __rb_parent = NULL;
+	vma = NULL;
+
+	while (*__rb_link) {
+		struct vm_area_struct *vma_tmp;
+
+		__rb_parent = *__rb_link;
+		vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
+
+		if (vma_tmp->vm_end > addr) {
+			vma = vma_tmp;
+			if (vma_tmp->vm_start <= addr)
+				return vma;
+			__rb_link = &__rb_parent->rb_left;
+		} else {
+			rb_prev = __rb_parent;
+			__rb_link = &__rb_parent->rb_right;
+		}
+	}
+
+	*pprev = NULL;
+	if (rb_prev)
+		*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
+	*rb_link = __rb_link;
+	*rb_parent = __rb_parent;
+	return vma;
+}
+
+static inline void
+__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
+		struct vm_area_struct *prev, struct rb_node *rb_parent)
+{
+	if (prev) {
+		vma->vm_next = prev->vm_next;
+		prev->vm_next = vma;
+	} else {
+		mm->mmap = vma;
+		if (rb_parent)
+			vma->vm_next = rb_entry(rb_parent,
+					struct vm_area_struct, vm_rb);
+		else
+			vma->vm_next = NULL;
+	}
+}
+
+void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
+		struct rb_node **rb_link, struct rb_node *rb_parent)
+{
+	rb_link_node(&vma->vm_rb, rb_parent, rb_link);
+	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
+}
+
+static inline void __vma_link_file(struct vm_area_struct *vma)
+{
+	struct file * file;
+
+	file = vma->vm_file;
+	if (file) {
+		struct address_space *mapping = file->f_mapping;
+
+		if (vma->vm_flags & VM_DENYWRITE)
+			atomic_dec(&file->f_dentry->d_inode->i_writecount);
+		if (vma->vm_flags & VM_SHARED)
+			mapping->i_mmap_writable++;
+
+		flush_dcache_mmap_lock(mapping);
+		if (unlikely(vma->vm_flags & VM_NONLINEAR))
+			vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
+		else
+			vma_prio_tree_insert(vma, &mapping->i_mmap);
+		flush_dcache_mmap_unlock(mapping);
+	}
+}
+
+static void
+__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
+	struct vm_area_struct *prev, struct rb_node **rb_link,
+	struct rb_node *rb_parent)
+{
+	__vma_link_list(mm, vma, prev, rb_parent);
+	__vma_link_rb(mm, vma, rb_link, rb_parent);
+	__anon_vma_link(vma);
+}
+
+static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
+			struct vm_area_struct *prev, struct rb_node **rb_link,
+			struct rb_node *rb_parent)
+{
+	struct address_space *mapping = NULL;
+
+	if (vma->vm_file)
+		mapping = vma->vm_file->f_mapping;
+
+	if (mapping) {
+		spin_lock(&mapping->i_mmap_lock);
+		vma->vm_truncate_count = mapping->truncate_count;
+	}
+	anon_vma_lock(vma);
+
+	__vma_link(mm, vma, prev, rb_link, rb_parent);
+	__vma_link_file(vma);
+
+	anon_vma_unlock(vma);
+	if (mapping)
+		spin_unlock(&mapping->i_mmap_lock);
+
+	mm->map_count++;
+	validate_mm(mm);
+}
+
+/*
+ * Helper for vma_adjust in the split_vma insert case:
+ * insert vm structure into list and rbtree and anon_vma,
+ * but it has already been inserted into prio_tree earlier.
+ */
+static void
+__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+	struct vm_area_struct * __vma, * prev;
+	struct rb_node ** rb_link, * rb_parent;
+
+	__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
+	if (__vma && __vma->vm_start < vma->vm_end)
+		BUG();
+	__vma_link(mm, vma, prev, rb_link, rb_parent);
+	mm->map_count++;
+}
+
+static inline void
+__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
+		struct vm_area_struct *prev)
+{
+	prev->vm_next = vma->vm_next;
+	rb_erase(&vma->vm_rb, &mm->mm_rb);
+	if (mm->mmap_cache == vma)
+		mm->mmap_cache = prev;
+}
+
+/*
+ * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
+ * is already present in an i_mmap tree without adjusting the tree.
+ * The following helper function should be used when such adjustments
+ * are necessary.  The "insert" vma (if any) is to be inserted
+ * before we drop the necessary locks.
+ */
+void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *next = vma->vm_next;
+	struct vm_area_struct *importer = NULL;
+	struct address_space *mapping = NULL;
+	struct prio_tree_root *root = NULL;
+	struct file *file = vma->vm_file;
+	struct anon_vma *anon_vma = NULL;
+	long adjust_next = 0;
+	int remove_next = 0;
+
+	if (next && !insert) {
+		if (end >= next->vm_end) {
+			/*
+			 * vma expands, overlapping all the next, and
+			 * perhaps the one after too (mprotect case 6).
+			 */
+again:			remove_next = 1 + (end > next->vm_end);
+			end = next->vm_end;
+			anon_vma = next->anon_vma;
+			importer = vma;
+		} else if (end > next->vm_start) {
+			/*
+			 * vma expands, overlapping part of the next:
+			 * mprotect case 5 shifting the boundary up.
+			 */
+			adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
+			anon_vma = next->anon_vma;
+			importer = vma;
+		} else if (end < vma->vm_end) {
+			/*
+			 * vma shrinks, and !insert tells it's not
+			 * split_vma inserting another: so it must be
+			 * mprotect case 4 shifting the boundary down.
+			 */
+			adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
+			anon_vma = next->anon_vma;
+			importer = next;
+		}
+	}
+
+	if (file) {
+		mapping = file->f_mapping;
+		if (!(vma->vm_flags & VM_NONLINEAR))
+			root = &mapping->i_mmap;
+		spin_lock(&mapping->i_mmap_lock);
+		if (importer &&
+		    vma->vm_truncate_count != next->vm_truncate_count) {
+			/*
+			 * unmap_mapping_range might be in progress:
+			 * ensure that the expanding vma is rescanned.
+			 */
+			importer->vm_truncate_count = 0;
+		}
+		if (insert) {
+			insert->vm_truncate_count = vma->vm_truncate_count;
+			/*
+			 * Put into prio_tree now, so instantiated pages
+			 * are visible to arm/parisc __flush_dcache_page
+			 * throughout; but we cannot insert into address
+			 * space until vma start or end is updated.
+			 */
+			__vma_link_file(insert);
+		}
+	}
+
+	/*
+	 * When changing only vma->vm_end, we don't really need
+	 * anon_vma lock: but is that case worth optimizing out?
+	 */
+	if (vma->anon_vma)
+		anon_vma = vma->anon_vma;
+	if (anon_vma) {
+		spin_lock(&anon_vma->lock);
+		/*
+		 * Easily overlooked: when mprotect shifts the boundary,
+		 * make sure the expanding vma has anon_vma set if the
+		 * shrinking vma had, to cover any anon pages imported.
+		 */
+		if (importer && !importer->anon_vma) {
+			importer->anon_vma = anon_vma;
+			__anon_vma_link(importer);
+		}
+	}
+
+	if (root) {
+		flush_dcache_mmap_lock(mapping);
+		vma_prio_tree_remove(vma, root);
+		if (adjust_next)
+			vma_prio_tree_remove(next, root);
+	}
+
+	vma->vm_start = start;
+	vma->vm_end = end;
+	vma->vm_pgoff = pgoff;
+	if (adjust_next) {
+		next->vm_start += adjust_next << PAGE_SHIFT;
+		next->vm_pgoff += adjust_next;
+	}
+
+	if (root) {
+		if (adjust_next)
+			vma_prio_tree_insert(next, root);
+		vma_prio_tree_insert(vma, root);
+		flush_dcache_mmap_unlock(mapping);
+	}
+
+	if (remove_next) {
+		/*
+		 * vma_merge has merged next into vma, and needs
+		 * us to remove next before dropping the locks.
+		 */
+		__vma_unlink(mm, next, vma);
+		if (file)
+			__remove_shared_vm_struct(next, file, mapping);
+		if (next->anon_vma)
+			__anon_vma_merge(vma, next);
+	} else if (insert) {
+		/*
+		 * split_vma has split insert from vma, and needs
+		 * us to insert it before dropping the locks
+		 * (it may either follow vma or precede it).
+		 */
+		__insert_vm_struct(mm, insert);
+	}
+
+	if (anon_vma)
+		spin_unlock(&anon_vma->lock);
+	if (mapping)
+		spin_unlock(&mapping->i_mmap_lock);
+
+	if (remove_next) {
+		if (file)
+			fput(file);
+		mm->map_count--;
+		mpol_free(vma_policy(next));
+		kmem_cache_free(vm_area_cachep, next);
+		/*
+		 * In mprotect's case 6 (see comments on vma_merge),
+		 * we must remove another next too. It would clutter
+		 * up the code too much to do both in one go.
+		 */
+		if (remove_next == 2) {
+			next = vma->vm_next;
+			goto again;
+		}
+	}
+
+	validate_mm(mm);
+}
+
+/*
+ * If the vma has a ->close operation then the driver probably needs to release
+ * per-vma resources, so we don't attempt to merge those.
+ */
+#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+
+static inline int is_mergeable_vma(struct vm_area_struct *vma,
+			struct file *file, unsigned long vm_flags)
+{
+	if (vma->vm_flags != vm_flags)
+		return 0;
+	if (vma->vm_file != file)
+		return 0;
+	if (vma->vm_ops && vma->vm_ops->close)
+		return 0;
+	return 1;
+}
+
+static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
+					struct anon_vma *anon_vma2)
+{
+	return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
+}
+
+/*
+ * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
+ * in front of (at a lower virtual address and file offset than) the vma.
+ *
+ * We cannot merge two vmas if they have differently assigned (non-NULL)
+ * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
+ *
+ * We don't check here for the merged mmap wrapping around the end of pagecache
+ * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
+ * wrap, nor mmaps which cover the final page at index -1UL.
+ */
+static int
+can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
+	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+{
+	if (is_mergeable_vma(vma, file, vm_flags) &&
+	    is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+		if (vma->vm_pgoff == vm_pgoff)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
+ * beyond (at a higher virtual address and file offset than) the vma.
+ *
+ * We cannot merge two vmas if they have differently assigned (non-NULL)
+ * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
+ */
+static int
+can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
+	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
+{
+	if (is_mergeable_vma(vma, file, vm_flags) &&
+	    is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
+		pgoff_t vm_pglen;
+		vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+		if (vma->vm_pgoff + vm_pglen == vm_pgoff)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
+ * whether that can be merged with its predecessor or its successor.
+ * Or both (it neatly fills a hole).
+ *
+ * In most cases - when called for mmap, brk or mremap - [addr,end) is
+ * certain not to be mapped by the time vma_merge is called; but when
+ * called for mprotect, it is certain to be already mapped (either at
+ * an offset within prev, or at the start of next), and the flags of
+ * this area are about to be changed to vm_flags - and the no-change
+ * case has already been eliminated.
+ *
+ * The following mprotect cases have to be considered, where AAAA is
+ * the area passed down from mprotect_fixup, never extending beyond one
+ * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
+ *
+ *     AAAA             AAAA                AAAA          AAAA
+ *    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPNNNNXXXX
+ *    cannot merge    might become    might become    might become
+ *                    PPNNNNNNNNNN    PPPPPPPPPPNN    PPPPPPPPPPPP 6 or
+ *    mmap, brk or    case 4 below    case 5 below    PPPPPPPPXXXX 7 or
+ *    mremap move:                                    PPPPNNNNNNNN 8
+ *        AAAA
+ *    PPPP    NNNN    PPPPPPPPPPPP    PPPPPPPPNNNN    PPPPNNNNNNNN
+ *    might become    case 1 below    case 2 below    case 3 below
+ *
+ * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
+ * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
+ */
+struct vm_area_struct *vma_merge(struct mm_struct *mm,
+			struct vm_area_struct *prev, unsigned long addr,
+			unsigned long end, unsigned long vm_flags,
+		     	struct anon_vma *anon_vma, struct file *file,
+			pgoff_t pgoff, struct mempolicy *policy)
+{
+	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
+	struct vm_area_struct *area, *next;
+
+	/*
+	 * We later require that vma->vm_flags == vm_flags,
+	 * so this tests vma->vm_flags & VM_SPECIAL, too.
+	 */
+	if (vm_flags & VM_SPECIAL)
+		return NULL;
+
+	if (prev)
+		next = prev->vm_next;
+	else
+		next = mm->mmap;
+	area = next;
+	if (next && next->vm_end == end)		/* cases 6, 7, 8 */
+		next = next->vm_next;
+
+	/*
+	 * Can it merge with the predecessor?
+	 */
+	if (prev && prev->vm_end == addr &&
+  			mpol_equal(vma_policy(prev), policy) &&
+			can_vma_merge_after(prev, vm_flags,
+						anon_vma, file, pgoff)) {
+		/*
+		 * OK, it can.  Can we now merge in the successor as well?
+		 */
+		if (next && end == next->vm_start &&
+				mpol_equal(policy, vma_policy(next)) &&
+				can_vma_merge_before(next, vm_flags,
+					anon_vma, file, pgoff+pglen) &&
+				is_mergeable_anon_vma(prev->anon_vma,
+						      next->anon_vma)) {
+							/* cases 1, 6 */
+			vma_adjust(prev, prev->vm_start,
+				next->vm_end, prev->vm_pgoff, NULL);
+		} else					/* cases 2, 5, 7 */
+			vma_adjust(prev, prev->vm_start,
+				end, prev->vm_pgoff, NULL);
+		return prev;
+	}
+
+	/*
+	 * Can this new request be merged in front of next?
+	 */
+	if (next && end == next->vm_start &&
+ 			mpol_equal(policy, vma_policy(next)) &&
+			can_vma_merge_before(next, vm_flags,
+					anon_vma, file, pgoff+pglen)) {
+		if (prev && addr < prev->vm_end)	/* case 4 */
+			vma_adjust(prev, prev->vm_start,
+				addr, prev->vm_pgoff, NULL);
+		else					/* cases 3, 8 */
+			vma_adjust(area, addr, next->vm_end,
+				next->vm_pgoff - pglen, NULL);
+		return area;
+	}
+
+	return NULL;
+}
+
+/*
+ * find_mergeable_anon_vma is used by anon_vma_prepare, to check
+ * neighbouring vmas for a suitable anon_vma, before it goes off
+ * to allocate a new anon_vma.  It checks because a repetitive
+ * sequence of mprotects and faults may otherwise lead to distinct
+ * anon_vmas being allocated, preventing vma merge in subsequent
+ * mprotect.
+ */
+struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
+{
+	struct vm_area_struct *near;
+	unsigned long vm_flags;
+
+	near = vma->vm_next;
+	if (!near)
+		goto try_prev;
+
+	/*
+	 * Since only mprotect tries to remerge vmas, match flags
+	 * which might be mprotected into each other later on.
+	 * Neither mlock nor madvise tries to remerge at present,
+	 * so leave their flags as obstructing a merge.
+	 */
+	vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
+	vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
+
+	if (near->anon_vma && vma->vm_end == near->vm_start &&
+ 			mpol_equal(vma_policy(vma), vma_policy(near)) &&
+			can_vma_merge_before(near, vm_flags,
+				NULL, vma->vm_file, vma->vm_pgoff +
+				((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
+		return near->anon_vma;
+try_prev:
+	/*
+	 * It is potentially slow to have to call find_vma_prev here.
+	 * But it's only on the first write fault on the vma, not
+	 * every time, and we could devise a way to avoid it later
+	 * (e.g. stash info in next's anon_vma_node when assigning
+	 * an anon_vma, or when trying vma_merge).  Another time.
+	 */
+	if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma)
+		BUG();
+	if (!near)
+		goto none;
+
+	vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
+	vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
+
+	if (near->anon_vma && near->vm_end == vma->vm_start &&
+  			mpol_equal(vma_policy(near), vma_policy(vma)) &&
+			can_vma_merge_after(near, vm_flags,
+				NULL, vma->vm_file, vma->vm_pgoff))
+		return near->anon_vma;
+none:
+	/*
+	 * There's no absolute need to look only at touching neighbours:
+	 * we could search further afield for "compatible" anon_vmas.
+	 * But it would probably just be a waste of time searching,
+	 * or lead to too many vmas hanging off the same anon_vma.
+	 * We're trying to allow mprotect remerging later on,
+	 * not trying to minimize memory used for anon_vmas.
+	 */
+	return NULL;
+}
+
+#ifdef CONFIG_PROC_FS
+void vm_stat_account(struct mm_struct *mm, unsigned long flags,
+						struct file *file, long pages)
+{
+	const unsigned long stack_flags
+		= VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
+
+#ifdef CONFIG_HUGETLB
+	if (flags & VM_HUGETLB) {
+		if (!(flags & VM_DONTCOPY))
+			mm->shared_vm += pages;
+		return;
+	}
+#endif /* CONFIG_HUGETLB */
+
+	if (file) {
+		mm->shared_vm += pages;
+		if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
+			mm->exec_vm += pages;
+	} else if (flags & stack_flags)
+		mm->stack_vm += pages;
+	if (flags & (VM_RESERVED|VM_IO))
+		mm->reserved_vm += pages;
+}
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * The caller must hold down_write(current->mm->mmap_sem).
+ */
+
+unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
+			unsigned long len, unsigned long prot,
+			unsigned long flags, unsigned long pgoff)
+{
+	struct mm_struct * mm = current->mm;
+	struct vm_area_struct * vma, * prev;
+	struct inode *inode;
+	unsigned int vm_flags;
+	int correct_wcount = 0;
+	int error;
+	struct rb_node ** rb_link, * rb_parent;
+	int accountable = 1;
+	unsigned long charged = 0, reqprot = prot;
+
+	if (file) {
+		if (is_file_hugepages(file))
+			accountable = 0;
+
+		if (!file->f_op || !file->f_op->mmap)
+			return -ENODEV;
+
+		if ((prot & PROT_EXEC) &&
+		    (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
+			return -EPERM;
+	}
+	/*
+	 * Does the application expect PROT_READ to imply PROT_EXEC?
+	 *
+	 * (the exception is when the underlying filesystem is noexec
+	 *  mounted, in which case we dont add PROT_EXEC.)
+	 */
+	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
+		if (!(file && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC)))
+			prot |= PROT_EXEC;
+
+	if (!len)
+		return -EINVAL;
+
+	/* Careful about overflows.. */
+	len = PAGE_ALIGN(len);
+	if (!len || len > TASK_SIZE)
+		return -ENOMEM;
+
+	/* offset overflow? */
+	if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
+               return -EOVERFLOW;
+
+	/* Too many mappings? */
+	if (mm->map_count > sysctl_max_map_count)
+		return -ENOMEM;
+
+	/* Obtain the address to map to. we verify (or select) it and ensure
+	 * that it represents a valid section of the address space.
+	 */
+	addr = get_unmapped_area(file, addr, len, pgoff, flags);
+	if (addr & ~PAGE_MASK)
+		return addr;
+
+	/* Do simple checking here so the lower-level routines won't have
+	 * to. we assume access permissions have been handled by the open
+	 * of the memory object, so we don't do any here.
+	 */
+	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
+			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+
+	if (flags & MAP_LOCKED) {
+		if (!can_do_mlock())
+			return -EPERM;
+		vm_flags |= VM_LOCKED;
+	}
+	/* mlock MCL_FUTURE? */
+	if (vm_flags & VM_LOCKED) {
+		unsigned long locked, lock_limit;
+		locked = len >> PAGE_SHIFT;
+		locked += mm->locked_vm;
+		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+		lock_limit >>= PAGE_SHIFT;
+		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+			return -EAGAIN;
+	}
+
+	inode = file ? file->f_dentry->d_inode : NULL;
+
+	if (file) {
+		switch (flags & MAP_TYPE) {
+		case MAP_SHARED:
+			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
+				return -EACCES;
+
+			/*
+			 * Make sure we don't allow writing to an append-only
+			 * file..
+			 */
+			if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
+				return -EACCES;
+
+			/*
+			 * Make sure there are no mandatory locks on the file.
+			 */
+			if (locks_verify_locked(inode))
+				return -EAGAIN;
+
+			vm_flags |= VM_SHARED | VM_MAYSHARE;
+			if (!(file->f_mode & FMODE_WRITE))
+				vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
+
+			/* fall through */
+		case MAP_PRIVATE:
+			if (!(file->f_mode & FMODE_READ))
+				return -EACCES;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	} else {
+		switch (flags & MAP_TYPE) {
+		case MAP_SHARED:
+			vm_flags |= VM_SHARED | VM_MAYSHARE;
+			break;
+		case MAP_PRIVATE:
+			/*
+			 * Set pgoff according to addr for anon_vma.
+			 */
+			pgoff = addr >> PAGE_SHIFT;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	error = security_file_mmap(file, reqprot, prot, flags);
+	if (error)
+		return error;
+		
+	/* Clear old maps */
+	error = -ENOMEM;
+munmap_back:
+	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+	if (vma && vma->vm_start < addr + len) {
+		if (do_munmap(mm, addr, len))
+			return -ENOMEM;
+		goto munmap_back;
+	}
+
+	/* Check against address space limit. */
+	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
+		return -ENOMEM;
+
+	if (accountable && (!(flags & MAP_NORESERVE) ||
+			    sysctl_overcommit_memory == OVERCOMMIT_NEVER)) {
+		if (vm_flags & VM_SHARED) {
+			/* Check memory availability in shmem_file_setup? */
+			vm_flags |= VM_ACCOUNT;
+		} else if (vm_flags & VM_WRITE) {
+			/*
+			 * Private writable mapping: check memory availability
+			 */
+			charged = len >> PAGE_SHIFT;
+			if (security_vm_enough_memory(charged))
+				return -ENOMEM;
+			vm_flags |= VM_ACCOUNT;
+		}
+	}
+
+	/*
+	 * Can we just expand an old private anonymous mapping?
+	 * The VM_SHARED test is necessary because shmem_zero_setup
+	 * will create the file object for a shared anonymous map below.
+	 */
+	if (!file && !(vm_flags & VM_SHARED) &&
+	    vma_merge(mm, prev, addr, addr + len, vm_flags,
+					NULL, NULL, pgoff, NULL))
+		goto out;
+
+	/*
+	 * Determine the object being mapped and call the appropriate
+	 * specific mapper. the address has already been validated, but
+	 * not unmapped, but the maps are removed from the list.
+	 */
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!vma) {
+		error = -ENOMEM;
+		goto unacct_error;
+	}
+	memset(vma, 0, sizeof(*vma));
+
+	vma->vm_mm = mm;
+	vma->vm_start = addr;
+	vma->vm_end = addr + len;
+	vma->vm_flags = vm_flags;
+	vma->vm_page_prot = protection_map[vm_flags & 0x0f];
+	vma->vm_pgoff = pgoff;
+
+	if (file) {
+		error = -EINVAL;
+		if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+			goto free_vma;
+		if (vm_flags & VM_DENYWRITE) {
+			error = deny_write_access(file);
+			if (error)
+				goto free_vma;
+			correct_wcount = 1;
+		}
+		vma->vm_file = file;
+		get_file(file);
+		error = file->f_op->mmap(file, vma);
+		if (error)
+			goto unmap_and_free_vma;
+	} else if (vm_flags & VM_SHARED) {
+		error = shmem_zero_setup(vma);
+		if (error)
+			goto free_vma;
+	}
+
+	/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
+	 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
+	 * that memory reservation must be checked; but that reservation
+	 * belongs to shared memory object, not to vma: so now clear it.
+	 */
+	if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
+		vma->vm_flags &= ~VM_ACCOUNT;
+
+	/* Can addr have changed??
+	 *
+	 * Answer: Yes, several device drivers can do it in their
+	 *         f_op->mmap method. -DaveM
+	 */
+	addr = vma->vm_start;
+	pgoff = vma->vm_pgoff;
+	vm_flags = vma->vm_flags;
+
+	if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
+			vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
+		file = vma->vm_file;
+		vma_link(mm, vma, prev, rb_link, rb_parent);
+		if (correct_wcount)
+			atomic_inc(&inode->i_writecount);
+	} else {
+		if (file) {
+			if (correct_wcount)
+				atomic_inc(&inode->i_writecount);
+			fput(file);
+		}
+		mpol_free(vma_policy(vma));
+		kmem_cache_free(vm_area_cachep, vma);
+	}
+out:	
+	mm->total_vm += len >> PAGE_SHIFT;
+	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+	if (vm_flags & VM_LOCKED) {
+		mm->locked_vm += len >> PAGE_SHIFT;
+		make_pages_present(addr, addr + len);
+	}
+	if (flags & MAP_POPULATE) {
+		up_write(&mm->mmap_sem);
+		sys_remap_file_pages(addr, len, 0,
+					pgoff, flags & MAP_NONBLOCK);
+		down_write(&mm->mmap_sem);
+	}
+	return addr;
+
+unmap_and_free_vma:
+	if (correct_wcount)
+		atomic_inc(&inode->i_writecount);
+	vma->vm_file = NULL;
+	fput(file);
+
+	/* Undo any partial mapping done by a device driver. */
+	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+	charged = 0;
+free_vma:
+	kmem_cache_free(vm_area_cachep, vma);
+unacct_error:
+	if (charged)
+		vm_unacct_memory(charged);
+	return error;
+}
+
+EXPORT_SYMBOL(do_mmap_pgoff);
+
+/* Get an address range which is currently unmapped.
+ * For shmat() with addr=0.
+ *
+ * Ugly calling convention alert:
+ * Return value with the low bits set means error value,
+ * ie
+ *	if (ret & ~PAGE_MASK)
+ *		error = ret;
+ *
+ * This function "knows" that -ENOMEM has the bits set.
+ */
+#ifndef HAVE_ARCH_UNMAPPED_AREA
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start_addr;
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+	if (len > mm->cached_hole_size) {
+	        start_addr = addr = mm->free_area_cache;
+	} else {
+	        start_addr = addr = TASK_UNMAPPED_BASE;
+	        mm->cached_hole_size = 0;
+	}
+
+full_search:
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr) {
+			/*
+			 * Start a new search - just in case we missed
+			 * some holes.
+			 */
+			if (start_addr != TASK_UNMAPPED_BASE) {
+				addr = TASK_UNMAPPED_BASE;
+			        start_addr = addr;
+				mm->cached_hole_size = 0;
+				goto full_search;
+			}
+			return -ENOMEM;
+		}
+		if (!vma || addr + len <= vma->vm_start) {
+			/*
+			 * Remember the place where we stopped the search:
+			 */
+			mm->free_area_cache = addr + len;
+			return addr;
+		}
+		if (addr + mm->cached_hole_size < vma->vm_start)
+		        mm->cached_hole_size = vma->vm_start - addr;
+		addr = vma->vm_end;
+	}
+}
+#endif	
+
+void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
+{
+	/*
+	 * Is this a new hole at the lowest possible address?
+	 */
+	if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
+		mm->free_area_cache = addr;
+		mm->cached_hole_size = ~0UL;
+	}
+}
+
+/*
+ * This mmap-allocator allocates new areas top-down from below the
+ * stack's low limit (the base):
+ */
+#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	/* requesting a specific address */
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+				(!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	/* check if free_area_cache is useful for us */
+	if (len <= mm->cached_hole_size) {
+ 	        mm->cached_hole_size = 0;
+ 		mm->free_area_cache = mm->mmap_base;
+ 	}
+
+	/* either no address requested or can't fit in requested address hole */
+	addr = mm->free_area_cache;
+
+	/* make sure it can fit in the remaining address space */
+	if (addr > len) {
+		vma = find_vma(mm, addr-len);
+		if (!vma || addr <= vma->vm_start)
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr-len);
+	}
+
+	if (mm->mmap_base < len)
+		goto bottomup;
+
+	addr = mm->mmap_base-len;
+
+	do {
+		/*
+		 * Lookup failure means no vma is above this address,
+		 * else if new region fits below vma->vm_start,
+		 * return with success:
+		 */
+		vma = find_vma(mm, addr);
+		if (!vma || addr+len <= vma->vm_start)
+			/* remember the address as a hint for next time */
+			return (mm->free_area_cache = addr);
+
+ 		/* remember the largest hole we saw so far */
+ 		if (addr + mm->cached_hole_size < vma->vm_start)
+ 		        mm->cached_hole_size = vma->vm_start - addr;
+
+		/* try just below the current vma->vm_start */
+		addr = vma->vm_start-len;
+	} while (len < vma->vm_start);
+
+bottomup:
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	mm->cached_hole_size = ~0UL;
+  	mm->free_area_cache = TASK_UNMAPPED_BASE;
+	addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
+	/*
+	 * Restore the topdown base:
+	 */
+	mm->free_area_cache = mm->mmap_base;
+	mm->cached_hole_size = ~0UL;
+
+	return addr;
+}
+#endif
+
+void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
+{
+	/*
+	 * Is this a new hole at the highest possible address?
+	 */
+	if (addr > mm->free_area_cache)
+		mm->free_area_cache = addr;
+
+	/* dont allow allocations above current base */
+	if (mm->free_area_cache > mm->mmap_base)
+		mm->free_area_cache = mm->mmap_base;
+}
+
+unsigned long
+get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+		unsigned long pgoff, unsigned long flags)
+{
+	unsigned long ret;
+
+	if (!(flags & MAP_FIXED)) {
+		unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+
+		get_area = current->mm->get_unmapped_area;
+		if (file && file->f_op && file->f_op->get_unmapped_area)
+			get_area = file->f_op->get_unmapped_area;
+		addr = get_area(file, addr, len, pgoff, flags);
+		if (IS_ERR_VALUE(addr))
+			return addr;
+	}
+
+	if (addr > TASK_SIZE - len)
+		return -ENOMEM;
+	if (addr & ~PAGE_MASK)
+		return -EINVAL;
+	if (file && is_file_hugepages(file))  {
+		/*
+		 * Check if the given range is hugepage aligned, and
+		 * can be made suitable for hugepages.
+		 */
+		ret = prepare_hugepage_range(addr, len);
+	} else {
+		/*
+		 * Ensure that a normal request is not falling in a
+		 * reserved hugepage range.  For some archs like IA-64,
+		 * there is a separate region for hugepages.
+		 */
+		ret = is_hugepage_only_range(current->mm, addr, len);
+	}
+	if (ret)
+		return -EINVAL;
+	return addr;
+}
+
+EXPORT_SYMBOL(get_unmapped_area);
+
+/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
+struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
+{
+	struct vm_area_struct *vma = NULL;
+
+	if (mm) {
+		/* Check the cache first. */
+		/* (Cache hit rate is typically around 35%.) */
+		vma = mm->mmap_cache;
+		if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
+			struct rb_node * rb_node;
+
+			rb_node = mm->mm_rb.rb_node;
+			vma = NULL;
+
+			while (rb_node) {
+				struct vm_area_struct * vma_tmp;
+
+				vma_tmp = rb_entry(rb_node,
+						struct vm_area_struct, vm_rb);
+
+				if (vma_tmp->vm_end > addr) {
+					vma = vma_tmp;
+					if (vma_tmp->vm_start <= addr)
+						break;
+					rb_node = rb_node->rb_left;
+				} else
+					rb_node = rb_node->rb_right;
+			}
+			if (vma)
+				mm->mmap_cache = vma;
+		}
+	}
+	return vma;
+}
+
+EXPORT_SYMBOL(find_vma);
+
+/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
+struct vm_area_struct *
+find_vma_prev(struct mm_struct *mm, unsigned long addr,
+			struct vm_area_struct **pprev)
+{
+	struct vm_area_struct *vma = NULL, *prev = NULL;
+	struct rb_node * rb_node;
+	if (!mm)
+		goto out;
+
+	/* Guard against addr being lower than the first VMA */
+	vma = mm->mmap;
+
+	/* Go through the RB tree quickly. */
+	rb_node = mm->mm_rb.rb_node;
+
+	while (rb_node) {
+		struct vm_area_struct *vma_tmp;
+		vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+		if (addr < vma_tmp->vm_end) {
+			rb_node = rb_node->rb_left;
+		} else {
+			prev = vma_tmp;
+			if (!prev->vm_next || (addr < prev->vm_next->vm_end))
+				break;
+			rb_node = rb_node->rb_right;
+		}
+	}
+
+out:
+	*pprev = prev;
+	return prev ? prev->vm_next : vma;
+}
+
+/*
+ * Verify that the stack growth is acceptable and
+ * update accounting. This is shared with both the
+ * grow-up and grow-down cases.
+ */
+static int acct_stack_growth(struct vm_area_struct * vma, unsigned long size, unsigned long grow)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct rlimit *rlim = current->signal->rlim;
+
+	/* address space limit tests */
+	if (!may_expand_vm(mm, grow))
+		return -ENOMEM;
+
+	/* Stack limit test */
+	if (size > rlim[RLIMIT_STACK].rlim_cur)
+		return -ENOMEM;
+
+	/* mlock limit tests */
+	if (vma->vm_flags & VM_LOCKED) {
+		unsigned long locked;
+		unsigned long limit;
+		locked = mm->locked_vm + grow;
+		limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+		if (locked > limit && !capable(CAP_IPC_LOCK))
+			return -ENOMEM;
+	}
+
+	/*
+	 * Overcommit..  This must be the final test, as it will
+	 * update security statistics.
+	 */
+	if (security_vm_enough_memory(grow))
+		return -ENOMEM;
+
+	/* Ok, everything looks good - let it rip */
+	mm->total_vm += grow;
+	if (vma->vm_flags & VM_LOCKED)
+		mm->locked_vm += grow;
+	vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
+	return 0;
+}
+
+#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
+/*
+ * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
+ * vma is the last one with address > vma->vm_end.  Have to extend vma.
+ */
+#ifndef CONFIG_IA64
+static inline
+#endif
+int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+{
+	int error;
+
+	if (!(vma->vm_flags & VM_GROWSUP))
+		return -EFAULT;
+
+	/*
+	 * We must make sure the anon_vma is allocated
+	 * so that the anon_vma locking is not a noop.
+	 */
+	if (unlikely(anon_vma_prepare(vma)))
+		return -ENOMEM;
+	anon_vma_lock(vma);
+
+	/*
+	 * vma->vm_start/vm_end cannot change under us because the caller
+	 * is required to hold the mmap_sem in read mode.  We need the
+	 * anon_vma lock to serialize against concurrent expand_stacks.
+	 */
+	address += 4 + PAGE_SIZE - 1;
+	address &= PAGE_MASK;
+	error = 0;
+
+	/* Somebody else might have raced and expanded it already */
+	if (address > vma->vm_end) {
+		unsigned long size, grow;
+
+		size = address - vma->vm_start;
+		grow = (address - vma->vm_end) >> PAGE_SHIFT;
+
+		error = acct_stack_growth(vma, size, grow);
+		if (!error)
+			vma->vm_end = address;
+	}
+	anon_vma_unlock(vma);
+	return error;
+}
+#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
+
+#ifdef CONFIG_STACK_GROWSUP
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+	return expand_upwards(vma, address);
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma, *prev;
+
+	addr &= PAGE_MASK;
+	vma = find_vma_prev(mm, addr, &prev);
+	if (vma && (vma->vm_start <= addr))
+		return vma;
+	if (!prev || expand_stack(prev, addr))
+		return NULL;
+	if (prev->vm_flags & VM_LOCKED) {
+		make_pages_present(addr, prev->vm_end);
+	}
+	return prev;
+}
+#else
+/*
+ * vma is the first one with address < vma->vm_start.  Have to extend vma.
+ */
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+	int error;
+
+	/*
+	 * We must make sure the anon_vma is allocated
+	 * so that the anon_vma locking is not a noop.
+	 */
+	if (unlikely(anon_vma_prepare(vma)))
+		return -ENOMEM;
+	anon_vma_lock(vma);
+
+	/*
+	 * vma->vm_start/vm_end cannot change under us because the caller
+	 * is required to hold the mmap_sem in read mode.  We need the
+	 * anon_vma lock to serialize against concurrent expand_stacks.
+	 */
+	address &= PAGE_MASK;
+	error = 0;
+
+	/* Somebody else might have raced and expanded it already */
+	if (address < vma->vm_start) {
+		unsigned long size, grow;
+
+		size = vma->vm_end - address;
+		grow = (vma->vm_start - address) >> PAGE_SHIFT;
+
+		error = acct_stack_growth(vma, size, grow);
+		if (!error) {
+			vma->vm_start = address;
+			vma->vm_pgoff -= grow;
+		}
+	}
+	anon_vma_unlock(vma);
+	return error;
+}
+
+struct vm_area_struct *
+find_extend_vma(struct mm_struct * mm, unsigned long addr)
+{
+	struct vm_area_struct * vma;
+	unsigned long start;
+
+	addr &= PAGE_MASK;
+	vma = find_vma(mm,addr);
+	if (!vma)
+		return NULL;
+	if (vma->vm_start <= addr)
+		return vma;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		return NULL;
+	start = vma->vm_start;
+	if (expand_stack(vma, addr))
+		return NULL;
+	if (vma->vm_flags & VM_LOCKED) {
+		make_pages_present(addr, start);
+	}
+	return vma;
+}
+#endif
+
+/*
+ * Ok - we have the memory areas we should free on the vma list,
+ * so release them, and do the vma updates.
+ *
+ * Called with the mm semaphore held.
+ */
+static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	/* Update high watermark before we lower total_vm */
+	update_hiwater_vm(mm);
+	do {
+		long nrpages = vma_pages(vma);
+
+		mm->total_vm -= nrpages;
+		if (vma->vm_flags & VM_LOCKED)
+			mm->locked_vm -= nrpages;
+		vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
+		vma = remove_vma(vma);
+	} while (vma);
+	validate_mm(mm);
+}
+
+/*
+ * Get rid of page table information in the indicated region.
+ *
+ * Called with the mm semaphore held.
+ */
+static void unmap_region(struct mm_struct *mm,
+		struct vm_area_struct *vma, struct vm_area_struct *prev,
+		unsigned long start, unsigned long end)
+{
+	struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
+	struct mmu_gather *tlb;
+	unsigned long nr_accounted = 0;
+
+	lru_add_drain();
+	tlb = tlb_gather_mmu(mm, 0);
+	update_hiwater_rss(mm);
+	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
+	vm_unacct_memory(nr_accounted);
+	free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+				 next? next->vm_start: 0);
+	tlb_finish_mmu(tlb, start, end);
+}
+
+/*
+ * Create a list of vma's touched by the unmap, removing them from the mm's
+ * vma list as we go..
+ */
+static void
+detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
+	struct vm_area_struct *prev, unsigned long end)
+{
+	struct vm_area_struct **insertion_point;
+	struct vm_area_struct *tail_vma = NULL;
+	unsigned long addr;
+
+	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+	do {
+		rb_erase(&vma->vm_rb, &mm->mm_rb);
+		mm->map_count--;
+		tail_vma = vma;
+		vma = vma->vm_next;
+	} while (vma && vma->vm_start < end);
+	*insertion_point = vma;
+	tail_vma->vm_next = NULL;
+	if (mm->unmap_area == arch_unmap_area)
+		addr = prev ? prev->vm_end : mm->mmap_base;
+	else
+		addr = vma ?  vma->vm_start : mm->mmap_base;
+	mm->unmap_area(mm, addr);
+	mm->mmap_cache = NULL;		/* Kill the cache. */
+}
+
+/*
+ * Split a vma into two pieces at address 'addr', a new vma is allocated
+ * either for the first part or the the tail.
+ */
+int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
+	      unsigned long addr, int new_below)
+{
+	struct mempolicy *pol;
+	struct vm_area_struct *new;
+
+	if (is_vm_hugetlb_page(vma) && (addr & ~HPAGE_MASK))
+		return -EINVAL;
+
+	if (mm->map_count >= sysctl_max_map_count)
+		return -ENOMEM;
+
+	new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	/* most fields are the same, copy all, and then fixup */
+	*new = *vma;
+
+	if (new_below)
+		new->vm_end = addr;
+	else {
+		new->vm_start = addr;
+		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
+	}
+
+	pol = mpol_copy(vma_policy(vma));
+	if (IS_ERR(pol)) {
+		kmem_cache_free(vm_area_cachep, new);
+		return PTR_ERR(pol);
+	}
+	vma_set_policy(new, pol);
+
+	if (new->vm_file)
+		get_file(new->vm_file);
+
+	if (new->vm_ops && new->vm_ops->open)
+		new->vm_ops->open(new);
+
+	if (new_below)
+		vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
+			((addr - new->vm_start) >> PAGE_SHIFT), new);
+	else
+		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+
+	return 0;
+}
+
+/* Munmap is split into 2 main parts -- this part which finds
+ * what needs doing, and the areas themselves, which do the
+ * work.  This now handles partial unmappings.
+ * Jeremy Fitzhardinge <jeremy@goop.org>
+ */
+int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
+{
+	unsigned long end;
+	struct vm_area_struct *vma, *prev, *last;
+
+	if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
+		return -EINVAL;
+
+	if ((len = PAGE_ALIGN(len)) == 0)
+		return -EINVAL;
+
+	/* Find the first overlapping VMA */
+	vma = find_vma_prev(mm, start, &prev);
+	if (!vma)
+		return 0;
+	/* we have  start < vma->vm_end  */
+
+	/* if it doesn't overlap, we have nothing.. */
+	end = start + len;
+	if (vma->vm_start >= end)
+		return 0;
+
+	/*
+	 * If we need to split any vma, do it now to save pain later.
+	 *
+	 * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
+	 * unmapped vm_area_struct will remain in use: so lower split_vma
+	 * places tmp vma above, and higher split_vma places tmp vma below.
+	 */
+	if (start > vma->vm_start) {
+		int error = split_vma(mm, vma, start, 0);
+		if (error)
+			return error;
+		prev = vma;
+	}
+
+	/* Does it split the last one? */
+	last = find_vma(mm, end);
+	if (last && end > last->vm_start) {
+		int error = split_vma(mm, last, end, 1);
+		if (error)
+			return error;
+	}
+	vma = prev? prev->vm_next: mm->mmap;
+
+	/*
+	 * Remove the vma's, and unmap the actual pages
+	 */
+	detach_vmas_to_be_unmapped(mm, vma, prev, end);
+	unmap_region(mm, vma, prev, start, end);
+
+	/* Fix up all other VM information */
+	remove_vma_list(mm, vma);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(do_munmap);
+
+asmlinkage long sys_munmap(unsigned long addr, size_t len)
+{
+	int ret;
+	struct mm_struct *mm = current->mm;
+
+	profile_munmap(addr);
+
+	down_write(&mm->mmap_sem);
+	ret = do_munmap(mm, addr, len);
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+static inline void verify_mm_writelocked(struct mm_struct *mm)
+{
+#ifdef CONFIG_DEBUG_VM
+	if (unlikely(down_read_trylock(&mm->mmap_sem))) {
+		WARN_ON(1);
+		up_read(&mm->mmap_sem);
+	}
+#endif
+}
+
+/*
+ *  this is really a simplified "do_mmap".  it only handles
+ *  anonymous maps.  eventually we may be able to do some
+ *  brk-specific accounting here.
+ */
+unsigned long do_brk(unsigned long addr, unsigned long len)
+{
+	struct mm_struct * mm = current->mm;
+	struct vm_area_struct * vma, * prev;
+	unsigned long flags;
+	struct rb_node ** rb_link, * rb_parent;
+	pgoff_t pgoff = addr >> PAGE_SHIFT;
+
+	len = PAGE_ALIGN(len);
+	if (!len)
+		return addr;
+
+	if ((addr + len) > TASK_SIZE || (addr + len) < addr)
+		return -EINVAL;
+
+	/*
+	 * mlock MCL_FUTURE?
+	 */
+	if (mm->def_flags & VM_LOCKED) {
+		unsigned long locked, lock_limit;
+		locked = len >> PAGE_SHIFT;
+		locked += mm->locked_vm;
+		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+		lock_limit >>= PAGE_SHIFT;
+		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+			return -EAGAIN;
+	}
+
+	/*
+	 * mm->mmap_sem is required to protect against another thread
+	 * changing the mappings in case we sleep.
+	 */
+	verify_mm_writelocked(mm);
+
+	/*
+	 * Clear old maps.  this also does some error checking for us
+	 */
+ munmap_back:
+	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+	if (vma && vma->vm_start < addr + len) {
+		if (do_munmap(mm, addr, len))
+			return -ENOMEM;
+		goto munmap_back;
+	}
+
+	/* Check against address space limits *after* clearing old maps... */
+	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
+		return -ENOMEM;
+
+	if (mm->map_count > sysctl_max_map_count)
+		return -ENOMEM;
+
+	if (security_vm_enough_memory(len >> PAGE_SHIFT))
+		return -ENOMEM;
+
+	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
+	/* Can we just expand an old private anonymous mapping? */
+	if (vma_merge(mm, prev, addr, addr + len, flags,
+					NULL, NULL, pgoff, NULL))
+		goto out;
+
+	/*
+	 * create a vma struct for an anonymous mapping
+	 */
+	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+	if (!vma) {
+		vm_unacct_memory(len >> PAGE_SHIFT);
+		return -ENOMEM;
+	}
+	memset(vma, 0, sizeof(*vma));
+
+	vma->vm_mm = mm;
+	vma->vm_start = addr;
+	vma->vm_end = addr + len;
+	vma->vm_pgoff = pgoff;
+	vma->vm_flags = flags;
+	vma->vm_page_prot = protection_map[flags & 0x0f];
+	vma_link(mm, vma, prev, rb_link, rb_parent);
+out:
+	mm->total_vm += len >> PAGE_SHIFT;
+	if (flags & VM_LOCKED) {
+		mm->locked_vm += len >> PAGE_SHIFT;
+		make_pages_present(addr, addr + len);
+	}
+	return addr;
+}
+
+EXPORT_SYMBOL(do_brk);
+
+/* Release all mmaps. */
+void exit_mmap(struct mm_struct *mm)
+{
+	struct mmu_gather *tlb;
+	struct vm_area_struct *vma = mm->mmap;
+	unsigned long nr_accounted = 0;
+	unsigned long end;
+
+	lru_add_drain();
+	flush_cache_mm(mm);
+	tlb = tlb_gather_mmu(mm, 1);
+	/* Don't update_hiwater_rss(mm) here, do_exit already did */
+	/* Use -1 here to ensure all VMAs in the mm are unmapped */
+	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
+	vm_unacct_memory(nr_accounted);
+	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+	tlb_finish_mmu(tlb, 0, end);
+
+	/*
+	 * Walk the list again, actually closing and freeing it,
+	 * with preemption enabled, without holding any MM locks.
+	 */
+	while (vma)
+		vma = remove_vma(vma);
+
+	BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
+}
+
+/* Insert vm structure into process list sorted by address
+ * and into the inode's i_mmap tree.  If vm_file is non-NULL
+ * then i_mmap_lock is taken here.
+ */
+int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
+{
+	struct vm_area_struct * __vma, * prev;
+	struct rb_node ** rb_link, * rb_parent;
+
+	/*
+	 * The vm_pgoff of a purely anonymous vma should be irrelevant
+	 * until its first write fault, when page's anon_vma and index
+	 * are set.  But now set the vm_pgoff it will almost certainly
+	 * end up with (unless mremap moves it elsewhere before that
+	 * first wfault), so /proc/pid/maps tells a consistent story.
+	 *
+	 * By setting it to reflect the virtual start address of the
+	 * vma, merges and splits can happen in a seamless way, just
+	 * using the existing file pgoff checks and manipulations.
+	 * Similarly in do_mmap_pgoff and in do_brk.
+	 */
+	if (!vma->vm_file) {
+		BUG_ON(vma->anon_vma);
+		vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
+	}
+	__vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
+	if (__vma && __vma->vm_start < vma->vm_end)
+		return -ENOMEM;
+	if ((vma->vm_flags & VM_ACCOUNT) &&
+	     security_vm_enough_memory(vma_pages(vma)))
+		return -ENOMEM;
+	vma_link(mm, vma, prev, rb_link, rb_parent);
+	return 0;
+}
+
+/*
+ * Copy the vma structure to a new location in the same mm,
+ * prior to moving page table entries, to effect an mremap move.
+ */
+struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
+	unsigned long addr, unsigned long len, pgoff_t pgoff)
+{
+	struct vm_area_struct *vma = *vmap;
+	unsigned long vma_start = vma->vm_start;
+	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *new_vma, *prev;
+	struct rb_node **rb_link, *rb_parent;
+	struct mempolicy *pol;
+
+	/*
+	 * If anonymous vma has not yet been faulted, update new pgoff
+	 * to match new location, to increase its chance of merging.
+	 */
+	if (!vma->vm_file && !vma->anon_vma)
+		pgoff = addr >> PAGE_SHIFT;
+
+	find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
+			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
+	if (new_vma) {
+		/*
+		 * Source vma may have been merged into new_vma
+		 */
+		if (vma_start >= new_vma->vm_start &&
+		    vma_start < new_vma->vm_end)
+			*vmap = new_vma;
+	} else {
+		new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+		if (new_vma) {
+			*new_vma = *vma;
+			pol = mpol_copy(vma_policy(vma));
+			if (IS_ERR(pol)) {
+				kmem_cache_free(vm_area_cachep, new_vma);
+				return NULL;
+			}
+			vma_set_policy(new_vma, pol);
+			new_vma->vm_start = addr;
+			new_vma->vm_end = addr + len;
+			new_vma->vm_pgoff = pgoff;
+			if (new_vma->vm_file)
+				get_file(new_vma->vm_file);
+			if (new_vma->vm_ops && new_vma->vm_ops->open)
+				new_vma->vm_ops->open(new_vma);
+			vma_link(mm, new_vma, prev, rb_link, rb_parent);
+		}
+	}
+	return new_vma;
+}
+
+/*
+ * Return true if the calling process may expand its vm space by the passed
+ * number of pages
+ */
+int may_expand_vm(struct mm_struct *mm, unsigned long npages)
+{
+	unsigned long cur = mm->total_vm;	/* pages */
+	unsigned long lim;
+
+	lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+
+	if (cur + npages > lim)
+		return 0;
+	return 1;
+}
