Blame - kernel/kexec.c - android_kernel_htc_msm8960

blob: de1441656efdacf2d86a5db5226fb6e7f1469655 [file] [log] [blame]

Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1	/*
				2	* kexec.c - kexec system call
				3	* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
				4	*
				5	* This source code is licensed under the GNU General Public License,
				6	* Version 2. See the file COPYING for more details.
				7	*/
				8
				9	#include <linux/mm.h>
				10	#include <linux/file.h>
				11	#include <linux/slab.h>
				12	#include <linux/fs.h>
				13	#include <linux/kexec.h>
				14	#include <linux/spinlock.h>
				15	#include <linux/list.h>
				16	#include <linux/highmem.h>
				17	#include <linux/syscalls.h>
				18	#include <linux/reboot.h>
				19	#include <linux/syscalls.h>
				20	#include <linux/ioport.h>
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	21	#include <linux/hardirq.h>
				22
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	23	#include <asm/page.h>
				24	#include <asm/uaccess.h>
				25	#include <asm/io.h>
				26	#include <asm/system.h>
				27	#include <asm/semaphore.h>
				28
Vivek Goyal	cc57165	2006-01-09 20:51:41 -0800	[diff] [blame]	29	/* Per cpu memory for storing cpu states in case of system crash. */
				30	note_buf_t* crash_notes;
				31
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	32	/* Location of the reserved area for the crash kernel */
				33	struct resource crashk_res = {
				34	.name = "Crash kernel",
				35	.start = 0,
				36	.end = 0,
				37	.flags = IORESOURCE_BUSY \| IORESOURCE_MEM
				38	};
				39
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	40	int kexec_should_crash(struct task_struct *p)
				41	{
				42	if (in_interrupt() \|\| !p->pid \|\| p->pid == 1 \|\| panic_on_oops)
				43	return 1;
				44	return 0;
				45	}
				46
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	47	/*
				48	* When kexec transitions to the new kernel there is a one-to-one
				49	* mapping between physical and virtual addresses. On processors
				50	* where you can disable the MMU this is trivial, and easy. For
				51	* others it is still a simple predictable page table to setup.
				52	*
				53	* In that environment kexec copies the new kernel to its final
				54	* resting place. This means I can only support memory whose
				55	* physical address can fit in an unsigned long. In particular
				56	* addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
				57	* If the assembly stub has more restrictive requirements
				58	* KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
				59	* defined more restrictively in <asm/kexec.h>.
				60	*
				61	* The code for the transition from the current kernel to the
				62	* the new kernel is placed in the control_code_buffer, whose size
				63	* is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
				64	* page of memory is necessary, but some architectures require more.
				65	* Because this memory must be identity mapped in the transition from
				66	* virtual to physical addresses it must live in the range
				67	* 0 - TASK_SIZE, as only the user space mappings are arbitrarily
				68	* modifiable.
				69	*
				70	* The assembly stub in the control code buffer is passed a linked list
				71	* of descriptor pages detailing the source pages of the new kernel,
				72	* and the destination addresses of those source pages. As this data
				73	* structure is not used in the context of the current OS, it must
				74	* be self-contained.
				75	*
				76	* The code has been made to work with highmem pages and will use a
				77	* destination page in its final resting place (if it happens
				78	* to allocate it). The end product of this is that most of the
				79	* physical address space, and most of RAM can be used.
				80	*
				81	* Future directions include:
				82	* - allocating a page table with the control code buffer identity
				83	* mapped, to simplify machine_kexec and make kexec_on_panic more
				84	* reliable.
				85	*/
				86
				87	/*
				88	* KIMAGE_NO_DEST is an impossible destination address..., for
				89	* allocating pages whose destination address we do not care about.
				90	*/
				91	#define KIMAGE_NO_DEST (-1UL)
				92
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	93	static int kimage_is_destination_range(struct kimage *image,
				94	unsigned long start, unsigned long end);
				95	static struct page kimage_alloc_page(struct kimage image,
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	96	gfp_t gfp_mask,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	97	unsigned long dest);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	98
				99	static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	100	unsigned long nr_segments,
				101	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	102	{
				103	size_t segment_bytes;
				104	struct kimage *image;
				105	unsigned long i;
				106	int result;
				107
				108	/* Allocate a controlling structure */
				109	result = -ENOMEM;
				110	image = kmalloc(sizeof(*image), GFP_KERNEL);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	111	if (!image)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	112	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	113
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	114	memset(image, 0, sizeof(*image));
				115	image->head = 0;
				116	image->entry = &image->head;
				117	image->last_entry = &image->head;
				118	image->control_page = ~0; /* By default this does not apply */
				119	image->start = entry;
				120	image->type = KEXEC_TYPE_DEFAULT;
				121
				122	/* Initialize the list of control pages */
				123	INIT_LIST_HEAD(&image->control_pages);
				124
				125	/* Initialize the list of destination pages */
				126	INIT_LIST_HEAD(&image->dest_pages);
				127
				128	/* Initialize the list of unuseable pages */
				129	INIT_LIST_HEAD(&image->unuseable_pages);
				130
				131	/* Read in the segments */
				132	image->nr_segments = nr_segments;
				133	segment_bytes = nr_segments * sizeof(*segments);
				134	result = copy_from_user(image->segment, segments, segment_bytes);
				135	if (result)
				136	goto out;
				137
				138	/*
				139	* Verify we have good destination addresses. The caller is
				140	* responsible for making certain we don't attempt to load
				141	* the new image into invalid or reserved areas of RAM. This
				142	* just verifies it is an address we can use.
				143	*
				144	* Since the kernel does everything in page size chunks ensure
				145	* the destination addreses are page aligned. Too many
				146	* special cases crop of when we don't do this. The most
				147	* insidious is getting overlapping destination addresses
				148	* simply because addresses are changed to page size
				149	* granularity.
				150	*/
				151	result = -EADDRNOTAVAIL;
				152	for (i = 0; i < nr_segments; i++) {
				153	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	154
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	155	mstart = image->segment[i].mem;
				156	mend = mstart + image->segment[i].memsz;
				157	if ((mstart & ~PAGE_MASK) \|\| (mend & ~PAGE_MASK))
				158	goto out;
				159	if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
				160	goto out;
				161	}
				162
				163	/* Verify our destination addresses do not overlap.
				164	* If we alloed overlapping destination addresses
				165	* through very weird things can happen with no
				166	* easy explanation as one segment stops on another.
				167	*/
				168	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	169	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	170	unsigned long mstart, mend;
				171	unsigned long j;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	172
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	173	mstart = image->segment[i].mem;
				174	mend = mstart + image->segment[i].memsz;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	175	for (j = 0; j < i; j++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	176	unsigned long pstart, pend;
				177	pstart = image->segment[j].mem;
				178	pend = pstart + image->segment[j].memsz;
				179	/* Do the segments overlap ? */
				180	if ((mend > pstart) && (mstart < pend))
				181	goto out;
				182	}
				183	}
				184
				185	/* Ensure our buffer sizes are strictly less than
				186	* our memory sizes. This should always be the case,
				187	* and it is easier to check up front than to be surprised
				188	* later on.
				189	*/
				190	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	191	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	192	if (image->segment[i].bufsz > image->segment[i].memsz)
				193	goto out;
				194	}
				195
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	196	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	197	out:
				198	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	199	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	200	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	201	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	202
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	203	return result;
				204
				205	}
				206
				207	static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	208	unsigned long nr_segments,
				209	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	210	{
				211	int result;
				212	struct kimage *image;
				213
				214	/* Allocate and initialize a controlling structure */
				215	image = NULL;
				216	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	217	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	218	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	219
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	220	*rimage = image;
				221
				222	/*
				223	* Find a location for the control code buffer, and add it
				224	* the vector of segments so that it's pages will also be
				225	* counted as destination pages.
				226	*/
				227	result = -ENOMEM;
				228	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	229	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	230	if (!image->control_code_page) {
				231	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				232	goto out;
				233	}
				234
				235	result = 0;
				236	out:
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	237	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	238	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	239	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	240	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	241
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	242	return result;
				243	}
				244
				245	static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	246	unsigned long nr_segments,
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	247	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	248	{
				249	int result;
				250	struct kimage *image;
				251	unsigned long i;
				252
				253	image = NULL;
				254	/* Verify we have a valid entry point */
				255	if ((entry < crashk_res.start) \|\| (entry > crashk_res.end)) {
				256	result = -EADDRNOTAVAIL;
				257	goto out;
				258	}
				259
				260	/* Allocate and initialize a controlling structure */
				261	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	262	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	263	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	264
				265	/* Enable the special crash kernel control page
				266	* allocation policy.
				267	*/
				268	image->control_page = crashk_res.start;
				269	image->type = KEXEC_TYPE_CRASH;
				270
				271	/*
				272	* Verify we have good destination addresses. Normally
				273	* the caller is responsible for making certain we don't
				274	* attempt to load the new image into invalid or reserved
				275	* areas of RAM. But crash kernels are preloaded into a
				276	* reserved area of ram. We must ensure the addresses
				277	* are in the reserved area otherwise preloading the
				278	* kernel could corrupt things.
				279	*/
				280	result = -EADDRNOTAVAIL;
				281	for (i = 0; i < nr_segments; i++) {
				282	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	283
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	284	mstart = image->segment[i].mem;
Vivek Goyal	50cccc6	2005-06-25 14:57:55 -0700	[diff] [blame]	285	mend = mstart + image->segment[i].memsz - 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	286	/* Ensure we are within the crash kernel limits */
				287	if ((mstart < crashk_res.start) \|\| (mend > crashk_res.end))
				288	goto out;
				289	}
				290
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	291	/*
				292	* Find a location for the control code buffer, and add
				293	* the vector of segments so that it's pages will also be
				294	* counted as destination pages.
				295	*/
				296	result = -ENOMEM;
				297	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	298	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	299	if (!image->control_code_page) {
				300	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				301	goto out;
				302	}
				303
				304	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	305	out:
				306	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	307	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	308	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	309	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	310
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	311	return result;
				312	}
				313
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	314	static int kimage_is_destination_range(struct kimage *image,
				315	unsigned long start,
				316	unsigned long end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	317	{
				318	unsigned long i;
				319
				320	for (i = 0; i < image->nr_segments; i++) {
				321	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	322
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	323	mstart = image->segment[i].mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	324	mend = mstart + image->segment[i].memsz;
				325	if ((end > mstart) && (start < mend))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	326	return 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	327	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	328
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	329	return 0;
				330	}
				331
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	332	static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	333	{
				334	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	335
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	336	pages = alloc_pages(gfp_mask, order);
				337	if (pages) {
				338	unsigned int count, i;
				339	pages->mapping = NULL;
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	340	set_page_private(pages, order);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	341	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	342	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	343	SetPageReserved(pages + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	344	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	345
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	346	return pages;
				347	}
				348
				349	static void kimage_free_pages(struct page *page)
				350	{
				351	unsigned int order, count, i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	352
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	353	order = page_private(page);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	354	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	355	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	356	ClearPageReserved(page + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	357	__free_pages(page, order);
				358	}
				359
				360	static void kimage_free_page_list(struct list_head *list)
				361	{
				362	struct list_head pos, next;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	363
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	364	list_for_each_safe(pos, next, list) {
				365	struct page *page;
				366
				367	page = list_entry(pos, struct page, lru);
				368	list_del(&page->lru);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	369	kimage_free_pages(page);
				370	}
				371	}
				372
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	373	static struct page kimage_alloc_normal_control_pages(struct kimage image,
				374	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	375	{
				376	/* Control pages are special, they are the intermediaries
				377	* that are needed while we copy the rest of the pages
				378	* to their final resting place. As such they must
				379	* not conflict with either the destination addresses
				380	* or memory the kernel is already using.
				381	*
				382	* The only case where we really need more than one of
				383	* these are for architectures where we cannot disable
				384	* the MMU and must instead generate an identity mapped
				385	* page table for all of the memory.
				386	*
				387	* At worst this runs in O(N) of the image size.
				388	*/
				389	struct list_head extra_pages;
				390	struct page *pages;
				391	unsigned int count;
				392
				393	count = 1 << order;
				394	INIT_LIST_HEAD(&extra_pages);
				395
				396	/* Loop while I can allocate a page and the page allocated
				397	* is a destination page.
				398	*/
				399	do {
				400	unsigned long pfn, epfn, addr, eaddr;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	401
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	402	pages = kimage_alloc_pages(GFP_KERNEL, order);
				403	if (!pages)
				404	break;
				405	pfn = page_to_pfn(pages);
				406	epfn = pfn + count;
				407	addr = pfn << PAGE_SHIFT;
				408	eaddr = epfn << PAGE_SHIFT;
				409	if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) \|\|
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	410	kimage_is_destination_range(image, addr, eaddr)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	411	list_add(&pages->lru, &extra_pages);
				412	pages = NULL;
				413	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	414	} while (!pages);
				415
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	416	if (pages) {
				417	/* Remember the allocated page... */
				418	list_add(&pages->lru, &image->control_pages);
				419
				420	/* Because the page is already in it's destination
				421	* location we will never allocate another page at
				422	* that address. Therefore kimage_alloc_pages
				423	* will not return it (again) and we don't need
				424	* to give it an entry in image->segment[].
				425	*/
				426	}
				427	/* Deal with the destination pages I have inadvertently allocated.
				428	*
				429	* Ideally I would convert multi-page allocations into single
				430	* page allocations, and add everyting to image->dest_pages.
				431	*
				432	* For now it is simpler to just free the pages.
				433	*/
				434	kimage_free_page_list(&extra_pages);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	435
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	436	return pages;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	437	}
				438
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	439	static struct page kimage_alloc_crash_control_pages(struct kimage image,
				440	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	441	{
				442	/* Control pages are special, they are the intermediaries
				443	* that are needed while we copy the rest of the pages
				444	* to their final resting place. As such they must
				445	* not conflict with either the destination addresses
				446	* or memory the kernel is already using.
				447	*
				448	* Control pages are also the only pags we must allocate
				449	* when loading a crash kernel. All of the other pages
				450	* are specified by the segments and we just memcpy
				451	* into them directly.
				452	*
				453	* The only case where we really need more than one of
				454	* these are for architectures where we cannot disable
				455	* the MMU and must instead generate an identity mapped
				456	* page table for all of the memory.
				457	*
				458	* Given the low demand this implements a very simple
				459	* allocator that finds the first hole of the appropriate
				460	* size in the reserved memory region, and allocates all
				461	* of the memory up to and including the hole.
				462	*/
				463	unsigned long hole_start, hole_end, size;
				464	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	465
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	466	pages = NULL;
				467	size = (1 << order) << PAGE_SHIFT;
				468	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
				469	hole_end = hole_start + size - 1;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	470	while (hole_end <= crashk_res.end) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	471	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	472
				473	if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	474	break;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	475	if (hole_end > crashk_res.end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	476	break;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	477	/* See if I overlap any of the segments */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	478	for (i = 0; i < image->nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	479	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	480
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	481	mstart = image->segment[i].mem;
				482	mend = mstart + image->segment[i].memsz - 1;
				483	if ((hole_end >= mstart) && (hole_start <= mend)) {
				484	/* Advance the hole to the end of the segment */
				485	hole_start = (mend + (size - 1)) & ~(size - 1);
				486	hole_end = hole_start + size - 1;
				487	break;
				488	}
				489	}
				490	/* If I don't overlap any segments I have found my hole! */
				491	if (i == image->nr_segments) {
				492	pages = pfn_to_page(hole_start >> PAGE_SHIFT);
				493	break;
				494	}
				495	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	496	if (pages)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	497	image->control_page = hole_end;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	498
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	499	return pages;
				500	}
				501
				502
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	503	struct page kimage_alloc_control_pages(struct kimage image,
				504	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	505	{
				506	struct page *pages = NULL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	507
				508	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	509	case KEXEC_TYPE_DEFAULT:
				510	pages = kimage_alloc_normal_control_pages(image, order);
				511	break;
				512	case KEXEC_TYPE_CRASH:
				513	pages = kimage_alloc_crash_control_pages(image, order);
				514	break;
				515	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	516
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	517	return pages;
				518	}
				519
				520	static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
				521	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	522	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	523	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	524
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	525	if (image->entry == image->last_entry) {
				526	kimage_entry_t *ind_page;
				527	struct page *page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	528
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	529	page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	530	if (!page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	531	return -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	532
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	533	ind_page = page_address(page);
				534	*image->entry = virt_to_phys(ind_page) \| IND_INDIRECTION;
				535	image->entry = ind_page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	536	image->last_entry = ind_page +
				537	((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	538	}
				539	*image->entry = entry;
				540	image->entry++;
				541	*image->entry = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	542
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	543	return 0;
				544	}
				545
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	546	static int kimage_set_destination(struct kimage *image,
				547	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	548	{
				549	int result;
				550
				551	destination &= PAGE_MASK;
				552	result = kimage_add_entry(image, destination \| IND_DESTINATION);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	553	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	554	image->destination = destination;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	555
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	556	return result;
				557	}
				558
				559
				560	static int kimage_add_page(struct kimage *image, unsigned long page)
				561	{
				562	int result;
				563
				564	page &= PAGE_MASK;
				565	result = kimage_add_entry(image, page \| IND_SOURCE);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	566	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	567	image->destination += PAGE_SIZE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	568
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	569	return result;
				570	}
				571
				572
				573	static void kimage_free_extra_pages(struct kimage *image)
				574	{
				575	/* Walk through and free any extra destination pages I may have */
				576	kimage_free_page_list(&image->dest_pages);
				577
				578	/* Walk through and free any unuseable pages I have cached */
				579	kimage_free_page_list(&image->unuseable_pages);
				580
				581	}
				582	static int kimage_terminate(struct kimage *image)
				583	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	584	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	585	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	586
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	587	*image->entry = IND_DONE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	588
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	589	return 0;
				590	}
				591
				592	#define for_each_kimage_entry(image, ptr, entry) \
				593	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
				594	ptr = (entry & IND_INDIRECTION)? \
				595	phys_to_virt((entry & PAGE_MASK)): ptr +1)
				596
				597	static void kimage_free_entry(kimage_entry_t entry)
				598	{
				599	struct page *page;
				600
				601	page = pfn_to_page(entry >> PAGE_SHIFT);
				602	kimage_free_pages(page);
				603	}
				604
				605	static void kimage_free(struct kimage *image)
				606	{
				607	kimage_entry_t *ptr, entry;
				608	kimage_entry_t ind = 0;
				609
				610	if (!image)
				611	return;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	612
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	613	kimage_free_extra_pages(image);
				614	for_each_kimage_entry(image, ptr, entry) {
				615	if (entry & IND_INDIRECTION) {
				616	/* Free the previous indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	617	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	618	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	619	/* Save this indirection page until we are
				620	* done with it.
				621	*/
				622	ind = entry;
				623	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	624	else if (entry & IND_SOURCE)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	625	kimage_free_entry(entry);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	626	}
				627	/* Free the final indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	628	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	629	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	630
				631	/* Handle any machine specific cleanup */
				632	machine_kexec_cleanup(image);
				633
				634	/* Free the kexec control pages... */
				635	kimage_free_page_list(&image->control_pages);
				636	kfree(image);
				637	}
				638
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	639	static kimage_entry_t kimage_dst_used(struct kimage image,
				640	unsigned long page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	641	{
				642	kimage_entry_t *ptr, entry;
				643	unsigned long destination = 0;
				644
				645	for_each_kimage_entry(image, ptr, entry) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	646	if (entry & IND_DESTINATION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	647	destination = entry & PAGE_MASK;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	648	else if (entry & IND_SOURCE) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	649	if (page == destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	650	return ptr;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	651	destination += PAGE_SIZE;
				652	}
				653	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	654
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	655	return NULL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	656	}
				657
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	658	static struct page kimage_alloc_page(struct kimage image,
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	659	gfp_t gfp_mask,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	660	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	661	{
				662	/*
				663	* Here we implement safeguards to ensure that a source page
				664	* is not copied to its destination page before the data on
				665	* the destination page is no longer useful.
				666	*
				667	* To do this we maintain the invariant that a source page is
				668	* either its own destination page, or it is not a
				669	* destination page at all.
				670	*
				671	* That is slightly stronger than required, but the proof
				672	* that no problems will not occur is trivial, and the
				673	* implementation is simply to verify.
				674	*
				675	* When allocating all pages normally this algorithm will run
				676	* in O(N) time, but in the worst case it will run in O(N^2)
				677	* time. If the runtime is a problem the data structures can
				678	* be fixed.
				679	*/
				680	struct page *page;
				681	unsigned long addr;
				682
				683	/*
				684	* Walk through the list of destination pages, and see if I
				685	* have a match.
				686	*/
				687	list_for_each_entry(page, &image->dest_pages, lru) {
				688	addr = page_to_pfn(page) << PAGE_SHIFT;
				689	if (addr == destination) {
				690	list_del(&page->lru);
				691	return page;
				692	}
				693	}
				694	page = NULL;
				695	while (1) {
				696	kimage_entry_t *old;
				697
				698	/* Allocate a page, if we run out of memory give up */
				699	page = kimage_alloc_pages(gfp_mask, 0);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	700	if (!page)
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	701	return NULL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	702	/* If the page cannot be used file it away */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	703	if (page_to_pfn(page) >
				704	(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	705	list_add(&page->lru, &image->unuseable_pages);
				706	continue;
				707	}
				708	addr = page_to_pfn(page) << PAGE_SHIFT;
				709
				710	/* If it is the destination page we want use it */
				711	if (addr == destination)
				712	break;
				713
				714	/* If the page is not a destination page use it */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	715	if (!kimage_is_destination_range(image, addr,
				716	addr + PAGE_SIZE))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	717	break;
				718
				719	/*
				720	* I know that the page is someones destination page.
				721	* See if there is already a source page for this
				722	* destination page. And if so swap the source pages.
				723	*/
				724	old = kimage_dst_used(image, addr);
				725	if (old) {
				726	/* If so move it */
				727	unsigned long old_addr;
				728	struct page *old_page;
				729
				730	old_addr = *old & PAGE_MASK;
				731	old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
				732	copy_highpage(page, old_page);
				733	old = addr \| (old & ~PAGE_MASK);
				734
				735	/* The old page I have found cannot be a
				736	* destination page, so return it.
				737	*/
				738	addr = old_addr;
				739	page = old_page;
				740	break;
				741	}
				742	else {
				743	/* Place the page on the destination list I
				744	* will use it later.
				745	*/
				746	list_add(&page->lru, &image->dest_pages);
				747	}
				748	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	749
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	750	return page;
				751	}
				752
				753	static int kimage_load_normal_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	754	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	755	{
				756	unsigned long maddr;
				757	unsigned long ubytes, mbytes;
				758	int result;
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	759	unsigned char __user *buf;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	760
				761	result = 0;
				762	buf = segment->buf;
				763	ubytes = segment->bufsz;
				764	mbytes = segment->memsz;
				765	maddr = segment->mem;
				766
				767	result = kimage_set_destination(image, maddr);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	768	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	769	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	770
				771	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	772	struct page *page;
				773	char *ptr;
				774	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	775
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	776	page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
				777	if (page == 0) {
				778	result = -ENOMEM;
				779	goto out;
				780	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	781	result = kimage_add_page(image, page_to_pfn(page)
				782	<< PAGE_SHIFT);
				783	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	784	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	785
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	786	ptr = kmap(page);
				787	/* Start with a clear page */
				788	memset(ptr, 0, PAGE_SIZE);
				789	ptr += maddr & ~PAGE_MASK;
				790	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	791	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	792	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	793
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	794	uchunk = mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	795	if (uchunk > ubytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	796	uchunk = ubytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	797
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	798	result = copy_from_user(ptr, buf, uchunk);
				799	kunmap(page);
				800	if (result) {
				801	result = (result < 0) ? result : -EIO;
				802	goto out;
				803	}
				804	ubytes -= uchunk;
				805	maddr += mchunk;
				806	buf += mchunk;
				807	mbytes -= mchunk;
				808	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	809	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	810	return result;
				811	}
				812
				813	static int kimage_load_crash_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	814	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	815	{
				816	/* For crash dumps kernels we simply copy the data from
				817	* user space to it's destination.
				818	* We do things a page at a time for the sake of kmap.
				819	*/
				820	unsigned long maddr;
				821	unsigned long ubytes, mbytes;
				822	int result;
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	823	unsigned char __user *buf;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	824
				825	result = 0;
				826	buf = segment->buf;
				827	ubytes = segment->bufsz;
				828	mbytes = segment->memsz;
				829	maddr = segment->mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	830	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	831	struct page *page;
				832	char *ptr;
				833	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	834
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	835	page = pfn_to_page(maddr >> PAGE_SHIFT);
				836	if (page == 0) {
				837	result = -ENOMEM;
				838	goto out;
				839	}
				840	ptr = kmap(page);
				841	ptr += maddr & ~PAGE_MASK;
				842	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	843	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	844	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	845
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	846	uchunk = mchunk;
				847	if (uchunk > ubytes) {
				848	uchunk = ubytes;
				849	/* Zero the trailing part of the page */
				850	memset(ptr + uchunk, 0, mchunk - uchunk);
				851	}
				852	result = copy_from_user(ptr, buf, uchunk);
				853	kunmap(page);
				854	if (result) {
				855	result = (result < 0) ? result : -EIO;
				856	goto out;
				857	}
				858	ubytes -= uchunk;
				859	maddr += mchunk;
				860	buf += mchunk;
				861	mbytes -= mchunk;
				862	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	863	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	864	return result;
				865	}
				866
				867	static int kimage_load_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	868	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	869	{
				870	int result = -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	871
				872	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	873	case KEXEC_TYPE_DEFAULT:
				874	result = kimage_load_normal_segment(image, segment);
				875	break;
				876	case KEXEC_TYPE_CRASH:
				877	result = kimage_load_crash_segment(image, segment);
				878	break;
				879	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	880
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	881	return result;
				882	}
				883
				884	/*
				885	* Exec Kernel system call: for obvious reasons only root may call it.
				886	*
				887	* This call breaks up into three pieces.
				888	* - A generic part which loads the new kernel from the current
				889	* address space, and very carefully places the data in the
				890	* allocated pages.
				891	*
				892	* - A generic part that interacts with the kernel and tells all of
				893	* the devices to shut down. Preventing on-going dmas, and placing
				894	* the devices in a consistent state so a later kernel can
				895	* reinitialize them.
				896	*
				897	* - A machine specific part that includes the syscall number
				898	* and the copies the image to it's final destination. And
				899	* jumps into the image at entry.
				900	*
				901	* kexec does not sync, or unmount filesystems so if you need
				902	* that to happen you need to do that yourself.
				903	*/
				904	struct kimage *kexec_image = NULL;
				905	static struct kimage *kexec_crash_image = NULL;
				906	/*
				907	* A home grown binary mutex.
				908	* Nothing can wait so this mutex is safe to use
				909	* in interrupt context :)
				910	*/
				911	static int kexec_lock = 0;
				912
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	913	asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
				914	struct kexec_segment __user *segments,
				915	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	916	{
				917	struct kimage *dest_image, image;
				918	int locked;
				919	int result;
				920
				921	/* We only trust the superuser with rebooting the system. */
				922	if (!capable(CAP_SYS_BOOT))
				923	return -EPERM;
				924
				925	/*
				926	* Verify we have a legal set of flags
				927	* This leaves us room for future extensions.
				928	*/
				929	if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
				930	return -EINVAL;
				931
				932	/* Verify we are on the appropriate architecture */
				933	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
				934	((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	935	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	936
				937	/* Put an artificial cap on the number
				938	* of segments passed to kexec_load.
				939	*/
				940	if (nr_segments > KEXEC_SEGMENT_MAX)
				941	return -EINVAL;
				942
				943	image = NULL;
				944	result = 0;
				945
				946	/* Because we write directly to the reserved memory
				947	* region when loading crash kernels we need a mutex here to
				948	* prevent multiple crash kernels from attempting to load
				949	* simultaneously, and to prevent a crash kernel from loading
				950	* over the top of a in use crash kernel.
				951	*
				952	* KISS: always take the mutex.
				953	*/
				954	locked = xchg(&kexec_lock, 1);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	955	if (locked)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	956	return -EBUSY;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	957
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	958	dest_image = &kexec_image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	959	if (flags & KEXEC_ON_CRASH)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	960	dest_image = &kexec_crash_image;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	961	if (nr_segments > 0) {
				962	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	963
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	964	/* Loading another kernel to reboot into */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	965	if ((flags & KEXEC_ON_CRASH) == 0)
				966	result = kimage_normal_alloc(&image, entry,
				967	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	968	/* Loading another kernel to switch to if this one crashes */
				969	else if (flags & KEXEC_ON_CRASH) {
				970	/* Free any current crash dump kernel before
				971	* we corrupt it.
				972	*/
				973	kimage_free(xchg(&kexec_crash_image, NULL));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	974	result = kimage_crash_alloc(&image, entry,
				975	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	976	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	977	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	978	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	979
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	980	result = machine_kexec_prepare(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	981	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	982	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	983
				984	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	985	result = kimage_load_segment(image, &image->segment[i]);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	986	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	987	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	988	}
				989	result = kimage_terminate(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	990	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	991	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	992	}
				993	/* Install the new kernel, and Uninstall the old */
				994	image = xchg(dest_image, image);
				995
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	996	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	997	xchg(&kexec_lock, 0); /* Release the mutex */
				998	kimage_free(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	999
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1000	return result;
				1001	}
				1002
				1003	#ifdef CONFIG_COMPAT
				1004	asmlinkage long compat_sys_kexec_load(unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1005	unsigned long nr_segments,
				1006	struct compat_kexec_segment __user *segments,
				1007	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1008	{
				1009	struct compat_kexec_segment in;
				1010	struct kexec_segment out, __user *ksegments;
				1011	unsigned long i, result;
				1012
				1013	/* Don't allow clients that don't understand the native
				1014	* architecture to do anything.
				1015	*/
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1016	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1017	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1018
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1019	if (nr_segments > KEXEC_SEGMENT_MAX)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1020	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1021
				1022	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
				1023	for (i=0; i < nr_segments; i++) {
				1024	result = copy_from_user(&in, &segments[i], sizeof(in));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1025	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1026	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1027
				1028	out.buf = compat_ptr(in.buf);
				1029	out.bufsz = in.bufsz;
				1030	out.mem = in.mem;
				1031	out.memsz = in.memsz;
				1032
				1033	result = copy_to_user(&ksegments[i], &out, sizeof(out));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1034	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1035	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1036	}
				1037
				1038	return sys_kexec_load(entry, nr_segments, ksegments, flags);
				1039	}
				1040	#endif
				1041
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	1042	void crash_kexec(struct pt_regs *regs)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1043	{
				1044	struct kimage *image;
				1045	int locked;
				1046
				1047
				1048	/* Take the kexec_lock here to prevent sys_kexec_load
				1049	* running on one cpu from replacing the crash kernel
				1050	* we are using after a panic on a different cpu.
				1051	*
				1052	* If the crash kernel was not located in a fixed area
				1053	* of memory the xchg(&kexec_crash_image) would be
				1054	* sufficient. But since I reuse the memory...
				1055	*/
				1056	locked = xchg(&kexec_lock, 1);
				1057	if (!locked) {
				1058	image = xchg(&kexec_crash_image, NULL);
				1059	if (image) {
Vivek Goyal	e996e58	2006-01-09 20:51:44 -0800	[diff] [blame^]	1060	struct pt_regs fixed_regs;
				1061	crash_setup_regs(&fixed_regs, regs);
				1062	machine_crash_shutdown(&fixed_regs);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1063	machine_kexec(image);
				1064	}
				1065	xchg(&kexec_lock, 0);
				1066	}
				1067	}
Vivek Goyal	cc57165	2006-01-09 20:51:41 -0800	[diff] [blame]	1068
				1069	static int __init crash_notes_memory_init(void)
				1070	{
				1071	/* Allocate memory for saving cpu registers. */
				1072	crash_notes = alloc_percpu(note_buf_t);
				1073	if (!crash_notes) {
				1074	printk("Kexec: Memory allocation for saving cpu register"
				1075	" states failed\n");
				1076	return -ENOMEM;
				1077	}
				1078	return 0;
				1079	}
				1080	module_init(crash_notes_memory_init)