| /* | 
 |  *    Copyright IBM Corp. 2007,2009 | 
 |  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | 
 |  */ | 
 |  | 
 | #include <linux/sched.h> | 
 | #include <linux/kernel.h> | 
 | #include <linux/errno.h> | 
 | #include <linux/gfp.h> | 
 | #include <linux/mm.h> | 
 | #include <linux/swap.h> | 
 | #include <linux/smp.h> | 
 | #include <linux/highmem.h> | 
 | #include <linux/pagemap.h> | 
 | #include <linux/spinlock.h> | 
 | #include <linux/module.h> | 
 | #include <linux/quicklist.h> | 
 |  | 
 | #include <asm/system.h> | 
 | #include <asm/pgtable.h> | 
 | #include <asm/pgalloc.h> | 
 | #include <asm/tlb.h> | 
 | #include <asm/tlbflush.h> | 
 | #include <asm/mmu_context.h> | 
 |  | 
 | #ifndef CONFIG_64BIT | 
 | #define ALLOC_ORDER	1 | 
 | #define TABLES_PER_PAGE	4 | 
 | #define FRAG_MASK	15UL | 
 | #define SECOND_HALVES	10UL | 
 |  | 
 | void clear_table_pgstes(unsigned long *table) | 
 | { | 
 | 	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | 
 | 	memset(table + 256, 0, PAGE_SIZE/4); | 
 | 	clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); | 
 | 	memset(table + 768, 0, PAGE_SIZE/4); | 
 | } | 
 |  | 
 | #else | 
 | #define ALLOC_ORDER	2 | 
 | #define TABLES_PER_PAGE	2 | 
 | #define FRAG_MASK	3UL | 
 | #define SECOND_HALVES	2UL | 
 |  | 
 | void clear_table_pgstes(unsigned long *table) | 
 | { | 
 | 	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | 
 | 	memset(table + 256, 0, PAGE_SIZE/2); | 
 | } | 
 |  | 
 | #endif | 
 |  | 
 | unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE; | 
 | EXPORT_SYMBOL(VMALLOC_START); | 
 |  | 
 | static int __init parse_vmalloc(char *arg) | 
 | { | 
 | 	if (!arg) | 
 | 		return -EINVAL; | 
 | 	VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK; | 
 | 	return 0; | 
 | } | 
 | early_param("vmalloc", parse_vmalloc); | 
 |  | 
 | unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) | 
 | { | 
 | 	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 
 |  | 
 | 	if (!page) | 
 | 		return NULL; | 
 | 	page->index = 0; | 
 | 	if (noexec) { | 
 | 		struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | 
 | 		if (!shadow) { | 
 | 			__free_pages(page, ALLOC_ORDER); | 
 | 			return NULL; | 
 | 		} | 
 | 		page->index = page_to_phys(shadow); | 
 | 	} | 
 | 	spin_lock(&mm->context.list_lock); | 
 | 	list_add(&page->lru, &mm->context.crst_list); | 
 | 	spin_unlock(&mm->context.list_lock); | 
 | 	return (unsigned long *) page_to_phys(page); | 
 | } | 
 |  | 
 | void crst_table_free(struct mm_struct *mm, unsigned long *table) | 
 | { | 
 | 	unsigned long *shadow = get_shadow_table(table); | 
 | 	struct page *page = virt_to_page(table); | 
 |  | 
 | 	spin_lock(&mm->context.list_lock); | 
 | 	list_del(&page->lru); | 
 | 	spin_unlock(&mm->context.list_lock); | 
 | 	if (shadow) | 
 | 		free_pages((unsigned long) shadow, ALLOC_ORDER); | 
 | 	free_pages((unsigned long) table, ALLOC_ORDER); | 
 | } | 
 |  | 
 | #ifdef CONFIG_64BIT | 
 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) | 
 | { | 
 | 	unsigned long *table, *pgd; | 
 | 	unsigned long entry; | 
 |  | 
 | 	BUG_ON(limit > (1UL << 53)); | 
 | repeat: | 
 | 	table = crst_table_alloc(mm, mm->context.noexec); | 
 | 	if (!table) | 
 | 		return -ENOMEM; | 
 | 	spin_lock(&mm->page_table_lock); | 
 | 	if (mm->context.asce_limit < limit) { | 
 | 		pgd = (unsigned long *) mm->pgd; | 
 | 		if (mm->context.asce_limit <= (1UL << 31)) { | 
 | 			entry = _REGION3_ENTRY_EMPTY; | 
 | 			mm->context.asce_limit = 1UL << 42; | 
 | 			mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 
 | 						_ASCE_USER_BITS | | 
 | 						_ASCE_TYPE_REGION3; | 
 | 		} else { | 
 | 			entry = _REGION2_ENTRY_EMPTY; | 
 | 			mm->context.asce_limit = 1UL << 53; | 
 | 			mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 
 | 						_ASCE_USER_BITS | | 
 | 						_ASCE_TYPE_REGION2; | 
 | 		} | 
 | 		crst_table_init(table, entry); | 
 | 		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | 
 | 		mm->pgd = (pgd_t *) table; | 
 | 		mm->task_size = mm->context.asce_limit; | 
 | 		table = NULL; | 
 | 	} | 
 | 	spin_unlock(&mm->page_table_lock); | 
 | 	if (table) | 
 | 		crst_table_free(mm, table); | 
 | 	if (mm->context.asce_limit < limit) | 
 | 		goto repeat; | 
 | 	update_mm(mm, current); | 
 | 	return 0; | 
 | } | 
 |  | 
 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | 
 | { | 
 | 	pgd_t *pgd; | 
 |  | 
 | 	if (mm->context.asce_limit <= limit) | 
 | 		return; | 
 | 	__tlb_flush_mm(mm); | 
 | 	while (mm->context.asce_limit > limit) { | 
 | 		pgd = mm->pgd; | 
 | 		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | 
 | 		case _REGION_ENTRY_TYPE_R2: | 
 | 			mm->context.asce_limit = 1UL << 42; | 
 | 			mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 
 | 						_ASCE_USER_BITS | | 
 | 						_ASCE_TYPE_REGION3; | 
 | 			break; | 
 | 		case _REGION_ENTRY_TYPE_R3: | 
 | 			mm->context.asce_limit = 1UL << 31; | 
 | 			mm->context.asce_bits = _ASCE_TABLE_LENGTH | | 
 | 						_ASCE_USER_BITS | | 
 | 						_ASCE_TYPE_SEGMENT; | 
 | 			break; | 
 | 		default: | 
 | 			BUG(); | 
 | 		} | 
 | 		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); | 
 | 		mm->task_size = mm->context.asce_limit; | 
 | 		crst_table_free(mm, (unsigned long *) pgd); | 
 | 	} | 
 | 	update_mm(mm, current); | 
 | } | 
 | #endif | 
 |  | 
 | /* | 
 |  * page table entry allocation/free routines. | 
 |  */ | 
 | unsigned long *page_table_alloc(struct mm_struct *mm) | 
 | { | 
 | 	struct page *page; | 
 | 	unsigned long *table; | 
 | 	unsigned long bits; | 
 |  | 
 | 	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | 
 | 	spin_lock(&mm->context.list_lock); | 
 | 	page = NULL; | 
 | 	if (!list_empty(&mm->context.pgtable_list)) { | 
 | 		page = list_first_entry(&mm->context.pgtable_list, | 
 | 					struct page, lru); | 
 | 		if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | 
 | 			page = NULL; | 
 | 	} | 
 | 	if (!page) { | 
 | 		spin_unlock(&mm->context.list_lock); | 
 | 		page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | 
 | 		if (!page) | 
 | 			return NULL; | 
 | 		pgtable_page_ctor(page); | 
 | 		page->flags &= ~FRAG_MASK; | 
 | 		table = (unsigned long *) page_to_phys(page); | 
 | 		if (mm->context.has_pgste) | 
 | 			clear_table_pgstes(table); | 
 | 		else | 
 | 			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); | 
 | 		spin_lock(&mm->context.list_lock); | 
 | 		list_add(&page->lru, &mm->context.pgtable_list); | 
 | 	} | 
 | 	table = (unsigned long *) page_to_phys(page); | 
 | 	while (page->flags & bits) { | 
 | 		table += 256; | 
 | 		bits <<= 1; | 
 | 	} | 
 | 	page->flags |= bits; | 
 | 	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) | 
 | 		list_move_tail(&page->lru, &mm->context.pgtable_list); | 
 | 	spin_unlock(&mm->context.list_lock); | 
 | 	return table; | 
 | } | 
 |  | 
 | void page_table_free(struct mm_struct *mm, unsigned long *table) | 
 | { | 
 | 	struct page *page; | 
 | 	unsigned long bits; | 
 |  | 
 | 	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; | 
 | 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); | 
 | 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | 
 | 	spin_lock(&mm->context.list_lock); | 
 | 	page->flags ^= bits; | 
 | 	if (page->flags & FRAG_MASK) { | 
 | 		/* Page now has some free pgtable fragments. */ | 
 | 		list_move(&page->lru, &mm->context.pgtable_list); | 
 | 		page = NULL; | 
 | 	} else | 
 | 		/* All fragments of the 4K page have been freed. */ | 
 | 		list_del(&page->lru); | 
 | 	spin_unlock(&mm->context.list_lock); | 
 | 	if (page) { | 
 | 		pgtable_page_dtor(page); | 
 | 		__free_page(page); | 
 | 	} | 
 | } | 
 |  | 
 | void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) | 
 | { | 
 | 	struct page *page; | 
 |  | 
 | 	spin_lock(&mm->context.list_lock); | 
 | 	/* Free shadow region and segment tables. */ | 
 | 	list_for_each_entry(page, &mm->context.crst_list, lru) | 
 | 		if (page->index) { | 
 | 			free_pages((unsigned long) page->index, ALLOC_ORDER); | 
 | 			page->index = 0; | 
 | 		} | 
 | 	/* "Free" second halves of page tables. */ | 
 | 	list_for_each_entry(page, &mm->context.pgtable_list, lru) | 
 | 		page->flags &= ~SECOND_HALVES; | 
 | 	spin_unlock(&mm->context.list_lock); | 
 | 	mm->context.noexec = 0; | 
 | 	update_mm(mm, tsk); | 
 | } | 
 |  | 
 | /* | 
 |  * switch on pgstes for its userspace process (for kvm) | 
 |  */ | 
 | int s390_enable_sie(void) | 
 | { | 
 | 	struct task_struct *tsk = current; | 
 | 	struct mm_struct *mm, *old_mm; | 
 |  | 
 | 	/* Do we have switched amode? If no, we cannot do sie */ | 
 | 	if (user_mode == HOME_SPACE_MODE) | 
 | 		return -EINVAL; | 
 |  | 
 | 	/* Do we have pgstes? if yes, we are done */ | 
 | 	if (tsk->mm->context.has_pgste) | 
 | 		return 0; | 
 |  | 
 | 	/* lets check if we are allowed to replace the mm */ | 
 | 	task_lock(tsk); | 
 | 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 
 | #ifdef CONFIG_AIO | 
 | 	    !hlist_empty(&tsk->mm->ioctx_list) || | 
 | #endif | 
 | 	    tsk->mm != tsk->active_mm) { | 
 | 		task_unlock(tsk); | 
 | 		return -EINVAL; | 
 | 	} | 
 | 	task_unlock(tsk); | 
 |  | 
 | 	/* we copy the mm and let dup_mm create the page tables with_pgstes */ | 
 | 	tsk->mm->context.alloc_pgste = 1; | 
 | 	mm = dup_mm(tsk); | 
 | 	tsk->mm->context.alloc_pgste = 0; | 
 | 	if (!mm) | 
 | 		return -ENOMEM; | 
 |  | 
 | 	/* Now lets check again if something happened */ | 
 | 	task_lock(tsk); | 
 | 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | 
 | #ifdef CONFIG_AIO | 
 | 	    !hlist_empty(&tsk->mm->ioctx_list) || | 
 | #endif | 
 | 	    tsk->mm != tsk->active_mm) { | 
 | 		mmput(mm); | 
 | 		task_unlock(tsk); | 
 | 		return -EINVAL; | 
 | 	} | 
 |  | 
 | 	/* ok, we are alone. No ptrace, no threads, etc. */ | 
 | 	old_mm = tsk->mm; | 
 | 	tsk->mm = tsk->active_mm = mm; | 
 | 	preempt_disable(); | 
 | 	update_mm(mm, tsk); | 
 | 	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); | 
 | 	preempt_enable(); | 
 | 	task_unlock(tsk); | 
 | 	mmput(old_mm); | 
 | 	return 0; | 
 | } | 
 | EXPORT_SYMBOL_GPL(s390_enable_sie); | 
 |  | 
 | #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION) | 
 | bool kernel_page_present(struct page *page) | 
 | { | 
 | 	unsigned long addr; | 
 | 	int cc; | 
 |  | 
 | 	addr = page_to_phys(page); | 
 | 	asm volatile( | 
 | 		"	lra	%1,0(%1)\n" | 
 | 		"	ipm	%0\n" | 
 | 		"	srl	%0,28" | 
 | 		: "=d" (cc), "+a" (addr) : : "cc"); | 
 | 	return cc == 0; | 
 | } | 
 | #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */ |