| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /*  | 
 | 2 |  * Copyright 2002 Andi Kleen, SuSE Labs.  | 
 | 3 |  * Thanks to Ben LaHaise for precious feedback. | 
 | 4 |  */  | 
 | 5 |  | 
 | 6 | #include <linux/config.h> | 
 | 7 | #include <linux/mm.h> | 
 | 8 | #include <linux/sched.h> | 
 | 9 | #include <linux/highmem.h> | 
 | 10 | #include <linux/module.h> | 
 | 11 | #include <linux/slab.h> | 
 | 12 | #include <asm/uaccess.h> | 
 | 13 | #include <asm/processor.h> | 
 | 14 | #include <asm/tlbflush.h> | 
 | 15 | #include <asm/io.h> | 
 | 16 |  | 
 | 17 | static inline pte_t *lookup_address(unsigned long address)  | 
 | 18 | {  | 
 | 19 | 	pgd_t *pgd = pgd_offset_k(address); | 
 | 20 | 	pud_t *pud; | 
 | 21 | 	pmd_t *pmd; | 
 | 22 | 	pte_t *pte; | 
 | 23 | 	if (pgd_none(*pgd)) | 
 | 24 | 		return NULL; | 
 | 25 | 	pud = pud_offset(pgd, address); | 
 | 26 | 	if (!pud_present(*pud)) | 
 | 27 | 		return NULL;  | 
 | 28 | 	pmd = pmd_offset(pud, address); | 
 | 29 | 	if (!pmd_present(*pmd)) | 
 | 30 | 		return NULL;  | 
 | 31 | 	if (pmd_large(*pmd)) | 
 | 32 | 		return (pte_t *)pmd; | 
 | 33 | 	pte = pte_offset_kernel(pmd, address); | 
 | 34 | 	if (pte && !pte_present(*pte)) | 
 | 35 | 		pte = NULL;  | 
 | 36 | 	return pte; | 
 | 37 | }  | 
 | 38 |  | 
 | 39 | static struct page *split_large_page(unsigned long address, pgprot_t prot, | 
 | 40 | 				     pgprot_t ref_prot) | 
 | 41 | {  | 
 | 42 | 	int i;  | 
 | 43 | 	unsigned long addr; | 
 | 44 | 	struct page *base = alloc_pages(GFP_KERNEL, 0); | 
 | 45 | 	pte_t *pbase; | 
 | 46 | 	if (!base)  | 
 | 47 | 		return NULL; | 
 | 48 | 	address = __pa(address); | 
 | 49 | 	addr = address & LARGE_PAGE_MASK;  | 
 | 50 | 	pbase = (pte_t *)page_address(base); | 
 | 51 | 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { | 
 | 52 | 		pbase[i] = pfn_pte(addr >> PAGE_SHIFT,  | 
 | 53 | 				   addr == address ? prot : ref_prot); | 
 | 54 | 	} | 
 | 55 | 	return base; | 
 | 56 | }  | 
 | 57 |  | 
 | 58 |  | 
 | 59 | static void flush_kernel_map(void *address)  | 
 | 60 | { | 
 | 61 | 	if (0 && address && cpu_has_clflush) { | 
 | 62 | 		/* is this worth it? */  | 
 | 63 | 		int i; | 
 | 64 | 		for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)  | 
 | 65 | 			asm volatile("clflush (%0)" :: "r" (address + i));  | 
 | 66 | 	} else | 
 | 67 | 		asm volatile("wbinvd":::"memory");  | 
 | 68 | 	if (address) | 
 | 69 | 		__flush_tlb_one(address); | 
 | 70 | 	else | 
 | 71 | 		__flush_tlb_all(); | 
 | 72 | } | 
 | 73 |  | 
 | 74 |  | 
 | 75 | static inline void flush_map(unsigned long address) | 
 | 76 | {	 | 
 | 77 | 	on_each_cpu(flush_kernel_map, (void *)address, 1, 1); | 
 | 78 | } | 
 | 79 |  | 
 | 80 | struct deferred_page {  | 
 | 81 | 	struct deferred_page *next;  | 
 | 82 | 	struct page *fpage; | 
 | 83 | 	unsigned long address; | 
 | 84 | };  | 
 | 85 | static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ | 
 | 86 |  | 
 | 87 | static inline void save_page(unsigned long address, struct page *fpage) | 
 | 88 | { | 
 | 89 | 	struct deferred_page *df; | 
 | 90 | 	df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL);  | 
 | 91 | 	if (!df) { | 
 | 92 | 		flush_map(address); | 
 | 93 | 		__free_page(fpage); | 
 | 94 | 	} else {  | 
 | 95 | 		df->next = df_list; | 
 | 96 | 		df->fpage = fpage; | 
 | 97 | 		df->address = address; | 
 | 98 | 		df_list = df; | 
 | 99 | 	} 			 | 
 | 100 | } | 
 | 101 |  | 
 | 102 | /*  | 
 | 103 |  * No more special protections in this 2/4MB area - revert to a | 
 | 104 |  * large page again.  | 
 | 105 |  */ | 
 | 106 | static void revert_page(unsigned long address, pgprot_t ref_prot) | 
 | 107 | { | 
 | 108 | 	pgd_t *pgd; | 
 | 109 | 	pud_t *pud; | 
 | 110 | 	pmd_t *pmd; | 
 | 111 | 	pte_t large_pte; | 
 | 112 |  | 
 | 113 | 	pgd = pgd_offset_k(address); | 
 | 114 | 	BUG_ON(pgd_none(*pgd)); | 
 | 115 | 	pud = pud_offset(pgd,address); | 
 | 116 | 	BUG_ON(pud_none(*pud)); | 
 | 117 | 	pmd = pmd_offset(pud, address); | 
 | 118 | 	BUG_ON(pmd_val(*pmd) & _PAGE_PSE); | 
 | 119 | 	pgprot_val(ref_prot) |= _PAGE_PSE; | 
 | 120 | 	large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); | 
 | 121 | 	set_pte((pte_t *)pmd, large_pte); | 
 | 122 | }       | 
 | 123 |  | 
 | 124 | static int | 
 | 125 | __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | 
 | 126 | 				   pgprot_t ref_prot) | 
 | 127 | {  | 
 | 128 | 	pte_t *kpte;  | 
 | 129 | 	struct page *kpte_page; | 
 | 130 | 	unsigned kpte_flags; | 
 | 131 | 	kpte = lookup_address(address); | 
 | 132 | 	if (!kpte) return 0; | 
 | 133 | 	kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); | 
 | 134 | 	kpte_flags = pte_val(*kpte);  | 
 | 135 | 	if (pgprot_val(prot) != pgprot_val(ref_prot)) {  | 
 | 136 | 		if ((kpte_flags & _PAGE_PSE) == 0) {  | 
 | 137 | 			set_pte(kpte, pfn_pte(pfn, prot)); | 
 | 138 | 		} else { | 
 | 139 |  			/* | 
 | 140 |  			 * split_large_page will take the reference for this change_page_attr | 
 | 141 |  			 * on the split page. | 
 | 142 |  			 */ | 
 | 143 | 			struct page *split = split_large_page(address, prot, ref_prot);  | 
 | 144 | 			if (!split) | 
 | 145 | 				return -ENOMEM; | 
 | 146 | 			set_pte(kpte,mk_pte(split, ref_prot)); | 
 | 147 | 			kpte_page = split; | 
 | 148 | 		}	 | 
 | 149 | 		get_page(kpte_page); | 
 | 150 | 	} else if ((kpte_flags & _PAGE_PSE) == 0) {  | 
 | 151 | 		set_pte(kpte, pfn_pte(pfn, ref_prot)); | 
 | 152 | 		__put_page(kpte_page); | 
 | 153 | 	} else | 
 | 154 | 		BUG(); | 
 | 155 |  | 
 | 156 | 	/* on x86-64 the direct mapping set at boot is not using 4k pages */ | 
 | 157 |  	BUG_ON(PageReserved(kpte_page)); | 
 | 158 |  | 
 | 159 | 	switch (page_count(kpte_page)) { | 
 | 160 |  	case 1: | 
 | 161 | 		save_page(address, kpte_page); 		      | 
 | 162 | 		revert_page(address, ref_prot); | 
 | 163 | 		break; | 
 | 164 |  	case 0: | 
 | 165 |  		BUG(); /* memleak and failed 2M page regeneration */ | 
 | 166 |  	} | 
 | 167 | 	return 0; | 
 | 168 | }  | 
 | 169 |  | 
 | 170 | /* | 
 | 171 |  * Change the page attributes of an page in the linear mapping. | 
 | 172 |  * | 
 | 173 |  * This should be used when a page is mapped with a different caching policy | 
 | 174 |  * than write-back somewhere - some CPUs do not like it when mappings with | 
 | 175 |  * different caching policies exist. This changes the page attributes of the | 
 | 176 |  * in kernel linear mapping too. | 
 | 177 |  *  | 
 | 178 |  * The caller needs to ensure that there are no conflicting mappings elsewhere. | 
 | 179 |  * This function only deals with the kernel linear map. | 
 | 180 |  *  | 
 | 181 |  * Caller must call global_flush_tlb() after this. | 
 | 182 |  */ | 
 | 183 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) | 
 | 184 | { | 
 | 185 | 	int err = 0;  | 
 | 186 | 	int i;  | 
 | 187 |  | 
 | 188 | 	down_write(&init_mm.mmap_sem); | 
 | 189 | 	for (i = 0; i < numpages; i++, address += PAGE_SIZE) { | 
 | 190 | 		unsigned long pfn = __pa(address) >> PAGE_SHIFT; | 
 | 191 |  | 
 | 192 | 		err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); | 
 | 193 | 		if (err)  | 
 | 194 | 			break;  | 
 | 195 | 		/* Handle kernel mapping too which aliases part of the | 
 | 196 | 		 * lowmem */ | 
 | 197 | 		if (__pa(address) < KERNEL_TEXT_SIZE) { | 
 | 198 | 			unsigned long addr2; | 
 | 199 | 			pgprot_t prot2 = prot; | 
 | 200 | 			addr2 = __START_KERNEL_map + __pa(address); | 
 | 201 |  			pgprot_val(prot2) &= ~_PAGE_NX; | 
 | 202 | 			err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC); | 
 | 203 | 		}  | 
 | 204 | 	} 	 | 
 | 205 | 	up_write(&init_mm.mmap_sem);  | 
 | 206 | 	return err; | 
 | 207 | } | 
 | 208 |  | 
 | 209 | /* Don't call this for MMIO areas that may not have a mem_map entry */ | 
 | 210 | int change_page_attr(struct page *page, int numpages, pgprot_t prot) | 
 | 211 | { | 
 | 212 | 	unsigned long addr = (unsigned long)page_address(page); | 
 | 213 | 	return change_page_attr_addr(addr, numpages, prot); | 
 | 214 | } | 
 | 215 |  | 
 | 216 | void global_flush_tlb(void) | 
 | 217 | {  | 
 | 218 | 	struct deferred_page *df, *next_df; | 
 | 219 |  | 
 | 220 | 	down_read(&init_mm.mmap_sem); | 
 | 221 | 	df = xchg(&df_list, NULL); | 
 | 222 | 	up_read(&init_mm.mmap_sem); | 
 | 223 | 	if (!df) | 
 | 224 | 		return; | 
 | 225 | 	flush_map((df && !df->next) ? df->address : 0); | 
 | 226 | 	for (; df; df = next_df) {  | 
 | 227 | 		next_df = df->next; | 
 | 228 | 		if (df->fpage)  | 
 | 229 | 			__free_page(df->fpage); | 
 | 230 | 		kfree(df); | 
 | 231 | 	}  | 
 | 232 | }  | 
 | 233 |  | 
 | 234 | EXPORT_SYMBOL(change_page_attr); | 
 | 235 | EXPORT_SYMBOL(global_flush_tlb); |