| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * mm/prio_tree.c - priority search tree for mapping->i_mmap | 
 | 3 |  * | 
 | 4 |  * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu> | 
 | 5 |  * | 
 | 6 |  * This file is released under the GPL v2. | 
 | 7 |  * | 
 | 8 |  * Based on the radix priority search tree proposed by Edward M. McCreight | 
 | 9 |  * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985 | 
 | 10 |  * | 
 | 11 |  * 02Feb2004	Initial version | 
 | 12 |  */ | 
 | 13 |  | 
 | 14 | #include <linux/mm.h> | 
 | 15 | #include <linux/prio_tree.h> | 
| Linus Torvalds | 268bb0c | 2011-05-20 12:50:29 -0700 | [diff] [blame] | 16 | #include <linux/prefetch.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 17 |  | 
 | 18 | /* | 
 | 19 |  * See lib/prio_tree.c for details on the general radix priority search tree | 
 | 20 |  * code. | 
 | 21 |  */ | 
 | 22 |  | 
 | 23 | /* | 
 | 24 |  * The following #defines are mirrored from lib/prio_tree.c. They're only used | 
 | 25 |  * for debugging, and should be removed (along with the debugging code using | 
 | 26 |  * them) when switching also VMAs to the regular prio_tree code. | 
 | 27 |  */ | 
 | 28 |  | 
 | 29 | #define RADIX_INDEX(vma)  ((vma)->vm_pgoff) | 
 | 30 | #define VMA_SIZE(vma)	  (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT) | 
 | 31 | /* avoid overflow */ | 
 | 32 | #define HEAP_INDEX(vma)   ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1)) | 
 | 33 |  | 
 | 34 | /* | 
 | 35 |  * Radix priority search tree for address_space->i_mmap | 
 | 36 |  * | 
 | 37 |  * For each vma that map a unique set of file pages i.e., unique [radix_index, | 
| Simon Arlott | 183ff22 | 2007-10-20 01:27:18 +0200 | [diff] [blame] | 38 |  * heap_index] value, we have a corresponding priority search tree node. If | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 39 |  * multiple vmas have identical [radix_index, heap_index] value, then one of | 
 | 40 |  * them is used as a tree node and others are stored in a vm_set list. The tree | 
 | 41 |  * node points to the first vma (head) of the list using vm_set.head. | 
 | 42 |  * | 
 | 43 |  * prio_tree_root | 
 | 44 |  *      | | 
 | 45 |  *      A       vm_set.head | 
 | 46 |  *     / \      / | 
 | 47 |  *    L   R -> H-I-J-K-M-N-O-P-Q-S | 
 | 48 |  *    ^   ^    <-- vm_set.list --> | 
 | 49 |  *  tree nodes | 
 | 50 |  * | 
 | 51 |  * We need some way to identify whether a vma is a tree node, head of a vm_set | 
 | 52 |  * list, or just a member of a vm_set list. We cannot use vm_flags to store | 
 | 53 |  * such information. The reason is, in the above figure, it is possible that | 
 | 54 |  * vm_flags' of R and H are covered by the different mmap_sems. When R is | 
 | 55 |  * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold | 
 | 56 |  * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now. | 
 | 57 |  * That's why some trick involving shared.vm_set.parent is used for identifying | 
 | 58 |  * tree nodes and list head nodes. | 
 | 59 |  * | 
 | 60 |  * vma radix priority search tree node rules: | 
 | 61 |  * | 
 | 62 |  * vma->shared.vm_set.parent != NULL    ==> a tree node | 
 | 63 |  *      vma->shared.vm_set.head != NULL ==> list of others mapping same range | 
 | 64 |  *      vma->shared.vm_set.head == NULL ==> no others map the same range | 
 | 65 |  * | 
 | 66 |  * vma->shared.vm_set.parent == NULL | 
 | 67 |  * 	vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range | 
 | 68 |  * 	vma->shared.vm_set.head == NULL ==> a list node | 
 | 69 |  */ | 
 | 70 |  | 
 | 71 | /* | 
 | 72 |  * Add a new vma known to map the same set of pages as the old vma: | 
 | 73 |  * useful for fork's dup_mmap as well as vma_prio_tree_insert below. | 
 | 74 |  * Note that it just happens to work correctly on i_mmap_nonlinear too. | 
 | 75 |  */ | 
 | 76 | void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old) | 
 | 77 | { | 
 | 78 | 	/* Leave these BUG_ONs till prio_tree patch stabilizes */ | 
 | 79 | 	BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old)); | 
 | 80 | 	BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old)); | 
 | 81 |  | 
 | 82 | 	vma->shared.vm_set.head = NULL; | 
 | 83 | 	vma->shared.vm_set.parent = NULL; | 
 | 84 |  | 
 | 85 | 	if (!old->shared.vm_set.parent) | 
 | 86 | 		list_add(&vma->shared.vm_set.list, | 
 | 87 | 				&old->shared.vm_set.list); | 
 | 88 | 	else if (old->shared.vm_set.head) | 
 | 89 | 		list_add_tail(&vma->shared.vm_set.list, | 
 | 90 | 				&old->shared.vm_set.head->shared.vm_set.list); | 
 | 91 | 	else { | 
 | 92 | 		INIT_LIST_HEAD(&vma->shared.vm_set.list); | 
 | 93 | 		vma->shared.vm_set.head = old; | 
 | 94 | 		old->shared.vm_set.head = vma; | 
 | 95 | 	} | 
 | 96 | } | 
 | 97 |  | 
 | 98 | void vma_prio_tree_insert(struct vm_area_struct *vma, | 
 | 99 | 			  struct prio_tree_root *root) | 
 | 100 | { | 
 | 101 | 	struct prio_tree_node *ptr; | 
 | 102 | 	struct vm_area_struct *old; | 
 | 103 |  | 
 | 104 | 	vma->shared.vm_set.head = NULL; | 
 | 105 |  | 
 | 106 | 	ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node); | 
 | 107 | 	if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) { | 
 | 108 | 		old = prio_tree_entry(ptr, struct vm_area_struct, | 
 | 109 | 					shared.prio_tree_node); | 
 | 110 | 		vma_prio_tree_add(vma, old); | 
 | 111 | 	} | 
 | 112 | } | 
 | 113 |  | 
 | 114 | void vma_prio_tree_remove(struct vm_area_struct *vma, | 
 | 115 | 			  struct prio_tree_root *root) | 
 | 116 | { | 
 | 117 | 	struct vm_area_struct *node, *head, *new_head; | 
 | 118 |  | 
 | 119 | 	if (!vma->shared.vm_set.head) { | 
 | 120 | 		if (!vma->shared.vm_set.parent) | 
 | 121 | 			list_del_init(&vma->shared.vm_set.list); | 
 | 122 | 		else | 
 | 123 | 			raw_prio_tree_remove(root, &vma->shared.prio_tree_node); | 
 | 124 | 	} else { | 
 | 125 | 		/* Leave this BUG_ON till prio_tree patch stabilizes */ | 
 | 126 | 		BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma); | 
 | 127 | 		if (vma->shared.vm_set.parent) { | 
 | 128 | 			head = vma->shared.vm_set.head; | 
 | 129 | 			if (!list_empty(&head->shared.vm_set.list)) { | 
 | 130 | 				new_head = list_entry( | 
 | 131 | 					head->shared.vm_set.list.next, | 
 | 132 | 					struct vm_area_struct, | 
 | 133 | 					shared.vm_set.list); | 
 | 134 | 				list_del_init(&head->shared.vm_set.list); | 
 | 135 | 			} else | 
 | 136 | 				new_head = NULL; | 
 | 137 |  | 
 | 138 | 			raw_prio_tree_replace(root, &vma->shared.prio_tree_node, | 
 | 139 | 					&head->shared.prio_tree_node); | 
 | 140 | 			head->shared.vm_set.head = new_head; | 
 | 141 | 			if (new_head) | 
 | 142 | 				new_head->shared.vm_set.head = head; | 
 | 143 |  | 
 | 144 | 		} else { | 
 | 145 | 			node = vma->shared.vm_set.head; | 
 | 146 | 			if (!list_empty(&vma->shared.vm_set.list)) { | 
 | 147 | 				new_head = list_entry( | 
 | 148 | 					vma->shared.vm_set.list.next, | 
 | 149 | 					struct vm_area_struct, | 
 | 150 | 					shared.vm_set.list); | 
 | 151 | 				list_del_init(&vma->shared.vm_set.list); | 
 | 152 | 				node->shared.vm_set.head = new_head; | 
 | 153 | 				new_head->shared.vm_set.head = node; | 
 | 154 | 			} else | 
 | 155 | 				node->shared.vm_set.head = NULL; | 
 | 156 | 		} | 
 | 157 | 	} | 
 | 158 | } | 
 | 159 |  | 
 | 160 | /* | 
 | 161 |  * Helper function to enumerate vmas that map a given file page or a set of | 
 | 162 |  * contiguous file pages. The function returns vmas that at least map a single | 
 | 163 |  * page in the given range of contiguous file pages. | 
 | 164 |  */ | 
 | 165 | struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma, | 
 | 166 | 					struct prio_tree_iter *iter) | 
 | 167 | { | 
 | 168 | 	struct prio_tree_node *ptr; | 
 | 169 | 	struct vm_area_struct *next; | 
 | 170 |  | 
 | 171 | 	if (!vma) { | 
 | 172 | 		/* | 
 | 173 | 		 * First call is with NULL vma | 
 | 174 | 		 */ | 
 | 175 | 		ptr = prio_tree_next(iter); | 
 | 176 | 		if (ptr) { | 
 | 177 | 			next = prio_tree_entry(ptr, struct vm_area_struct, | 
 | 178 | 						shared.prio_tree_node); | 
 | 179 | 			prefetch(next->shared.vm_set.head); | 
 | 180 | 			return next; | 
 | 181 | 		} else | 
 | 182 | 			return NULL; | 
 | 183 | 	} | 
 | 184 |  | 
 | 185 | 	if (vma->shared.vm_set.parent) { | 
 | 186 | 		if (vma->shared.vm_set.head) { | 
 | 187 | 			next = vma->shared.vm_set.head; | 
 | 188 | 			prefetch(next->shared.vm_set.list.next); | 
 | 189 | 			return next; | 
 | 190 | 		} | 
 | 191 | 	} else { | 
 | 192 | 		next = list_entry(vma->shared.vm_set.list.next, | 
 | 193 | 				struct vm_area_struct, shared.vm_set.list); | 
 | 194 | 		if (!next->shared.vm_set.head) { | 
 | 195 | 			prefetch(next->shared.vm_set.list.next); | 
 | 196 | 			return next; | 
 | 197 | 		} | 
 | 198 | 	} | 
 | 199 |  | 
 | 200 | 	ptr = prio_tree_next(iter); | 
 | 201 | 	if (ptr) { | 
 | 202 | 		next = prio_tree_entry(ptr, struct vm_area_struct, | 
 | 203 | 					shared.prio_tree_node); | 
 | 204 | 		prefetch(next->shared.vm_set.head); | 
 | 205 | 		return next; | 
 | 206 | 	} else | 
 | 207 | 		return NULL; | 
 | 208 | } |