| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | #include <linux/mm.h> | 
 | 2 | #include <linux/hugetlb.h> | 
 | 3 | #include <linux/mount.h> | 
 | 4 | #include <linux/seq_file.h> | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 5 | #include <linux/highmem.h> | 
| Kees Cook | 5096add | 2007-05-08 00:26:04 -0700 | [diff] [blame] | 6 | #include <linux/ptrace.h> | 
| Christoph Lameter | 6e21c8f | 2005-09-03 15:54:45 -0700 | [diff] [blame] | 7 | #include <linux/pagemap.h> | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 8 | #include <linux/ptrace.h> | 
| Christoph Lameter | 6e21c8f | 2005-09-03 15:54:45 -0700 | [diff] [blame] | 9 | #include <linux/mempolicy.h> | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 10 | #include <linux/swap.h> | 
 | 11 | #include <linux/swapops.h> | 
| Eric W. Biederman | df5f831 | 2008-02-08 04:18:33 -0800 | [diff] [blame] | 12 | #include <linux/seq_file.h> | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 13 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 14 | #include <asm/elf.h> | 
 | 15 | #include <asm/uaccess.h> | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 16 | #include <asm/tlbflush.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 17 | #include "internal.h" | 
 | 18 |  | 
| Eric W. Biederman | df5f831 | 2008-02-08 04:18:33 -0800 | [diff] [blame] | 19 | void task_mem(struct seq_file *m, struct mm_struct *mm) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 20 | { | 
 | 21 | 	unsigned long data, text, lib; | 
| Hugh Dickins | 365e9c87 | 2005-10-29 18:16:18 -0700 | [diff] [blame] | 22 | 	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; | 
 | 23 |  | 
 | 24 | 	/* | 
 | 25 | 	 * Note: to minimize their overhead, mm maintains hiwater_vm and | 
 | 26 | 	 * hiwater_rss only when about to *lower* total_vm or rss.  Any | 
 | 27 | 	 * collector of these hiwater stats must therefore get total_vm | 
 | 28 | 	 * and rss too, which will usually be the higher.  Barriers? not | 
 | 29 | 	 * worth the effort, such snapshots can always be inconsistent. | 
 | 30 | 	 */ | 
 | 31 | 	hiwater_vm = total_vm = mm->total_vm; | 
 | 32 | 	if (hiwater_vm < mm->hiwater_vm) | 
 | 33 | 		hiwater_vm = mm->hiwater_vm; | 
 | 34 | 	hiwater_rss = total_rss = get_mm_rss(mm); | 
 | 35 | 	if (hiwater_rss < mm->hiwater_rss) | 
 | 36 | 		hiwater_rss = mm->hiwater_rss; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 37 |  | 
 | 38 | 	data = mm->total_vm - mm->shared_vm - mm->stack_vm; | 
 | 39 | 	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; | 
 | 40 | 	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; | 
| Eric W. Biederman | df5f831 | 2008-02-08 04:18:33 -0800 | [diff] [blame] | 41 | 	seq_printf(m, | 
| Hugh Dickins | 365e9c87 | 2005-10-29 18:16:18 -0700 | [diff] [blame] | 42 | 		"VmPeak:\t%8lu kB\n" | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 43 | 		"VmSize:\t%8lu kB\n" | 
 | 44 | 		"VmLck:\t%8lu kB\n" | 
| Hugh Dickins | 365e9c87 | 2005-10-29 18:16:18 -0700 | [diff] [blame] | 45 | 		"VmHWM:\t%8lu kB\n" | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 46 | 		"VmRSS:\t%8lu kB\n" | 
 | 47 | 		"VmData:\t%8lu kB\n" | 
 | 48 | 		"VmStk:\t%8lu kB\n" | 
 | 49 | 		"VmExe:\t%8lu kB\n" | 
 | 50 | 		"VmLib:\t%8lu kB\n" | 
 | 51 | 		"VmPTE:\t%8lu kB\n", | 
| Hugh Dickins | 365e9c87 | 2005-10-29 18:16:18 -0700 | [diff] [blame] | 52 | 		hiwater_vm << (PAGE_SHIFT-10), | 
 | 53 | 		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 54 | 		mm->locked_vm << (PAGE_SHIFT-10), | 
| Hugh Dickins | 365e9c87 | 2005-10-29 18:16:18 -0700 | [diff] [blame] | 55 | 		hiwater_rss << (PAGE_SHIFT-10), | 
 | 56 | 		total_rss << (PAGE_SHIFT-10), | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 57 | 		data << (PAGE_SHIFT-10), | 
 | 58 | 		mm->stack_vm << (PAGE_SHIFT-10), text, lib, | 
 | 59 | 		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 60 | } | 
 | 61 |  | 
 | 62 | unsigned long task_vsize(struct mm_struct *mm) | 
 | 63 | { | 
 | 64 | 	return PAGE_SIZE * mm->total_vm; | 
 | 65 | } | 
 | 66 |  | 
 | 67 | int task_statm(struct mm_struct *mm, int *shared, int *text, | 
 | 68 | 	       int *data, int *resident) | 
 | 69 | { | 
| Hugh Dickins | 4294621 | 2005-10-29 18:16:05 -0700 | [diff] [blame] | 70 | 	*shared = get_mm_counter(mm, file_rss); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 71 | 	*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) | 
 | 72 | 								>> PAGE_SHIFT; | 
 | 73 | 	*data = mm->total_vm - mm->shared_vm; | 
| Hugh Dickins | 4294621 | 2005-10-29 18:16:05 -0700 | [diff] [blame] | 74 | 	*resident = *shared + get_mm_counter(mm, anon_rss); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 75 | 	return mm->total_vm; | 
 | 76 | } | 
 | 77 |  | 
| Jan Blunck | 3dcd25f | 2008-02-14 19:38:35 -0800 | [diff] [blame] | 78 | int proc_exe_link(struct inode *inode, struct path *path) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 | { | 
 | 80 | 	struct vm_area_struct * vma; | 
 | 81 | 	int result = -ENOENT; | 
| Eric W. Biederman | 99f8955 | 2006-06-26 00:25:55 -0700 | [diff] [blame] | 82 | 	struct task_struct *task = get_proc_task(inode); | 
 | 83 | 	struct mm_struct * mm = NULL; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 84 |  | 
| Eric W. Biederman | 99f8955 | 2006-06-26 00:25:55 -0700 | [diff] [blame] | 85 | 	if (task) { | 
 | 86 | 		mm = get_task_mm(task); | 
 | 87 | 		put_task_struct(task); | 
 | 88 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 89 | 	if (!mm) | 
 | 90 | 		goto out; | 
 | 91 | 	down_read(&mm->mmap_sem); | 
 | 92 |  | 
 | 93 | 	vma = mm->mmap; | 
 | 94 | 	while (vma) { | 
 | 95 | 		if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) | 
 | 96 | 			break; | 
 | 97 | 		vma = vma->vm_next; | 
 | 98 | 	} | 
 | 99 |  | 
 | 100 | 	if (vma) { | 
| Jan Blunck | 3dcd25f | 2008-02-14 19:38:35 -0800 | [diff] [blame] | 101 | 		*path = vma->vm_file->f_path; | 
 | 102 | 		path_get(&vma->vm_file->f_path); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 103 | 		result = 0; | 
 | 104 | 	} | 
 | 105 |  | 
 | 106 | 	up_read(&mm->mmap_sem); | 
 | 107 | 	mmput(mm); | 
 | 108 | out: | 
 | 109 | 	return result; | 
 | 110 | } | 
 | 111 |  | 
 | 112 | static void pad_len_spaces(struct seq_file *m, int len) | 
 | 113 | { | 
 | 114 | 	len = 25 + sizeof(void*) * 6 - len; | 
 | 115 | 	if (len < 1) | 
 | 116 | 		len = 1; | 
 | 117 | 	seq_printf(m, "%*c", len, ' '); | 
 | 118 | } | 
 | 119 |  | 
| Matt Mackall | a619879 | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 120 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 121 | { | 
| Matt Mackall | a619879 | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 122 | 	if (vma && vma != priv->tail_vma) { | 
 | 123 | 		struct mm_struct *mm = vma->vm_mm; | 
 | 124 | 		up_read(&mm->mmap_sem); | 
 | 125 | 		mmput(mm); | 
 | 126 | 	} | 
 | 127 | } | 
 | 128 |  | 
 | 129 | static void *m_start(struct seq_file *m, loff_t *pos) | 
 | 130 | { | 
 | 131 | 	struct proc_maps_private *priv = m->private; | 
 | 132 | 	unsigned long last_addr = m->version; | 
 | 133 | 	struct mm_struct *mm; | 
 | 134 | 	struct vm_area_struct *vma, *tail_vma = NULL; | 
 | 135 | 	loff_t l = *pos; | 
 | 136 |  | 
 | 137 | 	/* Clear the per syscall fields in priv */ | 
 | 138 | 	priv->task = NULL; | 
 | 139 | 	priv->tail_vma = NULL; | 
 | 140 |  | 
 | 141 | 	/* | 
 | 142 | 	 * We remember last_addr rather than next_addr to hit with | 
 | 143 | 	 * mmap_cache most of the time. We have zero last_addr at | 
 | 144 | 	 * the beginning and also after lseek. We will have -1 last_addr | 
 | 145 | 	 * after the end of the vmas. | 
 | 146 | 	 */ | 
 | 147 |  | 
 | 148 | 	if (last_addr == -1UL) | 
 | 149 | 		return NULL; | 
 | 150 |  | 
 | 151 | 	priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 
 | 152 | 	if (!priv->task) | 
 | 153 | 		return NULL; | 
 | 154 |  | 
 | 155 | 	mm = mm_for_maps(priv->task); | 
 | 156 | 	if (!mm) | 
 | 157 | 		return NULL; | 
 | 158 |  | 
 | 159 | 	tail_vma = get_gate_vma(priv->task); | 
 | 160 | 	priv->tail_vma = tail_vma; | 
 | 161 |  | 
 | 162 | 	/* Start with last addr hint */ | 
 | 163 | 	vma = find_vma(mm, last_addr); | 
 | 164 | 	if (last_addr && vma) { | 
 | 165 | 		vma = vma->vm_next; | 
 | 166 | 		goto out; | 
 | 167 | 	} | 
 | 168 |  | 
 | 169 | 	/* | 
 | 170 | 	 * Check the vma index is within the range and do | 
 | 171 | 	 * sequential scan until m_index. | 
 | 172 | 	 */ | 
 | 173 | 	vma = NULL; | 
 | 174 | 	if ((unsigned long)l < mm->map_count) { | 
 | 175 | 		vma = mm->mmap; | 
 | 176 | 		while (l-- && vma) | 
 | 177 | 			vma = vma->vm_next; | 
 | 178 | 		goto out; | 
 | 179 | 	} | 
 | 180 |  | 
 | 181 | 	if (l != mm->map_count) | 
 | 182 | 		tail_vma = NULL; /* After gate vma */ | 
 | 183 |  | 
 | 184 | out: | 
 | 185 | 	if (vma) | 
 | 186 | 		return vma; | 
 | 187 |  | 
 | 188 | 	/* End of vmas has been reached */ | 
 | 189 | 	m->version = (tail_vma != NULL)? 0: -1UL; | 
 | 190 | 	up_read(&mm->mmap_sem); | 
 | 191 | 	mmput(mm); | 
 | 192 | 	return tail_vma; | 
 | 193 | } | 
 | 194 |  | 
 | 195 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | 
 | 196 | { | 
 | 197 | 	struct proc_maps_private *priv = m->private; | 
 | 198 | 	struct vm_area_struct *vma = v; | 
 | 199 | 	struct vm_area_struct *tail_vma = priv->tail_vma; | 
 | 200 |  | 
 | 201 | 	(*pos)++; | 
 | 202 | 	if (vma && (vma != tail_vma) && vma->vm_next) | 
 | 203 | 		return vma->vm_next; | 
 | 204 | 	vma_stop(priv, vma); | 
 | 205 | 	return (vma != tail_vma)? tail_vma: NULL; | 
 | 206 | } | 
 | 207 |  | 
 | 208 | static void m_stop(struct seq_file *m, void *v) | 
 | 209 | { | 
 | 210 | 	struct proc_maps_private *priv = m->private; | 
 | 211 | 	struct vm_area_struct *vma = v; | 
 | 212 |  | 
 | 213 | 	vma_stop(priv, vma); | 
 | 214 | 	if (priv->task) | 
 | 215 | 		put_task_struct(priv->task); | 
 | 216 | } | 
 | 217 |  | 
 | 218 | static int do_maps_open(struct inode *inode, struct file *file, | 
| Jan Engelhardt | 03a4482 | 2008-02-08 04:21:19 -0800 | [diff] [blame] | 219 | 			const struct seq_operations *ops) | 
| Matt Mackall | a619879 | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 220 | { | 
 | 221 | 	struct proc_maps_private *priv; | 
 | 222 | 	int ret = -ENOMEM; | 
 | 223 | 	priv = kzalloc(sizeof(*priv), GFP_KERNEL); | 
 | 224 | 	if (priv) { | 
 | 225 | 		priv->pid = proc_pid(inode); | 
 | 226 | 		ret = seq_open(file, ops); | 
 | 227 | 		if (!ret) { | 
 | 228 | 			struct seq_file *m = file->private_data; | 
 | 229 | 			m->private = priv; | 
 | 230 | 		} else { | 
 | 231 | 			kfree(priv); | 
 | 232 | 		} | 
 | 233 | 	} | 
 | 234 | 	return ret; | 
 | 235 | } | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 236 |  | 
| Matt Mackall | 4752c36 | 2008-02-04 22:29:02 -0800 | [diff] [blame] | 237 | static int show_map(struct seq_file *m, void *v) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 238 | { | 
| Eric W. Biederman | 99f8955 | 2006-06-26 00:25:55 -0700 | [diff] [blame] | 239 | 	struct proc_maps_private *priv = m->private; | 
 | 240 | 	struct task_struct *task = priv->task; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 241 | 	struct vm_area_struct *vma = v; | 
 | 242 | 	struct mm_struct *mm = vma->vm_mm; | 
 | 243 | 	struct file *file = vma->vm_file; | 
 | 244 | 	int flags = vma->vm_flags; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 245 | 	unsigned long ino = 0; | 
 | 246 | 	dev_t dev = 0; | 
 | 247 | 	int len; | 
 | 248 |  | 
| Kees Cook | 5096add | 2007-05-08 00:26:04 -0700 | [diff] [blame] | 249 | 	if (maps_protect && !ptrace_may_attach(task)) | 
 | 250 | 		return -EACCES; | 
 | 251 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 252 | 	if (file) { | 
| Josef "Jeff" Sipek | 2fddfee | 2006-12-08 02:36:36 -0800 | [diff] [blame] | 253 | 		struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 254 | 		dev = inode->i_sb->s_dev; | 
 | 255 | 		ino = inode->i_ino; | 
 | 256 | 	} | 
 | 257 |  | 
 | 258 | 	seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 259 | 			vma->vm_start, | 
 | 260 | 			vma->vm_end, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 261 | 			flags & VM_READ ? 'r' : '-', | 
 | 262 | 			flags & VM_WRITE ? 'w' : '-', | 
 | 263 | 			flags & VM_EXEC ? 'x' : '-', | 
 | 264 | 			flags & VM_MAYSHARE ? 's' : 'p', | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 265 | 			vma->vm_pgoff << PAGE_SHIFT, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 266 | 			MAJOR(dev), MINOR(dev), ino, &len); | 
 | 267 |  | 
 | 268 | 	/* | 
 | 269 | 	 * Print the dentry name for named mappings, and a | 
 | 270 | 	 * special [heap] marker for the heap: | 
 | 271 | 	 */ | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 272 | 	if (file) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 273 | 		pad_len_spaces(m, len); | 
| Jan Blunck | c32c2f6 | 2008-02-14 19:38:43 -0800 | [diff] [blame] | 274 | 		seq_path(m, &file->f_path, "\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 275 | 	} else { | 
| Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 276 | 		const char *name = arch_vma_name(vma); | 
 | 277 | 		if (!name) { | 
 | 278 | 			if (mm) { | 
 | 279 | 				if (vma->vm_start <= mm->start_brk && | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 280 | 						vma->vm_end >= mm->brk) { | 
| Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 281 | 					name = "[heap]"; | 
 | 282 | 				} else if (vma->vm_start <= mm->start_stack && | 
 | 283 | 					   vma->vm_end >= mm->start_stack) { | 
 | 284 | 					name = "[stack]"; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 285 | 				} | 
| Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 286 | 			} else { | 
 | 287 | 				name = "[vdso]"; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 288 | 			} | 
| Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 289 | 		} | 
 | 290 | 		if (name) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 291 | 			pad_len_spaces(m, len); | 
| Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 292 | 			seq_puts(m, name); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 293 | 		} | 
 | 294 | 	} | 
 | 295 | 	seq_putc(m, '\n'); | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 296 |  | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 297 | 	if (m->count < m->size)  /* vma is copied successfully */ | 
 | 298 | 		m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 299 | 	return 0; | 
 | 300 | } | 
 | 301 |  | 
| Jan Engelhardt | 03a4482 | 2008-02-08 04:21:19 -0800 | [diff] [blame] | 302 | static const struct seq_operations proc_pid_maps_op = { | 
| Matt Mackall | a619879 | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 303 | 	.start	= m_start, | 
 | 304 | 	.next	= m_next, | 
 | 305 | 	.stop	= m_stop, | 
 | 306 | 	.show	= show_map | 
 | 307 | }; | 
 | 308 |  | 
 | 309 | static int maps_open(struct inode *inode, struct file *file) | 
 | 310 | { | 
 | 311 | 	return do_maps_open(inode, file, &proc_pid_maps_op); | 
 | 312 | } | 
 | 313 |  | 
 | 314 | const struct file_operations proc_maps_operations = { | 
 | 315 | 	.open		= maps_open, | 
 | 316 | 	.read		= seq_read, | 
 | 317 | 	.llseek		= seq_lseek, | 
 | 318 | 	.release	= seq_release_private, | 
 | 319 | }; | 
 | 320 |  | 
 | 321 | /* | 
 | 322 |  * Proportional Set Size(PSS): my share of RSS. | 
 | 323 |  * | 
 | 324 |  * PSS of a process is the count of pages it has in memory, where each | 
 | 325 |  * page is divided by the number of processes sharing it.  So if a | 
 | 326 |  * process has 1000 pages all to itself, and 1000 shared with one other | 
 | 327 |  * process, its PSS will be 1500. | 
 | 328 |  * | 
 | 329 |  * To keep (accumulated) division errors low, we adopt a 64bit | 
 | 330 |  * fixed-point pss counter to minimize division errors. So (pss >> | 
 | 331 |  * PSS_SHIFT) would be the real byte count. | 
 | 332 |  * | 
 | 333 |  * A shift of 12 before division means (assuming 4K page size): | 
 | 334 |  * 	- 1M 3-user-pages add up to 8KB errors; | 
 | 335 |  * 	- supports mapcount up to 2^24, or 16M; | 
 | 336 |  * 	- supports PSS up to 2^52 bytes, or 4PB. | 
 | 337 |  */ | 
 | 338 | #define PSS_SHIFT 12 | 
 | 339 |  | 
| Matt Mackall | 1e88328 | 2008-02-04 22:29:07 -0800 | [diff] [blame] | 340 | #ifdef CONFIG_PROC_PAGE_MONITOR | 
| Matt Mackall | a619879 | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 341 | struct mem_size_stats | 
 | 342 | { | 
 | 343 | 	struct vm_area_struct *vma; | 
 | 344 | 	unsigned long resident; | 
 | 345 | 	unsigned long shared_clean; | 
 | 346 | 	unsigned long shared_dirty; | 
 | 347 | 	unsigned long private_clean; | 
 | 348 | 	unsigned long private_dirty; | 
 | 349 | 	unsigned long referenced; | 
 | 350 | 	u64 pss; | 
 | 351 | }; | 
 | 352 |  | 
| Matt Mackall | b3ae5ac | 2008-02-04 22:29:01 -0800 | [diff] [blame] | 353 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 
 | 354 | 			   void *private) | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 355 | { | 
| David Rientjes | 826fad1 | 2007-05-06 14:49:21 -0700 | [diff] [blame] | 356 | 	struct mem_size_stats *mss = private; | 
| Matt Mackall | b3ae5ac | 2008-02-04 22:29:01 -0800 | [diff] [blame] | 357 | 	struct vm_area_struct *vma = mss->vma; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 358 | 	pte_t *pte, ptent; | 
| Hugh Dickins | 705e87c | 2005-10-29 18:16:27 -0700 | [diff] [blame] | 359 | 	spinlock_t *ptl; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 360 | 	struct page *page; | 
| Fengguang Wu | ec4dd3e | 2008-02-04 22:28:56 -0800 | [diff] [blame] | 361 | 	int mapcount; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 362 |  | 
| Hugh Dickins | 705e87c | 2005-10-29 18:16:27 -0700 | [diff] [blame] | 363 | 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 
| David Rientjes | 826fad1 | 2007-05-06 14:49:21 -0700 | [diff] [blame] | 364 | 	for (; addr != end; pte++, addr += PAGE_SIZE) { | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 365 | 		ptent = *pte; | 
| Hugh Dickins | 705e87c | 2005-10-29 18:16:27 -0700 | [diff] [blame] | 366 | 		if (!pte_present(ptent)) | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 367 | 			continue; | 
 | 368 |  | 
 | 369 | 		mss->resident += PAGE_SIZE; | 
| Nick Piggin | ad820c5 | 2006-03-06 15:42:58 -0800 | [diff] [blame] | 370 |  | 
 | 371 | 		page = vm_normal_page(vma, addr, ptent); | 
 | 372 | 		if (!page) | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 373 | 			continue; | 
 | 374 |  | 
| David Rientjes | f79f177 | 2007-05-06 14:49:22 -0700 | [diff] [blame] | 375 | 		/* Accumulate the size in pages that have been accessed. */ | 
 | 376 | 		if (pte_young(ptent) || PageReferenced(page)) | 
 | 377 | 			mss->referenced += PAGE_SIZE; | 
| Fengguang Wu | ec4dd3e | 2008-02-04 22:28:56 -0800 | [diff] [blame] | 378 | 		mapcount = page_mapcount(page); | 
 | 379 | 		if (mapcount >= 2) { | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 380 | 			if (pte_dirty(ptent)) | 
 | 381 | 				mss->shared_dirty += PAGE_SIZE; | 
 | 382 | 			else | 
 | 383 | 				mss->shared_clean += PAGE_SIZE; | 
| Fengguang Wu | ec4dd3e | 2008-02-04 22:28:56 -0800 | [diff] [blame] | 384 | 			mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 385 | 		} else { | 
 | 386 | 			if (pte_dirty(ptent)) | 
 | 387 | 				mss->private_dirty += PAGE_SIZE; | 
 | 388 | 			else | 
 | 389 | 				mss->private_clean += PAGE_SIZE; | 
| Fengguang Wu | ec4dd3e | 2008-02-04 22:28:56 -0800 | [diff] [blame] | 390 | 			mss->pss += (PAGE_SIZE << PSS_SHIFT); | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 391 | 		} | 
| David Rientjes | 826fad1 | 2007-05-06 14:49:21 -0700 | [diff] [blame] | 392 | 	} | 
| Hugh Dickins | 705e87c | 2005-10-29 18:16:27 -0700 | [diff] [blame] | 393 | 	pte_unmap_unlock(pte - 1, ptl); | 
 | 394 | 	cond_resched(); | 
| Matt Mackall | b3ae5ac | 2008-02-04 22:29:01 -0800 | [diff] [blame] | 395 | 	return 0; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 396 | } | 
 | 397 |  | 
| Matt Mackall | b3ae5ac | 2008-02-04 22:29:01 -0800 | [diff] [blame] | 398 | static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 399 |  | 
 | 400 | static int show_smap(struct seq_file *m, void *v) | 
 | 401 | { | 
 | 402 | 	struct vm_area_struct *vma = v; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 403 | 	struct mem_size_stats mss; | 
| Matt Mackall | 4752c36 | 2008-02-04 22:29:02 -0800 | [diff] [blame] | 404 | 	int ret; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 405 |  | 
 | 406 | 	memset(&mss, 0, sizeof mss); | 
| Matt Mackall | b3ae5ac | 2008-02-04 22:29:01 -0800 | [diff] [blame] | 407 | 	mss.vma = vma; | 
| Nick Piggin | 5ddfae1 | 2006-03-06 15:42:57 -0800 | [diff] [blame] | 408 | 	if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 
| Matt Mackall | b3ae5ac | 2008-02-04 22:29:01 -0800 | [diff] [blame] | 409 | 		walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, | 
 | 410 | 				&smaps_walk, &mss); | 
| Matt Mackall | 4752c36 | 2008-02-04 22:29:02 -0800 | [diff] [blame] | 411 |  | 
 | 412 | 	ret = show_map(m, v); | 
 | 413 | 	if (ret) | 
 | 414 | 		return ret; | 
 | 415 |  | 
 | 416 | 	seq_printf(m, | 
 | 417 | 		   "Size:           %8lu kB\n" | 
 | 418 | 		   "Rss:            %8lu kB\n" | 
 | 419 | 		   "Pss:            %8lu kB\n" | 
 | 420 | 		   "Shared_Clean:   %8lu kB\n" | 
 | 421 | 		   "Shared_Dirty:   %8lu kB\n" | 
 | 422 | 		   "Private_Clean:  %8lu kB\n" | 
 | 423 | 		   "Private_Dirty:  %8lu kB\n" | 
 | 424 | 		   "Referenced:     %8lu kB\n", | 
 | 425 | 		   (vma->vm_end - vma->vm_start) >> 10, | 
 | 426 | 		   mss.resident >> 10, | 
 | 427 | 		   (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), | 
 | 428 | 		   mss.shared_clean  >> 10, | 
 | 429 | 		   mss.shared_dirty  >> 10, | 
 | 430 | 		   mss.private_clean >> 10, | 
 | 431 | 		   mss.private_dirty >> 10, | 
 | 432 | 		   mss.referenced >> 10); | 
 | 433 |  | 
 | 434 | 	return ret; | 
| Mauricio Lin | e070ad4 | 2005-09-03 15:55:10 -0700 | [diff] [blame] | 435 | } | 
 | 436 |  | 
| Jan Engelhardt | 03a4482 | 2008-02-08 04:21:19 -0800 | [diff] [blame] | 437 | static const struct seq_operations proc_pid_smaps_op = { | 
| Matt Mackall | a619879 | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 438 | 	.start	= m_start, | 
 | 439 | 	.next	= m_next, | 
 | 440 | 	.stop	= m_stop, | 
 | 441 | 	.show	= show_smap | 
 | 442 | }; | 
 | 443 |  | 
 | 444 | static int smaps_open(struct inode *inode, struct file *file) | 
 | 445 | { | 
 | 446 | 	return do_maps_open(inode, file, &proc_pid_smaps_op); | 
 | 447 | } | 
 | 448 |  | 
 | 449 | const struct file_operations proc_smaps_operations = { | 
 | 450 | 	.open		= smaps_open, | 
 | 451 | 	.read		= seq_read, | 
 | 452 | 	.llseek		= seq_lseek, | 
 | 453 | 	.release	= seq_release_private, | 
 | 454 | }; | 
 | 455 |  | 
 | 456 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 
 | 457 | 				unsigned long end, void *private) | 
 | 458 | { | 
 | 459 | 	struct vm_area_struct *vma = private; | 
 | 460 | 	pte_t *pte, ptent; | 
 | 461 | 	spinlock_t *ptl; | 
 | 462 | 	struct page *page; | 
 | 463 |  | 
 | 464 | 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 
 | 465 | 	for (; addr != end; pte++, addr += PAGE_SIZE) { | 
 | 466 | 		ptent = *pte; | 
 | 467 | 		if (!pte_present(ptent)) | 
 | 468 | 			continue; | 
 | 469 |  | 
 | 470 | 		page = vm_normal_page(vma, addr, ptent); | 
 | 471 | 		if (!page) | 
 | 472 | 			continue; | 
 | 473 |  | 
 | 474 | 		/* Clear accessed and referenced bits. */ | 
 | 475 | 		ptep_test_and_clear_young(vma, addr, pte); | 
 | 476 | 		ClearPageReferenced(page); | 
 | 477 | 	} | 
 | 478 | 	pte_unmap_unlock(pte - 1, ptl); | 
 | 479 | 	cond_resched(); | 
 | 480 | 	return 0; | 
 | 481 | } | 
 | 482 |  | 
| Matt Mackall | b3ae5ac | 2008-02-04 22:29:01 -0800 | [diff] [blame] | 483 | static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; | 
 | 484 |  | 
| Matt Mackall | f248dcb | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 485 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 
 | 486 | 				size_t count, loff_t *ppos) | 
| David Rientjes | b813e93 | 2007-05-06 14:49:24 -0700 | [diff] [blame] | 487 | { | 
| Matt Mackall | f248dcb | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 488 | 	struct task_struct *task; | 
 | 489 | 	char buffer[PROC_NUMBUF], *end; | 
 | 490 | 	struct mm_struct *mm; | 
| David Rientjes | b813e93 | 2007-05-06 14:49:24 -0700 | [diff] [blame] | 491 | 	struct vm_area_struct *vma; | 
 | 492 |  | 
| Matt Mackall | f248dcb | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 493 | 	memset(buffer, 0, sizeof(buffer)); | 
 | 494 | 	if (count > sizeof(buffer) - 1) | 
 | 495 | 		count = sizeof(buffer) - 1; | 
 | 496 | 	if (copy_from_user(buffer, buf, count)) | 
 | 497 | 		return -EFAULT; | 
 | 498 | 	if (!simple_strtol(buffer, &end, 0)) | 
 | 499 | 		return -EINVAL; | 
 | 500 | 	if (*end == '\n') | 
 | 501 | 		end++; | 
 | 502 | 	task = get_proc_task(file->f_path.dentry->d_inode); | 
 | 503 | 	if (!task) | 
 | 504 | 		return -ESRCH; | 
 | 505 | 	mm = get_task_mm(task); | 
 | 506 | 	if (mm) { | 
 | 507 | 		down_read(&mm->mmap_sem); | 
 | 508 | 		for (vma = mm->mmap; vma; vma = vma->vm_next) | 
 | 509 | 			if (!is_vm_hugetlb_page(vma)) | 
 | 510 | 				walk_page_range(mm, vma->vm_start, vma->vm_end, | 
 | 511 | 						&clear_refs_walk, vma); | 
 | 512 | 		flush_tlb_mm(mm); | 
 | 513 | 		up_read(&mm->mmap_sem); | 
 | 514 | 		mmput(mm); | 
 | 515 | 	} | 
 | 516 | 	put_task_struct(task); | 
 | 517 | 	if (end - buffer == 0) | 
 | 518 | 		return -EIO; | 
 | 519 | 	return end - buffer; | 
| David Rientjes | b813e93 | 2007-05-06 14:49:24 -0700 | [diff] [blame] | 520 | } | 
 | 521 |  | 
| Matt Mackall | f248dcb | 2008-02-04 22:29:03 -0800 | [diff] [blame] | 522 | const struct file_operations proc_clear_refs_operations = { | 
 | 523 | 	.write		= clear_refs_write, | 
 | 524 | }; | 
 | 525 |  | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 526 | struct pagemapread { | 
 | 527 | 	char __user *out, *end; | 
 | 528 | }; | 
 | 529 |  | 
| Hans Rosenfeld | f16278c | 2008-03-21 18:46:59 -0500 | [diff] [blame] | 530 | #define PM_ENTRY_BYTES      sizeof(u64) | 
 | 531 | #define PM_STATUS_BITS      3 | 
 | 532 | #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS) | 
 | 533 | #define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) | 
 | 534 | #define PM_STATUS(nr)       (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK) | 
 | 535 | #define PM_PSHIFT_BITS      6 | 
 | 536 | #define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | 
 | 537 | #define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | 
 | 538 | #define PM_PSHIFT(x)        (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | 
 | 539 | #define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1) | 
 | 540 | #define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK) | 
 | 541 |  | 
 | 542 | #define PM_PRESENT          PM_STATUS(4LL) | 
 | 543 | #define PM_SWAP             PM_STATUS(2LL) | 
 | 544 | #define PM_NOT_PRESENT      PM_PSHIFT(PAGE_SHIFT) | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 545 | #define PM_END_OF_BUFFER    1 | 
 | 546 |  | 
 | 547 | static int add_to_pagemap(unsigned long addr, u64 pfn, | 
 | 548 | 			  struct pagemapread *pm) | 
 | 549 | { | 
 | 550 | 	/* | 
 | 551 | 	 * Make sure there's room in the buffer for an | 
 | 552 | 	 * entire entry.  Otherwise, only copy part of | 
 | 553 | 	 * the pfn. | 
 | 554 | 	 */ | 
 | 555 | 	if (pm->out + PM_ENTRY_BYTES >= pm->end) { | 
 | 556 | 		if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) | 
 | 557 | 			return -EFAULT; | 
 | 558 | 		pm->out = pm->end; | 
 | 559 | 		return PM_END_OF_BUFFER; | 
 | 560 | 	} | 
 | 561 |  | 
 | 562 | 	if (put_user(pfn, pm->out)) | 
 | 563 | 		return -EFAULT; | 
 | 564 | 	pm->out += PM_ENTRY_BYTES; | 
 | 565 | 	return 0; | 
 | 566 | } | 
 | 567 |  | 
 | 568 | static int pagemap_pte_hole(unsigned long start, unsigned long end, | 
 | 569 | 				void *private) | 
 | 570 | { | 
 | 571 | 	struct pagemapread *pm = private; | 
 | 572 | 	unsigned long addr; | 
 | 573 | 	int err = 0; | 
 | 574 | 	for (addr = start; addr < end; addr += PAGE_SIZE) { | 
 | 575 | 		err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); | 
 | 576 | 		if (err) | 
 | 577 | 			break; | 
 | 578 | 	} | 
 | 579 | 	return err; | 
 | 580 | } | 
 | 581 |  | 
 | 582 | u64 swap_pte_to_pagemap_entry(pte_t pte) | 
 | 583 | { | 
 | 584 | 	swp_entry_t e = pte_to_swp_entry(pte); | 
| Hans Rosenfeld | f16278c | 2008-03-21 18:46:59 -0500 | [diff] [blame] | 585 | 	return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 586 | } | 
 | 587 |  | 
 | 588 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 
 | 589 | 			     void *private) | 
 | 590 | { | 
 | 591 | 	struct pagemapread *pm = private; | 
 | 592 | 	pte_t *pte; | 
 | 593 | 	int err = 0; | 
 | 594 |  | 
 | 595 | 	for (; addr != end; addr += PAGE_SIZE) { | 
 | 596 | 		u64 pfn = PM_NOT_PRESENT; | 
 | 597 | 		pte = pte_offset_map(pmd, addr); | 
 | 598 | 		if (is_swap_pte(*pte)) | 
| Hans Rosenfeld | f16278c | 2008-03-21 18:46:59 -0500 | [diff] [blame] | 599 | 			pfn = PM_PFRAME(swap_pte_to_pagemap_entry(*pte)) | 
 | 600 | 				| PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 601 | 		else if (pte_present(*pte)) | 
| Hans Rosenfeld | f16278c | 2008-03-21 18:46:59 -0500 | [diff] [blame] | 602 | 			pfn = PM_PFRAME(pte_pfn(*pte)) | 
 | 603 | 				| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 604 | 		/* unmap so we're not in atomic when we copy to userspace */ | 
 | 605 | 		pte_unmap(pte); | 
 | 606 | 		err = add_to_pagemap(addr, pfn, pm); | 
 | 607 | 		if (err) | 
 | 608 | 			return err; | 
 | 609 | 	} | 
 | 610 |  | 
 | 611 | 	cond_resched(); | 
 | 612 |  | 
 | 613 | 	return err; | 
 | 614 | } | 
 | 615 |  | 
 | 616 | static struct mm_walk pagemap_walk = { | 
 | 617 | 	.pmd_entry = pagemap_pte_range, | 
 | 618 | 	.pte_hole = pagemap_pte_hole | 
 | 619 | }; | 
 | 620 |  | 
 | 621 | /* | 
 | 622 |  * /proc/pid/pagemap - an array mapping virtual pages to pfns | 
 | 623 |  * | 
| Hans Rosenfeld | f16278c | 2008-03-21 18:46:59 -0500 | [diff] [blame] | 624 |  * For each page in the address space, this file contains one 64-bit entry | 
 | 625 |  * consisting of the following: | 
 | 626 |  * | 
 | 627 |  * Bits 0-55  page frame number (PFN) if present | 
 | 628 |  * Bits 0-4   swap type if swapped | 
 | 629 |  * Bits 5-55  swap offset if swapped | 
 | 630 |  * Bits 55-60 page shift (page size = 1<<page shift) | 
 | 631 |  * Bit  61    reserved for future use | 
 | 632 |  * Bit  62    page swapped | 
 | 633 |  * Bit  63    page present | 
 | 634 |  * | 
 | 635 |  * If the page is not present but in swap, then the PFN contains an | 
 | 636 |  * encoding of the swap file number and the page's offset into the | 
 | 637 |  * swap. Unmapped pages return a null PFN. This allows determining | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 638 |  * precisely which pages are mapped (or in swap) and comparing mapped | 
 | 639 |  * pages between processes. | 
 | 640 |  * | 
 | 641 |  * Efficient users of this interface will use /proc/pid/maps to | 
 | 642 |  * determine which areas of memory are actually mapped and llseek to | 
 | 643 |  * skip over unmapped regions. | 
 | 644 |  */ | 
 | 645 | static ssize_t pagemap_read(struct file *file, char __user *buf, | 
 | 646 | 			    size_t count, loff_t *ppos) | 
 | 647 | { | 
 | 648 | 	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 
 | 649 | 	struct page **pages, *page; | 
 | 650 | 	unsigned long uaddr, uend; | 
 | 651 | 	struct mm_struct *mm; | 
 | 652 | 	struct pagemapread pm; | 
 | 653 | 	int pagecount; | 
 | 654 | 	int ret = -ESRCH; | 
 | 655 |  | 
 | 656 | 	if (!task) | 
 | 657 | 		goto out; | 
 | 658 |  | 
 | 659 | 	ret = -EACCES; | 
 | 660 | 	if (!ptrace_may_attach(task)) | 
| Marcelo Tosatti | fb39380 | 2008-03-13 12:32:35 -0700 | [diff] [blame] | 661 | 		goto out_task; | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 662 |  | 
 | 663 | 	ret = -EINVAL; | 
 | 664 | 	/* file position must be aligned */ | 
 | 665 | 	if (*ppos % PM_ENTRY_BYTES) | 
| Marcelo Tosatti | fb39380 | 2008-03-13 12:32:35 -0700 | [diff] [blame] | 666 | 		goto out_task; | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 667 |  | 
 | 668 | 	ret = 0; | 
 | 669 | 	mm = get_task_mm(task); | 
 | 670 | 	if (!mm) | 
| Marcelo Tosatti | fb39380 | 2008-03-13 12:32:35 -0700 | [diff] [blame] | 671 | 		goto out_task; | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 672 |  | 
 | 673 | 	ret = -ENOMEM; | 
 | 674 | 	uaddr = (unsigned long)buf & PAGE_MASK; | 
 | 675 | 	uend = (unsigned long)(buf + count); | 
 | 676 | 	pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; | 
 | 677 | 	pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); | 
 | 678 | 	if (!pages) | 
| Marcelo Tosatti | fb39380 | 2008-03-13 12:32:35 -0700 | [diff] [blame] | 679 | 		goto out_mm; | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 680 |  | 
 | 681 | 	down_read(¤t->mm->mmap_sem); | 
 | 682 | 	ret = get_user_pages(current, current->mm, uaddr, pagecount, | 
 | 683 | 			     1, 0, pages, NULL); | 
 | 684 | 	up_read(¤t->mm->mmap_sem); | 
 | 685 |  | 
 | 686 | 	if (ret < 0) | 
 | 687 | 		goto out_free; | 
 | 688 |  | 
| Marcelo Tosatti | fb39380 | 2008-03-13 12:32:35 -0700 | [diff] [blame] | 689 | 	if (ret != pagecount) { | 
 | 690 | 		pagecount = ret; | 
 | 691 | 		ret = -EFAULT; | 
 | 692 | 		goto out_pages; | 
 | 693 | 	} | 
 | 694 |  | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 695 | 	pm.out = buf; | 
 | 696 | 	pm.end = buf + count; | 
 | 697 |  | 
 | 698 | 	if (!ptrace_may_attach(task)) { | 
 | 699 | 		ret = -EIO; | 
 | 700 | 	} else { | 
 | 701 | 		unsigned long src = *ppos; | 
 | 702 | 		unsigned long svpfn = src / PM_ENTRY_BYTES; | 
 | 703 | 		unsigned long start_vaddr = svpfn << PAGE_SHIFT; | 
 | 704 | 		unsigned long end_vaddr = TASK_SIZE_OF(task); | 
 | 705 |  | 
 | 706 | 		/* watch out for wraparound */ | 
 | 707 | 		if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | 
 | 708 | 			start_vaddr = end_vaddr; | 
 | 709 |  | 
 | 710 | 		/* | 
 | 711 | 		 * The odds are that this will stop walking way | 
 | 712 | 		 * before end_vaddr, because the length of the | 
 | 713 | 		 * user buffer is tracked in "pm", and the walk | 
 | 714 | 		 * will stop when we hit the end of the buffer. | 
 | 715 | 		 */ | 
 | 716 | 		ret = walk_page_range(mm, start_vaddr, end_vaddr, | 
 | 717 | 					&pagemap_walk, &pm); | 
 | 718 | 		if (ret == PM_END_OF_BUFFER) | 
 | 719 | 			ret = 0; | 
 | 720 | 		/* don't need mmap_sem for these, but this looks cleaner */ | 
 | 721 | 		*ppos += pm.out - buf; | 
 | 722 | 		if (!ret) | 
 | 723 | 			ret = pm.out - buf; | 
 | 724 | 	} | 
 | 725 |  | 
| Marcelo Tosatti | fb39380 | 2008-03-13 12:32:35 -0700 | [diff] [blame] | 726 | out_pages: | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 727 | 	for (; pagecount; pagecount--) { | 
 | 728 | 		page = pages[pagecount-1]; | 
 | 729 | 		if (!PageReserved(page)) | 
 | 730 | 			SetPageDirty(page); | 
 | 731 | 		page_cache_release(page); | 
 | 732 | 	} | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 733 | out_free: | 
 | 734 | 	kfree(pages); | 
| Marcelo Tosatti | fb39380 | 2008-03-13 12:32:35 -0700 | [diff] [blame] | 735 | out_mm: | 
 | 736 | 	mmput(mm); | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 737 | out_task: | 
 | 738 | 	put_task_struct(task); | 
 | 739 | out: | 
 | 740 | 	return ret; | 
 | 741 | } | 
 | 742 |  | 
 | 743 | const struct file_operations proc_pagemap_operations = { | 
 | 744 | 	.llseek		= mem_lseek, /* borrow this */ | 
 | 745 | 	.read		= pagemap_read, | 
 | 746 | }; | 
| Matt Mackall | 1e88328 | 2008-02-04 22:29:07 -0800 | [diff] [blame] | 747 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 
| Matt Mackall | 85863e4 | 2008-02-04 22:29:04 -0800 | [diff] [blame] | 748 |  | 
| Christoph Lameter | 6e21c8f | 2005-09-03 15:54:45 -0700 | [diff] [blame] | 749 | #ifdef CONFIG_NUMA | 
| Christoph Lameter | 1a75a6c | 2006-01-08 01:01:02 -0800 | [diff] [blame] | 750 | extern int show_numa_map(struct seq_file *m, void *v); | 
| Christoph Lameter | 6e21c8f | 2005-09-03 15:54:45 -0700 | [diff] [blame] | 751 |  | 
| Kees Cook | 5096add | 2007-05-08 00:26:04 -0700 | [diff] [blame] | 752 | static int show_numa_map_checked(struct seq_file *m, void *v) | 
 | 753 | { | 
 | 754 | 	struct proc_maps_private *priv = m->private; | 
 | 755 | 	struct task_struct *task = priv->task; | 
 | 756 |  | 
 | 757 | 	if (maps_protect && !ptrace_may_attach(task)) | 
 | 758 | 		return -EACCES; | 
 | 759 |  | 
 | 760 | 	return show_numa_map(m, v); | 
 | 761 | } | 
 | 762 |  | 
| Jan Engelhardt | 03a4482 | 2008-02-08 04:21:19 -0800 | [diff] [blame] | 763 | static const struct seq_operations proc_pid_numa_maps_op = { | 
| Christoph Lameter | 1a75a6c | 2006-01-08 01:01:02 -0800 | [diff] [blame] | 764 |         .start  = m_start, | 
 | 765 |         .next   = m_next, | 
 | 766 |         .stop   = m_stop, | 
| Kees Cook | 5096add | 2007-05-08 00:26:04 -0700 | [diff] [blame] | 767 |         .show   = show_numa_map_checked | 
| Christoph Lameter | 6e21c8f | 2005-09-03 15:54:45 -0700 | [diff] [blame] | 768 | }; | 
| Eric W. Biederman | 662795d | 2006-06-26 00:25:48 -0700 | [diff] [blame] | 769 |  | 
 | 770 | static int numa_maps_open(struct inode *inode, struct file *file) | 
 | 771 | { | 
 | 772 | 	return do_maps_open(inode, file, &proc_pid_numa_maps_op); | 
 | 773 | } | 
 | 774 |  | 
| Arjan van de Ven | 00977a5 | 2007-02-12 00:55:34 -0800 | [diff] [blame] | 775 | const struct file_operations proc_numa_maps_operations = { | 
| Eric W. Biederman | 662795d | 2006-06-26 00:25:48 -0700 | [diff] [blame] | 776 | 	.open		= numa_maps_open, | 
 | 777 | 	.read		= seq_read, | 
 | 778 | 	.llseek		= seq_lseek, | 
| Eric W. Biederman | 99f8955 | 2006-06-26 00:25:55 -0700 | [diff] [blame] | 779 | 	.release	= seq_release_private, | 
| Eric W. Biederman | 662795d | 2006-06-26 00:25:48 -0700 | [diff] [blame] | 780 | }; | 
| Christoph Lameter | 6e21c8f | 2005-09-03 15:54:45 -0700 | [diff] [blame] | 781 | #endif |