| Andy Lutomirski | 98eedc3 | 2011-07-13 09:24:16 -0400 | [diff] [blame] | 1 | /* | 
 | 2 |  * parse_vdso.c: Linux reference vDSO parser | 
 | 3 |  * Written by Andrew Lutomirski, 2011. | 
 | 4 |  * | 
 | 5 |  * This code is meant to be linked in to various programs that run on Linux. | 
 | 6 |  * As such, it is available with as few restrictions as possible.  This file | 
 | 7 |  * is licensed under the Creative Commons Zero License, version 1.0, | 
 | 8 |  * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode | 
 | 9 |  * | 
 | 10 |  * The vDSO is a regular ELF DSO that the kernel maps into user space when | 
 | 11 |  * it starts a program.  It works equally well in statically and dynamically | 
 | 12 |  * linked binaries. | 
 | 13 |  * | 
 | 14 |  * This code is tested on x86_64.  In principle it should work on any 64-bit | 
 | 15 |  * architecture that has a vDSO. | 
 | 16 |  */ | 
 | 17 |  | 
 | 18 | #include <stdbool.h> | 
 | 19 | #include <stdint.h> | 
 | 20 | #include <string.h> | 
 | 21 | #include <elf.h> | 
 | 22 |  | 
 | 23 | /* | 
 | 24 |  * To use this vDSO parser, first call one of the vdso_init_* functions. | 
 | 25 |  * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR | 
 | 26 |  * to vdso_init_from_sysinfo_ehdr.  Otherwise pass auxv to vdso_init_from_auxv. | 
 | 27 |  * Then call vdso_sym for each symbol you want.  For example, to look up | 
 | 28 |  * gettimeofday on x86_64, use: | 
 | 29 |  * | 
 | 30 |  *     <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday"); | 
 | 31 |  * or | 
 | 32 |  *     <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); | 
 | 33 |  * | 
 | 34 |  * vdso_sym will return 0 if the symbol doesn't exist or if the init function | 
 | 35 |  * failed or was not called.  vdso_sym is a little slow, so its return value | 
 | 36 |  * should be cached. | 
 | 37 |  * | 
 | 38 |  * vdso_sym is threadsafe; the init functions are not. | 
 | 39 |  * | 
 | 40 |  * These are the prototypes: | 
 | 41 |  */ | 
 | 42 | extern void vdso_init_from_auxv(void *auxv); | 
 | 43 | extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); | 
 | 44 | extern void *vdso_sym(const char *version, const char *name); | 
 | 45 |  | 
 | 46 |  | 
 | 47 | /* And here's the code. */ | 
 | 48 |  | 
 | 49 | #ifndef __x86_64__ | 
 | 50 | # error Not yet ported to non-x86_64 architectures | 
 | 51 | #endif | 
 | 52 |  | 
 | 53 | static struct vdso_info | 
 | 54 | { | 
 | 55 | 	bool valid; | 
 | 56 |  | 
 | 57 | 	/* Load information */ | 
 | 58 | 	uintptr_t load_addr; | 
 | 59 | 	uintptr_t load_offset;  /* load_addr - recorded vaddr */ | 
 | 60 |  | 
 | 61 | 	/* Symbol table */ | 
 | 62 | 	Elf64_Sym *symtab; | 
 | 63 | 	const char *symstrings; | 
 | 64 | 	Elf64_Word *bucket, *chain; | 
 | 65 | 	Elf64_Word nbucket, nchain; | 
 | 66 |  | 
 | 67 | 	/* Version table */ | 
 | 68 | 	Elf64_Versym *versym; | 
 | 69 | 	Elf64_Verdef *verdef; | 
 | 70 | } vdso_info; | 
 | 71 |  | 
 | 72 | /* Straight from the ELF specification. */ | 
 | 73 | static unsigned long elf_hash(const unsigned char *name) | 
 | 74 | { | 
 | 75 | 	unsigned long h = 0, g; | 
 | 76 | 	while (*name) | 
 | 77 | 	{ | 
 | 78 | 		h = (h << 4) + *name++; | 
 | 79 | 		if (g = h & 0xf0000000) | 
 | 80 | 			h ^= g >> 24; | 
 | 81 | 		h &= ~g; | 
 | 82 | 	} | 
 | 83 | 	return h; | 
 | 84 | } | 
 | 85 |  | 
 | 86 | void vdso_init_from_sysinfo_ehdr(uintptr_t base) | 
 | 87 | { | 
 | 88 | 	size_t i; | 
 | 89 | 	bool found_vaddr = false; | 
 | 90 |  | 
 | 91 | 	vdso_info.valid = false; | 
 | 92 |  | 
 | 93 | 	vdso_info.load_addr = base; | 
 | 94 |  | 
 | 95 | 	Elf64_Ehdr *hdr = (Elf64_Ehdr*)base; | 
 | 96 | 	Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff); | 
 | 97 | 	Elf64_Dyn *dyn = 0; | 
 | 98 |  | 
 | 99 | 	/* | 
 | 100 | 	 * We need two things from the segment table: the load offset | 
 | 101 | 	 * and the dynamic table. | 
 | 102 | 	 */ | 
 | 103 | 	for (i = 0; i < hdr->e_phnum; i++) | 
 | 104 | 	{ | 
 | 105 | 		if (pt[i].p_type == PT_LOAD && !found_vaddr) { | 
 | 106 | 			found_vaddr = true; | 
 | 107 | 			vdso_info.load_offset =	base | 
 | 108 | 				+ (uintptr_t)pt[i].p_offset | 
 | 109 | 				- (uintptr_t)pt[i].p_vaddr; | 
 | 110 | 		} else if (pt[i].p_type == PT_DYNAMIC) { | 
 | 111 | 			dyn = (Elf64_Dyn*)(base + pt[i].p_offset); | 
 | 112 | 		} | 
 | 113 | 	} | 
 | 114 |  | 
 | 115 | 	if (!found_vaddr || !dyn) | 
 | 116 | 		return;  /* Failed */ | 
 | 117 |  | 
 | 118 | 	/* | 
 | 119 | 	 * Fish out the useful bits of the dynamic table. | 
 | 120 | 	 */ | 
 | 121 | 	Elf64_Word *hash = 0; | 
 | 122 | 	vdso_info.symstrings = 0; | 
 | 123 | 	vdso_info.symtab = 0; | 
 | 124 | 	vdso_info.versym = 0; | 
 | 125 | 	vdso_info.verdef = 0; | 
 | 126 | 	for (i = 0; dyn[i].d_tag != DT_NULL; i++) { | 
 | 127 | 		switch (dyn[i].d_tag) { | 
 | 128 | 		case DT_STRTAB: | 
 | 129 | 			vdso_info.symstrings = (const char *) | 
 | 130 | 				((uintptr_t)dyn[i].d_un.d_ptr | 
 | 131 | 				 + vdso_info.load_offset); | 
 | 132 | 			break; | 
 | 133 | 		case DT_SYMTAB: | 
 | 134 | 			vdso_info.symtab = (Elf64_Sym *) | 
 | 135 | 				((uintptr_t)dyn[i].d_un.d_ptr | 
 | 136 | 				 + vdso_info.load_offset); | 
 | 137 | 			break; | 
 | 138 | 		case DT_HASH: | 
 | 139 | 			hash = (Elf64_Word *) | 
 | 140 | 				((uintptr_t)dyn[i].d_un.d_ptr | 
 | 141 | 				 + vdso_info.load_offset); | 
 | 142 | 			break; | 
 | 143 | 		case DT_VERSYM: | 
 | 144 | 			vdso_info.versym = (Elf64_Versym *) | 
 | 145 | 				((uintptr_t)dyn[i].d_un.d_ptr | 
 | 146 | 				 + vdso_info.load_offset); | 
 | 147 | 			break; | 
 | 148 | 		case DT_VERDEF: | 
 | 149 | 			vdso_info.verdef = (Elf64_Verdef *) | 
 | 150 | 				((uintptr_t)dyn[i].d_un.d_ptr | 
 | 151 | 				 + vdso_info.load_offset); | 
 | 152 | 			break; | 
 | 153 | 		} | 
 | 154 | 	} | 
 | 155 | 	if (!vdso_info.symstrings || !vdso_info.symtab || !hash) | 
 | 156 | 		return;  /* Failed */ | 
 | 157 |  | 
 | 158 | 	if (!vdso_info.verdef) | 
 | 159 | 		vdso_info.versym = 0; | 
 | 160 |  | 
 | 161 | 	/* Parse the hash table header. */ | 
 | 162 | 	vdso_info.nbucket = hash[0]; | 
 | 163 | 	vdso_info.nchain = hash[1]; | 
 | 164 | 	vdso_info.bucket = &hash[2]; | 
 | 165 | 	vdso_info.chain = &hash[vdso_info.nbucket + 2]; | 
 | 166 |  | 
 | 167 | 	/* That's all we need. */ | 
 | 168 | 	vdso_info.valid = true; | 
 | 169 | } | 
 | 170 |  | 
 | 171 | static bool vdso_match_version(Elf64_Versym ver, | 
 | 172 | 			       const char *name, Elf64_Word hash) | 
 | 173 | { | 
 | 174 | 	/* | 
 | 175 | 	 * This is a helper function to check if the version indexed by | 
 | 176 | 	 * ver matches name (which hashes to hash). | 
 | 177 | 	 * | 
 | 178 | 	 * The version definition table is a mess, and I don't know how | 
 | 179 | 	 * to do this in better than linear time without allocating memory | 
 | 180 | 	 * to build an index.  I also don't know why the table has | 
 | 181 | 	 * variable size entries in the first place. | 
 | 182 | 	 * | 
 | 183 | 	 * For added fun, I can't find a comprehensible specification of how | 
 | 184 | 	 * to parse all the weird flags in the table. | 
 | 185 | 	 * | 
 | 186 | 	 * So I just parse the whole table every time. | 
 | 187 | 	 */ | 
 | 188 |  | 
 | 189 | 	/* First step: find the version definition */ | 
 | 190 | 	ver &= 0x7fff;  /* Apparently bit 15 means "hidden" */ | 
 | 191 | 	Elf64_Verdef *def = vdso_info.verdef; | 
 | 192 | 	while(true) { | 
 | 193 | 		if ((def->vd_flags & VER_FLG_BASE) == 0 | 
 | 194 | 		    && (def->vd_ndx & 0x7fff) == ver) | 
 | 195 | 			break; | 
 | 196 |  | 
 | 197 | 		if (def->vd_next == 0) | 
 | 198 | 			return false;  /* No definition. */ | 
 | 199 |  | 
 | 200 | 		def = (Elf64_Verdef *)((char *)def + def->vd_next); | 
 | 201 | 	} | 
 | 202 |  | 
 | 203 | 	/* Now figure out whether it matches. */ | 
 | 204 | 	Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux); | 
 | 205 | 	return def->vd_hash == hash | 
 | 206 | 		&& !strcmp(name, vdso_info.symstrings + aux->vda_name); | 
 | 207 | } | 
 | 208 |  | 
 | 209 | void *vdso_sym(const char *version, const char *name) | 
 | 210 | { | 
 | 211 | 	unsigned long ver_hash; | 
 | 212 | 	if (!vdso_info.valid) | 
 | 213 | 		return 0; | 
 | 214 |  | 
 | 215 | 	ver_hash = elf_hash(version); | 
 | 216 | 	Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; | 
 | 217 |  | 
 | 218 | 	for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { | 
 | 219 | 		Elf64_Sym *sym = &vdso_info.symtab[chain]; | 
 | 220 |  | 
 | 221 | 		/* Check for a defined global or weak function w/ right name. */ | 
 | 222 | 		if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) | 
 | 223 | 			continue; | 
 | 224 | 		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && | 
 | 225 | 		    ELF64_ST_BIND(sym->st_info) != STB_WEAK) | 
 | 226 | 			continue; | 
 | 227 | 		if (sym->st_shndx == SHN_UNDEF) | 
 | 228 | 			continue; | 
 | 229 | 		if (strcmp(name, vdso_info.symstrings + sym->st_name)) | 
 | 230 | 			continue; | 
 | 231 |  | 
 | 232 | 		/* Check symbol version. */ | 
 | 233 | 		if (vdso_info.versym | 
 | 234 | 		    && !vdso_match_version(vdso_info.versym[chain], | 
 | 235 | 					   version, ver_hash)) | 
 | 236 | 			continue; | 
 | 237 |  | 
 | 238 | 		return (void *)(vdso_info.load_offset + sym->st_value); | 
 | 239 | 	} | 
 | 240 |  | 
 | 241 | 	return 0; | 
 | 242 | } | 
 | 243 |  | 
 | 244 | void vdso_init_from_auxv(void *auxv) | 
 | 245 | { | 
 | 246 | 	Elf64_auxv_t *elf_auxv = auxv; | 
 | 247 | 	for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) | 
 | 248 | 	{ | 
 | 249 | 		if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { | 
 | 250 | 			vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val); | 
 | 251 | 			return; | 
 | 252 | 		} | 
 | 253 | 	} | 
 | 254 |  | 
 | 255 | 	vdso_info.valid = false; | 
 | 256 | } |