Blame - scripts/kallsyms.c - android_kernel_htc_msm8960

blob: d3d2e5341051809b01d13b75b352ed3d41f01d5c [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/* Generate assembler source containing symbol information
				2	*
				3	* Copyright 2002 by Kai Germaschewski
				4	*
				5	* This software may be used and distributed according to the terms
				6	* of the GNU General Public License, incorporated herein by reference.
				7	*
				8	* Usage: nm -n vmlinux \| scripts/kallsyms [--all-symbols] > symbols.S
				9	*
				10	* ChangeLog:
				11	*
				12	* (25/Aug/2004) Paulo Marques <pmarques@grupopie.com>
				13	* Changed the compression method from stem compression to "table lookup"
				14	* compression
				15	*
				16	* Table compression uses all the unused char codes on the symbols and
				17	* maps these to the most used substrings (tokens). For instance, it might
				18	* map char code 0xF7 to represent "write_" and then in every symbol where
				19	* "write_" appears it can be replaced by 0xF7, saving 5 bytes.
				20	* The used codes themselves are also placed in the table so that the
				21	* decompresion can work without "special cases".
				22	* Applied to kernel symbols, this usually produces a compression ratio
				23	* of about 50%.
				24	*
				25	*/
				26
				27	#include <stdio.h>
				28	#include <stdlib.h>
				29	#include <string.h>
				30	#include <ctype.h>
				31
				32	/* maximum token length used. It doesn't pay to increase it a lot, because
				33	* very long substrings probably don't repeat themselves too often. */
				34	#define MAX_TOK_SIZE 11
				35	#define KSYM_NAME_LEN 127
				36
				37	/* we use only a subset of the complete symbol table to gather the token count,
				38	* to speed up compression, at the expense of a little compression ratio */
				39	#define WORKING_SET 1024
				40
				41	/* first find the best token only on the list of tokens that would profit more
				42	* than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list.
				43	* Increasing this value will put less tokens on the "good" list, so the search
				44	* is faster. However, if the good list runs out of tokens, we must painfully
				45	* search the bad list. */
				46	#define GOOD_BAD_THRESHOLD 10
				47
				48	/* token hash parameters */
				49	#define HASH_BITS 18
				50	#define HASH_TABLE_SIZE (1 << HASH_BITS)
				51	#define HASH_MASK (HASH_TABLE_SIZE - 1)
				52	#define HASH_BASE_OFFSET 2166136261U
				53	#define HASH_FOLD(a) ((a)&(HASH_MASK))
				54
				55	/* flags to mark symbols */
				56	#define SYM_FLAG_VALID 1
				57	#define SYM_FLAG_SAMPLED 2
				58
				59	struct sym_entry {
				60	unsigned long long addr;
				61	char type;
				62	unsigned char flags;
				63	unsigned char len;
				64	unsigned char *sym;
				65	};
				66
				67
				68	static struct sym_entry *table;
				69	static int size, cnt;
David Woodhouse	075d6eb	2005-05-05 16:15:09 -0700	[diff] [blame]	70	static unsigned long long _stext, _etext, _sinittext, _einittext, _sextratext, _eextratext;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	71	static int all_symbols = 0;
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	72	static char symbol_prefix_char = '\0';
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	73
				74	struct token {
				75	unsigned char data[MAX_TOK_SIZE];
				76	unsigned char len;
				77	/* profit: the number of bytes that could be saved by inserting this
				78	* token into the table */
				79	int profit;
				80	struct token next; / next token on the hash list */
				81	struct token right; / next token on the good/bad list */
				82	struct token left; / previous token on the good/bad list */
				83	struct token smaller; / token that is less one letter than this one */
				84	};
				85
				86	struct token bad_head, good_head;
				87	struct token *hash_table[HASH_TABLE_SIZE];
				88
				89	/* the table that holds the result of the compression */
				90	unsigned char best_table[256][MAX_TOK_SIZE+1];
				91	unsigned char best_table_len[256];
				92
				93
				94	static void
				95	usage(void)
				96	{
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	97	fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=<prefix char>] < in.map > out.S\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	98	exit(1);
				99	}
				100
				101	/*
				102	* This ignores the intensely annoying "mapping symbols" found
				103	* in ARM ELF files: $a, $t and $d.
				104	*/
				105	static inline int
				106	is_arm_mapping_symbol(const char *str)
				107	{
				108	return str[0] == '$' && strchr("atd", str[1])
				109	&& (str[2] == '\0' \|\| str[2] == '.');
				110	}
				111
				112	static int
				113	read_symbol(FILE in, struct sym_entry s)
				114	{
				115	char str[500];
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	116	char *sym;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	117	int rc;
				118
				119	rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str);
				120	if (rc != 3) {
				121	if (rc != EOF) {
				122	/* skip line */
				123	fgets(str, 500, in);
				124	}
				125	return -1;
				126	}
				127
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	128	sym = str;
				129	/* skip prefix char */
				130	if (symbol_prefix_char && str[0] == symbol_prefix_char)
				131	sym++;
				132
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	133	/* Ignore most absolute/undefined (?) symbols. */
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	134	if (strcmp(sym, "_stext") == 0)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	135	_stext = s->addr;
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	136	else if (strcmp(sym, "_etext") == 0)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	137	_etext = s->addr;
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	138	else if (strcmp(sym, "_sinittext") == 0)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	139	_sinittext = s->addr;
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	140	else if (strcmp(sym, "_einittext") == 0)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	141	_einittext = s->addr;
David Woodhouse	075d6eb	2005-05-05 16:15:09 -0700	[diff] [blame]	142	else if (strcmp(sym, "_sextratext") == 0)
				143	_sextratext = s->addr;
				144	else if (strcmp(sym, "_eextratext") == 0)
				145	_eextratext = s->addr;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	146	else if (toupper(s->type) == 'A')
				147	{
				148	/* Keep these useful absolute symbols */
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	149	if (strcmp(sym, "__kernel_syscall_via_break") &&
				150	strcmp(sym, "__kernel_syscall_via_epc") &&
				151	strcmp(sym, "__kernel_sigtramp") &&
				152	strcmp(sym, "__gp"))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	153	return -1;
				154
				155	}
				156	else if (toupper(s->type) == 'U' \|\|
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	157	is_arm_mapping_symbol(sym))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	158	return -1;
				159
				160	/* include the type field in the symbol name, so that it gets
				161	* compressed together */
				162	s->len = strlen(str) + 1;
				163	s->sym = (char *) malloc(s->len + 1);
				164	strcpy(s->sym + 1, str);
				165	s->sym[0] = s->type;
				166
				167	return 0;
				168	}
				169
				170	static int
				171	symbol_valid(struct sym_entry *s)
				172	{
				173	/* Symbols which vary between passes. Passes 1 and 2 must have
				174	* identical symbol lists. The kallsyms_* symbols below are only added
				175	* after pass 1, they would be included in pass 2 when --all-symbols is
				176	* specified so exclude them to get a stable symbol list.
				177	*/
				178	static char *special_symbols[] = {
				179	"kallsyms_addresses",
				180	"kallsyms_num_syms",
				181	"kallsyms_names",
				182	"kallsyms_markers",
				183	"kallsyms_token_table",
				184	"kallsyms_token_index",
				185
				186	/* Exclude linker generated symbols which vary between passes */
				187	"_SDA_BASE_", /* ppc */
				188	"_SDA2_BASE_", /* ppc */
				189	NULL };
				190	int i;
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	191	int offset = 1;
				192
				193	/* skip prefix char */
				194	if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char)
				195	offset++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	196
				197	/* if --all-symbols is not specified, then symbols outside the text
				198	* and inittext sections are discarded */
				199	if (!all_symbols) {
				200	if ((s->addr < _stext \|\| s->addr > _etext)
David Woodhouse	075d6eb	2005-05-05 16:15:09 -0700	[diff] [blame]	201	&& (s->addr < _sinittext \|\| s->addr > _einittext)
				202	&& (s->addr < _sextratext \|\| s->addr > _eextratext))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	203	return 0;
				204	/* Corner case. Discard any symbols with the same value as
David Woodhouse	075d6eb	2005-05-05 16:15:09 -0700	[diff] [blame]	205	* _etext _einittext or _eextratext; they can move between pass
				206	* 1 and 2 when the kallsyms data are added. If these symbols
				207	* move then they may get dropped in pass 2, which breaks the
				208	* kallsyms rules.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	209	*/
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	210	if ((s->addr == _etext && strcmp(s->sym + offset, "_etext")) \|\|
David Woodhouse	075d6eb	2005-05-05 16:15:09 -0700	[diff] [blame]	211	(s->addr == _einittext && strcmp(s->sym + offset, "_einittext")) \|\|
				212	(s->addr == _eextratext && strcmp(s->sym + offset, "_eextratext")))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	213	return 0;
				214	}
				215
				216	/* Exclude symbols which vary between passes. */
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	217	if (strstr(s->sym + offset, "_compiled."))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	218	return 0;
				219
				220	for (i = 0; special_symbols[i]; i++)
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	221	if( strcmp(s->sym + offset, special_symbols[i]) == 0 )
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	222	return 0;
				223
				224	return 1;
				225	}
				226
				227	static void
				228	read_map(FILE *in)
				229	{
				230	while (!feof(in)) {
				231	if (cnt >= size) {
				232	size += 10000;
				233	table = realloc(table, sizeof(table) size);
				234	if (!table) {
				235	fprintf(stderr, "out of memory\n");
				236	exit (1);
				237	}
				238	}
				239	if (read_symbol(in, &table[cnt]) == 0)
				240	cnt++;
				241	}
				242	}
				243
				244	static void output_label(char *label)
				245	{
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	246	if (symbol_prefix_char)
				247	printf(".globl %c%s\n", symbol_prefix_char, label);
				248	else
				249	printf(".globl %s\n", label);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	250	printf("\tALGN\n");
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	251	if (symbol_prefix_char)
				252	printf("%c%s:\n", symbol_prefix_char, label);
				253	else
				254	printf("%s:\n", label);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	255	}
				256
				257	/* uncompress a compressed symbol. When this function is called, the best table
				258	* might still be compressed itself, so the function needs to be recursive */
				259	static int expand_symbol(unsigned char data, int len, char result)
				260	{
				261	int c, rlen, total=0;
				262
				263	while (len) {
				264	c = *data;
				265	/* if the table holds a single char that is the same as the one
				266	* we are looking for, then end the search */
				267	if (best_table[c][0]==c && best_table_len[c]==1) {
				268	*result++ = c;
				269	total++;
				270	} else {
				271	/* if not, recurse and expand */
				272	rlen = expand_symbol(best_table[c], best_table_len[c], result);
				273	total += rlen;
				274	result += rlen;
				275	}
				276	data++;
				277	len--;
				278	}
				279	*result=0;
				280
				281	return total;
				282	}
				283
				284	static void
				285	write_src(void)
				286	{
				287	int i, k, off, valid;
				288	unsigned int best_idx[256];
				289	unsigned int *markers;
				290	char buf[KSYM_NAME_LEN+1];
				291
				292	printf("#include <asm/types.h>\n");
				293	printf("#if BITS_PER_LONG == 64\n");
				294	printf("#define PTR .quad\n");
				295	printf("#define ALGN .align 8\n");
				296	printf("#else\n");
				297	printf("#define PTR .long\n");
				298	printf("#define ALGN .align 4\n");
				299	printf("#endif\n");
				300
				301	printf(".data\n");
				302
				303	output_label("kallsyms_addresses");
				304	valid = 0;
				305	for (i = 0; i < cnt; i++) {
				306	if (table[i].flags & SYM_FLAG_VALID) {
				307	printf("\tPTR\t%#llx\n", table[i].addr);
				308	valid++;
				309	}
				310	}
				311	printf("\n");
				312
				313	output_label("kallsyms_num_syms");
				314	printf("\tPTR\t%d\n", valid);
				315	printf("\n");
				316
				317	/* table of offset markers, that give the offset in the compressed stream
				318	* every 256 symbols */
				319	markers = (unsigned int ) malloc(sizeof(unsigned int)((valid + 255) / 256));
				320
				321	output_label("kallsyms_names");
				322	valid = 0;
				323	off = 0;
				324	for (i = 0; i < cnt; i++) {
				325
				326	if (!table[i].flags & SYM_FLAG_VALID)
				327	continue;
				328
				329	if ((valid & 0xFF) == 0)
				330	markers[valid >> 8] = off;
				331
				332	printf("\t.byte 0x%02x", table[i].len);
				333	for (k = 0; k < table[i].len; k++)
				334	printf(", 0x%02x", table[i].sym[k]);
				335	printf("\n");
				336
				337	off += table[i].len + 1;
				338	valid++;
				339	}
				340	printf("\n");
				341
				342	output_label("kallsyms_markers");
				343	for (i = 0; i < ((valid + 255) >> 8); i++)
				344	printf("\tPTR\t%d\n", markers[i]);
				345	printf("\n");
				346
				347	free(markers);
				348
				349	output_label("kallsyms_token_table");
				350	off = 0;
				351	for (i = 0; i < 256; i++) {
				352	best_idx[i] = off;
				353	expand_symbol(best_table[i],best_table_len[i],buf);
				354	printf("\t.asciz\t\"%s\"\n", buf);
				355	off += strlen(buf) + 1;
				356	}
				357	printf("\n");
				358
				359	output_label("kallsyms_token_index");
				360	for (i = 0; i < 256; i++)
				361	printf("\t.short\t%d\n", best_idx[i]);
				362	printf("\n");
				363	}
				364
				365
				366	/* table lookup compression functions */
				367
				368	static inline unsigned int rehash_token(unsigned int hash, unsigned char data)
				369	{
				370	return ((hash * 16777619) ^ data);
				371	}
				372
				373	static unsigned int hash_token(unsigned char *data, int len)
				374	{
				375	unsigned int hash=HASH_BASE_OFFSET;
				376	int i;
				377
				378	for (i = 0; i < len; i++)
				379	hash = rehash_token(hash, data[i]);
				380
				381	return HASH_FOLD(hash);
				382	}
				383
				384	/* find a token given its data and hash value */
				385	static struct token find_token_hash(unsigned char data, int len, unsigned int hash)
				386	{
				387	struct token *ptr;
				388
				389	ptr = hash_table[hash];
				390
				391	while (ptr) {
				392	if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0))
				393	return ptr;
				394	ptr=ptr->next;
				395	}
				396
				397	return NULL;
				398	}
				399
				400	static inline void insert_token_in_group(struct token head, struct token ptr)
				401	{
				402	ptr->right = head->right;
				403	ptr->right->left = ptr;
				404	head->right = ptr;
				405	ptr->left = head;
				406	}
				407
				408	static inline void remove_token_from_group(struct token *ptr)
				409	{
				410	ptr->left->right = ptr->right;
				411	ptr->right->left = ptr->left;
				412	}
				413
				414
				415	/* build the counts for all the tokens that start with "data", and have lenghts
				416	* from 2 to "len" */
				417	static void learn_token(unsigned char *data, int len)
				418	{
				419	struct token ptr,last_ptr;
				420	int i, newprofit;
				421	unsigned int hash = HASH_BASE_OFFSET;
				422	unsigned int hashes[MAX_TOK_SIZE + 1];
				423
				424	if (len > MAX_TOK_SIZE)
				425	len = MAX_TOK_SIZE;
				426
				427	/* calculate and store the hash values for all the sub-tokens */
				428	hash = rehash_token(hash, data[0]);
				429	for (i = 2; i <= len; i++) {
				430	hash = rehash_token(hash, data[i-1]);
				431	hashes[i] = HASH_FOLD(hash);
				432	}
				433
				434	last_ptr = NULL;
				435	ptr = NULL;
				436
				437	for (i = len; i >= 2; i--) {
				438	hash = hashes[i];
				439
				440	if (!ptr) ptr = find_token_hash(data, i, hash);
				441
				442	if (!ptr) {
				443	/* create a new token entry */
				444	ptr = (struct token ) malloc(sizeof(ptr));
				445
				446	memcpy(ptr->data, data, i);
				447	ptr->len = i;
				448
				449	/* when we create an entry, it's profit is 0 because
				450	* we also take into account the size of the token on
				451	* the compressed table. We then subtract GOOD_BAD_THRESHOLD
				452	* so that the test to see if this token belongs to
				453	* the good or bad list, is a comparison to zero */
				454	ptr->profit = -GOOD_BAD_THRESHOLD;
				455
				456	ptr->next = hash_table[hash];
				457	hash_table[hash] = ptr;
				458
				459	insert_token_in_group(&bad_head, ptr);
				460
				461	ptr->smaller = NULL;
				462	} else {
				463	newprofit = ptr->profit + (ptr->len - 1);
				464	/* check to see if this token needs to be moved to a
				465	* different list */
				466	if((ptr->profit < 0) && (newprofit >= 0)) {
				467	remove_token_from_group(ptr);
				468	insert_token_in_group(&good_head,ptr);
				469	}
				470	ptr->profit = newprofit;
				471	}
				472
				473	if (last_ptr) last_ptr->smaller = ptr;
				474	last_ptr = ptr;
				475
				476	ptr = ptr->smaller;
				477	}
				478	}
				479
				480	/* decrease the counts for all the tokens that start with "data", and have lenghts
				481	* from 2 to "len". This function is much simpler than learn_token because we have
				482	* more guarantees (tho tokens exist, the ->smaller pointer is set, etc.)
				483	* The two separate functions exist only because of compression performance */
				484	static void forget_token(unsigned char *data, int len)
				485	{
				486	struct token *ptr;
				487	int i, newprofit;
				488	unsigned int hash=0;
				489
				490	if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE;
				491
				492	hash = hash_token(data, len);
				493	ptr = find_token_hash(data, len, hash);
				494
				495	for (i = len; i >= 2; i--) {
				496
				497	newprofit = ptr->profit - (ptr->len - 1);
				498	if ((ptr->profit >= 0) && (newprofit < 0)) {
				499	remove_token_from_group(ptr);
				500	insert_token_in_group(&bad_head, ptr);
				501	}
				502	ptr->profit=newprofit;
				503
				504	ptr=ptr->smaller;
				505	}
				506	}
				507
				508	/* count all the possible tokens in a symbol */
				509	static void learn_symbol(unsigned char *symbol, int len)
				510	{
				511	int i;
				512
				513	for (i = 0; i < len - 1; i++)
				514	learn_token(symbol + i, len - i);
				515	}
				516
				517	/* decrease the count for all the possible tokens in a symbol */
				518	static void forget_symbol(unsigned char *symbol, int len)
				519	{
				520	int i;
				521
				522	for (i = 0; i < len - 1; i++)
				523	forget_token(symbol + i, len - i);
				524	}
				525
				526	/* set all the symbol flags and do the initial token count */
				527	static void build_initial_tok_table(void)
				528	{
				529	int i, use_it, valid;
				530
				531	valid = 0;
				532	for (i = 0; i < cnt; i++) {
				533	table[i].flags = 0;
				534	if ( symbol_valid(&table[i]) ) {
				535	table[i].flags \|= SYM_FLAG_VALID;
				536	valid++;
				537	}
				538	}
				539
				540	use_it = 0;
				541	for (i = 0; i < cnt; i++) {
				542
				543	/* subsample the available symbols. This method is almost like
				544	* a Bresenham's algorithm to get uniformly distributed samples
				545	* across the symbol table */
				546	if (table[i].flags & SYM_FLAG_VALID) {
				547
				548	use_it += WORKING_SET;
				549
				550	if (use_it >= valid) {
				551	table[i].flags \|= SYM_FLAG_SAMPLED;
				552	use_it -= valid;
				553	}
				554	}
				555	if (table[i].flags & SYM_FLAG_SAMPLED)
				556	learn_symbol(table[i].sym, table[i].len);
				557	}
				558	}
				559
				560	/* replace a given token in all the valid symbols. Use the sampled symbols
				561	* to update the counts */
				562	static void compress_symbols(unsigned char *str, int tlen, int idx)
				563	{
				564	int i, len, learn, size;
				565	unsigned char *p;
				566
				567	for (i = 0; i < cnt; i++) {
				568
				569	if (!(table[i].flags & SYM_FLAG_VALID)) continue;
				570
				571	len = table[i].len;
				572	learn = 0;
				573	p = table[i].sym;
				574
				575	do {
				576	/* find the token on the symbol */
				577	p = (unsigned char ) strstr((char ) p, (char *) str);
				578	if (!p) break;
				579
				580	if (!learn) {
				581	/* if this symbol was used to count, decrease it */
				582	if (table[i].flags & SYM_FLAG_SAMPLED)
				583	forget_symbol(table[i].sym, len);
				584	learn = 1;
				585	}
				586
				587	*p = idx;
				588	size = (len - (p - table[i].sym)) - tlen + 1;
				589	memmove(p + 1, p + tlen, size);
				590	p++;
				591	len -= tlen - 1;
				592
				593	} while (size >= tlen);
				594
				595	if(learn) {
				596	table[i].len = len;
				597	/* if this symbol was used to count, learn it again */
				598	if(table[i].flags & SYM_FLAG_SAMPLED)
				599	learn_symbol(table[i].sym, len);
				600	}
				601	}
				602	}
				603
				604	/* search the token with the maximum profit */
				605	static struct token *find_best_token(void)
				606	{
				607	struct token ptr,best,*head;
				608	int bestprofit;
				609
				610	bestprofit=-10000;
				611
				612	/* failsafe: if the "good" list is empty search from the "bad" list */
				613	if(good_head.right == &good_head) head = &bad_head;
				614	else head = &good_head;
				615
				616	ptr = head->right;
				617	best = NULL;
				618	while (ptr != head) {
				619	if (ptr->profit > bestprofit) {
				620	bestprofit = ptr->profit;
				621	best = ptr;
				622	}
				623	ptr = ptr->right;
				624	}
				625
				626	return best;
				627	}
				628
				629	/* this is the core of the algorithm: calculate the "best" table */
				630	static void optimize_result(void)
				631	{
				632	struct token *best;
				633	int i;
				634
				635	/* using the '\0' symbol last allows compress_symbols to use standard
				636	* fast string functions */
				637	for (i = 255; i >= 0; i--) {
				638
				639	/* if this table slot is empty (it is not used by an actual
				640	* original char code */
				641	if (!best_table_len[i]) {
				642
				643	/* find the token with the breates profit value */
				644	best = find_best_token();
				645
				646	/* place it in the "best" table */
				647	best_table_len[i] = best->len;
				648	memcpy(best_table[i], best->data, best_table_len[i]);
				649	/* zero terminate the token so that we can use strstr
				650	in compress_symbols */
				651	best_table[i][best_table_len[i]]='\0';
				652
				653	/* replace this token in all the valid symbols */
				654	compress_symbols(best_table[i], best_table_len[i], i);
				655	}
				656	}
				657	}
				658
				659	/* start by placing the symbols that are actually used on the table */
				660	static void insert_real_symbols_in_table(void)
				661	{
				662	int i, j, c;
				663
				664	memset(best_table, 0, sizeof(best_table));
				665	memset(best_table_len, 0, sizeof(best_table_len));
				666
				667	for (i = 0; i < cnt; i++) {
				668	if (table[i].flags & SYM_FLAG_VALID) {
				669	for (j = 0; j < table[i].len; j++) {
				670	c = table[i].sym[j];
				671	best_table[c][0]=c;
				672	best_table_len[c]=1;
				673	}
				674	}
				675	}
				676	}
				677
				678	static void optimize_token_table(void)
				679	{
				680	memset(hash_table, 0, sizeof(hash_table));
				681
				682	good_head.left = &good_head;
				683	good_head.right = &good_head;
				684
				685	bad_head.left = &bad_head;
				686	bad_head.right = &bad_head;
				687
				688	build_initial_tok_table();
				689
				690	insert_real_symbols_in_table();
				691
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	692	/* When valid symbol is not registered, exit to error */
				693	if (good_head.left == good_head.right &&
				694	bad_head.left == bad_head.right) {
				695	fprintf(stderr, "No valid symbol.\n");
				696	exit(1);
				697	}
				698
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	699	optimize_result();
				700	}
				701
				702
				703	int
				704	main(int argc, char **argv)
				705	{
Yoshinori Sato	41f11a4	2005-05-01 08:59:06 -0700	[diff] [blame]	706	if (argc >= 2) {
				707	int i;
				708	for (i = 1; i < argc; i++) {
				709	if(strcmp(argv[i], "--all-symbols") == 0)
				710	all_symbols = 1;
				711	else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
				712	char *p = &argv[i][16];
				713	/* skip quote */
				714	if ((p == '"' && (p+2) == '"') \|\| (p == '\'' && (p+2) == '\''))
				715	p++;
				716	symbol_prefix_char = *p;
				717	} else
				718	usage();
				719	}
				720	} else if (argc != 1)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	721	usage();
				722
				723	read_map(stdin);
				724	optimize_token_table();
				725	write_src();
				726
				727	return 0;
				728	}