| /* | 
 |  * net/sched/estimator.c	Simple rate estimator. | 
 |  * | 
 |  *		This program is free software; you can redistribute it and/or | 
 |  *		modify it under the terms of the GNU General Public License | 
 |  *		as published by the Free Software Foundation; either version | 
 |  *		2 of the License, or (at your option) any later version. | 
 |  * | 
 |  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 
 |  */ | 
 |  | 
 | #include <asm/uaccess.h> | 
 | #include <asm/system.h> | 
 | #include <linux/bitops.h> | 
 | #include <linux/module.h> | 
 | #include <linux/types.h> | 
 | #include <linux/kernel.h> | 
 | #include <linux/jiffies.h> | 
 | #include <linux/string.h> | 
 | #include <linux/mm.h> | 
 | #include <linux/socket.h> | 
 | #include <linux/sockios.h> | 
 | #include <linux/in.h> | 
 | #include <linux/errno.h> | 
 | #include <linux/interrupt.h> | 
 | #include <linux/netdevice.h> | 
 | #include <linux/skbuff.h> | 
 | #include <linux/rtnetlink.h> | 
 | #include <linux/init.h> | 
 | #include <net/sock.h> | 
 | #include <net/pkt_sched.h> | 
 |  | 
 | /* | 
 |    This code is NOT intended to be used for statistics collection, | 
 |    its purpose is to provide a base for statistical multiplexing | 
 |    for controlled load service. | 
 |    If you need only statistics, run a user level daemon which | 
 |    periodically reads byte counters. | 
 |  | 
 |    Unfortunately, rate estimation is not a very easy task. | 
 |    F.e. I did not find a simple way to estimate the current peak rate | 
 |    and even failed to formulate the problem 8)8) | 
 |  | 
 |    So I preferred not to built an estimator into the scheduler, | 
 |    but run this task separately. | 
 |    Ideally, it should be kernel thread(s), but for now it runs | 
 |    from timers, which puts apparent top bounds on the number of rated | 
 |    flows, has minimal overhead on small, but is enough | 
 |    to handle controlled load service, sets of aggregates. | 
 |  | 
 |    We measure rate over A=(1<<interval) seconds and evaluate EWMA: | 
 |  | 
 |    avrate = avrate*(1-W) + rate*W | 
 |  | 
 |    where W is chosen as negative power of 2: W = 2^(-ewma_log) | 
 |  | 
 |    The resulting time constant is: | 
 |  | 
 |    T = A/(-ln(1-W)) | 
 |  | 
 |  | 
 |    NOTES. | 
 |  | 
 |    * The stored value for avbps is scaled by 2^5, so that maximal | 
 |      rate is ~1Gbit, avpps is scaled by 2^10. | 
 |  | 
 |    * Minimal interval is HZ/4=250msec (it is the greatest common divisor | 
 |      for HZ=100 and HZ=1024 8)), maximal interval | 
 |      is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals | 
 |      are too expensive, longer ones can be implemented | 
 |      at user level painlessly. | 
 |  */ | 
 |  | 
 | #define EST_MAX_INTERVAL	5 | 
 |  | 
 | struct qdisc_estimator | 
 | { | 
 | 	struct qdisc_estimator	*next; | 
 | 	struct tc_stats		*stats; | 
 | 	spinlock_t		*stats_lock; | 
 | 	unsigned		interval; | 
 | 	int			ewma_log; | 
 | 	u64			last_bytes; | 
 | 	u32			last_packets; | 
 | 	u32			avpps; | 
 | 	u32			avbps; | 
 | }; | 
 |  | 
 | struct qdisc_estimator_head | 
 | { | 
 | 	struct timer_list	timer; | 
 | 	struct qdisc_estimator	*list; | 
 | }; | 
 |  | 
 | static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1]; | 
 |  | 
 | /* Estimator array lock */ | 
 | static DEFINE_RWLOCK(est_lock); | 
 |  | 
 | static void est_timer(unsigned long arg) | 
 | { | 
 | 	int idx = (int)arg; | 
 | 	struct qdisc_estimator *e; | 
 |  | 
 | 	read_lock(&est_lock); | 
 | 	for (e = elist[idx].list; e; e = e->next) { | 
 | 		struct tc_stats *st = e->stats; | 
 | 		u64 nbytes; | 
 | 		u32 npackets; | 
 | 		u32 rate; | 
 |  | 
 | 		spin_lock(e->stats_lock); | 
 | 		nbytes = st->bytes; | 
 | 		npackets = st->packets; | 
 | 		rate = (nbytes - e->last_bytes)<<(7 - idx); | 
 | 		e->last_bytes = nbytes; | 
 | 		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; | 
 | 		st->bps = (e->avbps+0xF)>>5; | 
 |  | 
 | 		rate = (npackets - e->last_packets)<<(12 - idx); | 
 | 		e->last_packets = npackets; | 
 | 		e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; | 
 | 		e->stats->pps = (e->avpps+0x1FF)>>10; | 
 | 		spin_unlock(e->stats_lock); | 
 | 	} | 
 |  | 
 | 	mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); | 
 | 	read_unlock(&est_lock); | 
 | } | 
 |  | 
 | int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt) | 
 | { | 
 | 	struct qdisc_estimator *est; | 
 | 	struct tc_estimator *parm = RTA_DATA(opt); | 
 |  | 
 | 	if (RTA_PAYLOAD(opt) < sizeof(*parm)) | 
 | 		return -EINVAL; | 
 |  | 
 | 	if (parm->interval < -2 || parm->interval > 3) | 
 | 		return -EINVAL; | 
 |  | 
 | 	est = kmalloc(sizeof(*est), GFP_KERNEL); | 
 | 	if (est == NULL) | 
 | 		return -ENOBUFS; | 
 |  | 
 | 	memset(est, 0, sizeof(*est)); | 
 | 	est->interval = parm->interval + 2; | 
 | 	est->stats = stats; | 
 | 	est->stats_lock = stats_lock; | 
 | 	est->ewma_log = parm->ewma_log; | 
 | 	est->last_bytes = stats->bytes; | 
 | 	est->avbps = stats->bps<<5; | 
 | 	est->last_packets = stats->packets; | 
 | 	est->avpps = stats->pps<<10; | 
 |  | 
 | 	est->next = elist[est->interval].list; | 
 | 	if (est->next == NULL) { | 
 | 		init_timer(&elist[est->interval].timer); | 
 | 		elist[est->interval].timer.data = est->interval; | 
 | 		elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4); | 
 | 		elist[est->interval].timer.function = est_timer; | 
 | 		add_timer(&elist[est->interval].timer); | 
 | 	} | 
 | 	write_lock_bh(&est_lock); | 
 | 	elist[est->interval].list = est; | 
 | 	write_unlock_bh(&est_lock); | 
 | 	return 0; | 
 | } | 
 |  | 
 | void qdisc_kill_estimator(struct tc_stats *stats) | 
 | { | 
 | 	int idx; | 
 | 	struct qdisc_estimator *est, **pest; | 
 |  | 
 | 	for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { | 
 | 		int killed = 0; | 
 | 		pest = &elist[idx].list; | 
 | 		while ((est=*pest) != NULL) { | 
 | 			if (est->stats != stats) { | 
 | 				pest = &est->next; | 
 | 				continue; | 
 | 			} | 
 |  | 
 | 			write_lock_bh(&est_lock); | 
 | 			*pest = est->next; | 
 | 			write_unlock_bh(&est_lock); | 
 |  | 
 | 			kfree(est); | 
 | 			killed++; | 
 | 		} | 
 | 		if (killed && elist[idx].list == NULL) | 
 | 			del_timer(&elist[idx].timer); | 
 | 	} | 
 | } | 
 |  | 
 | EXPORT_SYMBOL(qdisc_kill_estimator); | 
 | EXPORT_SYMBOL(qdisc_new_estimator); |