|  | /* | 
|  | *      Copyright (C) 1993-1996 Bas Laarhoven. | 
|  |  | 
|  | This program is free software; you can redistribute it and/or modify | 
|  | it under the terms of the GNU General Public License as published by | 
|  | the Free Software Foundation; either version 2, or (at your option) | 
|  | any later version. | 
|  |  | 
|  | This program is distributed in the hope that it will be useful, | 
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | GNU General Public License for more details. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License | 
|  | along with this program; see the file COPYING.  If not, write to | 
|  | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | 
|  |  | 
|  | * | 
|  | * $Source: /homes/cvs/ftape-stacked/ftape/lowlevel/ftape-calibr.c,v $ | 
|  | * $Revision: 1.2 $ | 
|  | * $Date: 1997/10/05 19:18:08 $ | 
|  | * | 
|  | *      GP calibration routine for processor speed dependent | 
|  | *      functions. | 
|  | */ | 
|  |  | 
|  | #include <linux/config.h> | 
|  | #include <linux/errno.h> | 
|  | #include <linux/jiffies.h> | 
|  | #include <asm/system.h> | 
|  | #include <asm/io.h> | 
|  | #if defined(__alpha__) | 
|  | # include <asm/hwrpb.h> | 
|  | #elif defined(__x86_64__) | 
|  | # include <asm/msr.h> | 
|  | # include <asm/timex.h> | 
|  | #elif defined(__i386__) | 
|  | # include <linux/timex.h> | 
|  | #endif | 
|  | #include <linux/ftape.h> | 
|  | #include "../lowlevel/ftape-tracing.h" | 
|  | #include "../lowlevel/ftape-calibr.h" | 
|  | #include "../lowlevel/fdc-io.h" | 
|  |  | 
|  | #undef DEBUG | 
|  |  | 
|  | #if !defined(__alpha__) && !defined(__i386__) && !defined(__x86_64__) | 
|  | # error Ftape is not implemented for this architecture! | 
|  | #endif | 
|  |  | 
|  | #if defined(__alpha__) || defined(__x86_64__) | 
|  | static unsigned long ps_per_cycle = 0; | 
|  | #endif | 
|  |  | 
|  | static spinlock_t calibr_lock; | 
|  |  | 
|  | /* | 
|  | * Note: On Intel PCs, the clock ticks at 100 Hz (HZ==100) which is | 
|  | * too slow for certain timeouts (and that clock doesn't even tick | 
|  | * when interrupts are disabled).  For that reason, the 8254 timer is | 
|  | * used directly to implement fine-grained timeouts.  However, on | 
|  | * Alpha PCs, the 8254 is *not* used to implement the clock tick | 
|  | * (which is 1024 Hz, normally) and the 8254 timer runs at some | 
|  | * "random" frequency (it seems to run at 18Hz, but it's not safe to | 
|  | * rely on this value).  Instead, we use the Alpha's "rpcc" | 
|  | * instruction to read cycle counts.  As this is a 32 bit counter, | 
|  | * it will overflow only once per 30 seconds (on a 200MHz machine), | 
|  | * which is plenty. | 
|  | */ | 
|  |  | 
|  | unsigned int ftape_timestamp(void) | 
|  | { | 
|  | #if defined(__alpha__) | 
|  | unsigned long r; | 
|  |  | 
|  | asm volatile ("rpcc %0" : "=r" (r)); | 
|  | return r; | 
|  | #elif defined(__x86_64__) | 
|  | unsigned long r; | 
|  | rdtscl(r); | 
|  | return r; | 
|  | #elif defined(__i386__) | 
|  |  | 
|  | /* | 
|  | * Note that there is some time between counter underflowing and jiffies | 
|  | * increasing, so the code below won't always give correct output. | 
|  | * -Vojtech | 
|  | */ | 
|  |  | 
|  | unsigned long flags; | 
|  | __u16 lo; | 
|  | __u16 hi; | 
|  |  | 
|  | spin_lock_irqsave(&calibr_lock, flags); | 
|  | outb_p(0x00, 0x43);	/* latch the count ASAP */ | 
|  | lo = inb_p(0x40);	/* read the latched count */ | 
|  | lo |= inb(0x40) << 8; | 
|  | hi = jiffies; | 
|  | spin_unlock_irqrestore(&calibr_lock, flags); | 
|  | return ((hi + 1) * (unsigned int) LATCH) - lo;  /* downcounter ! */ | 
|  | #endif | 
|  | } | 
|  |  | 
|  | static unsigned int short_ftape_timestamp(void) | 
|  | { | 
|  | #if defined(__alpha__) || defined(__x86_64__) | 
|  | return ftape_timestamp(); | 
|  | #elif defined(__i386__) | 
|  | unsigned int count; | 
|  | unsigned long flags; | 
|  |  | 
|  | spin_lock_irqsave(&calibr_lock, flags); | 
|  | outb_p(0x00, 0x43);	/* latch the count ASAP */ | 
|  | count = inb_p(0x40);	/* read the latched count */ | 
|  | count |= inb(0x40) << 8; | 
|  | spin_unlock_irqrestore(&calibr_lock, flags); | 
|  | return (LATCH - count);	/* normal: downcounter */ | 
|  | #endif | 
|  | } | 
|  |  | 
|  | static unsigned int diff(unsigned int t0, unsigned int t1) | 
|  | { | 
|  | #if defined(__alpha__) || defined(__x86_64__) | 
|  | return (t1 - t0); | 
|  | #elif defined(__i386__) | 
|  | /* | 
|  | * This is tricky: to work for both short and full ftape_timestamps | 
|  | * we'll have to discriminate between these. | 
|  | * If it _looks_ like short stamps with wrapping around we'll | 
|  | * asume it are. This will generate a small error if it really | 
|  | * was a (very large) delta from full ftape_timestamps. | 
|  | */ | 
|  | return (t1 <= t0 && t0 <= LATCH) ? t1 + LATCH - t0 : t1 - t0; | 
|  | #endif | 
|  | } | 
|  |  | 
|  | static unsigned int usecs(unsigned int count) | 
|  | { | 
|  | #if defined(__alpha__) || defined(__x86_64__) | 
|  | return (ps_per_cycle * count) / 1000000UL; | 
|  | #elif defined(__i386__) | 
|  | return (10000 * count) / ((CLOCK_TICK_RATE + 50) / 100); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | unsigned int ftape_timediff(unsigned int t0, unsigned int t1) | 
|  | { | 
|  | /* | 
|  | *  Calculate difference in usec for ftape_timestamp results t0 & t1. | 
|  | *  Note that on the i386 platform with short time-stamps, the | 
|  | *  maximum allowed timespan is 1/HZ or we'll lose ticks! | 
|  | */ | 
|  | return usecs(diff(t0, t1)); | 
|  | } | 
|  |  | 
|  | /*      To get an indication of the I/O performance, | 
|  | *      measure the duration of the inb() function. | 
|  | */ | 
|  | static void time_inb(void) | 
|  | { | 
|  | int i; | 
|  | int t0, t1; | 
|  | unsigned long flags; | 
|  | int status; | 
|  | TRACE_FUN(ft_t_any); | 
|  |  | 
|  | spin_lock_irqsave(&calibr_lock, flags); | 
|  | t0 = short_ftape_timestamp(); | 
|  | for (i = 0; i < 1000; ++i) { | 
|  | status = inb(fdc.msr); | 
|  | } | 
|  | t1 = short_ftape_timestamp(); | 
|  | spin_unlock_irqrestore(&calibr_lock, flags); | 
|  | TRACE(ft_t_info, "inb() duration: %d nsec", ftape_timediff(t0, t1)); | 
|  | TRACE_EXIT; | 
|  | } | 
|  |  | 
|  | static void init_clock(void) | 
|  | { | 
|  | TRACE_FUN(ft_t_any); | 
|  |  | 
|  | #if defined(__x86_64__) | 
|  | ps_per_cycle = 1000000000UL / cpu_khz; | 
|  | #elif defined(__alpha__) | 
|  | extern struct hwrpb_struct *hwrpb; | 
|  | ps_per_cycle = (1000*1000*1000*1000UL) / hwrpb->cycle_freq; | 
|  | #endif | 
|  | TRACE_EXIT; | 
|  | } | 
|  |  | 
|  | /* | 
|  | *      Input:  function taking int count as parameter. | 
|  | *              pointers to calculated calibration variables. | 
|  | */ | 
|  | void ftape_calibrate(char *name, | 
|  | void (*fun) (unsigned int), | 
|  | unsigned int *calibr_count, | 
|  | unsigned int *calibr_time) | 
|  | { | 
|  | static int first_time = 1; | 
|  | int i; | 
|  | unsigned int tc = 0; | 
|  | unsigned int count; | 
|  | unsigned int time; | 
|  | #if defined(__i386__) | 
|  | unsigned int old_tc = 0; | 
|  | unsigned int old_count = 1; | 
|  | unsigned int old_time = 1; | 
|  | #endif | 
|  | TRACE_FUN(ft_t_flow); | 
|  |  | 
|  | if (first_time) {             /* get idea of I/O performance */ | 
|  | init_clock(); | 
|  | time_inb(); | 
|  | first_time = 0; | 
|  | } | 
|  | /*    value of timeout must be set so that on very slow systems | 
|  | *    it will give a time less than one jiffy, and on | 
|  | *    very fast systems it'll give reasonable precision. | 
|  | */ | 
|  |  | 
|  | count = 40; | 
|  | for (i = 0; i < 15; ++i) { | 
|  | unsigned int t0; | 
|  | unsigned int t1; | 
|  | unsigned int once; | 
|  | unsigned int multiple; | 
|  | unsigned long flags; | 
|  |  | 
|  | *calibr_count = | 
|  | *calibr_time = count;	/* set TC to 1 */ | 
|  | spin_lock_irqsave(&calibr_lock, flags); | 
|  | fun(0);		/* dummy, get code into cache */ | 
|  | t0 = short_ftape_timestamp(); | 
|  | fun(0);		/* overhead + one test */ | 
|  | t1 = short_ftape_timestamp(); | 
|  | once = diff(t0, t1); | 
|  | t0 = short_ftape_timestamp(); | 
|  | fun(count);		/* overhead + count tests */ | 
|  | t1 = short_ftape_timestamp(); | 
|  | multiple = diff(t0, t1); | 
|  | spin_unlock_irqrestore(&calibr_lock, flags); | 
|  | time = ftape_timediff(0, multiple - once); | 
|  | tc = (1000 * time) / (count - 1); | 
|  | TRACE(ft_t_any, "once:%3d us,%6d times:%6d us, TC:%5d ns", | 
|  | usecs(once), count - 1, usecs(multiple), tc); | 
|  | #if defined(__alpha__) || defined(__x86_64__) | 
|  | /* | 
|  | * Increase the calibration count exponentially until the | 
|  | * calibration time exceeds 100 ms. | 
|  | */ | 
|  | if (time >= 100*1000) { | 
|  | break; | 
|  | } | 
|  | #elif defined(__i386__) | 
|  | /* | 
|  | * increase the count until the resulting time nears 2/HZ, | 
|  | * then the tc will drop sharply because we lose LATCH counts. | 
|  | */ | 
|  | if (tc <= old_tc / 2) { | 
|  | time = old_time; | 
|  | count = old_count; | 
|  | break; | 
|  | } | 
|  | old_tc = tc; | 
|  | old_count = count; | 
|  | old_time = time; | 
|  | #endif | 
|  | count *= 2; | 
|  | } | 
|  | *calibr_count = count - 1; | 
|  | *calibr_time  = time; | 
|  | TRACE(ft_t_info, "TC for `%s()' = %d nsec (at %d counts)", | 
|  | name, (1000 * *calibr_time) / *calibr_count, *calibr_count); | 
|  | TRACE_EXIT; | 
|  | } |