| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * linux/kernel/posix_timers.c | 
 | 3 |  * | 
 | 4 |  * | 
 | 5 |  * 2002-10-15  Posix Clocks & timers | 
 | 6 |  *                           by George Anzinger george@mvista.com | 
 | 7 |  * | 
 | 8 |  *			     Copyright (C) 2002 2003 by MontaVista Software. | 
 | 9 |  * | 
 | 10 |  * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug. | 
 | 11 |  *			     Copyright (C) 2004 Boris Hu | 
 | 12 |  * | 
 | 13 |  * This program is free software; you can redistribute it and/or modify | 
 | 14 |  * it under the terms of the GNU General Public License as published by | 
 | 15 |  * the Free Software Foundation; either version 2 of the License, or (at | 
 | 16 |  * your option) any later version. | 
 | 17 |  * | 
 | 18 |  * This program is distributed in the hope that it will be useful, but | 
 | 19 |  * WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 20 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 
 | 21 |  * General Public License for more details. | 
 | 22 |  | 
 | 23 |  * You should have received a copy of the GNU General Public License | 
 | 24 |  * along with this program; if not, write to the Free Software | 
 | 25 |  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
 | 26 |  * | 
 | 27 |  * MontaVista Software | 1237 East Arques Avenue | Sunnyvale | CA 94085 | USA | 
 | 28 |  */ | 
 | 29 |  | 
 | 30 | /* These are all the functions necessary to implement | 
 | 31 |  * POSIX clocks & timers | 
 | 32 |  */ | 
 | 33 | #include <linux/mm.h> | 
 | 34 | #include <linux/smp_lock.h> | 
 | 35 | #include <linux/interrupt.h> | 
 | 36 | #include <linux/slab.h> | 
 | 37 | #include <linux/time.h> | 
 | 38 |  | 
 | 39 | #include <asm/uaccess.h> | 
 | 40 | #include <asm/semaphore.h> | 
 | 41 | #include <linux/list.h> | 
 | 42 | #include <linux/init.h> | 
 | 43 | #include <linux/compiler.h> | 
 | 44 | #include <linux/idr.h> | 
 | 45 | #include <linux/posix-timers.h> | 
 | 46 | #include <linux/syscalls.h> | 
 | 47 | #include <linux/wait.h> | 
 | 48 | #include <linux/workqueue.h> | 
 | 49 | #include <linux/module.h> | 
 | 50 |  | 
 | 51 | #ifndef div_long_long_rem | 
 | 52 | #include <asm/div64.h> | 
 | 53 |  | 
 | 54 | #define div_long_long_rem(dividend,divisor,remainder) ({ \ | 
 | 55 | 		       u64 result = dividend;		\ | 
 | 56 | 		       *remainder = do_div(result,divisor); \ | 
 | 57 | 		       result; }) | 
 | 58 |  | 
 | 59 | #endif | 
 | 60 | #define CLOCK_REALTIME_RES TICK_NSEC  /* In nano seconds. */ | 
 | 61 |  | 
 | 62 | static inline u64  mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2) | 
 | 63 | { | 
 | 64 | 	return (u64)mpy1 * mpy2; | 
 | 65 | } | 
 | 66 | /* | 
 | 67 |  * Management arrays for POSIX timers.	 Timers are kept in slab memory | 
 | 68 |  * Timer ids are allocated by an external routine that keeps track of the | 
 | 69 |  * id and the timer.  The external interface is: | 
 | 70 |  * | 
 | 71 |  * void *idr_find(struct idr *idp, int id);           to find timer_id <id> | 
 | 72 |  * int idr_get_new(struct idr *idp, void *ptr);       to get a new id and | 
 | 73 |  *                                                    related it to <ptr> | 
 | 74 |  * void idr_remove(struct idr *idp, int id);          to release <id> | 
 | 75 |  * void idr_init(struct idr *idp);                    to initialize <idp> | 
 | 76 |  *                                                    which we supply. | 
 | 77 |  * The idr_get_new *may* call slab for more memory so it must not be | 
 | 78 |  * called under a spin lock.  Likewise idr_remore may release memory | 
 | 79 |  * (but it may be ok to do this under a lock...). | 
 | 80 |  * idr_find is just a memory look up and is quite fast.  A -1 return | 
 | 81 |  * indicates that the requested id does not exist. | 
 | 82 |  */ | 
 | 83 |  | 
 | 84 | /* | 
 | 85 |  * Lets keep our timers in a slab cache :-) | 
 | 86 |  */ | 
 | 87 | static kmem_cache_t *posix_timers_cache; | 
 | 88 | static struct idr posix_timers_id; | 
 | 89 | static DEFINE_SPINLOCK(idr_lock); | 
 | 90 |  | 
 | 91 | /* | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 92 |  * we assume that the new SIGEV_THREAD_ID shares no bits with the other | 
 | 93 |  * SIGEV values.  Here we put out an error if this assumption fails. | 
 | 94 |  */ | 
 | 95 | #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \ | 
 | 96 |                        ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) | 
 | 97 | #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" | 
 | 98 | #endif | 
 | 99 |  | 
 | 100 |  | 
 | 101 | /* | 
 | 102 |  * The timer ID is turned into a timer address by idr_find(). | 
 | 103 |  * Verifying a valid ID consists of: | 
 | 104 |  * | 
 | 105 |  * a) checking that idr_find() returns other than -1. | 
 | 106 |  * b) checking that the timer id matches the one in the timer itself. | 
 | 107 |  * c) that the timer owner is in the callers thread group. | 
 | 108 |  */ | 
 | 109 |  | 
 | 110 | /* | 
 | 111 |  * CLOCKs: The POSIX standard calls for a couple of clocks and allows us | 
 | 112 |  *	    to implement others.  This structure defines the various | 
 | 113 |  *	    clocks and allows the possibility of adding others.	 We | 
 | 114 |  *	    provide an interface to add clocks to the table and expect | 
 | 115 |  *	    the "arch" code to add at least one clock that is high | 
 | 116 |  *	    resolution.	 Here we define the standard CLOCK_REALTIME as a | 
 | 117 |  *	    1/HZ resolution clock. | 
 | 118 |  * | 
 | 119 |  * RESOLUTION: Clock resolution is used to round up timer and interval | 
 | 120 |  *	    times, NOT to report clock times, which are reported with as | 
 | 121 |  *	    much resolution as the system can muster.  In some cases this | 
 | 122 |  *	    resolution may depend on the underlying clock hardware and | 
 | 123 |  *	    may not be quantifiable until run time, and only then is the | 
 | 124 |  *	    necessary code is written.	The standard says we should say | 
 | 125 |  *	    something about this issue in the documentation... | 
 | 126 |  * | 
 | 127 |  * FUNCTIONS: The CLOCKs structure defines possible functions to handle | 
 | 128 |  *	    various clock functions.  For clocks that use the standard | 
 | 129 |  *	    system timer code these entries should be NULL.  This will | 
 | 130 |  *	    allow dispatch without the overhead of indirect function | 
 | 131 |  *	    calls.  CLOCKS that depend on other sources (e.g. WWV or GPS) | 
 | 132 |  *	    must supply functions here, even if the function just returns | 
 | 133 |  *	    ENOSYS.  The standard POSIX timer management code assumes the | 
 | 134 |  *	    following: 1.) The k_itimer struct (sched.h) is used for the | 
 | 135 |  *	    timer.  2.) The list, it_lock, it_clock, it_id and it_process | 
 | 136 |  *	    fields are not modified by timer code. | 
 | 137 |  * | 
 | 138 |  *          At this time all functions EXCEPT clock_nanosleep can be | 
 | 139 |  *          redirected by the CLOCKS structure.  Clock_nanosleep is in | 
 | 140 |  *          there, but the code ignores it. | 
 | 141 |  * | 
 | 142 |  * Permissions: It is assumed that the clock_settime() function defined | 
 | 143 |  *	    for each clock will take care of permission checks.	 Some | 
 | 144 |  *	    clocks may be set able by any user (i.e. local process | 
 | 145 |  *	    clocks) others not.	 Currently the only set able clock we | 
 | 146 |  *	    have is CLOCK_REALTIME and its high res counter part, both of | 
 | 147 |  *	    which we beg off on and pass to do_sys_settimeofday(). | 
 | 148 |  */ | 
 | 149 |  | 
 | 150 | static struct k_clock posix_clocks[MAX_CLOCKS]; | 
 | 151 | /* | 
 | 152 |  * We only have one real clock that can be set so we need only one abs list, | 
 | 153 |  * even if we should want to have several clocks with differing resolutions. | 
 | 154 |  */ | 
 | 155 | static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list), | 
 | 156 | 				      .lock = SPIN_LOCK_UNLOCKED}; | 
 | 157 |  | 
 | 158 | static void posix_timer_fn(unsigned long); | 
 | 159 | static u64 do_posix_clock_monotonic_gettime_parts( | 
 | 160 | 	struct timespec *tp, struct timespec *mo); | 
 | 161 | int do_posix_clock_monotonic_gettime(struct timespec *tp); | 
 | 162 | static int do_posix_clock_monotonic_get(clockid_t, struct timespec *tp); | 
 | 163 |  | 
 | 164 | static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); | 
 | 165 |  | 
 | 166 | static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) | 
 | 167 | { | 
 | 168 | 	spin_unlock_irqrestore(&timr->it_lock, flags); | 
 | 169 | } | 
 | 170 |  | 
 | 171 | /* | 
 | 172 |  * Call the k_clock hook function if non-null, or the default function. | 
 | 173 |  */ | 
 | 174 | #define CLOCK_DISPATCH(clock, call, arglist) \ | 
 | 175 |  	((clock) < 0 ? posix_cpu_##call arglist : \ | 
 | 176 |  	 (posix_clocks[clock].call != NULL \ | 
 | 177 |  	  ? (*posix_clocks[clock].call) arglist : common_##call arglist)) | 
 | 178 |  | 
 | 179 | /* | 
 | 180 |  * Default clock hook functions when the struct k_clock passed | 
 | 181 |  * to register_posix_clock leaves a function pointer null. | 
 | 182 |  * | 
 | 183 |  * The function common_CALL is the default implementation for | 
 | 184 |  * the function pointer CALL in struct k_clock. | 
 | 185 |  */ | 
 | 186 |  | 
 | 187 | static inline int common_clock_getres(clockid_t which_clock, | 
 | 188 | 				      struct timespec *tp) | 
 | 189 | { | 
 | 190 | 	tp->tv_sec = 0; | 
 | 191 | 	tp->tv_nsec = posix_clocks[which_clock].res; | 
 | 192 | 	return 0; | 
 | 193 | } | 
 | 194 |  | 
 | 195 | static inline int common_clock_get(clockid_t which_clock, struct timespec *tp) | 
 | 196 | { | 
 | 197 | 	getnstimeofday(tp); | 
 | 198 | 	return 0; | 
 | 199 | } | 
 | 200 |  | 
 | 201 | static inline int common_clock_set(clockid_t which_clock, struct timespec *tp) | 
 | 202 | { | 
 | 203 | 	return do_sys_settimeofday(tp, NULL); | 
 | 204 | } | 
 | 205 |  | 
 | 206 | static inline int common_timer_create(struct k_itimer *new_timer) | 
 | 207 | { | 
 | 208 | 	INIT_LIST_HEAD(&new_timer->it.real.abs_timer_entry); | 
 | 209 | 	init_timer(&new_timer->it.real.timer); | 
 | 210 | 	new_timer->it.real.timer.data = (unsigned long) new_timer; | 
 | 211 | 	new_timer->it.real.timer.function = posix_timer_fn; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 212 | 	return 0; | 
 | 213 | } | 
 | 214 |  | 
 | 215 | /* | 
 | 216 |  * These ones are defined below. | 
 | 217 |  */ | 
 | 218 | static int common_nsleep(clockid_t, int flags, struct timespec *t); | 
 | 219 | static void common_timer_get(struct k_itimer *, struct itimerspec *); | 
 | 220 | static int common_timer_set(struct k_itimer *, int, | 
 | 221 | 			    struct itimerspec *, struct itimerspec *); | 
 | 222 | static int common_timer_del(struct k_itimer *timer); | 
 | 223 |  | 
 | 224 | /* | 
 | 225 |  * Return nonzero iff we know a priori this clockid_t value is bogus. | 
 | 226 |  */ | 
 | 227 | static inline int invalid_clockid(clockid_t which_clock) | 
 | 228 | { | 
 | 229 | 	if (which_clock < 0)	/* CPU clock, posix_cpu_* will check it */ | 
 | 230 | 		return 0; | 
 | 231 | 	if ((unsigned) which_clock >= MAX_CLOCKS) | 
 | 232 | 		return 1; | 
 | 233 | 	if (posix_clocks[which_clock].clock_getres != NULL) | 
 | 234 | 		return 0; | 
 | 235 | #ifndef CLOCK_DISPATCH_DIRECT | 
 | 236 | 	if (posix_clocks[which_clock].res != 0) | 
 | 237 | 		return 0; | 
 | 238 | #endif | 
 | 239 | 	return 1; | 
 | 240 | } | 
 | 241 |  | 
 | 242 |  | 
 | 243 | /* | 
 | 244 |  * Initialize everything, well, just everything in Posix clocks/timers ;) | 
 | 245 |  */ | 
 | 246 | static __init int init_posix_timers(void) | 
 | 247 | { | 
 | 248 | 	struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES, | 
 | 249 | 					 .abs_struct = &abs_list | 
 | 250 | 	}; | 
 | 251 | 	struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES, | 
 | 252 | 		.abs_struct = NULL, | 
 | 253 | 		.clock_get = do_posix_clock_monotonic_get, | 
 | 254 | 		.clock_set = do_posix_clock_nosettime | 
 | 255 | 	}; | 
 | 256 |  | 
 | 257 | 	register_posix_clock(CLOCK_REALTIME, &clock_realtime); | 
 | 258 | 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); | 
 | 259 |  | 
 | 260 | 	posix_timers_cache = kmem_cache_create("posix_timers_cache", | 
 | 261 | 					sizeof (struct k_itimer), 0, 0, NULL, NULL); | 
 | 262 | 	idr_init(&posix_timers_id); | 
 | 263 | 	return 0; | 
 | 264 | } | 
 | 265 |  | 
 | 266 | __initcall(init_posix_timers); | 
 | 267 |  | 
 | 268 | static void tstojiffie(struct timespec *tp, int res, u64 *jiff) | 
 | 269 | { | 
 | 270 | 	long sec = tp->tv_sec; | 
 | 271 | 	long nsec = tp->tv_nsec + res - 1; | 
 | 272 |  | 
 | 273 | 	if (nsec > NSEC_PER_SEC) { | 
 | 274 | 		sec++; | 
 | 275 | 		nsec -= NSEC_PER_SEC; | 
 | 276 | 	} | 
 | 277 |  | 
 | 278 | 	/* | 
 | 279 | 	 * The scaling constants are defined in <linux/time.h> | 
 | 280 | 	 * The difference between there and here is that we do the | 
 | 281 | 	 * res rounding and compute a 64-bit result (well so does that | 
 | 282 | 	 * but it then throws away the high bits). | 
 | 283 |   	 */ | 
 | 284 | 	*jiff =  (mpy_l_X_l_ll(sec, SEC_CONVERSION) + | 
 | 285 | 		  (mpy_l_X_l_ll(nsec, NSEC_CONVERSION) >>  | 
 | 286 | 		   (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC; | 
 | 287 | } | 
 | 288 |  | 
 | 289 | /* | 
 | 290 |  * This function adjusts the timer as needed as a result of the clock | 
 | 291 |  * being set.  It should only be called for absolute timers, and then | 
 | 292 |  * under the abs_list lock.  It computes the time difference and sets | 
 | 293 |  * the new jiffies value in the timer.  It also updates the timers | 
 | 294 |  * reference wall_to_monotonic value.  It is complicated by the fact | 
 | 295 |  * that tstojiffies() only handles positive times and it needs to work | 
 | 296 |  * with both positive and negative times.  Also, for negative offsets, | 
 | 297 |  * we need to defeat the res round up. | 
 | 298 |  * | 
 | 299 |  * Return is true if there is a new time, else false. | 
 | 300 |  */ | 
 | 301 | static long add_clockset_delta(struct k_itimer *timr, | 
 | 302 | 			       struct timespec *new_wall_to) | 
 | 303 | { | 
 | 304 | 	struct timespec delta; | 
 | 305 | 	int sign = 0; | 
 | 306 | 	u64 exp; | 
 | 307 |  | 
 | 308 | 	set_normalized_timespec(&delta, | 
 | 309 | 				new_wall_to->tv_sec - | 
 | 310 | 				timr->it.real.wall_to_prev.tv_sec, | 
 | 311 | 				new_wall_to->tv_nsec - | 
 | 312 | 				timr->it.real.wall_to_prev.tv_nsec); | 
 | 313 | 	if (likely(!(delta.tv_sec | delta.tv_nsec))) | 
 | 314 | 		return 0; | 
 | 315 | 	if (delta.tv_sec < 0) { | 
 | 316 | 		set_normalized_timespec(&delta, | 
 | 317 | 					-delta.tv_sec, | 
 | 318 | 					1 - delta.tv_nsec - | 
 | 319 | 					posix_clocks[timr->it_clock].res); | 
 | 320 | 		sign++; | 
 | 321 | 	} | 
 | 322 | 	tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp); | 
 | 323 | 	timr->it.real.wall_to_prev = *new_wall_to; | 
 | 324 | 	timr->it.real.timer.expires += (sign ? -exp : exp); | 
 | 325 | 	return 1; | 
 | 326 | } | 
 | 327 |  | 
 | 328 | static void remove_from_abslist(struct k_itimer *timr) | 
 | 329 | { | 
 | 330 | 	if (!list_empty(&timr->it.real.abs_timer_entry)) { | 
 | 331 | 		spin_lock(&abs_list.lock); | 
 | 332 | 		list_del_init(&timr->it.real.abs_timer_entry); | 
 | 333 | 		spin_unlock(&abs_list.lock); | 
 | 334 | 	} | 
 | 335 | } | 
 | 336 |  | 
 | 337 | static void schedule_next_timer(struct k_itimer *timr) | 
 | 338 | { | 
 | 339 | 	struct timespec new_wall_to; | 
 | 340 | 	struct now_struct now; | 
 | 341 | 	unsigned long seq; | 
 | 342 |  | 
 | 343 | 	/* | 
 | 344 | 	 * Set up the timer for the next interval (if there is one). | 
 | 345 | 	 * Note: this code uses the abs_timer_lock to protect | 
 | 346 | 	 * it.real.wall_to_prev and must hold it until exp is set, not exactly | 
 | 347 | 	 * obvious... | 
 | 348 |  | 
 | 349 | 	 * This function is used for CLOCK_REALTIME* and | 
 | 350 | 	 * CLOCK_MONOTONIC* timers.  If we ever want to handle other | 
 | 351 | 	 * CLOCKs, the calling code (do_schedule_next_timer) would need | 
 | 352 | 	 * to pull the "clock" info from the timer and dispatch the | 
 | 353 | 	 * "other" CLOCKs "next timer" code (which, I suppose should | 
 | 354 | 	 * also be added to the k_clock structure). | 
 | 355 | 	 */ | 
 | 356 | 	if (!timr->it.real.incr) | 
 | 357 | 		return; | 
 | 358 |  | 
 | 359 | 	do { | 
 | 360 | 		seq = read_seqbegin(&xtime_lock); | 
 | 361 | 		new_wall_to =	wall_to_monotonic; | 
 | 362 | 		posix_get_now(&now); | 
 | 363 | 	} while (read_seqretry(&xtime_lock, seq)); | 
 | 364 |  | 
 | 365 | 	if (!list_empty(&timr->it.real.abs_timer_entry)) { | 
 | 366 | 		spin_lock(&abs_list.lock); | 
 | 367 | 		add_clockset_delta(timr, &new_wall_to); | 
 | 368 |  | 
 | 369 | 		posix_bump_timer(timr, now); | 
 | 370 |  | 
 | 371 | 		spin_unlock(&abs_list.lock); | 
 | 372 | 	} else { | 
 | 373 | 		posix_bump_timer(timr, now); | 
 | 374 | 	} | 
 | 375 | 	timr->it_overrun_last = timr->it_overrun; | 
 | 376 | 	timr->it_overrun = -1; | 
 | 377 | 	++timr->it_requeue_pending; | 
 | 378 | 	add_timer(&timr->it.real.timer); | 
 | 379 | } | 
 | 380 |  | 
 | 381 | /* | 
 | 382 |  * This function is exported for use by the signal deliver code.  It is | 
 | 383 |  * called just prior to the info block being released and passes that | 
 | 384 |  * block to us.  It's function is to update the overrun entry AND to | 
 | 385 |  * restart the timer.  It should only be called if the timer is to be | 
 | 386 |  * restarted (i.e. we have flagged this in the sys_private entry of the | 
 | 387 |  * info block). | 
 | 388 |  * | 
 | 389 |  * To protect aginst the timer going away while the interrupt is queued, | 
 | 390 |  * we require that the it_requeue_pending flag be set. | 
 | 391 |  */ | 
 | 392 | void do_schedule_next_timer(struct siginfo *info) | 
 | 393 | { | 
 | 394 | 	struct k_itimer *timr; | 
 | 395 | 	unsigned long flags; | 
 | 396 |  | 
 | 397 | 	timr = lock_timer(info->si_tid, &flags); | 
 | 398 |  | 
 | 399 | 	if (!timr || timr->it_requeue_pending != info->si_sys_private) | 
 | 400 | 		goto exit; | 
 | 401 |  | 
 | 402 | 	if (timr->it_clock < 0)	/* CPU clock */ | 
 | 403 | 		posix_cpu_timer_schedule(timr); | 
 | 404 | 	else | 
 | 405 | 		schedule_next_timer(timr); | 
 | 406 | 	info->si_overrun = timr->it_overrun_last; | 
 | 407 | exit: | 
 | 408 | 	if (timr) | 
 | 409 | 		unlock_timer(timr, flags); | 
 | 410 | } | 
 | 411 |  | 
 | 412 | int posix_timer_event(struct k_itimer *timr,int si_private) | 
 | 413 | { | 
 | 414 | 	memset(&timr->sigq->info, 0, sizeof(siginfo_t)); | 
 | 415 | 	timr->sigq->info.si_sys_private = si_private; | 
 | 416 | 	/* | 
 | 417 | 	 * Send signal to the process that owns this timer. | 
 | 418 |  | 
 | 419 | 	 * This code assumes that all the possible abs_lists share the | 
 | 420 | 	 * same lock (there is only one list at this time). If this is | 
 | 421 | 	 * not the case, the CLOCK info would need to be used to find | 
 | 422 | 	 * the proper abs list lock. | 
 | 423 | 	 */ | 
 | 424 |  | 
 | 425 | 	timr->sigq->info.si_signo = timr->it_sigev_signo; | 
 | 426 | 	timr->sigq->info.si_errno = 0; | 
 | 427 | 	timr->sigq->info.si_code = SI_TIMER; | 
 | 428 | 	timr->sigq->info.si_tid = timr->it_id; | 
 | 429 | 	timr->sigq->info.si_value = timr->it_sigev_value; | 
 | 430 | 	if (timr->it_sigev_notify & SIGEV_THREAD_ID) { | 
 | 431 | 		if (unlikely(timr->it_process->flags & PF_EXITING)) { | 
 | 432 | 			timr->it_sigev_notify = SIGEV_SIGNAL; | 
 | 433 | 			put_task_struct(timr->it_process); | 
 | 434 | 			timr->it_process = timr->it_process->group_leader; | 
 | 435 | 			goto group; | 
 | 436 | 		} | 
 | 437 | 		return send_sigqueue(timr->it_sigev_signo, timr->sigq, | 
 | 438 | 			timr->it_process); | 
 | 439 | 	} | 
 | 440 | 	else { | 
 | 441 | 	group: | 
 | 442 | 		return send_group_sigqueue(timr->it_sigev_signo, timr->sigq, | 
 | 443 | 			timr->it_process); | 
 | 444 | 	} | 
 | 445 | } | 
 | 446 | EXPORT_SYMBOL_GPL(posix_timer_event); | 
 | 447 |  | 
 | 448 | /* | 
 | 449 |  * This function gets called when a POSIX.1b interval timer expires.  It | 
 | 450 |  * is used as a callback from the kernel internal timer.  The | 
 | 451 |  * run_timer_list code ALWAYS calls with interrupts on. | 
 | 452 |  | 
 | 453 |  * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. | 
 | 454 |  */ | 
 | 455 | static void posix_timer_fn(unsigned long __data) | 
 | 456 | { | 
 | 457 | 	struct k_itimer *timr = (struct k_itimer *) __data; | 
 | 458 | 	unsigned long flags; | 
 | 459 | 	unsigned long seq; | 
 | 460 | 	struct timespec delta, new_wall_to; | 
 | 461 | 	u64 exp = 0; | 
 | 462 | 	int do_notify = 1; | 
 | 463 |  | 
 | 464 | 	spin_lock_irqsave(&timr->it_lock, flags); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 465 | 	if (!list_empty(&timr->it.real.abs_timer_entry)) { | 
 | 466 | 		spin_lock(&abs_list.lock); | 
 | 467 | 		do { | 
 | 468 | 			seq = read_seqbegin(&xtime_lock); | 
 | 469 | 			new_wall_to =	wall_to_monotonic; | 
 | 470 | 		} while (read_seqretry(&xtime_lock, seq)); | 
 | 471 | 		set_normalized_timespec(&delta, | 
 | 472 | 					new_wall_to.tv_sec - | 
 | 473 | 					timr->it.real.wall_to_prev.tv_sec, | 
 | 474 | 					new_wall_to.tv_nsec - | 
 | 475 | 					timr->it.real.wall_to_prev.tv_nsec); | 
 | 476 | 		if (likely((delta.tv_sec | delta.tv_nsec ) == 0)) { | 
 | 477 | 			/* do nothing, timer is on time */ | 
 | 478 | 		} else if (delta.tv_sec < 0) { | 
 | 479 | 			/* do nothing, timer is already late */ | 
 | 480 | 		} else { | 
 | 481 | 			/* timer is early due to a clock set */ | 
 | 482 | 			tstojiffie(&delta, | 
 | 483 | 				   posix_clocks[timr->it_clock].res, | 
 | 484 | 				   &exp); | 
 | 485 | 			timr->it.real.wall_to_prev = new_wall_to; | 
 | 486 | 			timr->it.real.timer.expires += exp; | 
 | 487 | 			add_timer(&timr->it.real.timer); | 
 | 488 | 			do_notify = 0; | 
 | 489 | 		} | 
 | 490 | 		spin_unlock(&abs_list.lock); | 
 | 491 |  | 
 | 492 | 	} | 
 | 493 | 	if (do_notify)  { | 
 | 494 | 		int si_private=0; | 
 | 495 |  | 
 | 496 | 		if (timr->it.real.incr) | 
 | 497 | 			si_private = ++timr->it_requeue_pending; | 
 | 498 | 		else { | 
 | 499 | 			remove_from_abslist(timr); | 
 | 500 | 		} | 
 | 501 |  | 
 | 502 | 		if (posix_timer_event(timr, si_private)) | 
 | 503 | 			/* | 
 | 504 | 			 * signal was not sent because of sig_ignor | 
 | 505 | 			 * we will not get a call back to restart it AND | 
 | 506 | 			 * it should be restarted. | 
 | 507 | 			 */ | 
 | 508 | 			schedule_next_timer(timr); | 
 | 509 | 	} | 
 | 510 | 	unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */ | 
 | 511 | } | 
 | 512 |  | 
 | 513 |  | 
 | 514 | static inline struct task_struct * good_sigevent(sigevent_t * event) | 
 | 515 | { | 
 | 516 | 	struct task_struct *rtn = current->group_leader; | 
 | 517 |  | 
 | 518 | 	if ((event->sigev_notify & SIGEV_THREAD_ID ) && | 
 | 519 | 		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) || | 
 | 520 | 		 rtn->tgid != current->tgid || | 
 | 521 | 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) | 
 | 522 | 		return NULL; | 
 | 523 |  | 
 | 524 | 	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && | 
 | 525 | 	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) | 
 | 526 | 		return NULL; | 
 | 527 |  | 
 | 528 | 	return rtn; | 
 | 529 | } | 
 | 530 |  | 
 | 531 | void register_posix_clock(clockid_t clock_id, struct k_clock *new_clock) | 
 | 532 | { | 
 | 533 | 	if ((unsigned) clock_id >= MAX_CLOCKS) { | 
 | 534 | 		printk("POSIX clock register failed for clock_id %d\n", | 
 | 535 | 		       clock_id); | 
 | 536 | 		return; | 
 | 537 | 	} | 
 | 538 |  | 
 | 539 | 	posix_clocks[clock_id] = *new_clock; | 
 | 540 | } | 
 | 541 | EXPORT_SYMBOL_GPL(register_posix_clock); | 
 | 542 |  | 
 | 543 | static struct k_itimer * alloc_posix_timer(void) | 
 | 544 | { | 
 | 545 | 	struct k_itimer *tmr; | 
 | 546 | 	tmr = kmem_cache_alloc(posix_timers_cache, GFP_KERNEL); | 
 | 547 | 	if (!tmr) | 
 | 548 | 		return tmr; | 
 | 549 | 	memset(tmr, 0, sizeof (struct k_itimer)); | 
 | 550 | 	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { | 
 | 551 | 		kmem_cache_free(posix_timers_cache, tmr); | 
 | 552 | 		tmr = NULL; | 
 | 553 | 	} | 
 | 554 | 	return tmr; | 
 | 555 | } | 
 | 556 |  | 
 | 557 | #define IT_ID_SET	1 | 
 | 558 | #define IT_ID_NOT_SET	0 | 
 | 559 | static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | 
 | 560 | { | 
 | 561 | 	if (it_id_set) { | 
 | 562 | 		unsigned long flags; | 
 | 563 | 		spin_lock_irqsave(&idr_lock, flags); | 
 | 564 | 		idr_remove(&posix_timers_id, tmr->it_id); | 
 | 565 | 		spin_unlock_irqrestore(&idr_lock, flags); | 
 | 566 | 	} | 
 | 567 | 	sigqueue_free(tmr->sigq); | 
 | 568 | 	if (unlikely(tmr->it_process) && | 
 | 569 | 	    tmr->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 
 | 570 | 		put_task_struct(tmr->it_process); | 
 | 571 | 	kmem_cache_free(posix_timers_cache, tmr); | 
 | 572 | } | 
 | 573 |  | 
 | 574 | /* Create a POSIX.1b interval timer. */ | 
 | 575 |  | 
 | 576 | asmlinkage long | 
 | 577 | sys_timer_create(clockid_t which_clock, | 
 | 578 | 		 struct sigevent __user *timer_event_spec, | 
 | 579 | 		 timer_t __user * created_timer_id) | 
 | 580 | { | 
 | 581 | 	int error = 0; | 
 | 582 | 	struct k_itimer *new_timer = NULL; | 
 | 583 | 	int new_timer_id; | 
 | 584 | 	struct task_struct *process = NULL; | 
 | 585 | 	unsigned long flags; | 
 | 586 | 	sigevent_t event; | 
 | 587 | 	int it_id_set = IT_ID_NOT_SET; | 
 | 588 |  | 
 | 589 | 	if (invalid_clockid(which_clock)) | 
 | 590 | 		return -EINVAL; | 
 | 591 |  | 
 | 592 | 	new_timer = alloc_posix_timer(); | 
 | 593 | 	if (unlikely(!new_timer)) | 
 | 594 | 		return -EAGAIN; | 
 | 595 |  | 
 | 596 | 	spin_lock_init(&new_timer->it_lock); | 
 | 597 |  retry: | 
 | 598 | 	if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) { | 
 | 599 | 		error = -EAGAIN; | 
 | 600 | 		goto out; | 
 | 601 | 	} | 
 | 602 | 	spin_lock_irq(&idr_lock); | 
 | 603 | 	error = idr_get_new(&posix_timers_id, | 
 | 604 | 			    (void *) new_timer, | 
 | 605 | 			    &new_timer_id); | 
 | 606 | 	spin_unlock_irq(&idr_lock); | 
 | 607 | 	if (error == -EAGAIN) | 
 | 608 | 		goto retry; | 
 | 609 | 	else if (error) { | 
 | 610 | 		/* | 
 | 611 | 		 * Wierd looking, but we return EAGAIN if the IDR is | 
 | 612 | 		 * full (proper POSIX return value for this) | 
 | 613 | 		 */ | 
 | 614 | 		error = -EAGAIN; | 
 | 615 | 		goto out; | 
 | 616 | 	} | 
 | 617 |  | 
 | 618 | 	it_id_set = IT_ID_SET; | 
 | 619 | 	new_timer->it_id = (timer_t) new_timer_id; | 
 | 620 | 	new_timer->it_clock = which_clock; | 
 | 621 | 	new_timer->it_overrun = -1; | 
 | 622 | 	error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); | 
 | 623 | 	if (error) | 
 | 624 | 		goto out; | 
 | 625 |  | 
 | 626 | 	/* | 
 | 627 | 	 * return the timer_id now.  The next step is hard to | 
 | 628 | 	 * back out if there is an error. | 
 | 629 | 	 */ | 
 | 630 | 	if (copy_to_user(created_timer_id, | 
 | 631 | 			 &new_timer_id, sizeof (new_timer_id))) { | 
 | 632 | 		error = -EFAULT; | 
 | 633 | 		goto out; | 
 | 634 | 	} | 
 | 635 | 	if (timer_event_spec) { | 
 | 636 | 		if (copy_from_user(&event, timer_event_spec, sizeof (event))) { | 
 | 637 | 			error = -EFAULT; | 
 | 638 | 			goto out; | 
 | 639 | 		} | 
 | 640 | 		new_timer->it_sigev_notify = event.sigev_notify; | 
 | 641 | 		new_timer->it_sigev_signo = event.sigev_signo; | 
 | 642 | 		new_timer->it_sigev_value = event.sigev_value; | 
 | 643 |  | 
 | 644 | 		read_lock(&tasklist_lock); | 
 | 645 | 		if ((process = good_sigevent(&event))) { | 
 | 646 | 			/* | 
 | 647 | 			 * We may be setting up this process for another | 
 | 648 | 			 * thread.  It may be exiting.  To catch this | 
 | 649 | 			 * case the we check the PF_EXITING flag.  If | 
 | 650 | 			 * the flag is not set, the siglock will catch | 
 | 651 | 			 * him before it is too late (in exit_itimers). | 
 | 652 | 			 * | 
 | 653 | 			 * The exec case is a bit more invloved but easy | 
 | 654 | 			 * to code.  If the process is in our thread | 
 | 655 | 			 * group (and it must be or we would not allow | 
 | 656 | 			 * it here) and is doing an exec, it will cause | 
 | 657 | 			 * us to be killed.  In this case it will wait | 
 | 658 | 			 * for us to die which means we can finish this | 
 | 659 | 			 * linkage with our last gasp. I.e. no code :) | 
 | 660 | 			 */ | 
 | 661 | 			spin_lock_irqsave(&process->sighand->siglock, flags); | 
 | 662 | 			if (!(process->flags & PF_EXITING)) { | 
 | 663 | 				new_timer->it_process = process; | 
 | 664 | 				list_add(&new_timer->list, | 
 | 665 | 					 &process->signal->posix_timers); | 
 | 666 | 				spin_unlock_irqrestore(&process->sighand->siglock, flags); | 
 | 667 | 				if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 
 | 668 | 					get_task_struct(process); | 
 | 669 | 			} else { | 
 | 670 | 				spin_unlock_irqrestore(&process->sighand->siglock, flags); | 
 | 671 | 				process = NULL; | 
 | 672 | 			} | 
 | 673 | 		} | 
 | 674 | 		read_unlock(&tasklist_lock); | 
 | 675 | 		if (!process) { | 
 | 676 | 			error = -EINVAL; | 
 | 677 | 			goto out; | 
 | 678 | 		} | 
 | 679 | 	} else { | 
 | 680 | 		new_timer->it_sigev_notify = SIGEV_SIGNAL; | 
 | 681 | 		new_timer->it_sigev_signo = SIGALRM; | 
 | 682 | 		new_timer->it_sigev_value.sival_int = new_timer->it_id; | 
 | 683 | 		process = current->group_leader; | 
 | 684 | 		spin_lock_irqsave(&process->sighand->siglock, flags); | 
 | 685 | 		new_timer->it_process = process; | 
 | 686 | 		list_add(&new_timer->list, &process->signal->posix_timers); | 
 | 687 | 		spin_unlock_irqrestore(&process->sighand->siglock, flags); | 
 | 688 | 	} | 
 | 689 |  | 
 | 690 |  	/* | 
 | 691 | 	 * In the case of the timer belonging to another task, after | 
 | 692 | 	 * the task is unlocked, the timer is owned by the other task | 
 | 693 | 	 * and may cease to exist at any time.  Don't use or modify | 
 | 694 | 	 * new_timer after the unlock call. | 
 | 695 | 	 */ | 
 | 696 |  | 
 | 697 | out: | 
 | 698 | 	if (error) | 
 | 699 | 		release_posix_timer(new_timer, it_id_set); | 
 | 700 |  | 
 | 701 | 	return error; | 
 | 702 | } | 
 | 703 |  | 
 | 704 | /* | 
 | 705 |  * good_timespec | 
 | 706 |  * | 
 | 707 |  * This function checks the elements of a timespec structure. | 
 | 708 |  * | 
 | 709 |  * Arguments: | 
 | 710 |  * ts	     : Pointer to the timespec structure to check | 
 | 711 |  * | 
 | 712 |  * Return value: | 
 | 713 |  * If a NULL pointer was passed in, or the tv_nsec field was less than 0 | 
 | 714 |  * or greater than NSEC_PER_SEC, or the tv_sec field was less than 0, | 
 | 715 |  * this function returns 0. Otherwise it returns 1. | 
 | 716 |  */ | 
 | 717 | static int good_timespec(const struct timespec *ts) | 
 | 718 | { | 
 | 719 | 	if ((!ts) || (ts->tv_sec < 0) || | 
 | 720 | 			((unsigned) ts->tv_nsec >= NSEC_PER_SEC)) | 
 | 721 | 		return 0; | 
 | 722 | 	return 1; | 
 | 723 | } | 
 | 724 |  | 
 | 725 | /* | 
 | 726 |  * Locking issues: We need to protect the result of the id look up until | 
 | 727 |  * we get the timer locked down so it is not deleted under us.  The | 
 | 728 |  * removal is done under the idr spinlock so we use that here to bridge | 
 | 729 |  * the find to the timer lock.  To avoid a dead lock, the timer id MUST | 
 | 730 |  * be release with out holding the timer lock. | 
 | 731 |  */ | 
 | 732 | static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) | 
 | 733 | { | 
 | 734 | 	struct k_itimer *timr; | 
 | 735 | 	/* | 
 | 736 | 	 * Watch out here.  We do a irqsave on the idr_lock and pass the | 
 | 737 | 	 * flags part over to the timer lock.  Must not let interrupts in | 
 | 738 | 	 * while we are moving the lock. | 
 | 739 | 	 */ | 
 | 740 |  | 
 | 741 | 	spin_lock_irqsave(&idr_lock, *flags); | 
 | 742 | 	timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); | 
 | 743 | 	if (timr) { | 
 | 744 | 		spin_lock(&timr->it_lock); | 
 | 745 | 		spin_unlock(&idr_lock); | 
 | 746 |  | 
 | 747 | 		if ((timr->it_id != timer_id) || !(timr->it_process) || | 
 | 748 | 				timr->it_process->tgid != current->tgid) { | 
 | 749 | 			unlock_timer(timr, *flags); | 
 | 750 | 			timr = NULL; | 
 | 751 | 		} | 
 | 752 | 	} else | 
 | 753 | 		spin_unlock_irqrestore(&idr_lock, *flags); | 
 | 754 |  | 
 | 755 | 	return timr; | 
 | 756 | } | 
 | 757 |  | 
 | 758 | /* | 
 | 759 |  * Get the time remaining on a POSIX.1b interval timer.  This function | 
 | 760 |  * is ALWAYS called with spin_lock_irq on the timer, thus it must not | 
 | 761 |  * mess with irq. | 
 | 762 |  * | 
 | 763 |  * We have a couple of messes to clean up here.  First there is the case | 
 | 764 |  * of a timer that has a requeue pending.  These timers should appear to | 
 | 765 |  * be in the timer list with an expiry as if we were to requeue them | 
 | 766 |  * now. | 
 | 767 |  * | 
 | 768 |  * The second issue is the SIGEV_NONE timer which may be active but is | 
 | 769 |  * not really ever put in the timer list (to save system resources). | 
 | 770 |  * This timer may be expired, and if so, we will do it here.  Otherwise | 
 | 771 |  * it is the same as a requeue pending timer WRT to what we should | 
 | 772 |  * report. | 
 | 773 |  */ | 
 | 774 | static void | 
 | 775 | common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) | 
 | 776 | { | 
 | 777 | 	unsigned long expires; | 
 | 778 | 	struct now_struct now; | 
 | 779 |  | 
 | 780 | 	do | 
 | 781 | 		expires = timr->it.real.timer.expires; | 
 | 782 | 	while ((volatile long) (timr->it.real.timer.expires) != expires); | 
 | 783 |  | 
 | 784 | 	posix_get_now(&now); | 
 | 785 |  | 
 | 786 | 	if (expires && | 
 | 787 | 	    ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) && | 
 | 788 | 	    !timr->it.real.incr && | 
 | 789 | 	    posix_time_before(&timr->it.real.timer, &now)) | 
 | 790 | 		timr->it.real.timer.expires = expires = 0; | 
 | 791 | 	if (expires) { | 
 | 792 | 		if (timr->it_requeue_pending & REQUEUE_PENDING || | 
 | 793 | 		    (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { | 
 | 794 | 			posix_bump_timer(timr, now); | 
 | 795 | 			expires = timr->it.real.timer.expires; | 
 | 796 | 		} | 
 | 797 | 		else | 
 | 798 | 			if (!timer_pending(&timr->it.real.timer)) | 
 | 799 | 				expires = 0; | 
 | 800 | 		if (expires) | 
 | 801 | 			expires -= now.jiffies; | 
 | 802 | 	} | 
 | 803 | 	jiffies_to_timespec(expires, &cur_setting->it_value); | 
 | 804 | 	jiffies_to_timespec(timr->it.real.incr, &cur_setting->it_interval); | 
 | 805 |  | 
 | 806 | 	if (cur_setting->it_value.tv_sec < 0) { | 
 | 807 | 		cur_setting->it_value.tv_nsec = 1; | 
 | 808 | 		cur_setting->it_value.tv_sec = 0; | 
 | 809 | 	} | 
 | 810 | } | 
 | 811 |  | 
 | 812 | /* Get the time remaining on a POSIX.1b interval timer. */ | 
 | 813 | asmlinkage long | 
 | 814 | sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) | 
 | 815 | { | 
 | 816 | 	struct k_itimer *timr; | 
 | 817 | 	struct itimerspec cur_setting; | 
 | 818 | 	unsigned long flags; | 
 | 819 |  | 
 | 820 | 	timr = lock_timer(timer_id, &flags); | 
 | 821 | 	if (!timr) | 
 | 822 | 		return -EINVAL; | 
 | 823 |  | 
 | 824 | 	CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting)); | 
 | 825 |  | 
 | 826 | 	unlock_timer(timr, flags); | 
 | 827 |  | 
 | 828 | 	if (copy_to_user(setting, &cur_setting, sizeof (cur_setting))) | 
 | 829 | 		return -EFAULT; | 
 | 830 |  | 
 | 831 | 	return 0; | 
 | 832 | } | 
 | 833 | /* | 
 | 834 |  * Get the number of overruns of a POSIX.1b interval timer.  This is to | 
 | 835 |  * be the overrun of the timer last delivered.  At the same time we are | 
 | 836 |  * accumulating overruns on the next timer.  The overrun is frozen when | 
 | 837 |  * the signal is delivered, either at the notify time (if the info block | 
 | 838 |  * is not queued) or at the actual delivery time (as we are informed by | 
 | 839 |  * the call back to do_schedule_next_timer().  So all we need to do is | 
 | 840 |  * to pick up the frozen overrun. | 
 | 841 |  */ | 
 | 842 |  | 
 | 843 | asmlinkage long | 
 | 844 | sys_timer_getoverrun(timer_t timer_id) | 
 | 845 | { | 
 | 846 | 	struct k_itimer *timr; | 
 | 847 | 	int overrun; | 
 | 848 | 	long flags; | 
 | 849 |  | 
 | 850 | 	timr = lock_timer(timer_id, &flags); | 
 | 851 | 	if (!timr) | 
 | 852 | 		return -EINVAL; | 
 | 853 |  | 
 | 854 | 	overrun = timr->it_overrun_last; | 
 | 855 | 	unlock_timer(timr, flags); | 
 | 856 |  | 
 | 857 | 	return overrun; | 
 | 858 | } | 
 | 859 | /* | 
 | 860 |  * Adjust for absolute time | 
 | 861 |  * | 
 | 862 |  * If absolute time is given and it is not CLOCK_MONOTONIC, we need to | 
 | 863 |  * adjust for the offset between the timer clock (CLOCK_MONOTONIC) and | 
 | 864 |  * what ever clock he is using. | 
 | 865 |  * | 
 | 866 |  * If it is relative time, we need to add the current (CLOCK_MONOTONIC) | 
 | 867 |  * time to it to get the proper time for the timer. | 
 | 868 |  */ | 
 | 869 | static int adjust_abs_time(struct k_clock *clock, struct timespec *tp,  | 
 | 870 | 			   int abs, u64 *exp, struct timespec *wall_to) | 
 | 871 | { | 
 | 872 | 	struct timespec now; | 
 | 873 | 	struct timespec oc = *tp; | 
 | 874 | 	u64 jiffies_64_f; | 
 | 875 | 	int rtn =0; | 
 | 876 |  | 
 | 877 | 	if (abs) { | 
 | 878 | 		/* | 
 | 879 | 		 * The mask pick up the 4 basic clocks  | 
 | 880 | 		 */ | 
 | 881 | 		if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) { | 
 | 882 | 			jiffies_64_f = do_posix_clock_monotonic_gettime_parts( | 
 | 883 | 				&now,  wall_to); | 
 | 884 | 			/* | 
 | 885 | 			 * If we are doing a MONOTONIC clock | 
 | 886 | 			 */ | 
 | 887 | 			if((clock - &posix_clocks[0]) & CLOCKS_MONO){ | 
 | 888 | 				now.tv_sec += wall_to->tv_sec; | 
 | 889 | 				now.tv_nsec += wall_to->tv_nsec; | 
 | 890 | 			} | 
 | 891 | 		} else { | 
 | 892 | 			/* | 
 | 893 | 			 * Not one of the basic clocks | 
 | 894 | 			 */ | 
 | 895 | 			clock->clock_get(clock - posix_clocks, &now); | 
 | 896 | 			jiffies_64_f = get_jiffies_64(); | 
 | 897 | 		} | 
 | 898 | 		/* | 
 | 899 | 		 * Take away now to get delta | 
 | 900 | 		 */ | 
 | 901 | 		oc.tv_sec -= now.tv_sec; | 
 | 902 | 		oc.tv_nsec -= now.tv_nsec; | 
 | 903 | 		/* | 
 | 904 | 		 * Normalize... | 
 | 905 | 		 */ | 
 | 906 | 		while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) { | 
 | 907 | 			oc.tv_nsec -= NSEC_PER_SEC; | 
 | 908 | 			oc.tv_sec++; | 
 | 909 | 		} | 
 | 910 | 		while ((oc.tv_nsec) < 0) { | 
 | 911 | 			oc.tv_nsec += NSEC_PER_SEC; | 
 | 912 | 			oc.tv_sec--; | 
 | 913 | 		} | 
 | 914 | 	}else{ | 
 | 915 | 		jiffies_64_f = get_jiffies_64(); | 
 | 916 | 	} | 
 | 917 | 	/* | 
 | 918 | 	 * Check if the requested time is prior to now (if so set now) | 
 | 919 | 	 */ | 
 | 920 | 	if (oc.tv_sec < 0) | 
 | 921 | 		oc.tv_sec = oc.tv_nsec = 0; | 
 | 922 |  | 
 | 923 | 	if (oc.tv_sec | oc.tv_nsec) | 
 | 924 | 		set_normalized_timespec(&oc, oc.tv_sec, | 
 | 925 | 					oc.tv_nsec + clock->res); | 
 | 926 | 	tstojiffie(&oc, clock->res, exp); | 
 | 927 |  | 
 | 928 | 	/* | 
 | 929 | 	 * Check if the requested time is more than the timer code | 
 | 930 | 	 * can handle (if so we error out but return the value too). | 
 | 931 | 	 */ | 
 | 932 | 	if (*exp > ((u64)MAX_JIFFY_OFFSET)) | 
 | 933 | 			/* | 
 | 934 | 			 * This is a considered response, not exactly in | 
 | 935 | 			 * line with the standard (in fact it is silent on | 
 | 936 | 			 * possible overflows).  We assume such a large  | 
 | 937 | 			 * value is ALMOST always a programming error and | 
 | 938 | 			 * try not to compound it by setting a really dumb | 
 | 939 | 			 * value. | 
 | 940 | 			 */ | 
 | 941 | 			rtn = -EINVAL; | 
 | 942 | 	/* | 
 | 943 | 	 * return the actual jiffies expire time, full 64 bits | 
 | 944 | 	 */ | 
 | 945 | 	*exp += jiffies_64_f; | 
 | 946 | 	return rtn; | 
 | 947 | } | 
 | 948 |  | 
 | 949 | /* Set a POSIX.1b interval timer. */ | 
 | 950 | /* timr->it_lock is taken. */ | 
 | 951 | static inline int | 
 | 952 | common_timer_set(struct k_itimer *timr, int flags, | 
 | 953 | 		 struct itimerspec *new_setting, struct itimerspec *old_setting) | 
 | 954 | { | 
 | 955 | 	struct k_clock *clock = &posix_clocks[timr->it_clock]; | 
 | 956 | 	u64 expire_64; | 
 | 957 |  | 
 | 958 | 	if (old_setting) | 
 | 959 | 		common_timer_get(timr, old_setting); | 
 | 960 |  | 
 | 961 | 	/* disable the timer */ | 
 | 962 | 	timr->it.real.incr = 0; | 
 | 963 | 	/* | 
 | 964 | 	 * careful here.  If smp we could be in the "fire" routine which will | 
 | 965 | 	 * be spinning as we hold the lock.  But this is ONLY an SMP issue. | 
 | 966 | 	 */ | 
| Oleg Nesterov | f972be3 | 2005-06-23 00:09:00 -0700 | [diff] [blame] | 967 | 	if (try_to_del_timer_sync(&timr->it.real.timer) < 0) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 968 | #ifdef CONFIG_SMP | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 969 | 		/* | 
 | 970 | 		 * It can only be active if on an other cpu.  Since | 
 | 971 | 		 * we have cleared the interval stuff above, it should | 
 | 972 | 		 * clear once we release the spin lock.  Of course once | 
 | 973 | 		 * we do that anything could happen, including the | 
 | 974 | 		 * complete melt down of the timer.  So return with | 
 | 975 | 		 * a "retry" exit status. | 
 | 976 | 		 */ | 
 | 977 | 		return TIMER_RETRY; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 978 | #endif | 
| Oleg Nesterov | f972be3 | 2005-06-23 00:09:00 -0700 | [diff] [blame] | 979 | 	} | 
 | 980 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 981 | 	remove_from_abslist(timr); | 
 | 982 |  | 
 | 983 | 	timr->it_requeue_pending = (timr->it_requeue_pending + 2) &  | 
 | 984 | 		~REQUEUE_PENDING; | 
 | 985 | 	timr->it_overrun_last = 0; | 
 | 986 | 	timr->it_overrun = -1; | 
 | 987 | 	/* | 
 | 988 | 	 *switch off the timer when it_value is zero | 
 | 989 | 	 */ | 
 | 990 | 	if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) { | 
 | 991 | 		timr->it.real.timer.expires = 0; | 
 | 992 | 		return 0; | 
 | 993 | 	} | 
 | 994 |  | 
 | 995 | 	if (adjust_abs_time(clock, | 
 | 996 | 			    &new_setting->it_value, flags & TIMER_ABSTIME,  | 
 | 997 | 			    &expire_64, &(timr->it.real.wall_to_prev))) { | 
 | 998 | 		return -EINVAL; | 
 | 999 | 	} | 
 | 1000 | 	timr->it.real.timer.expires = (unsigned long)expire_64; | 
 | 1001 | 	tstojiffie(&new_setting->it_interval, clock->res, &expire_64); | 
 | 1002 | 	timr->it.real.incr = (unsigned long)expire_64; | 
 | 1003 |  | 
 | 1004 | 	/* | 
 | 1005 | 	 * We do not even queue SIGEV_NONE timers!  But we do put them | 
 | 1006 | 	 * in the abs list so we can do that right. | 
 | 1007 | 	 */ | 
 | 1008 | 	if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)) | 
 | 1009 | 		add_timer(&timr->it.real.timer); | 
 | 1010 |  | 
 | 1011 | 	if (flags & TIMER_ABSTIME && clock->abs_struct) { | 
 | 1012 | 		spin_lock(&clock->abs_struct->lock); | 
 | 1013 | 		list_add_tail(&(timr->it.real.abs_timer_entry), | 
 | 1014 | 			      &(clock->abs_struct->list)); | 
 | 1015 | 		spin_unlock(&clock->abs_struct->lock); | 
 | 1016 | 	} | 
 | 1017 | 	return 0; | 
 | 1018 | } | 
 | 1019 |  | 
 | 1020 | /* Set a POSIX.1b interval timer */ | 
 | 1021 | asmlinkage long | 
 | 1022 | sys_timer_settime(timer_t timer_id, int flags, | 
 | 1023 | 		  const struct itimerspec __user *new_setting, | 
 | 1024 | 		  struct itimerspec __user *old_setting) | 
 | 1025 | { | 
 | 1026 | 	struct k_itimer *timr; | 
 | 1027 | 	struct itimerspec new_spec, old_spec; | 
 | 1028 | 	int error = 0; | 
 | 1029 | 	long flag; | 
 | 1030 | 	struct itimerspec *rtn = old_setting ? &old_spec : NULL; | 
 | 1031 |  | 
 | 1032 | 	if (!new_setting) | 
 | 1033 | 		return -EINVAL; | 
 | 1034 |  | 
 | 1035 | 	if (copy_from_user(&new_spec, new_setting, sizeof (new_spec))) | 
 | 1036 | 		return -EFAULT; | 
 | 1037 |  | 
 | 1038 | 	if ((!good_timespec(&new_spec.it_interval)) || | 
 | 1039 | 	    (!good_timespec(&new_spec.it_value))) | 
 | 1040 | 		return -EINVAL; | 
 | 1041 | retry: | 
 | 1042 | 	timr = lock_timer(timer_id, &flag); | 
 | 1043 | 	if (!timr) | 
 | 1044 | 		return -EINVAL; | 
 | 1045 |  | 
 | 1046 | 	error = CLOCK_DISPATCH(timr->it_clock, timer_set, | 
 | 1047 | 			       (timr, flags, &new_spec, rtn)); | 
 | 1048 |  | 
 | 1049 | 	unlock_timer(timr, flag); | 
 | 1050 | 	if (error == TIMER_RETRY) { | 
 | 1051 | 		rtn = NULL;	// We already got the old time... | 
 | 1052 | 		goto retry; | 
 | 1053 | 	} | 
 | 1054 |  | 
 | 1055 | 	if (old_setting && !error && copy_to_user(old_setting, | 
 | 1056 | 						  &old_spec, sizeof (old_spec))) | 
 | 1057 | 		error = -EFAULT; | 
 | 1058 |  | 
 | 1059 | 	return error; | 
 | 1060 | } | 
 | 1061 |  | 
 | 1062 | static inline int common_timer_del(struct k_itimer *timer) | 
 | 1063 | { | 
 | 1064 | 	timer->it.real.incr = 0; | 
| Oleg Nesterov | f972be3 | 2005-06-23 00:09:00 -0700 | [diff] [blame] | 1065 |  | 
 | 1066 | 	if (try_to_del_timer_sync(&timer->it.real.timer) < 0) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1067 | #ifdef CONFIG_SMP | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1068 | 		/* | 
 | 1069 | 		 * It can only be active if on an other cpu.  Since | 
 | 1070 | 		 * we have cleared the interval stuff above, it should | 
 | 1071 | 		 * clear once we release the spin lock.  Of course once | 
 | 1072 | 		 * we do that anything could happen, including the | 
 | 1073 | 		 * complete melt down of the timer.  So return with | 
 | 1074 | 		 * a "retry" exit status. | 
 | 1075 | 		 */ | 
 | 1076 | 		return TIMER_RETRY; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1077 | #endif | 
| Oleg Nesterov | f972be3 | 2005-06-23 00:09:00 -0700 | [diff] [blame] | 1078 | 	} | 
 | 1079 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1080 | 	remove_from_abslist(timer); | 
 | 1081 |  | 
 | 1082 | 	return 0; | 
 | 1083 | } | 
 | 1084 |  | 
 | 1085 | static inline int timer_delete_hook(struct k_itimer *timer) | 
 | 1086 | { | 
 | 1087 | 	return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer)); | 
 | 1088 | } | 
 | 1089 |  | 
 | 1090 | /* Delete a POSIX.1b interval timer. */ | 
 | 1091 | asmlinkage long | 
 | 1092 | sys_timer_delete(timer_t timer_id) | 
 | 1093 | { | 
 | 1094 | 	struct k_itimer *timer; | 
 | 1095 | 	long flags; | 
 | 1096 |  | 
 | 1097 | #ifdef CONFIG_SMP | 
 | 1098 | 	int error; | 
 | 1099 | retry_delete: | 
 | 1100 | #endif | 
 | 1101 | 	timer = lock_timer(timer_id, &flags); | 
 | 1102 | 	if (!timer) | 
 | 1103 | 		return -EINVAL; | 
 | 1104 |  | 
 | 1105 | #ifdef CONFIG_SMP | 
 | 1106 | 	error = timer_delete_hook(timer); | 
 | 1107 |  | 
 | 1108 | 	if (error == TIMER_RETRY) { | 
 | 1109 | 		unlock_timer(timer, flags); | 
 | 1110 | 		goto retry_delete; | 
 | 1111 | 	} | 
 | 1112 | #else | 
 | 1113 | 	timer_delete_hook(timer); | 
 | 1114 | #endif | 
 | 1115 | 	spin_lock(¤t->sighand->siglock); | 
 | 1116 | 	list_del(&timer->list); | 
 | 1117 | 	spin_unlock(¤t->sighand->siglock); | 
 | 1118 | 	/* | 
 | 1119 | 	 * This keeps any tasks waiting on the spin lock from thinking | 
 | 1120 | 	 * they got something (see the lock code above). | 
 | 1121 | 	 */ | 
 | 1122 | 	if (timer->it_process) { | 
 | 1123 | 		if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 
 | 1124 | 			put_task_struct(timer->it_process); | 
 | 1125 | 		timer->it_process = NULL; | 
 | 1126 | 	} | 
 | 1127 | 	unlock_timer(timer, flags); | 
 | 1128 | 	release_posix_timer(timer, IT_ID_SET); | 
 | 1129 | 	return 0; | 
 | 1130 | } | 
 | 1131 | /* | 
 | 1132 |  * return timer owned by the process, used by exit_itimers | 
 | 1133 |  */ | 
 | 1134 | static inline void itimer_delete(struct k_itimer *timer) | 
 | 1135 | { | 
 | 1136 | 	unsigned long flags; | 
 | 1137 |  | 
 | 1138 | #ifdef CONFIG_SMP | 
 | 1139 | 	int error; | 
 | 1140 | retry_delete: | 
 | 1141 | #endif | 
 | 1142 | 	spin_lock_irqsave(&timer->it_lock, flags); | 
 | 1143 |  | 
 | 1144 | #ifdef CONFIG_SMP | 
 | 1145 | 	error = timer_delete_hook(timer); | 
 | 1146 |  | 
 | 1147 | 	if (error == TIMER_RETRY) { | 
 | 1148 | 		unlock_timer(timer, flags); | 
 | 1149 | 		goto retry_delete; | 
 | 1150 | 	} | 
 | 1151 | #else | 
 | 1152 | 	timer_delete_hook(timer); | 
 | 1153 | #endif | 
 | 1154 | 	list_del(&timer->list); | 
 | 1155 | 	/* | 
 | 1156 | 	 * This keeps any tasks waiting on the spin lock from thinking | 
 | 1157 | 	 * they got something (see the lock code above). | 
 | 1158 | 	 */ | 
 | 1159 | 	if (timer->it_process) { | 
 | 1160 | 		if (timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 
 | 1161 | 			put_task_struct(timer->it_process); | 
 | 1162 | 		timer->it_process = NULL; | 
 | 1163 | 	} | 
 | 1164 | 	unlock_timer(timer, flags); | 
 | 1165 | 	release_posix_timer(timer, IT_ID_SET); | 
 | 1166 | } | 
 | 1167 |  | 
 | 1168 | /* | 
 | 1169 |  * This is called by __exit_signal, only when there are no more | 
 | 1170 |  * references to the shared signal_struct. | 
 | 1171 |  */ | 
 | 1172 | void exit_itimers(struct signal_struct *sig) | 
 | 1173 | { | 
 | 1174 | 	struct k_itimer *tmr; | 
 | 1175 |  | 
 | 1176 | 	while (!list_empty(&sig->posix_timers)) { | 
 | 1177 | 		tmr = list_entry(sig->posix_timers.next, struct k_itimer, list); | 
 | 1178 | 		itimer_delete(tmr); | 
 | 1179 | 	} | 
| Ingo Molnar | caf2857 | 2005-06-17 11:36:36 +0200 | [diff] [blame] | 1180 | 	del_timer_sync(&sig->real_timer); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1181 | } | 
 | 1182 |  | 
 | 1183 | /* | 
 | 1184 |  * And now for the "clock" calls | 
 | 1185 |  * | 
 | 1186 |  * These functions are called both from timer functions (with the timer | 
 | 1187 |  * spin_lock_irq() held and from clock calls with no locking.	They must | 
 | 1188 |  * use the save flags versions of locks. | 
 | 1189 |  */ | 
 | 1190 |  | 
 | 1191 | /* | 
 | 1192 |  * We do ticks here to avoid the irq lock ( they take sooo long). | 
 | 1193 |  * The seqlock is great here.  Since we a reader, we don't really care | 
 | 1194 |  * if we are interrupted since we don't take lock that will stall us or | 
 | 1195 |  * any other cpu. Voila, no irq lock is needed. | 
 | 1196 |  * | 
 | 1197 |  */ | 
 | 1198 |  | 
 | 1199 | static u64 do_posix_clock_monotonic_gettime_parts( | 
 | 1200 | 	struct timespec *tp, struct timespec *mo) | 
 | 1201 | { | 
 | 1202 | 	u64 jiff; | 
 | 1203 | 	unsigned int seq; | 
 | 1204 |  | 
 | 1205 | 	do { | 
 | 1206 | 		seq = read_seqbegin(&xtime_lock); | 
 | 1207 | 		getnstimeofday(tp); | 
 | 1208 | 		*mo = wall_to_monotonic; | 
 | 1209 | 		jiff = jiffies_64; | 
 | 1210 |  | 
 | 1211 | 	} while(read_seqretry(&xtime_lock, seq)); | 
 | 1212 |  | 
 | 1213 | 	return jiff; | 
 | 1214 | } | 
 | 1215 |  | 
 | 1216 | static int do_posix_clock_monotonic_get(clockid_t clock, struct timespec *tp) | 
 | 1217 | { | 
 | 1218 | 	struct timespec wall_to_mono; | 
 | 1219 |  | 
 | 1220 | 	do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono); | 
 | 1221 |  | 
 | 1222 | 	tp->tv_sec += wall_to_mono.tv_sec; | 
 | 1223 | 	tp->tv_nsec += wall_to_mono.tv_nsec; | 
 | 1224 |  | 
 | 1225 | 	if ((tp->tv_nsec - NSEC_PER_SEC) > 0) { | 
 | 1226 | 		tp->tv_nsec -= NSEC_PER_SEC; | 
 | 1227 | 		tp->tv_sec++; | 
 | 1228 | 	} | 
 | 1229 | 	return 0; | 
 | 1230 | } | 
 | 1231 |  | 
 | 1232 | int do_posix_clock_monotonic_gettime(struct timespec *tp) | 
 | 1233 | { | 
 | 1234 | 	return do_posix_clock_monotonic_get(CLOCK_MONOTONIC, tp); | 
 | 1235 | } | 
 | 1236 |  | 
 | 1237 | int do_posix_clock_nosettime(clockid_t clockid, struct timespec *tp) | 
 | 1238 | { | 
 | 1239 | 	return -EINVAL; | 
 | 1240 | } | 
 | 1241 | EXPORT_SYMBOL_GPL(do_posix_clock_nosettime); | 
 | 1242 |  | 
 | 1243 | int do_posix_clock_notimer_create(struct k_itimer *timer) | 
 | 1244 | { | 
 | 1245 | 	return -EINVAL; | 
 | 1246 | } | 
 | 1247 | EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create); | 
 | 1248 |  | 
 | 1249 | int do_posix_clock_nonanosleep(clockid_t clock, int flags, struct timespec *t) | 
 | 1250 | { | 
 | 1251 | #ifndef ENOTSUP | 
 | 1252 | 	return -EOPNOTSUPP;	/* aka ENOTSUP in userland for POSIX */ | 
 | 1253 | #else  /*  parisc does define it separately.  */ | 
 | 1254 | 	return -ENOTSUP; | 
 | 1255 | #endif | 
 | 1256 | } | 
 | 1257 | EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); | 
 | 1258 |  | 
 | 1259 | asmlinkage long | 
 | 1260 | sys_clock_settime(clockid_t which_clock, const struct timespec __user *tp) | 
 | 1261 | { | 
 | 1262 | 	struct timespec new_tp; | 
 | 1263 |  | 
 | 1264 | 	if (invalid_clockid(which_clock)) | 
 | 1265 | 		return -EINVAL; | 
 | 1266 | 	if (copy_from_user(&new_tp, tp, sizeof (*tp))) | 
 | 1267 | 		return -EFAULT; | 
 | 1268 |  | 
 | 1269 | 	return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); | 
 | 1270 | } | 
 | 1271 |  | 
 | 1272 | asmlinkage long | 
 | 1273 | sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp) | 
 | 1274 | { | 
 | 1275 | 	struct timespec kernel_tp; | 
 | 1276 | 	int error; | 
 | 1277 |  | 
 | 1278 | 	if (invalid_clockid(which_clock)) | 
 | 1279 | 		return -EINVAL; | 
 | 1280 | 	error = CLOCK_DISPATCH(which_clock, clock_get, | 
 | 1281 | 			       (which_clock, &kernel_tp)); | 
 | 1282 | 	if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) | 
 | 1283 | 		error = -EFAULT; | 
 | 1284 |  | 
 | 1285 | 	return error; | 
 | 1286 |  | 
 | 1287 | } | 
 | 1288 |  | 
 | 1289 | asmlinkage long | 
 | 1290 | sys_clock_getres(clockid_t which_clock, struct timespec __user *tp) | 
 | 1291 | { | 
 | 1292 | 	struct timespec rtn_tp; | 
 | 1293 | 	int error; | 
 | 1294 |  | 
 | 1295 | 	if (invalid_clockid(which_clock)) | 
 | 1296 | 		return -EINVAL; | 
 | 1297 |  | 
 | 1298 | 	error = CLOCK_DISPATCH(which_clock, clock_getres, | 
 | 1299 | 			       (which_clock, &rtn_tp)); | 
 | 1300 |  | 
 | 1301 | 	if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) { | 
 | 1302 | 		error = -EFAULT; | 
 | 1303 | 	} | 
 | 1304 |  | 
 | 1305 | 	return error; | 
 | 1306 | } | 
 | 1307 |  | 
 | 1308 | static void nanosleep_wake_up(unsigned long __data) | 
 | 1309 | { | 
 | 1310 | 	struct task_struct *p = (struct task_struct *) __data; | 
 | 1311 |  | 
 | 1312 | 	wake_up_process(p); | 
 | 1313 | } | 
 | 1314 |  | 
 | 1315 | /* | 
 | 1316 |  * The standard says that an absolute nanosleep call MUST wake up at | 
 | 1317 |  * the requested time in spite of clock settings.  Here is what we do: | 
 | 1318 |  * For each nanosleep call that needs it (only absolute and not on | 
 | 1319 |  * CLOCK_MONOTONIC* (as it can not be set)) we thread a little structure | 
 | 1320 |  * into the "nanosleep_abs_list".  All we need is the task_struct pointer. | 
 | 1321 |  * When ever the clock is set we just wake up all those tasks.	 The rest | 
 | 1322 |  * is done by the while loop in clock_nanosleep(). | 
 | 1323 |  * | 
 | 1324 |  * On locking, clock_was_set() is called from update_wall_clock which | 
 | 1325 |  * holds (or has held for it) a write_lock_irq( xtime_lock) and is | 
 | 1326 |  * called from the timer bh code.  Thus we need the irq save locks. | 
 | 1327 |  * | 
 | 1328 |  * Also, on the call from update_wall_clock, that is done as part of a | 
 | 1329 |  * softirq thing.  We don't want to delay the system that much (possibly | 
 | 1330 |  * long list of timers to fix), so we defer that work to keventd. | 
 | 1331 |  */ | 
 | 1332 |  | 
 | 1333 | static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue); | 
 | 1334 | static DECLARE_WORK(clock_was_set_work, (void(*)(void*))clock_was_set, NULL); | 
 | 1335 |  | 
 | 1336 | static DECLARE_MUTEX(clock_was_set_lock); | 
 | 1337 |  | 
 | 1338 | void clock_was_set(void) | 
 | 1339 | { | 
 | 1340 | 	struct k_itimer *timr; | 
 | 1341 | 	struct timespec new_wall_to; | 
 | 1342 | 	LIST_HEAD(cws_list); | 
 | 1343 | 	unsigned long seq; | 
 | 1344 |  | 
 | 1345 |  | 
 | 1346 | 	if (unlikely(in_interrupt())) { | 
 | 1347 | 		schedule_work(&clock_was_set_work); | 
 | 1348 | 		return; | 
 | 1349 | 	} | 
 | 1350 | 	wake_up_all(&nanosleep_abs_wqueue); | 
 | 1351 |  | 
 | 1352 | 	/* | 
 | 1353 | 	 * Check if there exist TIMER_ABSTIME timers to correct. | 
 | 1354 | 	 * | 
 | 1355 | 	 * Notes on locking: This code is run in task context with irq | 
 | 1356 | 	 * on.  We CAN be interrupted!  All other usage of the abs list | 
 | 1357 | 	 * lock is under the timer lock which holds the irq lock as | 
 | 1358 | 	 * well.  We REALLY don't want to scan the whole list with the | 
 | 1359 | 	 * interrupt system off, AND we would like a sequence lock on | 
 | 1360 | 	 * this code as well.  Since we assume that the clock will not | 
 | 1361 | 	 * be set often, it seems ok to take and release the irq lock | 
 | 1362 | 	 * for each timer.  In fact add_timer will do this, so this is | 
 | 1363 | 	 * not an issue.  So we know when we are done, we will move the | 
 | 1364 | 	 * whole list to a new location.  Then as we process each entry, | 
 | 1365 | 	 * we will move it to the actual list again.  This way, when our | 
 | 1366 | 	 * copy is empty, we are done.  We are not all that concerned | 
 | 1367 | 	 * about preemption so we will use a semaphore lock to protect | 
 | 1368 | 	 * aginst reentry.  This way we will not stall another | 
 | 1369 | 	 * processor.  It is possible that this may delay some timers | 
 | 1370 | 	 * that should have expired, given the new clock, but even this | 
 | 1371 | 	 * will be minimal as we will always update to the current time, | 
 | 1372 | 	 * even if it was set by a task that is waiting for entry to | 
 | 1373 | 	 * this code.  Timers that expire too early will be caught by | 
 | 1374 | 	 * the expire code and restarted. | 
 | 1375 |  | 
 | 1376 | 	 * Absolute timers that repeat are left in the abs list while | 
 | 1377 | 	 * waiting for the task to pick up the signal.  This means we | 
 | 1378 | 	 * may find timers that are not in the "add_timer" list, but are | 
 | 1379 | 	 * in the abs list.  We do the same thing for these, save | 
 | 1380 | 	 * putting them back in the "add_timer" list.  (Note, these are | 
 | 1381 | 	 * left in the abs list mainly to indicate that they are | 
 | 1382 | 	 * ABSOLUTE timers, a fact that is used by the re-arm code, and | 
 | 1383 | 	 * for which we have no other flag.) | 
 | 1384 |  | 
 | 1385 | 	 */ | 
 | 1386 |  | 
 | 1387 | 	down(&clock_was_set_lock); | 
 | 1388 | 	spin_lock_irq(&abs_list.lock); | 
 | 1389 | 	list_splice_init(&abs_list.list, &cws_list); | 
 | 1390 | 	spin_unlock_irq(&abs_list.lock); | 
 | 1391 | 	do { | 
 | 1392 | 		do { | 
 | 1393 | 			seq = read_seqbegin(&xtime_lock); | 
 | 1394 | 			new_wall_to =	wall_to_monotonic; | 
 | 1395 | 		} while (read_seqretry(&xtime_lock, seq)); | 
 | 1396 |  | 
 | 1397 | 		spin_lock_irq(&abs_list.lock); | 
 | 1398 | 		if (list_empty(&cws_list)) { | 
 | 1399 | 			spin_unlock_irq(&abs_list.lock); | 
 | 1400 | 			break; | 
 | 1401 | 		} | 
 | 1402 | 		timr = list_entry(cws_list.next, struct k_itimer, | 
 | 1403 | 				  it.real.abs_timer_entry); | 
 | 1404 |  | 
 | 1405 | 		list_del_init(&timr->it.real.abs_timer_entry); | 
 | 1406 | 		if (add_clockset_delta(timr, &new_wall_to) && | 
 | 1407 | 		    del_timer(&timr->it.real.timer))  /* timer run yet? */ | 
 | 1408 | 			add_timer(&timr->it.real.timer); | 
 | 1409 | 		list_add(&timr->it.real.abs_timer_entry, &abs_list.list); | 
 | 1410 | 		spin_unlock_irq(&abs_list.lock); | 
 | 1411 | 	} while (1); | 
 | 1412 |  | 
 | 1413 | 	up(&clock_was_set_lock); | 
 | 1414 | } | 
 | 1415 |  | 
 | 1416 | long clock_nanosleep_restart(struct restart_block *restart_block); | 
 | 1417 |  | 
 | 1418 | asmlinkage long | 
 | 1419 | sys_clock_nanosleep(clockid_t which_clock, int flags, | 
 | 1420 | 		    const struct timespec __user *rqtp, | 
 | 1421 | 		    struct timespec __user *rmtp) | 
 | 1422 | { | 
 | 1423 | 	struct timespec t; | 
 | 1424 | 	struct restart_block *restart_block = | 
 | 1425 | 	    &(current_thread_info()->restart_block); | 
 | 1426 | 	int ret; | 
 | 1427 |  | 
 | 1428 | 	if (invalid_clockid(which_clock)) | 
 | 1429 | 		return -EINVAL; | 
 | 1430 |  | 
 | 1431 | 	if (copy_from_user(&t, rqtp, sizeof (struct timespec))) | 
 | 1432 | 		return -EFAULT; | 
 | 1433 |  | 
 | 1434 | 	if ((unsigned) t.tv_nsec >= NSEC_PER_SEC || t.tv_sec < 0) | 
 | 1435 | 		return -EINVAL; | 
 | 1436 |  | 
 | 1437 | 	/* | 
 | 1438 | 	 * Do this here as nsleep function does not have the real address. | 
 | 1439 | 	 */ | 
 | 1440 | 	restart_block->arg1 = (unsigned long)rmtp; | 
 | 1441 |  | 
 | 1442 | 	ret = CLOCK_DISPATCH(which_clock, nsleep, (which_clock, flags, &t)); | 
 | 1443 |  | 
 | 1444 | 	if ((ret == -ERESTART_RESTARTBLOCK) && rmtp && | 
 | 1445 | 					copy_to_user(rmtp, &t, sizeof (t))) | 
 | 1446 | 		return -EFAULT; | 
 | 1447 | 	return ret; | 
 | 1448 | } | 
 | 1449 |  | 
 | 1450 |  | 
 | 1451 | static int common_nsleep(clockid_t which_clock, | 
 | 1452 | 			 int flags, struct timespec *tsave) | 
 | 1453 | { | 
 | 1454 | 	struct timespec t, dum; | 
 | 1455 | 	struct timer_list new_timer; | 
 | 1456 | 	DECLARE_WAITQUEUE(abs_wqueue, current); | 
 | 1457 | 	u64 rq_time = (u64)0; | 
 | 1458 | 	s64 left; | 
 | 1459 | 	int abs; | 
 | 1460 | 	struct restart_block *restart_block = | 
 | 1461 | 	    ¤t_thread_info()->restart_block; | 
 | 1462 |  | 
 | 1463 | 	abs_wqueue.flags = 0; | 
 | 1464 | 	init_timer(&new_timer); | 
 | 1465 | 	new_timer.expires = 0; | 
 | 1466 | 	new_timer.data = (unsigned long) current; | 
 | 1467 | 	new_timer.function = nanosleep_wake_up; | 
 | 1468 | 	abs = flags & TIMER_ABSTIME; | 
 | 1469 |  | 
 | 1470 | 	if (restart_block->fn == clock_nanosleep_restart) { | 
 | 1471 | 		/* | 
 | 1472 | 		 * Interrupted by a non-delivered signal, pick up remaining | 
 | 1473 | 		 * time and continue.  Remaining time is in arg2 & 3. | 
 | 1474 | 		 */ | 
 | 1475 | 		restart_block->fn = do_no_restart_syscall; | 
 | 1476 |  | 
 | 1477 | 		rq_time = restart_block->arg3; | 
 | 1478 | 		rq_time = (rq_time << 32) + restart_block->arg2; | 
 | 1479 | 		if (!rq_time) | 
 | 1480 | 			return -EINTR; | 
 | 1481 | 		left = rq_time - get_jiffies_64(); | 
 | 1482 | 		if (left <= (s64)0) | 
 | 1483 | 			return 0;	/* Already passed */ | 
 | 1484 | 	} | 
 | 1485 |  | 
 | 1486 | 	if (abs && (posix_clocks[which_clock].clock_get != | 
 | 1487 | 			    posix_clocks[CLOCK_MONOTONIC].clock_get)) | 
 | 1488 | 		add_wait_queue(&nanosleep_abs_wqueue, &abs_wqueue); | 
 | 1489 |  | 
 | 1490 | 	do { | 
 | 1491 | 		t = *tsave; | 
 | 1492 | 		if (abs || !rq_time) { | 
 | 1493 | 			adjust_abs_time(&posix_clocks[which_clock], &t, abs, | 
 | 1494 | 					&rq_time, &dum); | 
 | 1495 | 		} | 
 | 1496 |  | 
 | 1497 | 		left = rq_time - get_jiffies_64(); | 
 | 1498 | 		if (left >= (s64)MAX_JIFFY_OFFSET) | 
 | 1499 | 			left = (s64)MAX_JIFFY_OFFSET; | 
 | 1500 | 		if (left < (s64)0) | 
 | 1501 | 			break; | 
 | 1502 |  | 
 | 1503 | 		new_timer.expires = jiffies + left; | 
 | 1504 | 		__set_current_state(TASK_INTERRUPTIBLE); | 
 | 1505 | 		add_timer(&new_timer); | 
 | 1506 |  | 
 | 1507 | 		schedule(); | 
 | 1508 |  | 
 | 1509 | 		del_timer_sync(&new_timer); | 
 | 1510 | 		left = rq_time - get_jiffies_64(); | 
 | 1511 | 	} while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING)); | 
 | 1512 |  | 
 | 1513 | 	if (abs_wqueue.task_list.next) | 
 | 1514 | 		finish_wait(&nanosleep_abs_wqueue, &abs_wqueue); | 
 | 1515 |  | 
 | 1516 | 	if (left > (s64)0) { | 
 | 1517 |  | 
 | 1518 | 		/* | 
 | 1519 | 		 * Always restart abs calls from scratch to pick up any | 
 | 1520 | 		 * clock shifting that happened while we are away. | 
 | 1521 | 		 */ | 
 | 1522 | 		if (abs) | 
 | 1523 | 			return -ERESTARTNOHAND; | 
 | 1524 |  | 
 | 1525 | 		left *= TICK_NSEC; | 
 | 1526 | 		tsave->tv_sec = div_long_long_rem(left,  | 
 | 1527 | 						  NSEC_PER_SEC,  | 
 | 1528 | 						  &tsave->tv_nsec); | 
 | 1529 | 		/* | 
 | 1530 | 		 * Restart works by saving the time remaing in  | 
 | 1531 | 		 * arg2 & 3 (it is 64-bits of jiffies).  The other | 
 | 1532 | 		 * info we need is the clock_id (saved in arg0).  | 
 | 1533 | 		 * The sys_call interface needs the users  | 
 | 1534 | 		 * timespec return address which _it_ saves in arg1. | 
 | 1535 | 		 * Since we have cast the nanosleep call to a clock_nanosleep | 
 | 1536 | 		 * both can be restarted with the same code. | 
 | 1537 | 		 */ | 
 | 1538 | 		restart_block->fn = clock_nanosleep_restart; | 
 | 1539 | 		restart_block->arg0 = which_clock; | 
 | 1540 | 		/* | 
 | 1541 | 		 * Caller sets arg1 | 
 | 1542 | 		 */ | 
 | 1543 | 		restart_block->arg2 = rq_time & 0xffffffffLL; | 
 | 1544 | 		restart_block->arg3 = rq_time >> 32; | 
 | 1545 |  | 
 | 1546 | 		return -ERESTART_RESTARTBLOCK; | 
 | 1547 | 	} | 
 | 1548 |  | 
 | 1549 | 	return 0; | 
 | 1550 | } | 
 | 1551 | /* | 
 | 1552 |  * This will restart clock_nanosleep. | 
 | 1553 |  */ | 
 | 1554 | long | 
 | 1555 | clock_nanosleep_restart(struct restart_block *restart_block) | 
 | 1556 | { | 
 | 1557 | 	struct timespec t; | 
 | 1558 | 	int ret = common_nsleep(restart_block->arg0, 0, &t); | 
 | 1559 |  | 
 | 1560 | 	if ((ret == -ERESTART_RESTARTBLOCK) && restart_block->arg1 && | 
 | 1561 | 	    copy_to_user((struct timespec __user *)(restart_block->arg1), &t, | 
 | 1562 | 			 sizeof (t))) | 
 | 1563 | 		return -EFAULT; | 
 | 1564 | 	return ret; | 
 | 1565 | } |