Blame - kernel/posix-timers.c - android_kernel_htc_msm8960

blob: fd316c2722604a866b359548db9659aa5c1eefbc [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/kernel/posix_timers.c
				3	*
				4	*
				5	* 2002-10-15 Posix Clocks & timers
				6	* by George Anzinger george@mvista.com
				7	*
				8	* Copyright (C) 2002 2003 by MontaVista Software.
				9	*
				10	* 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
				11	* Copyright (C) 2004 Boris Hu
				12	*
				13	* This program is free software; you can redistribute it and/or modify
				14	* it under the terms of the GNU General Public License as published by
				15	* the Free Software Foundation; either version 2 of the License, or (at
				16	* your option) any later version.
				17	*
				18	* This program is distributed in the hope that it will be useful, but
				19	* WITHOUT ANY WARRANTY; without even the implied warranty of
				20	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				21	* General Public License for more details.
				22
				23	* You should have received a copy of the GNU General Public License
				24	* along with this program; if not, write to the Free Software
				25	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
				26	*
				27	* MontaVista Software \| 1237 East Arques Avenue \| Sunnyvale \| CA 94085 \| USA
				28	*/
				29
				30	/* These are all the functions necessary to implement
				31	* POSIX clocks & timers
				32	*/
				33	#include <linux/mm.h>
				34	#include <linux/smp_lock.h>
				35	#include <linux/interrupt.h>
				36	#include <linux/slab.h>
				37	#include <linux/time.h>
				38
				39	#include <asm/uaccess.h>
				40	#include <asm/semaphore.h>
				41	#include <linux/list.h>
				42	#include <linux/init.h>
				43	#include <linux/compiler.h>
				44	#include <linux/idr.h>
				45	#include <linux/posix-timers.h>
				46	#include <linux/syscalls.h>
				47	#include <linux/wait.h>
				48	#include <linux/workqueue.h>
				49	#include <linux/module.h>
				50
				51	#ifndef div_long_long_rem
				52	#include <asm/div64.h>
				53
				54	#define div_long_long_rem(dividend,divisor,remainder) ({ \
				55	u64 result = dividend; \
				56	*remainder = do_div(result,divisor); \
				57	result; })
				58
				59	#endif
				60	#define CLOCK_REALTIME_RES TICK_NSEC /* In nano seconds. */
				61
				62	static inline u64 mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2)
				63	{
				64	return (u64)mpy1 * mpy2;
				65	}
				66	/*
				67	* Management arrays for POSIX timers. Timers are kept in slab memory
				68	* Timer ids are allocated by an external routine that keeps track of the
				69	* id and the timer. The external interface is:
				70	*
				71	* void idr_find(struct idr idp, int id); to find timer_id <id>
				72	* int idr_get_new(struct idr idp, void ptr); to get a new id and
				73	* related it to <ptr>
				74	* void idr_remove(struct idr *idp, int id); to release <id>
				75	* void idr_init(struct idr *idp); to initialize <idp>
				76	* which we supply.
				77	* The idr_get_new may call slab for more memory so it must not be
				78	* called under a spin lock. Likewise idr_remore may release memory
				79	* (but it may be ok to do this under a lock...).
				80	* idr_find is just a memory look up and is quite fast. A -1 return
				81	* indicates that the requested id does not exist.
				82	*/
				83
				84	/*
				85	* Lets keep our timers in a slab cache :-)
				86	*/
				87	static kmem_cache_t *posix_timers_cache;
				88	static struct idr posix_timers_id;
				89	static DEFINE_SPINLOCK(idr_lock);
				90
				91	/*
				92	* Just because the timer is not in the timer list does NOT mean it is
				93	* inactive. It could be in the "fire" routine getting a new expire time.
				94	*/
				95	#define TIMER_INACTIVE 1
				96
				97	#ifdef CONFIG_SMP
				98	# define timer_active(tmr) \
				99	((tmr)->it.real.timer.entry.prev != (void *)TIMER_INACTIVE)
				100	# define set_timer_inactive(tmr) \
				101	do { \
				102	(tmr)->it.real.timer.entry.prev = (void *)TIMER_INACTIVE; \
				103	} while (0)
				104	#else
				105	# define timer_active(tmr) BARFY // error to use outside of SMP
				106	# define set_timer_inactive(tmr) do { } while (0)
				107	#endif
				108	/*
				109	* we assume that the new SIGEV_THREAD_ID shares no bits with the other
				110	* SIGEV values. Here we put out an error if this assumption fails.
				111	*/
				112	#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
				113	~(SIGEV_SIGNAL \| SIGEV_NONE \| SIGEV_THREAD))
				114	#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
				115	#endif
				116
				117
				118	/*
				119	* The timer ID is turned into a timer address by idr_find().
				120	* Verifying a valid ID consists of:
				121	*
				122	* a) checking that idr_find() returns other than -1.
				123	* b) checking that the timer id matches the one in the timer itself.
				124	* c) that the timer owner is in the callers thread group.
				125	*/
				126
				127	/*
				128	* CLOCKs: The POSIX standard calls for a couple of clocks and allows us
				129	* to implement others. This structure defines the various
				130	* clocks and allows the possibility of adding others. We
				131	* provide an interface to add clocks to the table and expect
				132	* the "arch" code to add at least one clock that is high
				133	* resolution. Here we define the standard CLOCK_REALTIME as a
				134	* 1/HZ resolution clock.
				135	*
				136	* RESOLUTION: Clock resolution is used to round up timer and interval
				137	* times, NOT to report clock times, which are reported with as
				138	* much resolution as the system can muster. In some cases this
				139	* resolution may depend on the underlying clock hardware and
				140	* may not be quantifiable until run time, and only then is the
				141	* necessary code is written. The standard says we should say
				142	* something about this issue in the documentation...
				143	*
				144	* FUNCTIONS: The CLOCKs structure defines possible functions to handle
				145	* various clock functions. For clocks that use the standard
				146	* system timer code these entries should be NULL. This will
				147	* allow dispatch without the overhead of indirect function
				148	* calls. CLOCKS that depend on other sources (e.g. WWV or GPS)
				149	* must supply functions here, even if the function just returns
				150	* ENOSYS. The standard POSIX timer management code assumes the
				151	* following: 1.) The k_itimer struct (sched.h) is used for the
				152	* timer. 2.) The list, it_lock, it_clock, it_id and it_process
				153	* fields are not modified by timer code.
				154	*
				155	* At this time all functions EXCEPT clock_nanosleep can be
				156	* redirected by the CLOCKS structure. Clock_nanosleep is in
				157	* there, but the code ignores it.
				158	*
				159	* Permissions: It is assumed that the clock_settime() function defined
				160	* for each clock will take care of permission checks. Some
				161	* clocks may be set able by any user (i.e. local process
				162	* clocks) others not. Currently the only set able clock we
				163	* have is CLOCK_REALTIME and its high res counter part, both of
				164	* which we beg off on and pass to do_sys_settimeofday().
				165	*/
				166
				167	static struct k_clock posix_clocks[MAX_CLOCKS];
				168	/*
				169	* We only have one real clock that can be set so we need only one abs list,
				170	* even if we should want to have several clocks with differing resolutions.
				171	*/
				172	static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list),
				173	.lock = SPIN_LOCK_UNLOCKED};
				174
				175	static void posix_timer_fn(unsigned long);
				176	static u64 do_posix_clock_monotonic_gettime_parts(
				177	struct timespec tp, struct timespec mo);
				178	int do_posix_clock_monotonic_gettime(struct timespec *tp);
				179	static int do_posix_clock_monotonic_get(clockid_t, struct timespec *tp);
				180
				181	static struct k_itimer lock_timer(timer_t timer_id, unsigned long flags);
				182
				183	static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
				184	{
				185	spin_unlock_irqrestore(&timr->it_lock, flags);
				186	}
				187
				188	/*
				189	* Call the k_clock hook function if non-null, or the default function.
				190	*/
				191	#define CLOCK_DISPATCH(clock, call, arglist) \
				192	((clock) < 0 ? posix_cpu_##call arglist : \
				193	(posix_clocks[clock].call != NULL \
				194	? (*posix_clocks[clock].call) arglist : common_##call arglist))
				195
				196	/*
				197	* Default clock hook functions when the struct k_clock passed
				198	* to register_posix_clock leaves a function pointer null.
				199	*
				200	* The function common_CALL is the default implementation for
				201	* the function pointer CALL in struct k_clock.
				202	*/
				203
				204	static inline int common_clock_getres(clockid_t which_clock,
				205	struct timespec *tp)
				206	{
				207	tp->tv_sec = 0;
				208	tp->tv_nsec = posix_clocks[which_clock].res;
				209	return 0;
				210	}
				211
				212	static inline int common_clock_get(clockid_t which_clock, struct timespec *tp)
				213	{
				214	getnstimeofday(tp);
				215	return 0;
				216	}
				217
				218	static inline int common_clock_set(clockid_t which_clock, struct timespec *tp)
				219	{
				220	return do_sys_settimeofday(tp, NULL);
				221	}
				222
				223	static inline int common_timer_create(struct k_itimer *new_timer)
				224	{
				225	INIT_LIST_HEAD(&new_timer->it.real.abs_timer_entry);
				226	init_timer(&new_timer->it.real.timer);
				227	new_timer->it.real.timer.data = (unsigned long) new_timer;
				228	new_timer->it.real.timer.function = posix_timer_fn;
				229	set_timer_inactive(new_timer);
				230	return 0;
				231	}
				232
				233	/*
				234	* These ones are defined below.
				235	*/
				236	static int common_nsleep(clockid_t, int flags, struct timespec *t);
				237	static void common_timer_get(struct k_itimer , struct itimerspec );
				238	static int common_timer_set(struct k_itimer *, int,
				239	struct itimerspec , struct itimerspec );
				240	static int common_timer_del(struct k_itimer *timer);
				241
				242	/*
				243	* Return nonzero iff we know a priori this clockid_t value is bogus.
				244	*/
				245	static inline int invalid_clockid(clockid_t which_clock)
				246	{
				247	if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */
				248	return 0;
				249	if ((unsigned) which_clock >= MAX_CLOCKS)
				250	return 1;
				251	if (posix_clocks[which_clock].clock_getres != NULL)
				252	return 0;
				253	#ifndef CLOCK_DISPATCH_DIRECT
				254	if (posix_clocks[which_clock].res != 0)
				255	return 0;
				256	#endif
				257	return 1;
				258	}
				259
				260
				261	/*
				262	* Initialize everything, well, just everything in Posix clocks/timers ;)
				263	*/
				264	static __init int init_posix_timers(void)
				265	{
				266	struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
				267	.abs_struct = &abs_list
				268	};
				269	struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
				270	.abs_struct = NULL,
				271	.clock_get = do_posix_clock_monotonic_get,
				272	.clock_set = do_posix_clock_nosettime
				273	};
				274
				275	register_posix_clock(CLOCK_REALTIME, &clock_realtime);
				276	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
				277
				278	posix_timers_cache = kmem_cache_create("posix_timers_cache",
				279	sizeof (struct k_itimer), 0, 0, NULL, NULL);
				280	idr_init(&posix_timers_id);
				281	return 0;
				282	}
				283
				284	__initcall(init_posix_timers);
				285
				286	static void tstojiffie(struct timespec tp, int res, u64 jiff)
				287	{
				288	long sec = tp->tv_sec;
				289	long nsec = tp->tv_nsec + res - 1;
				290
				291	if (nsec > NSEC_PER_SEC) {
				292	sec++;
				293	nsec -= NSEC_PER_SEC;
				294	}
				295
				296	/*
				297	* The scaling constants are defined in <linux/time.h>
				298	* The difference between there and here is that we do the
				299	* res rounding and compute a 64-bit result (well so does that
				300	* but it then throws away the high bits).
				301	*/
				302	*jiff = (mpy_l_X_l_ll(sec, SEC_CONVERSION) +
				303	(mpy_l_X_l_ll(nsec, NSEC_CONVERSION) >>
				304	(NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
				305	}
				306
				307	/*
				308	* This function adjusts the timer as needed as a result of the clock
				309	* being set. It should only be called for absolute timers, and then
				310	* under the abs_list lock. It computes the time difference and sets
				311	* the new jiffies value in the timer. It also updates the timers
				312	* reference wall_to_monotonic value. It is complicated by the fact
				313	* that tstojiffies() only handles positive times and it needs to work
				314	* with both positive and negative times. Also, for negative offsets,
				315	* we need to defeat the res round up.
				316	*
				317	* Return is true if there is a new time, else false.
				318	*/
				319	static long add_clockset_delta(struct k_itimer *timr,
				320	struct timespec *new_wall_to)
				321	{
				322	struct timespec delta;
				323	int sign = 0;
				324	u64 exp;
				325
				326	set_normalized_timespec(&delta,
				327	new_wall_to->tv_sec -
				328	timr->it.real.wall_to_prev.tv_sec,
				329	new_wall_to->tv_nsec -
				330	timr->it.real.wall_to_prev.tv_nsec);
				331	if (likely(!(delta.tv_sec \| delta.tv_nsec)))
				332	return 0;
				333	if (delta.tv_sec < 0) {
				334	set_normalized_timespec(&delta,
				335	-delta.tv_sec,
				336	1 - delta.tv_nsec -
				337	posix_clocks[timr->it_clock].res);
				338	sign++;
				339	}
				340	tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp);
				341	timr->it.real.wall_to_prev = *new_wall_to;
				342	timr->it.real.timer.expires += (sign ? -exp : exp);
				343	return 1;
				344	}
				345
				346	static void remove_from_abslist(struct k_itimer *timr)
				347	{
				348	if (!list_empty(&timr->it.real.abs_timer_entry)) {
				349	spin_lock(&abs_list.lock);
				350	list_del_init(&timr->it.real.abs_timer_entry);
				351	spin_unlock(&abs_list.lock);
				352	}
				353	}
				354
				355	static void schedule_next_timer(struct k_itimer *timr)
				356	{
				357	struct timespec new_wall_to;
				358	struct now_struct now;
				359	unsigned long seq;
				360
				361	/*
				362	* Set up the timer for the next interval (if there is one).
				363	* Note: this code uses the abs_timer_lock to protect
				364	* it.real.wall_to_prev and must hold it until exp is set, not exactly
				365	* obvious...
				366
				367	* This function is used for CLOCK_REALTIME* and
				368	* CLOCK_MONOTONIC* timers. If we ever want to handle other
				369	* CLOCKs, the calling code (do_schedule_next_timer) would need
				370	* to pull the "clock" info from the timer and dispatch the
				371	* "other" CLOCKs "next timer" code (which, I suppose should
				372	* also be added to the k_clock structure).
				373	*/
				374	if (!timr->it.real.incr)
				375	return;
				376
				377	do {
				378	seq = read_seqbegin(&xtime_lock);
				379	new_wall_to = wall_to_monotonic;
				380	posix_get_now(&now);
				381	} while (read_seqretry(&xtime_lock, seq));
				382
				383	if (!list_empty(&timr->it.real.abs_timer_entry)) {
				384	spin_lock(&abs_list.lock);
				385	add_clockset_delta(timr, &new_wall_to);
				386
				387	posix_bump_timer(timr, now);
				388
				389	spin_unlock(&abs_list.lock);
				390	} else {
				391	posix_bump_timer(timr, now);
				392	}
				393	timr->it_overrun_last = timr->it_overrun;
				394	timr->it_overrun = -1;
				395	++timr->it_requeue_pending;
				396	add_timer(&timr->it.real.timer);
				397	}
				398
				399	/*
				400	* This function is exported for use by the signal deliver code. It is
				401	* called just prior to the info block being released and passes that
				402	* block to us. It's function is to update the overrun entry AND to
				403	* restart the timer. It should only be called if the timer is to be
				404	* restarted (i.e. we have flagged this in the sys_private entry of the
				405	* info block).
				406	*
				407	* To protect aginst the timer going away while the interrupt is queued,
				408	* we require that the it_requeue_pending flag be set.
				409	*/
				410	void do_schedule_next_timer(struct siginfo *info)
				411	{
				412	struct k_itimer *timr;
				413	unsigned long flags;
				414
				415	timr = lock_timer(info->si_tid, &flags);
				416
				417	if (!timr \|\| timr->it_requeue_pending != info->si_sys_private)
				418	goto exit;
				419
				420	if (timr->it_clock < 0) /* CPU clock */
				421	posix_cpu_timer_schedule(timr);
				422	else
				423	schedule_next_timer(timr);
				424	info->si_overrun = timr->it_overrun_last;
				425	exit:
				426	if (timr)
				427	unlock_timer(timr, flags);
				428	}
				429
				430	int posix_timer_event(struct k_itimer *timr,int si_private)
				431	{
				432	memset(&timr->sigq->info, 0, sizeof(siginfo_t));
				433	timr->sigq->info.si_sys_private = si_private;
				434	/*
				435	* Send signal to the process that owns this timer.
				436
				437	* This code assumes that all the possible abs_lists share the
				438	* same lock (there is only one list at this time). If this is
				439	* not the case, the CLOCK info would need to be used to find
				440	* the proper abs list lock.
				441	*/
				442
				443	timr->sigq->info.si_signo = timr->it_sigev_signo;
				444	timr->sigq->info.si_errno = 0;
				445	timr->sigq->info.si_code = SI_TIMER;
				446	timr->sigq->info.si_tid = timr->it_id;
				447	timr->sigq->info.si_value = timr->it_sigev_value;
				448	if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
				449	if (unlikely(timr->it_process->flags & PF_EXITING)) {
				450	timr->it_sigev_notify = SIGEV_SIGNAL;
				451	put_task_struct(timr->it_process);
				452	timr->it_process = timr->it_process->group_leader;
				453	goto group;
				454	}
				455	return send_sigqueue(timr->it_sigev_signo, timr->sigq,
				456	timr->it_process);
				457	}
				458	else {
				459	group:
				460	return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
				461	timr->it_process);
				462	}
				463	}
				464	EXPORT_SYMBOL_GPL(posix_timer_event);
				465
				466	/*
				467	* This function gets called when a POSIX.1b interval timer expires. It
				468	* is used as a callback from the kernel internal timer. The
				469	* run_timer_list code ALWAYS calls with interrupts on.
				470
				471	* This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
				472	*/
				473	static void posix_timer_fn(unsigned long __data)
				474	{
				475	struct k_itimer timr = (struct k_itimer ) __data;
				476	unsigned long flags;
				477	unsigned long seq;
				478	struct timespec delta, new_wall_to;
				479	u64 exp = 0;
				480	int do_notify = 1;
				481
				482	spin_lock_irqsave(&timr->it_lock, flags);
				483	set_timer_inactive(timr);
				484	if (!list_empty(&timr->it.real.abs_timer_entry)) {
				485	spin_lock(&abs_list.lock);
				486	do {
				487	seq = read_seqbegin(&xtime_lock);
				488	new_wall_to = wall_to_monotonic;
				489	} while (read_seqretry(&xtime_lock, seq));
				490	set_normalized_timespec(&delta,
				491	new_wall_to.tv_sec -
				492	timr->it.real.wall_to_prev.tv_sec,
				493	new_wall_to.tv_nsec -
				494	timr->it.real.wall_to_prev.tv_nsec);
				495	if (likely((delta.tv_sec \| delta.tv_nsec ) == 0)) {
				496	/* do nothing, timer is on time */
				497	} else if (delta.tv_sec < 0) {
				498	/* do nothing, timer is already late */
				499	} else {
				500	/* timer is early due to a clock set */
				501	tstojiffie(&delta,
				502	posix_clocks[timr->it_clock].res,
				503	&exp);
				504	timr->it.real.wall_to_prev = new_wall_to;
				505	timr->it.real.timer.expires += exp;
				506	add_timer(&timr->it.real.timer);
				507	do_notify = 0;
				508	}
				509	spin_unlock(&abs_list.lock);
				510
				511	}
				512	if (do_notify) {
				513	int si_private=0;
				514
				515	if (timr->it.real.incr)
				516	si_private = ++timr->it_requeue_pending;
				517	else {
				518	remove_from_abslist(timr);
				519	}
				520
				521	if (posix_timer_event(timr, si_private))
				522	/*
				523	* signal was not sent because of sig_ignor
				524	* we will not get a call back to restart it AND
				525	* it should be restarted.
				526	*/
				527	schedule_next_timer(timr);
				528	}
				529	unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */
				530	}
				531
				532
				533	static inline struct task_struct * good_sigevent(sigevent_t * event)
				534	{
				535	struct task_struct *rtn = current->group_leader;
				536
				537	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
				538	(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) \|\|
				539	rtn->tgid != current->tgid \|\|
				540	(event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
				541	return NULL;
				542
				543	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
				544	((event->sigev_signo <= 0) \|\| (event->sigev_signo > SIGRTMAX)))
				545	return NULL;
				546
				547	return rtn;
				548	}
				549
				550	void register_posix_clock(clockid_t clock_id, struct k_clock *new_clock)
				551	{
				552	if ((unsigned) clock_id >= MAX_CLOCKS) {
				553	printk("POSIX clock register failed for clock_id %d\n",
				554	clock_id);
				555	return;
				556	}
				557
				558	posix_clocks[clock_id] = *new_clock;
				559	}
				560	EXPORT_SYMBOL_GPL(register_posix_clock);
				561
				562	static struct k_itimer * alloc_posix_timer(void)
				563	{
				564	struct k_itimer *tmr;
				565	tmr = kmem_cache_alloc(posix_timers_cache, GFP_KERNEL);
				566	if (!tmr)
				567	return tmr;
				568	memset(tmr, 0, sizeof (struct k_itimer));
				569	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
				570	kmem_cache_free(posix_timers_cache, tmr);
				571	tmr = NULL;
				572	}
				573	return tmr;
				574	}
				575
				576	#define IT_ID_SET 1
				577	#define IT_ID_NOT_SET 0
				578	static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
				579	{
				580	if (it_id_set) {
				581	unsigned long flags;
				582	spin_lock_irqsave(&idr_lock, flags);
				583	idr_remove(&posix_timers_id, tmr->it_id);
				584	spin_unlock_irqrestore(&idr_lock, flags);
				585	}
				586	sigqueue_free(tmr->sigq);
				587	if (unlikely(tmr->it_process) &&
				588	tmr->it_sigev_notify == (SIGEV_SIGNAL\|SIGEV_THREAD_ID))
				589	put_task_struct(tmr->it_process);
				590	kmem_cache_free(posix_timers_cache, tmr);
				591	}
				592
				593	/* Create a POSIX.1b interval timer. */
				594
				595	asmlinkage long
				596	sys_timer_create(clockid_t which_clock,
				597	struct sigevent __user *timer_event_spec,
				598	timer_t __user * created_timer_id)
				599	{
				600	int error = 0;
				601	struct k_itimer *new_timer = NULL;
				602	int new_timer_id;
				603	struct task_struct *process = NULL;
				604	unsigned long flags;
				605	sigevent_t event;
				606	int it_id_set = IT_ID_NOT_SET;
				607
				608	if (invalid_clockid(which_clock))
				609	return -EINVAL;
				610
				611	new_timer = alloc_posix_timer();
				612	if (unlikely(!new_timer))
				613	return -EAGAIN;
				614
				615	spin_lock_init(&new_timer->it_lock);
				616	retry:
				617	if (unlikely(!idr_pre_get(&posix_timers_id, GFP_KERNEL))) {
				618	error = -EAGAIN;
				619	goto out;
				620	}
				621	spin_lock_irq(&idr_lock);
				622	error = idr_get_new(&posix_timers_id,
				623	(void *) new_timer,
				624	&new_timer_id);
				625	spin_unlock_irq(&idr_lock);
				626	if (error == -EAGAIN)
				627	goto retry;
				628	else if (error) {
				629	/*
				630	* Wierd looking, but we return EAGAIN if the IDR is
				631	* full (proper POSIX return value for this)
				632	*/
				633	error = -EAGAIN;
				634	goto out;
				635	}
				636
				637	it_id_set = IT_ID_SET;
				638	new_timer->it_id = (timer_t) new_timer_id;
				639	new_timer->it_clock = which_clock;
				640	new_timer->it_overrun = -1;
				641	error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
				642	if (error)
				643	goto out;
				644
				645	/*
				646	* return the timer_id now. The next step is hard to
				647	* back out if there is an error.
				648	*/
				649	if (copy_to_user(created_timer_id,
				650	&new_timer_id, sizeof (new_timer_id))) {
				651	error = -EFAULT;
				652	goto out;
				653	}
				654	if (timer_event_spec) {
				655	if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
				656	error = -EFAULT;
				657	goto out;
				658	}
				659	new_timer->it_sigev_notify = event.sigev_notify;
				660	new_timer->it_sigev_signo = event.sigev_signo;
				661	new_timer->it_sigev_value = event.sigev_value;
				662
				663	read_lock(&tasklist_lock);
				664	if ((process = good_sigevent(&event))) {
				665	/*
				666	* We may be setting up this process for another
				667	* thread. It may be exiting. To catch this
				668	* case the we check the PF_EXITING flag. If
				669	* the flag is not set, the siglock will catch
				670	* him before it is too late (in exit_itimers).
				671	*
				672	* The exec case is a bit more invloved but easy
				673	* to code. If the process is in our thread
				674	* group (and it must be or we would not allow
				675	* it here) and is doing an exec, it will cause
				676	* us to be killed. In this case it will wait
				677	* for us to die which means we can finish this
				678	* linkage with our last gasp. I.e. no code :)
				679	*/
				680	spin_lock_irqsave(&process->sighand->siglock, flags);
				681	if (!(process->flags & PF_EXITING)) {
				682	new_timer->it_process = process;
				683	list_add(&new_timer->list,
				684	&process->signal->posix_timers);
				685	spin_unlock_irqrestore(&process->sighand->siglock, flags);
				686	if (new_timer->it_sigev_notify == (SIGEV_SIGNAL\|SIGEV_THREAD_ID))
				687	get_task_struct(process);
				688	} else {
				689	spin_unlock_irqrestore(&process->sighand->siglock, flags);
				690	process = NULL;
				691	}
				692	}
				693	read_unlock(&tasklist_lock);
				694	if (!process) {
				695	error = -EINVAL;
				696	goto out;
				697	}
				698	} else {
				699	new_timer->it_sigev_notify = SIGEV_SIGNAL;
				700	new_timer->it_sigev_signo = SIGALRM;
				701	new_timer->it_sigev_value.sival_int = new_timer->it_id;
				702	process = current->group_leader;
				703	spin_lock_irqsave(&process->sighand->siglock, flags);
				704	new_timer->it_process = process;
				705	list_add(&new_timer->list, &process->signal->posix_timers);
				706	spin_unlock_irqrestore(&process->sighand->siglock, flags);
				707	}
				708
				709	/*
				710	* In the case of the timer belonging to another task, after
				711	* the task is unlocked, the timer is owned by the other task
				712	* and may cease to exist at any time. Don't use or modify
				713	* new_timer after the unlock call.
				714	*/
				715
				716	out:
				717	if (error)
				718	release_posix_timer(new_timer, it_id_set);
				719
				720	return error;
				721	}
				722
				723	/*
				724	* good_timespec
				725	*
				726	* This function checks the elements of a timespec structure.
				727	*
				728	* Arguments:
				729	* ts : Pointer to the timespec structure to check
				730	*
				731	* Return value:
				732	* If a NULL pointer was passed in, or the tv_nsec field was less than 0
				733	* or greater than NSEC_PER_SEC, or the tv_sec field was less than 0,
				734	* this function returns 0. Otherwise it returns 1.
				735	*/
				736	static int good_timespec(const struct timespec *ts)
				737	{
				738	if ((!ts) \|\| (ts->tv_sec < 0) \|\|
				739	((unsigned) ts->tv_nsec >= NSEC_PER_SEC))
				740	return 0;
				741	return 1;
				742	}
				743
				744	/*
				745	* Locking issues: We need to protect the result of the id look up until
				746	* we get the timer locked down so it is not deleted under us. The
				747	* removal is done under the idr spinlock so we use that here to bridge
				748	* the find to the timer lock. To avoid a dead lock, the timer id MUST
				749	* be release with out holding the timer lock.
				750	*/
				751	static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
				752	{
				753	struct k_itimer *timr;
				754	/*
				755	* Watch out here. We do a irqsave on the idr_lock and pass the
				756	* flags part over to the timer lock. Must not let interrupts in
				757	* while we are moving the lock.
				758	*/
				759
				760	spin_lock_irqsave(&idr_lock, *flags);
				761	timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id);
				762	if (timr) {
				763	spin_lock(&timr->it_lock);
				764	spin_unlock(&idr_lock);
				765
				766	if ((timr->it_id != timer_id) \|\| !(timr->it_process) \|\|
				767	timr->it_process->tgid != current->tgid) {
				768	unlock_timer(timr, *flags);
				769	timr = NULL;
				770	}
				771	} else
				772	spin_unlock_irqrestore(&idr_lock, *flags);
				773
				774	return timr;
				775	}
				776
				777	/*
				778	* Get the time remaining on a POSIX.1b interval timer. This function
				779	* is ALWAYS called with spin_lock_irq on the timer, thus it must not
				780	* mess with irq.
				781	*
				782	* We have a couple of messes to clean up here. First there is the case
				783	* of a timer that has a requeue pending. These timers should appear to
				784	* be in the timer list with an expiry as if we were to requeue them
				785	* now.
				786	*
				787	* The second issue is the SIGEV_NONE timer which may be active but is
				788	* not really ever put in the timer list (to save system resources).
				789	* This timer may be expired, and if so, we will do it here. Otherwise
				790	* it is the same as a requeue pending timer WRT to what we should
				791	* report.
				792	*/
				793	static void
				794	common_timer_get(struct k_itimer timr, struct itimerspec cur_setting)
				795	{
				796	unsigned long expires;
				797	struct now_struct now;
				798
				799	do
				800	expires = timr->it.real.timer.expires;
				801	while ((volatile long) (timr->it.real.timer.expires) != expires);
				802
				803	posix_get_now(&now);
				804
				805	if (expires &&
				806	((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) &&
				807	!timr->it.real.incr &&
				808	posix_time_before(&timr->it.real.timer, &now))
				809	timr->it.real.timer.expires = expires = 0;
				810	if (expires) {
				811	if (timr->it_requeue_pending & REQUEUE_PENDING \|\|
				812	(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
				813	posix_bump_timer(timr, now);
				814	expires = timr->it.real.timer.expires;
				815	}
				816	else
				817	if (!timer_pending(&timr->it.real.timer))
				818	expires = 0;
				819	if (expires)
				820	expires -= now.jiffies;
				821	}
				822	jiffies_to_timespec(expires, &cur_setting->it_value);
				823	jiffies_to_timespec(timr->it.real.incr, &cur_setting->it_interval);
				824
				825	if (cur_setting->it_value.tv_sec < 0) {
				826	cur_setting->it_value.tv_nsec = 1;
				827	cur_setting->it_value.tv_sec = 0;
				828	}
				829	}
				830
				831	/* Get the time remaining on a POSIX.1b interval timer. */
				832	asmlinkage long
				833	sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting)
				834	{
				835	struct k_itimer *timr;
				836	struct itimerspec cur_setting;
				837	unsigned long flags;
				838
				839	timr = lock_timer(timer_id, &flags);
				840	if (!timr)
				841	return -EINVAL;
				842
				843	CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting));
				844
				845	unlock_timer(timr, flags);
				846
				847	if (copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
				848	return -EFAULT;
				849
				850	return 0;
				851	}
				852	/*
				853	* Get the number of overruns of a POSIX.1b interval timer. This is to
				854	* be the overrun of the timer last delivered. At the same time we are
				855	* accumulating overruns on the next timer. The overrun is frozen when
				856	* the signal is delivered, either at the notify time (if the info block
				857	* is not queued) or at the actual delivery time (as we are informed by
				858	* the call back to do_schedule_next_timer(). So all we need to do is
				859	* to pick up the frozen overrun.
				860	*/
				861
				862	asmlinkage long
				863	sys_timer_getoverrun(timer_t timer_id)
				864	{
				865	struct k_itimer *timr;
				866	int overrun;
				867	long flags;
				868
				869	timr = lock_timer(timer_id, &flags);
				870	if (!timr)
				871	return -EINVAL;
				872
				873	overrun = timr->it_overrun_last;
				874	unlock_timer(timr, flags);
				875
				876	return overrun;
				877	}
				878	/*
				879	* Adjust for absolute time
				880	*
				881	* If absolute time is given and it is not CLOCK_MONOTONIC, we need to
				882	* adjust for the offset between the timer clock (CLOCK_MONOTONIC) and
				883	* what ever clock he is using.
				884	*
				885	* If it is relative time, we need to add the current (CLOCK_MONOTONIC)
				886	* time to it to get the proper time for the timer.
				887	*/
				888	static int adjust_abs_time(struct k_clock clock, struct timespec tp,
				889	int abs, u64 exp, struct timespec wall_to)
				890	{
				891	struct timespec now;
				892	struct timespec oc = *tp;
				893	u64 jiffies_64_f;
				894	int rtn =0;
				895
				896	if (abs) {
				897	/*
				898	* The mask pick up the 4 basic clocks
				899	*/
				900	if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) {
				901	jiffies_64_f = do_posix_clock_monotonic_gettime_parts(
				902	&now, wall_to);
				903	/*
				904	* If we are doing a MONOTONIC clock
				905	*/
				906	if((clock - &posix_clocks[0]) & CLOCKS_MONO){
				907	now.tv_sec += wall_to->tv_sec;
				908	now.tv_nsec += wall_to->tv_nsec;
				909	}
				910	} else {
				911	/*
				912	* Not one of the basic clocks
				913	*/
				914	clock->clock_get(clock - posix_clocks, &now);
				915	jiffies_64_f = get_jiffies_64();
				916	}
				917	/*
				918	* Take away now to get delta
				919	*/
				920	oc.tv_sec -= now.tv_sec;
				921	oc.tv_nsec -= now.tv_nsec;
				922	/*
				923	* Normalize...
				924	*/
				925	while ((oc.tv_nsec - NSEC_PER_SEC) >= 0) {
				926	oc.tv_nsec -= NSEC_PER_SEC;
				927	oc.tv_sec++;
				928	}
				929	while ((oc.tv_nsec) < 0) {
				930	oc.tv_nsec += NSEC_PER_SEC;
				931	oc.tv_sec--;
				932	}
				933	}else{
				934	jiffies_64_f = get_jiffies_64();
				935	}
				936	/*
				937	* Check if the requested time is prior to now (if so set now)
				938	*/
				939	if (oc.tv_sec < 0)
				940	oc.tv_sec = oc.tv_nsec = 0;
				941
				942	if (oc.tv_sec \| oc.tv_nsec)
				943	set_normalized_timespec(&oc, oc.tv_sec,
				944	oc.tv_nsec + clock->res);
				945	tstojiffie(&oc, clock->res, exp);
				946
				947	/*
				948	* Check if the requested time is more than the timer code
				949	* can handle (if so we error out but return the value too).
				950	*/
				951	if (*exp > ((u64)MAX_JIFFY_OFFSET))
				952	/*
				953	* This is a considered response, not exactly in
				954	* line with the standard (in fact it is silent on
				955	* possible overflows). We assume such a large
				956	* value is ALMOST always a programming error and
				957	* try not to compound it by setting a really dumb
				958	* value.
				959	*/
				960	rtn = -EINVAL;
				961	/*
				962	* return the actual jiffies expire time, full 64 bits
				963	*/
				964	*exp += jiffies_64_f;
				965	return rtn;
				966	}
				967
				968	/* Set a POSIX.1b interval timer. */
				969	/* timr->it_lock is taken. */
				970	static inline int
				971	common_timer_set(struct k_itimer *timr, int flags,
				972	struct itimerspec new_setting, struct itimerspec old_setting)
				973	{
				974	struct k_clock *clock = &posix_clocks[timr->it_clock];
				975	u64 expire_64;
				976
				977	if (old_setting)
				978	common_timer_get(timr, old_setting);
				979
				980	/* disable the timer */
				981	timr->it.real.incr = 0;
				982	/*
				983	* careful here. If smp we could be in the "fire" routine which will
				984	* be spinning as we hold the lock. But this is ONLY an SMP issue.
				985	*/
				986	#ifdef CONFIG_SMP
				987	if (timer_active(timr) && !del_timer(&timr->it.real.timer))
				988	/*
				989	* It can only be active if on an other cpu. Since
				990	* we have cleared the interval stuff above, it should
				991	* clear once we release the spin lock. Of course once
				992	* we do that anything could happen, including the
				993	* complete melt down of the timer. So return with
				994	* a "retry" exit status.
				995	*/
				996	return TIMER_RETRY;
				997
				998	set_timer_inactive(timr);
				999	#else
				1000	del_timer(&timr->it.real.timer);
				1001	#endif
				1002	remove_from_abslist(timr);
				1003
				1004	timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
				1005	~REQUEUE_PENDING;
				1006	timr->it_overrun_last = 0;
				1007	timr->it_overrun = -1;
				1008	/*
				1009	*switch off the timer when it_value is zero
				1010	*/
				1011	if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) {
				1012	timr->it.real.timer.expires = 0;
				1013	return 0;
				1014	}
				1015
				1016	if (adjust_abs_time(clock,
				1017	&new_setting->it_value, flags & TIMER_ABSTIME,
				1018	&expire_64, &(timr->it.real.wall_to_prev))) {
				1019	return -EINVAL;
				1020	}
				1021	timr->it.real.timer.expires = (unsigned long)expire_64;
				1022	tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
				1023	timr->it.real.incr = (unsigned long)expire_64;
				1024
				1025	/*
				1026	* We do not even queue SIGEV_NONE timers! But we do put them
				1027	* in the abs list so we can do that right.
				1028	*/
				1029	if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
				1030	add_timer(&timr->it.real.timer);
				1031
				1032	if (flags & TIMER_ABSTIME && clock->abs_struct) {
				1033	spin_lock(&clock->abs_struct->lock);
				1034	list_add_tail(&(timr->it.real.abs_timer_entry),
				1035	&(clock->abs_struct->list));
				1036	spin_unlock(&clock->abs_struct->lock);
				1037	}
				1038	return 0;
				1039	}
				1040
				1041	/* Set a POSIX.1b interval timer */
				1042	asmlinkage long
				1043	sys_timer_settime(timer_t timer_id, int flags,
				1044	const struct itimerspec __user *new_setting,
				1045	struct itimerspec __user *old_setting)
				1046	{
				1047	struct k_itimer *timr;
				1048	struct itimerspec new_spec, old_spec;
				1049	int error = 0;
				1050	long flag;
				1051	struct itimerspec *rtn = old_setting ? &old_spec : NULL;
				1052
				1053	if (!new_setting)
				1054	return -EINVAL;
				1055
				1056	if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
				1057	return -EFAULT;
				1058
				1059	if ((!good_timespec(&new_spec.it_interval)) \|\|
				1060	(!good_timespec(&new_spec.it_value)))
				1061	return -EINVAL;
				1062	retry:
				1063	timr = lock_timer(timer_id, &flag);
				1064	if (!timr)
				1065	return -EINVAL;
				1066
				1067	error = CLOCK_DISPATCH(timr->it_clock, timer_set,
				1068	(timr, flags, &new_spec, rtn));
				1069
				1070	unlock_timer(timr, flag);
				1071	if (error == TIMER_RETRY) {
				1072	rtn = NULL; // We already got the old time...
				1073	goto retry;
				1074	}
				1075
				1076	if (old_setting && !error && copy_to_user(old_setting,
				1077	&old_spec, sizeof (old_spec)))
				1078	error = -EFAULT;
				1079
				1080	return error;
				1081	}
				1082
				1083	static inline int common_timer_del(struct k_itimer *timer)
				1084	{
				1085	timer->it.real.incr = 0;
				1086	#ifdef CONFIG_SMP
				1087	if (timer_active(timer) && !del_timer(&timer->it.real.timer))
				1088	/*
				1089	* It can only be active if on an other cpu. Since
				1090	* we have cleared the interval stuff above, it should
				1091	* clear once we release the spin lock. Of course once
				1092	* we do that anything could happen, including the
				1093	* complete melt down of the timer. So return with
				1094	* a "retry" exit status.
				1095	*/
				1096	return TIMER_RETRY;
				1097	#else
				1098	del_timer(&timer->it.real.timer);
				1099	#endif
				1100	remove_from_abslist(timer);
				1101
				1102	return 0;
				1103	}
				1104
				1105	static inline int timer_delete_hook(struct k_itimer *timer)
				1106	{
				1107	return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer));
				1108	}
				1109
				1110	/* Delete a POSIX.1b interval timer. */
				1111	asmlinkage long
				1112	sys_timer_delete(timer_t timer_id)
				1113	{
				1114	struct k_itimer *timer;
				1115	long flags;
				1116
				1117	#ifdef CONFIG_SMP
				1118	int error;
				1119	retry_delete:
				1120	#endif
				1121	timer = lock_timer(timer_id, &flags);
				1122	if (!timer)
				1123	return -EINVAL;
				1124
				1125	#ifdef CONFIG_SMP
				1126	error = timer_delete_hook(timer);
				1127
				1128	if (error == TIMER_RETRY) {
				1129	unlock_timer(timer, flags);
				1130	goto retry_delete;
				1131	}
				1132	#else
				1133	timer_delete_hook(timer);
				1134	#endif
				1135	spin_lock(&current->sighand->siglock);
				1136	list_del(&timer->list);
				1137	spin_unlock(&current->sighand->siglock);
				1138	/*
				1139	* This keeps any tasks waiting on the spin lock from thinking
				1140	* they got something (see the lock code above).
				1141	*/
				1142	if (timer->it_process) {
				1143	if (timer->it_sigev_notify == (SIGEV_SIGNAL\|SIGEV_THREAD_ID))
				1144	put_task_struct(timer->it_process);
				1145	timer->it_process = NULL;
				1146	}
				1147	unlock_timer(timer, flags);
				1148	release_posix_timer(timer, IT_ID_SET);
				1149	return 0;
				1150	}
				1151	/*
				1152	* return timer owned by the process, used by exit_itimers
				1153	*/
				1154	static inline void itimer_delete(struct k_itimer *timer)
				1155	{
				1156	unsigned long flags;
				1157
				1158	#ifdef CONFIG_SMP
				1159	int error;
				1160	retry_delete:
				1161	#endif
				1162	spin_lock_irqsave(&timer->it_lock, flags);
				1163
				1164	#ifdef CONFIG_SMP
				1165	error = timer_delete_hook(timer);
				1166
				1167	if (error == TIMER_RETRY) {
				1168	unlock_timer(timer, flags);
				1169	goto retry_delete;
				1170	}
				1171	#else
				1172	timer_delete_hook(timer);
				1173	#endif
				1174	list_del(&timer->list);
				1175	/*
				1176	* This keeps any tasks waiting on the spin lock from thinking
				1177	* they got something (see the lock code above).
				1178	*/
				1179	if (timer->it_process) {
				1180	if (timer->it_sigev_notify == (SIGEV_SIGNAL\|SIGEV_THREAD_ID))
				1181	put_task_struct(timer->it_process);
				1182	timer->it_process = NULL;
				1183	}
				1184	unlock_timer(timer, flags);
				1185	release_posix_timer(timer, IT_ID_SET);
				1186	}
				1187
				1188	/*
				1189	* This is called by __exit_signal, only when there are no more
				1190	* references to the shared signal_struct.
				1191	*/
				1192	void exit_itimers(struct signal_struct *sig)
				1193	{
				1194	struct k_itimer *tmr;
				1195
				1196	while (!list_empty(&sig->posix_timers)) {
				1197	tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
				1198	itimer_delete(tmr);
				1199	}
				1200	}
				1201
				1202	/*
				1203	* And now for the "clock" calls
				1204	*
				1205	* These functions are called both from timer functions (with the timer
				1206	* spin_lock_irq() held and from clock calls with no locking. They must
				1207	* use the save flags versions of locks.
				1208	*/
				1209
				1210	/*
				1211	* We do ticks here to avoid the irq lock ( they take sooo long).
				1212	* The seqlock is great here. Since we a reader, we don't really care
				1213	* if we are interrupted since we don't take lock that will stall us or
				1214	* any other cpu. Voila, no irq lock is needed.
				1215	*
				1216	*/
				1217
				1218	static u64 do_posix_clock_monotonic_gettime_parts(
				1219	struct timespec tp, struct timespec mo)
				1220	{
				1221	u64 jiff;
				1222	unsigned int seq;
				1223
				1224	do {
				1225	seq = read_seqbegin(&xtime_lock);
				1226	getnstimeofday(tp);
				1227	*mo = wall_to_monotonic;
				1228	jiff = jiffies_64;
				1229
				1230	} while(read_seqretry(&xtime_lock, seq));
				1231
				1232	return jiff;
				1233	}
				1234
				1235	static int do_posix_clock_monotonic_get(clockid_t clock, struct timespec *tp)
				1236	{
				1237	struct timespec wall_to_mono;
				1238
				1239	do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono);
				1240
				1241	tp->tv_sec += wall_to_mono.tv_sec;
				1242	tp->tv_nsec += wall_to_mono.tv_nsec;
				1243
				1244	if ((tp->tv_nsec - NSEC_PER_SEC) > 0) {
				1245	tp->tv_nsec -= NSEC_PER_SEC;
				1246	tp->tv_sec++;
				1247	}
				1248	return 0;
				1249	}
				1250
				1251	int do_posix_clock_monotonic_gettime(struct timespec *tp)
				1252	{
				1253	return do_posix_clock_monotonic_get(CLOCK_MONOTONIC, tp);
				1254	}
				1255
				1256	int do_posix_clock_nosettime(clockid_t clockid, struct timespec *tp)
				1257	{
				1258	return -EINVAL;
				1259	}
				1260	EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
				1261
				1262	int do_posix_clock_notimer_create(struct k_itimer *timer)
				1263	{
				1264	return -EINVAL;
				1265	}
				1266	EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create);
				1267
				1268	int do_posix_clock_nonanosleep(clockid_t clock, int flags, struct timespec *t)
				1269	{
				1270	#ifndef ENOTSUP
				1271	return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */
				1272	#else /* parisc does define it separately. */
				1273	return -ENOTSUP;
				1274	#endif
				1275	}
				1276	EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
				1277
				1278	asmlinkage long
				1279	sys_clock_settime(clockid_t which_clock, const struct timespec __user *tp)
				1280	{
				1281	struct timespec new_tp;
				1282
				1283	if (invalid_clockid(which_clock))
				1284	return -EINVAL;
				1285	if (copy_from_user(&new_tp, tp, sizeof (*tp)))
				1286	return -EFAULT;
				1287
				1288	return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp));
				1289	}
				1290
				1291	asmlinkage long
				1292	sys_clock_gettime(clockid_t which_clock, struct timespec __user *tp)
				1293	{
				1294	struct timespec kernel_tp;
				1295	int error;
				1296
				1297	if (invalid_clockid(which_clock))
				1298	return -EINVAL;
				1299	error = CLOCK_DISPATCH(which_clock, clock_get,
				1300	(which_clock, &kernel_tp));
				1301	if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
				1302	error = -EFAULT;
				1303
				1304	return error;
				1305
				1306	}
				1307
				1308	asmlinkage long
				1309	sys_clock_getres(clockid_t which_clock, struct timespec __user *tp)
				1310	{
				1311	struct timespec rtn_tp;
				1312	int error;
				1313
				1314	if (invalid_clockid(which_clock))
				1315	return -EINVAL;
				1316
				1317	error = CLOCK_DISPATCH(which_clock, clock_getres,
				1318	(which_clock, &rtn_tp));
				1319
				1320	if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) {
				1321	error = -EFAULT;
				1322	}
				1323
				1324	return error;
				1325	}
				1326
				1327	static void nanosleep_wake_up(unsigned long __data)
				1328	{
				1329	struct task_struct p = (struct task_struct ) __data;
				1330
				1331	wake_up_process(p);
				1332	}
				1333
				1334	/*
				1335	* The standard says that an absolute nanosleep call MUST wake up at
				1336	* the requested time in spite of clock settings. Here is what we do:
				1337	* For each nanosleep call that needs it (only absolute and not on
				1338	* CLOCK_MONOTONIC* (as it can not be set)) we thread a little structure
				1339	* into the "nanosleep_abs_list". All we need is the task_struct pointer.
				1340	* When ever the clock is set we just wake up all those tasks. The rest
				1341	* is done by the while loop in clock_nanosleep().
				1342	*
				1343	* On locking, clock_was_set() is called from update_wall_clock which
				1344	* holds (or has held for it) a write_lock_irq( xtime_lock) and is
				1345	* called from the timer bh code. Thus we need the irq save locks.
				1346	*
				1347	* Also, on the call from update_wall_clock, that is done as part of a
				1348	* softirq thing. We don't want to delay the system that much (possibly
				1349	* long list of timers to fix), so we defer that work to keventd.
				1350	*/
				1351
				1352	static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue);
				1353	static DECLARE_WORK(clock_was_set_work, (void()(void))clock_was_set, NULL);
				1354
				1355	static DECLARE_MUTEX(clock_was_set_lock);
				1356
				1357	void clock_was_set(void)
				1358	{
				1359	struct k_itimer *timr;
				1360	struct timespec new_wall_to;
				1361	LIST_HEAD(cws_list);
				1362	unsigned long seq;
				1363
				1364
				1365	if (unlikely(in_interrupt())) {
				1366	schedule_work(&clock_was_set_work);
				1367	return;
				1368	}
				1369	wake_up_all(&nanosleep_abs_wqueue);
				1370
				1371	/*
				1372	* Check if there exist TIMER_ABSTIME timers to correct.
				1373	*
				1374	* Notes on locking: This code is run in task context with irq
				1375	* on. We CAN be interrupted! All other usage of the abs list
				1376	* lock is under the timer lock which holds the irq lock as
				1377	* well. We REALLY don't want to scan the whole list with the
				1378	* interrupt system off, AND we would like a sequence lock on
				1379	* this code as well. Since we assume that the clock will not
				1380	* be set often, it seems ok to take and release the irq lock
				1381	* for each timer. In fact add_timer will do this, so this is
				1382	* not an issue. So we know when we are done, we will move the
				1383	* whole list to a new location. Then as we process each entry,
				1384	* we will move it to the actual list again. This way, when our
				1385	* copy is empty, we are done. We are not all that concerned
				1386	* about preemption so we will use a semaphore lock to protect
				1387	* aginst reentry. This way we will not stall another
				1388	* processor. It is possible that this may delay some timers
				1389	* that should have expired, given the new clock, but even this
				1390	* will be minimal as we will always update to the current time,
				1391	* even if it was set by a task that is waiting for entry to
				1392	* this code. Timers that expire too early will be caught by
				1393	* the expire code and restarted.
				1394
				1395	* Absolute timers that repeat are left in the abs list while
				1396	* waiting for the task to pick up the signal. This means we
				1397	* may find timers that are not in the "add_timer" list, but are
				1398	* in the abs list. We do the same thing for these, save
				1399	* putting them back in the "add_timer" list. (Note, these are
				1400	* left in the abs list mainly to indicate that they are
				1401	* ABSOLUTE timers, a fact that is used by the re-arm code, and
				1402	* for which we have no other flag.)
				1403
				1404	*/
				1405
				1406	down(&clock_was_set_lock);
				1407	spin_lock_irq(&abs_list.lock);
				1408	list_splice_init(&abs_list.list, &cws_list);
				1409	spin_unlock_irq(&abs_list.lock);
				1410	do {
				1411	do {
				1412	seq = read_seqbegin(&xtime_lock);
				1413	new_wall_to = wall_to_monotonic;
				1414	} while (read_seqretry(&xtime_lock, seq));
				1415
				1416	spin_lock_irq(&abs_list.lock);
				1417	if (list_empty(&cws_list)) {
				1418	spin_unlock_irq(&abs_list.lock);
				1419	break;
				1420	}
				1421	timr = list_entry(cws_list.next, struct k_itimer,
				1422	it.real.abs_timer_entry);
				1423
				1424	list_del_init(&timr->it.real.abs_timer_entry);
				1425	if (add_clockset_delta(timr, &new_wall_to) &&
				1426	del_timer(&timr->it.real.timer)) /* timer run yet? */
				1427	add_timer(&timr->it.real.timer);
				1428	list_add(&timr->it.real.abs_timer_entry, &abs_list.list);
				1429	spin_unlock_irq(&abs_list.lock);
				1430	} while (1);
				1431
				1432	up(&clock_was_set_lock);
				1433	}
				1434
				1435	long clock_nanosleep_restart(struct restart_block *restart_block);
				1436
				1437	asmlinkage long
				1438	sys_clock_nanosleep(clockid_t which_clock, int flags,
				1439	const struct timespec __user *rqtp,
				1440	struct timespec __user *rmtp)
				1441	{
				1442	struct timespec t;
				1443	struct restart_block *restart_block =
				1444	&(current_thread_info()->restart_block);
				1445	int ret;
				1446
				1447	if (invalid_clockid(which_clock))
				1448	return -EINVAL;
				1449
				1450	if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
				1451	return -EFAULT;
				1452
				1453	if ((unsigned) t.tv_nsec >= NSEC_PER_SEC \|\| t.tv_sec < 0)
				1454	return -EINVAL;
				1455
				1456	/*
				1457	* Do this here as nsleep function does not have the real address.
				1458	*/
				1459	restart_block->arg1 = (unsigned long)rmtp;
				1460
				1461	ret = CLOCK_DISPATCH(which_clock, nsleep, (which_clock, flags, &t));
				1462
				1463	if ((ret == -ERESTART_RESTARTBLOCK) && rmtp &&
				1464	copy_to_user(rmtp, &t, sizeof (t)))
				1465	return -EFAULT;
				1466	return ret;
				1467	}
				1468
				1469
				1470	static int common_nsleep(clockid_t which_clock,
				1471	int flags, struct timespec *tsave)
				1472	{
				1473	struct timespec t, dum;
				1474	struct timer_list new_timer;
				1475	DECLARE_WAITQUEUE(abs_wqueue, current);
				1476	u64 rq_time = (u64)0;
				1477	s64 left;
				1478	int abs;
				1479	struct restart_block *restart_block =
				1480	&current_thread_info()->restart_block;
				1481
				1482	abs_wqueue.flags = 0;
				1483	init_timer(&new_timer);
				1484	new_timer.expires = 0;
				1485	new_timer.data = (unsigned long) current;
				1486	new_timer.function = nanosleep_wake_up;
				1487	abs = flags & TIMER_ABSTIME;
				1488
				1489	if (restart_block->fn == clock_nanosleep_restart) {
				1490	/*
				1491	* Interrupted by a non-delivered signal, pick up remaining
				1492	* time and continue. Remaining time is in arg2 & 3.
				1493	*/
				1494	restart_block->fn = do_no_restart_syscall;
				1495
				1496	rq_time = restart_block->arg3;
				1497	rq_time = (rq_time << 32) + restart_block->arg2;
				1498	if (!rq_time)
				1499	return -EINTR;
				1500	left = rq_time - get_jiffies_64();
				1501	if (left <= (s64)0)
				1502	return 0; /* Already passed */
				1503	}
				1504
				1505	if (abs && (posix_clocks[which_clock].clock_get !=
				1506	posix_clocks[CLOCK_MONOTONIC].clock_get))
				1507	add_wait_queue(&nanosleep_abs_wqueue, &abs_wqueue);
				1508
				1509	do {
				1510	t = *tsave;
				1511	if (abs \|\| !rq_time) {
				1512	adjust_abs_time(&posix_clocks[which_clock], &t, abs,
				1513	&rq_time, &dum);
				1514	}
				1515
				1516	left = rq_time - get_jiffies_64();
				1517	if (left >= (s64)MAX_JIFFY_OFFSET)
				1518	left = (s64)MAX_JIFFY_OFFSET;
				1519	if (left < (s64)0)
				1520	break;
				1521
				1522	new_timer.expires = jiffies + left;
				1523	__set_current_state(TASK_INTERRUPTIBLE);
				1524	add_timer(&new_timer);
				1525
				1526	schedule();
				1527
				1528	del_timer_sync(&new_timer);
				1529	left = rq_time - get_jiffies_64();
				1530	} while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING));
				1531
				1532	if (abs_wqueue.task_list.next)
				1533	finish_wait(&nanosleep_abs_wqueue, &abs_wqueue);
				1534
				1535	if (left > (s64)0) {
				1536
				1537	/*
				1538	* Always restart abs calls from scratch to pick up any
				1539	* clock shifting that happened while we are away.
				1540	*/
				1541	if (abs)
				1542	return -ERESTARTNOHAND;
				1543
				1544	left *= TICK_NSEC;
				1545	tsave->tv_sec = div_long_long_rem(left,
				1546	NSEC_PER_SEC,
				1547	&tsave->tv_nsec);
				1548	/*
				1549	* Restart works by saving the time remaing in
				1550	* arg2 & 3 (it is 64-bits of jiffies). The other
				1551	* info we need is the clock_id (saved in arg0).
				1552	* The sys_call interface needs the users
				1553	* timespec return address which _it_ saves in arg1.
				1554	* Since we have cast the nanosleep call to a clock_nanosleep
				1555	* both can be restarted with the same code.
				1556	*/
				1557	restart_block->fn = clock_nanosleep_restart;
				1558	restart_block->arg0 = which_clock;
				1559	/*
				1560	* Caller sets arg1
				1561	*/
				1562	restart_block->arg2 = rq_time & 0xffffffffLL;
				1563	restart_block->arg3 = rq_time >> 32;
				1564
				1565	return -ERESTART_RESTARTBLOCK;
				1566	}
				1567
				1568	return 0;
				1569	}
				1570	/*
				1571	* This will restart clock_nanosleep.
				1572	*/
				1573	long
				1574	clock_nanosleep_restart(struct restart_block *restart_block)
				1575	{
				1576	struct timespec t;
				1577	int ret = common_nsleep(restart_block->arg0, 0, &t);
				1578
				1579	if ((ret == -ERESTART_RESTARTBLOCK) && restart_block->arg1 &&
				1580	copy_to_user((struct timespec __user *)(restart_block->arg1), &t,
				1581	sizeof (t)))
				1582	return -EFAULT;
				1583	return ret;
				1584	}