Blame - kernel/sched/sched.h - android_kernel_oneplus_msm8996

blob: cc03cfdf469f6af1bda58d936539c9348b7e9501 [file] [log] [blame]

Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	1
				2	#include <linux/sched.h>
Clark Williams	cf4aebc2	2013-02-07 09:46:59 -0600	[diff] [blame]	3	#include <linux/sched/sysctl.h>
Clark Williams	8bd75c7	2013-02-07 09:47:07 -0600	[diff] [blame^]	4	#include <linux/sched/rt.h>
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	5	#include <linux/mutex.h>
				6	#include <linux/spinlock.h>
				7	#include <linux/stop_machine.h>
				8
Peter Zijlstra	391e43d	2011-11-15 17:14:39 +0100	[diff] [blame]	9	#include "cpupri.h"
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	10
				11	extern __read_mostly int scheduler_running;
				12
				13	/*
				14	* Convert user-nice values [ -20 ... 0 ... 19 ]
				15	* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
				16	* and back.
				17	*/
				18	#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
				19	#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
				20	#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
				21
				22	/*
				23	* 'User priority' is the nice value converted to something we
				24	* can work with better when scaling various scheduler parameters,
				25	* it's a [ 0 ... 39 ] range.
				26	*/
				27	#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
				28	#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
				29	#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
				30
				31	/*
				32	* Helpers for converting nanosecond timing to jiffy resolution
				33	*/
				34	#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
				35
				36	#define NICE_0_LOAD SCHED_LOAD_SCALE
				37	#define NICE_0_SHIFT SCHED_LOAD_SHIFT
				38
				39	/*
				40	* These are the 'tuning knobs' of the scheduler:
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	41	*/
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	42
				43	/*
				44	* single value that denotes runtime == period, ie unlimited time.
				45	*/
				46	#define RUNTIME_INF ((u64)~0ULL)
				47
				48	static inline int rt_policy(int policy)
				49	{
				50	if (policy == SCHED_FIFO \|\| policy == SCHED_RR)
				51	return 1;
				52	return 0;
				53	}
				54
				55	static inline int task_has_rt_policy(struct task_struct *p)
				56	{
				57	return rt_policy(p->policy);
				58	}
				59
				60	/*
				61	* This is the priority-queue data structure of the RT scheduling class:
				62	*/
				63	struct rt_prio_array {
				64	DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
				65	struct list_head queue[MAX_RT_PRIO];
				66	};
				67
				68	struct rt_bandwidth {
				69	/* nests inside the rq lock: */
				70	raw_spinlock_t rt_runtime_lock;
				71	ktime_t rt_period;
				72	u64 rt_runtime;
				73	struct hrtimer rt_period_timer;
				74	};
				75
				76	extern struct mutex sched_domains_mutex;
				77
				78	#ifdef CONFIG_CGROUP_SCHED
				79
				80	#include <linux/cgroup.h>
				81
				82	struct cfs_rq;
				83	struct rt_rq;
				84
Mike Galbraith	35cf4e5	2012-08-07 05:00:13 +0200	[diff] [blame]	85	extern struct list_head task_groups;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	86
				87	struct cfs_bandwidth {
				88	#ifdef CONFIG_CFS_BANDWIDTH
				89	raw_spinlock_t lock;
				90	ktime_t period;
				91	u64 quota, runtime;
				92	s64 hierarchal_quota;
				93	u64 runtime_expires;
				94
				95	int idle, timer_active;
				96	struct hrtimer period_timer, slack_timer;
				97	struct list_head throttled_cfs_rq;
				98
				99	/* statistics */
				100	int nr_periods, nr_throttled;
				101	u64 throttled_time;
				102	#endif
				103	};
				104
				105	/* task group related information */
				106	struct task_group {
				107	struct cgroup_subsys_state css;
				108
				109	#ifdef CONFIG_FAIR_GROUP_SCHED
				110	/* schedulable entities of this group on each cpu */
				111	struct sched_entity **se;
				112	/* runqueue "owned" by this group on each cpu */
				113	struct cfs_rq **cfs_rq;
				114	unsigned long shares;
				115
				116	atomic_t load_weight;
Paul Turner	c566e8e	2012-10-04 13:18:30 +0200	[diff] [blame]	117	atomic64_t load_avg;
Paul Turner	bb17f65	2012-10-04 13:18:31 +0200	[diff] [blame]	118	atomic_t runnable_avg;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	119	#endif
				120
				121	#ifdef CONFIG_RT_GROUP_SCHED
				122	struct sched_rt_entity **rt_se;
				123	struct rt_rq **rt_rq;
				124
				125	struct rt_bandwidth rt_bandwidth;
				126	#endif
				127
				128	struct rcu_head rcu;
				129	struct list_head list;
				130
				131	struct task_group *parent;
				132	struct list_head siblings;
				133	struct list_head children;
				134
				135	#ifdef CONFIG_SCHED_AUTOGROUP
				136	struct autogroup *autogroup;
				137	#endif
				138
				139	struct cfs_bandwidth cfs_bandwidth;
				140	};
				141
				142	#ifdef CONFIG_FAIR_GROUP_SCHED
				143	#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
				144
				145	/*
				146	* A weight of 0 or 1 can cause arithmetics problems.
				147	* A weight of a cfs_rq is the sum of weights of which entities
				148	* are queued on this cfs_rq, so a weight of a entity should not be
				149	* too large, so as the shares value of a task group.
				150	* (The default weight is 1024 - so there's no practical
				151	* limitation from this.)
				152	*/
				153	#define MIN_SHARES (1UL << 1)
				154	#define MAX_SHARES (1UL << 18)
				155	#endif
				156
				157	/* Default task group.
				158	* Every task in system belong to this group at bootup.
				159	*/
				160	extern struct task_group root_task_group;
				161
				162	typedef int (tg_visitor)(struct task_group , void *);
				163
				164	extern int walk_tg_tree_from(struct task_group *from,
				165	tg_visitor down, tg_visitor up, void *data);
				166
				167	/*
				168	* Iterate the full tree, calling @down when first entering a node and @up when
				169	* leaving it for the final time.
				170	*
				171	* Caller must hold rcu_lock or sufficient equivalent.
				172	*/
				173	static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
				174	{
				175	return walk_tg_tree_from(&root_task_group, down, up, data);
				176	}
				177
				178	extern int tg_nop(struct task_group tg, void data);
				179
				180	extern void free_fair_sched_group(struct task_group *tg);
				181	extern int alloc_fair_sched_group(struct task_group tg, struct task_group parent);
				182	extern void unregister_fair_sched_group(struct task_group *tg, int cpu);
				183	extern void init_tg_cfs_entry(struct task_group tg, struct cfs_rq cfs_rq,
				184	struct sched_entity *se, int cpu,
				185	struct sched_entity *parent);
				186	extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
				187	extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
				188
				189	extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
				190	extern void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
				191	extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
				192
				193	extern void free_rt_sched_group(struct task_group *tg);
				194	extern int alloc_rt_sched_group(struct task_group tg, struct task_group parent);
				195	extern void init_tg_rt_entry(struct task_group tg, struct rt_rq rt_rq,
				196	struct sched_rt_entity *rt_se, int cpu,
				197	struct sched_rt_entity *parent);
				198
				199	#else /* CONFIG_CGROUP_SCHED */
				200
				201	struct cfs_bandwidth { };
				202
				203	#endif /* CONFIG_CGROUP_SCHED */
				204
				205	/* CFS-related fields in a runqueue */
				206	struct cfs_rq {
				207	struct load_weight load;
Peter Zijlstra	c82513e	2012-04-26 13:12:27 +0200	[diff] [blame]	208	unsigned int nr_running, h_nr_running;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	209
				210	u64 exec_clock;
				211	u64 min_vruntime;
				212	#ifndef CONFIG_64BIT
				213	u64 min_vruntime_copy;
				214	#endif
				215
				216	struct rb_root tasks_timeline;
				217	struct rb_node *rb_leftmost;
				218
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	219	/*
				220	* 'curr' points to currently running entity on this cfs_rq.
				221	* It is set to NULL otherwise (i.e when none are currently running).
				222	*/
				223	struct sched_entity curr, next, last, skip;
				224
				225	#ifdef CONFIG_SCHED_DEBUG
				226	unsigned int nr_spread_over;
				227	#endif
				228
Paul Turner	2dac754	2012-10-04 13:18:30 +0200	[diff] [blame]	229	#ifdef CONFIG_SMP
Paul Turner	f4e26b1	2012-10-04 13:18:32 +0200	[diff] [blame]	230	/*
				231	* Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
				232	* removed when useful for applications beyond shares distribution (e.g.
				233	* load-balance).
				234	*/
				235	#ifdef CONFIG_FAIR_GROUP_SCHED
Paul Turner	2dac754	2012-10-04 13:18:30 +0200	[diff] [blame]	236	/*
				237	* CFS Load tracking
				238	* Under CFS, load is tracked on a per-entity basis and aggregated up.
				239	* This allows for the description of both thread and group usage (in
				240	* the FAIR_GROUP_SCHED case).
				241	*/
Paul Turner	9ee474f	2012-10-04 13:18:30 +0200	[diff] [blame]	242	u64 runnable_load_avg, blocked_load_avg;
Paul Turner	aff3e49	2012-10-04 13:18:30 +0200	[diff] [blame]	243	atomic64_t decay_counter, removed_load;
Paul Turner	9ee474f	2012-10-04 13:18:30 +0200	[diff] [blame]	244	u64 last_decay;
Paul Turner	f4e26b1	2012-10-04 13:18:32 +0200	[diff] [blame]	245	#endif /* CONFIG_FAIR_GROUP_SCHED */
				246	/* These always depend on CONFIG_FAIR_GROUP_SCHED */
Paul Turner	c566e8e	2012-10-04 13:18:30 +0200	[diff] [blame]	247	#ifdef CONFIG_FAIR_GROUP_SCHED
Paul Turner	bb17f65	2012-10-04 13:18:31 +0200	[diff] [blame]	248	u32 tg_runnable_contrib;
Paul Turner	c566e8e	2012-10-04 13:18:30 +0200	[diff] [blame]	249	u64 tg_load_contrib;
Paul Turner	8295836	2012-10-04 13:18:31 +0200	[diff] [blame]	250	#endif /* CONFIG_FAIR_GROUP_SCHED */
				251
				252	/*
				253	* h_load = weight * f(tg)
				254	*
				255	* Where f(tg) is the recursive weight fraction assigned to
				256	* this group.
				257	*/
				258	unsigned long h_load;
				259	#endif /* CONFIG_SMP */
				260
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	261	#ifdef CONFIG_FAIR_GROUP_SCHED
				262	struct rq rq; / cpu runqueue to which this cfs_rq is attached */
				263
				264	/*
				265	* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
				266	* a hierarchy). Non-leaf lrqs hold other higher schedulable entities
				267	* (like users, containers etc.)
				268	*
				269	* leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This
				270	* list is used during load balance.
				271	*/
				272	int on_list;
				273	struct list_head leaf_cfs_rq_list;
				274	struct task_group tg; / group that "owns" this runqueue */
				275
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	276	#ifdef CONFIG_CFS_BANDWIDTH
				277	int runtime_enabled;
				278	u64 runtime_expires;
				279	s64 runtime_remaining;
				280
Paul Turner	f1b1728	2012-10-04 13:18:31 +0200	[diff] [blame]	281	u64 throttled_clock, throttled_clock_task;
				282	u64 throttled_clock_task_time;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	283	int throttled, throttle_count;
				284	struct list_head throttled_list;
				285	#endif /* CONFIG_CFS_BANDWIDTH */
				286	#endif /* CONFIG_FAIR_GROUP_SCHED */
				287	};
				288
				289	static inline int rt_bandwidth_enabled(void)
				290	{
				291	return sysctl_sched_rt_runtime >= 0;
				292	}
				293
				294	/* Real-Time classes' related field in a runqueue: */
				295	struct rt_rq {
				296	struct rt_prio_array active;
Peter Zijlstra	c82513e	2012-04-26 13:12:27 +0200	[diff] [blame]	297	unsigned int rt_nr_running;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	298	#if defined CONFIG_SMP \|\| defined CONFIG_RT_GROUP_SCHED
				299	struct {
				300	int curr; /* highest queued rt task prio */
				301	#ifdef CONFIG_SMP
				302	int next; /* next highest */
				303	#endif
				304	} highest_prio;
				305	#endif
				306	#ifdef CONFIG_SMP
				307	unsigned long rt_nr_migratory;
				308	unsigned long rt_nr_total;
				309	int overloaded;
				310	struct plist_head pushable_tasks;
				311	#endif
				312	int rt_throttled;
				313	u64 rt_time;
				314	u64 rt_runtime;
				315	/* Nests inside the rq lock: */
				316	raw_spinlock_t rt_runtime_lock;
				317
				318	#ifdef CONFIG_RT_GROUP_SCHED
				319	unsigned long rt_nr_boosted;
				320
				321	struct rq *rq;
				322	struct list_head leaf_rt_rq_list;
				323	struct task_group *tg;
				324	#endif
				325	};
				326
				327	#ifdef CONFIG_SMP
				328
				329	/*
				330	* We add the notion of a root-domain which will be used to define per-domain
				331	* variables. Each exclusive cpuset essentially defines an island domain by
				332	* fully partitioning the member cpus from any other cpuset. Whenever a new
				333	* exclusive cpuset is created, we also create and attach a new root-domain
				334	* object.
				335	*
				336	*/
				337	struct root_domain {
				338	atomic_t refcount;
				339	atomic_t rto_count;
				340	struct rcu_head rcu;
				341	cpumask_var_t span;
				342	cpumask_var_t online;
				343
				344	/*
				345	* The "RT overload" flag: it gets set if a CPU has more than
				346	* one runnable RT task.
				347	*/
				348	cpumask_var_t rto_mask;
				349	struct cpupri cpupri;
				350	};
				351
				352	extern struct root_domain def_root_domain;
				353
				354	#endif /* CONFIG_SMP */
				355
				356	/*
				357	* This is the main, per-CPU runqueue data structure.
				358	*
				359	* Locking rule: those places that want to lock multiple runqueues
				360	* (such as the load balancing or the thread migration code), lock
				361	* acquire operations must be ordered by ascending &runqueue.
				362	*/
				363	struct rq {
				364	/* runqueue lock: */
				365	raw_spinlock_t lock;
				366
				367	/*
				368	* nr_running and cpu_load should be in the same cacheline because
				369	* remote CPUs use both these fields when doing load calculation.
				370	*/
Peter Zijlstra	c82513e	2012-04-26 13:12:27 +0200	[diff] [blame]	371	unsigned int nr_running;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	372	#define CPU_LOAD_IDX_MAX 5
				373	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
				374	unsigned long last_load_update_tick;
				375	#ifdef CONFIG_NO_HZ
				376	u64 nohz_stamp;
Suresh Siddha	1c792db	2011-12-01 17:07:32 -0800	[diff] [blame]	377	unsigned long nohz_flags;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	378	#endif
				379	int skip_clock_update;
				380
				381	/* capture load from all tasks on this cpu: */
				382	struct load_weight load;
				383	unsigned long nr_load_updates;
				384	u64 nr_switches;
				385
				386	struct cfs_rq cfs;
				387	struct rt_rq rt;
				388
				389	#ifdef CONFIG_FAIR_GROUP_SCHED
				390	/* list of leaf cfs_rq on this cpu: */
				391	struct list_head leaf_cfs_rq_list;
Peter Zijlstra	a35b646	2012-08-08 21:46:40 +0200	[diff] [blame]	392	#ifdef CONFIG_SMP
				393	unsigned long h_load_throttle;
				394	#endif /* CONFIG_SMP */
				395	#endif /* CONFIG_FAIR_GROUP_SCHED */
				396
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	397	#ifdef CONFIG_RT_GROUP_SCHED
				398	struct list_head leaf_rt_rq_list;
				399	#endif
				400
				401	/*
				402	* This is part of a global counter where only the total sum
				403	* over all CPUs matters. A task can increase this counter on
				404	* one CPU and if it got migrated afterwards it may decrease
				405	* it on another CPU. Always updated under the runqueue lock:
				406	*/
				407	unsigned long nr_uninterruptible;
				408
				409	struct task_struct curr, idle, *stop;
				410	unsigned long next_balance;
				411	struct mm_struct *prev_mm;
				412
				413	u64 clock;
				414	u64 clock_task;
				415
				416	atomic_t nr_iowait;
				417
				418	#ifdef CONFIG_SMP
				419	struct root_domain *rd;
				420	struct sched_domain *sd;
				421
				422	unsigned long cpu_power;
				423
				424	unsigned char idle_balance;
				425	/* For active balancing */
				426	int post_schedule;
				427	int active_balance;
				428	int push_cpu;
				429	struct cpu_stop_work active_balance_work;
				430	/* cpu of this runqueue: */
				431	int cpu;
				432	int online;
				433
Peter Zijlstra	367456c	2012-02-20 21:49:09 +0100	[diff] [blame]	434	struct list_head cfs_tasks;
				435
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	436	u64 rt_avg;
				437	u64 age_stamp;
				438	u64 idle_stamp;
				439	u64 avg_idle;
				440	#endif
				441
				442	#ifdef CONFIG_IRQ_TIME_ACCOUNTING
				443	u64 prev_irq_time;
				444	#endif
				445	#ifdef CONFIG_PARAVIRT
				446	u64 prev_steal_time;
				447	#endif
				448	#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
				449	u64 prev_steal_time_rq;
				450	#endif
				451
				452	/* calc_load related fields */
				453	unsigned long calc_load_update;
				454	long calc_load_active;
				455
				456	#ifdef CONFIG_SCHED_HRTICK
				457	#ifdef CONFIG_SMP
				458	int hrtick_csd_pending;
				459	struct call_single_data hrtick_csd;
				460	#endif
				461	struct hrtimer hrtick_timer;
				462	#endif
				463
				464	#ifdef CONFIG_SCHEDSTATS
				465	/* latency stats */
				466	struct sched_info rq_sched_info;
				467	unsigned long long rq_cpu_time;
				468	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
				469
				470	/* sys_sched_yield() stats */
				471	unsigned int yld_count;
				472
				473	/* schedule() stats */
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	474	unsigned int sched_count;
				475	unsigned int sched_goidle;
				476
				477	/* try_to_wake_up() stats */
				478	unsigned int ttwu_count;
				479	unsigned int ttwu_local;
				480	#endif
				481
				482	#ifdef CONFIG_SMP
				483	struct llist_head wake_list;
				484	#endif
Ben Segall	18bf280	2012-10-04 12:51:20 +0200	[diff] [blame]	485
				486	struct sched_avg avg;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	487	};
				488
				489	static inline int cpu_of(struct rq *rq)
				490	{
				491	#ifdef CONFIG_SMP
				492	return rq->cpu;
				493	#else
				494	return 0;
				495	#endif
				496	}
				497
				498	DECLARE_PER_CPU(struct rq, runqueues);
				499
Peter Zijlstra	518cd62	2011-12-07 15:07:31 +0100	[diff] [blame]	500	#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
				501	#define this_rq() (&__get_cpu_var(runqueues))
				502	#define task_rq(p) cpu_rq(task_cpu(p))
				503	#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
				504	#define raw_rq() (&__raw_get_cpu_var(runqueues))
				505
				506	#ifdef CONFIG_SMP
				507
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	508	#define rcu_dereference_check_sched_domain(p) \
				509	rcu_dereference_check((p), \
				510	lockdep_is_held(&sched_domains_mutex))
				511
				512	/*
				513	* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
				514	* See detach_destroy_domains: synchronize_sched for details.
				515	*
				516	* The domain tree of any CPU may only be accessed from within
				517	* preempt-disabled sections.
				518	*/
				519	#define for_each_domain(cpu, __sd) \
Peter Zijlstra	518cd62	2011-12-07 15:07:31 +0100	[diff] [blame]	520	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
				521	__sd; __sd = __sd->parent)
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	522
Suresh Siddha	77e8136	2011-11-17 11:08:23 -0800	[diff] [blame]	523	#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
				524
Peter Zijlstra	518cd62	2011-12-07 15:07:31 +0100	[diff] [blame]	525	/**
				526	* highest_flag_domain - Return highest sched_domain containing flag.
				527	* @cpu: The cpu whose highest level of sched domain is to
				528	* be returned.
				529	* @flag: The flag to check for the highest sched_domain
				530	* for the given cpu.
				531	*
				532	* Returns the highest sched_domain of a cpu which contains the given flag.
				533	*/
				534	static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
				535	{
				536	struct sched_domain sd, hsd = NULL;
				537
				538	for_each_domain(cpu, sd) {
				539	if (!(sd->flags & flag))
				540	break;
				541	hsd = sd;
				542	}
				543
				544	return hsd;
				545	}
				546
				547	DECLARE_PER_CPU(struct sched_domain *, sd_llc);
				548	DECLARE_PER_CPU(int, sd_llc_id);
				549
Peter Zijlstra	c117487	2012-05-31 14:47:33 +0200	[diff] [blame]	550	extern int group_balance_cpu(struct sched_group *sg);
				551
Peter Zijlstra	518cd62	2011-12-07 15:07:31 +0100	[diff] [blame]	552	#endif /* CONFIG_SMP */
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	553
Peter Zijlstra	391e43d	2011-11-15 17:14:39 +0100	[diff] [blame]	554	#include "stats.h"
				555	#include "auto_group.h"
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	556
				557	#ifdef CONFIG_CGROUP_SCHED
				558
				559	/*
				560	* Return the group to which this tasks belongs.
				561	*
Peter Zijlstra	8323f26	2012-06-22 13:36:05 +0200	[diff] [blame]	562	* We cannot use task_subsys_state() and friends because the cgroup
				563	* subsystem changes that value before the cgroup_subsys::attach() method
				564	* is called, therefore we cannot pin it and might observe the wrong value.
				565	*
				566	* The same is true for autogroup's p->signal->autogroup->tg, the autogroup
				567	* core changes this before calling sched_move_task().
				568	*
				569	* Instead we use a 'copy' which is updated from sched_move_task() while
				570	* holding both task_struct::pi_lock and rq::lock.
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	571	*/
				572	static inline struct task_group task_group(struct task_struct p)
				573	{
Peter Zijlstra	8323f26	2012-06-22 13:36:05 +0200	[diff] [blame]	574	return p->sched_task_group;
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	575	}
				576
				577	/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
				578	static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
				579	{
				580	#if defined(CONFIG_FAIR_GROUP_SCHED) \|\| defined(CONFIG_RT_GROUP_SCHED)
				581	struct task_group *tg = task_group(p);
				582	#endif
				583
				584	#ifdef CONFIG_FAIR_GROUP_SCHED
				585	p->se.cfs_rq = tg->cfs_rq[cpu];
				586	p->se.parent = tg->se[cpu];
				587	#endif
				588
				589	#ifdef CONFIG_RT_GROUP_SCHED
				590	p->rt.rt_rq = tg->rt_rq[cpu];
				591	p->rt.parent = tg->rt_se[cpu];
				592	#endif
				593	}
				594
				595	#else /* CONFIG_CGROUP_SCHED */
				596
				597	static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
				598	static inline struct task_group task_group(struct task_struct p)
				599	{
				600	return NULL;
				601	}
				602
				603	#endif /* CONFIG_CGROUP_SCHED */
				604
				605	static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
				606	{
				607	set_task_rq(p, cpu);
				608	#ifdef CONFIG_SMP
				609	/*
				610	* After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
				611	* successfuly executed on another CPU. We must ensure that updates of
				612	* per-task data have been completed by this moment.
				613	*/
				614	smp_wmb();
				615	task_thread_info(p)->cpu = cpu;
				616	#endif
				617	}
				618
				619	/*
				620	* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
				621	*/
				622	#ifdef CONFIG_SCHED_DEBUG
Ingo Molnar	c5905af	2012-02-24 08:31:31 +0100	[diff] [blame]	623	# include <linux/static_key.h>
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	624	# define const_debug __read_mostly
				625	#else
				626	# define const_debug const
				627	#endif
				628
				629	extern const_debug unsigned int sysctl_sched_features;
				630
				631	#define SCHED_FEAT(name, enabled) \
				632	__SCHED_FEAT_##name ,
				633
				634	enum {
Peter Zijlstra	391e43d	2011-11-15 17:14:39 +0100	[diff] [blame]	635	#include "features.h"
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	636	__SCHED_FEAT_NR,
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	637	};
				638
				639	#undef SCHED_FEAT
				640
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	641	#if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL)
Ingo Molnar	c5905af	2012-02-24 08:31:31 +0100	[diff] [blame]	642	static __always_inline bool static_branch__true(struct static_key *key)
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	643	{
Ingo Molnar	c5905af	2012-02-24 08:31:31 +0100	[diff] [blame]	644	return static_key_true(key); /* Not out of line branch. */
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	645	}
				646
Ingo Molnar	c5905af	2012-02-24 08:31:31 +0100	[diff] [blame]	647	static __always_inline bool static_branch__false(struct static_key *key)
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	648	{
Ingo Molnar	c5905af	2012-02-24 08:31:31 +0100	[diff] [blame]	649	return static_key_false(key); /* Out of line branch. */
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	650	}
				651
				652	#define SCHED_FEAT(name, enabled) \
Ingo Molnar	c5905af	2012-02-24 08:31:31 +0100	[diff] [blame]	653	static __always_inline bool static_branch_##name(struct static_key *key) \
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	654	{ \
				655	return static_branch__##enabled(key); \
				656	}
				657
				658	#include "features.h"
				659
				660	#undef SCHED_FEAT
				661
Ingo Molnar	c5905af	2012-02-24 08:31:31 +0100	[diff] [blame]	662	extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	663	#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
				664	#else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	665	#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
Peter Zijlstra	f8b6d1c	2011-07-06 14:20:14 +0200	[diff] [blame]	666	#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	667
Peter Zijlstra	cbee9f8	2012-10-25 14:16:43 +0200	[diff] [blame]	668	#ifdef CONFIG_NUMA_BALANCING
				669	#define sched_feat_numa(x) sched_feat(x)
Mel Gorman	3105b86	2012-11-23 11:23:49 +0000	[diff] [blame]	670	#ifdef CONFIG_SCHED_DEBUG
				671	#define numabalancing_enabled sched_feat_numa(NUMA)
				672	#else
				673	extern bool numabalancing_enabled;
				674	#endif /* CONFIG_SCHED_DEBUG */
Peter Zijlstra	cbee9f8	2012-10-25 14:16:43 +0200	[diff] [blame]	675	#else
				676	#define sched_feat_numa(x) (0)
Mel Gorman	3105b86	2012-11-23 11:23:49 +0000	[diff] [blame]	677	#define numabalancing_enabled (0)
				678	#endif /* CONFIG_NUMA_BALANCING */
Peter Zijlstra	cbee9f8	2012-10-25 14:16:43 +0200	[diff] [blame]	679
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	680	static inline u64 global_rt_period(void)
				681	{
				682	return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
				683	}
				684
				685	static inline u64 global_rt_runtime(void)
				686	{
				687	if (sysctl_sched_rt_runtime < 0)
				688	return RUNTIME_INF;
				689
				690	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
				691	}
				692
				693
				694
				695	static inline int task_current(struct rq rq, struct task_struct p)
				696	{
				697	return rq->curr == p;
				698	}
				699
				700	static inline int task_running(struct rq rq, struct task_struct p)
				701	{
				702	#ifdef CONFIG_SMP
				703	return p->on_cpu;
				704	#else
				705	return task_current(rq, p);
				706	#endif
				707	}
				708
				709
				710	#ifndef prepare_arch_switch
				711	# define prepare_arch_switch(next) do { } while (0)
				712	#endif
				713	#ifndef finish_arch_switch
				714	# define finish_arch_switch(prev) do { } while (0)
				715	#endif
Catalin Marinas	01f23e1	2011-11-27 21:43:10 +0000	[diff] [blame]	716	#ifndef finish_arch_post_lock_switch
				717	# define finish_arch_post_lock_switch() do { } while (0)
				718	#endif
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	719
				720	#ifndef __ARCH_WANT_UNLOCKED_CTXSW
				721	static inline void prepare_lock_switch(struct rq rq, struct task_struct next)
				722	{
				723	#ifdef CONFIG_SMP
				724	/*
				725	* We can optimise this out completely for !SMP, because the
				726	* SMP rebalancing from interrupt is the only thing that cares
				727	* here.
				728	*/
				729	next->on_cpu = 1;
				730	#endif
				731	}
				732
				733	static inline void finish_lock_switch(struct rq rq, struct task_struct prev)
				734	{
				735	#ifdef CONFIG_SMP
				736	/*
				737	* After ->on_cpu is cleared, the task can be moved to a different CPU.
				738	* We must ensure this doesn't happen until the switch is completely
				739	* finished.
				740	*/
				741	smp_wmb();
				742	prev->on_cpu = 0;
				743	#endif
				744	#ifdef CONFIG_DEBUG_SPINLOCK
				745	/* this is a valid case when another task releases the spinlock */
				746	rq->lock.owner = current;
				747	#endif
				748	/*
				749	* If we are tracking spinlock dependencies then we have to
				750	* fix up the runqueue lock - which gets 'carried over' from
				751	* prev into current:
				752	*/
				753	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
				754
				755	raw_spin_unlock_irq(&rq->lock);
				756	}
				757
				758	#else /* __ARCH_WANT_UNLOCKED_CTXSW */
				759	static inline void prepare_lock_switch(struct rq rq, struct task_struct next)
				760	{
				761	#ifdef CONFIG_SMP
				762	/*
				763	* We can optimise this out completely for !SMP, because the
				764	* SMP rebalancing from interrupt is the only thing that cares
				765	* here.
				766	*/
				767	next->on_cpu = 1;
				768	#endif
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	769	raw_spin_unlock(&rq->lock);
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	770	}
				771
				772	static inline void finish_lock_switch(struct rq rq, struct task_struct prev)
				773	{
				774	#ifdef CONFIG_SMP
				775	/*
				776	* After ->on_cpu is cleared, the task can be moved to a different CPU.
				777	* We must ensure this doesn't happen until the switch is completely
				778	* finished.
				779	*/
				780	smp_wmb();
				781	prev->on_cpu = 0;
				782	#endif
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	783	local_irq_enable();
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	784	}
				785	#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
				786
				787
				788	static inline void update_load_add(struct load_weight *lw, unsigned long inc)
				789	{
				790	lw->weight += inc;
				791	lw->inv_weight = 0;
				792	}
				793
				794	static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
				795	{
				796	lw->weight -= dec;
				797	lw->inv_weight = 0;
				798	}
				799
				800	static inline void update_load_set(struct load_weight *lw, unsigned long w)
				801	{
				802	lw->weight = w;
				803	lw->inv_weight = 0;
				804	}
				805
				806	/*
				807	* To aid in avoiding the subversion of "niceness" due to uneven distribution
				808	* of tasks with abnormal "nice" values across CPUs the contribution that
				809	* each task makes to its run queue's load is weighted according to its
				810	* scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
				811	* scaled version of the new time slice allocation that they receive on time
				812	* slice expiry etc.
				813	*/
				814
				815	#define WEIGHT_IDLEPRIO 3
				816	#define WMULT_IDLEPRIO 1431655765
				817
				818	/*
				819	* Nice levels are multiplicative, with a gentle 10% change for every
				820	* nice level changed. I.e. when a CPU-bound task goes from nice 0 to
				821	* nice 1, it will get ~10% less CPU time than another CPU-bound task
				822	* that remained on nice 0.
				823	*
				824	* The "10% effect" is relative and cumulative: from _any_ nice level,
				825	* if you go up 1 level, it's -10% CPU usage, if you go down 1 level
				826	* it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
				827	* If a task goes up by ~10% and another task goes down by ~10% then
				828	* the relative distance between them is ~25%.)
				829	*/
				830	static const int prio_to_weight[40] = {
				831	/* -20 */ 88761, 71755, 56483, 46273, 36291,
				832	/* -15 */ 29154, 23254, 18705, 14949, 11916,
				833	/* -10 */ 9548, 7620, 6100, 4904, 3906,
				834	/* -5 */ 3121, 2501, 1991, 1586, 1277,
				835	/* 0 */ 1024, 820, 655, 526, 423,
				836	/* 5 */ 335, 272, 215, 172, 137,
				837	/* 10 */ 110, 87, 70, 56, 45,
				838	/* 15 */ 36, 29, 23, 18, 15,
				839	};
				840
				841	/*
				842	* Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
				843	*
				844	* In cases where the weight does not change often, we can use the
				845	* precalculated inverse to speed up arithmetics by turning divisions
				846	* into multiplications:
				847	*/
				848	static const u32 prio_to_wmult[40] = {
				849	/* -20 */ 48388, 59856, 76040, 92818, 118348,
				850	/* -15 */ 147320, 184698, 229616, 287308, 360437,
				851	/* -10 */ 449829, 563644, 704093, 875809, 1099582,
				852	/* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
				853	/* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
				854	/* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
				855	/* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
				856	/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
				857	};
				858
				859	/* Time spent by the tasks of the cpu accounting group executing in ... */
				860	enum cpuacct_stat_index {
				861	CPUACCT_STAT_USER, /* ... user mode */
				862	CPUACCT_STAT_SYSTEM, /* ... kernel mode */
				863
				864	CPUACCT_STAT_NSTATS,
				865	};
				866
				867
				868	#define sched_class_highest (&stop_sched_class)
				869	#define for_each_class(class) \
				870	for (class = sched_class_highest; class; class = class->next)
				871
				872	extern const struct sched_class stop_sched_class;
				873	extern const struct sched_class rt_sched_class;
				874	extern const struct sched_class fair_sched_class;
				875	extern const struct sched_class idle_sched_class;
				876
				877
				878	#ifdef CONFIG_SMP
				879
				880	extern void trigger_load_balance(struct rq *rq, int cpu);
				881	extern void idle_balance(int this_cpu, struct rq *this_rq);
				882
				883	#else /* CONFIG_SMP */
				884
				885	static inline void idle_balance(int cpu, struct rq *rq)
				886	{
				887	}
				888
				889	#endif
				890
				891	extern void sysrq_sched_debug_show(void);
				892	extern void sched_init_granularity(void);
				893	extern void update_max_interval(void);
				894	extern void update_group_power(struct sched_domain *sd, int cpu);
				895	extern int update_runtime(struct notifier_block nfb, unsigned long action, void hcpu);
				896	extern void init_sched_rt_class(void);
				897	extern void init_sched_fair_class(void);
				898
				899	extern void resched_task(struct task_struct *p);
				900	extern void resched_cpu(int cpu);
				901
				902	extern struct rt_bandwidth def_rt_bandwidth;
				903	extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
				904
Peter Zijlstra	556061b	2012-05-11 17:31:26 +0200	[diff] [blame]	905	extern void update_idle_cpu_load(struct rq *this_rq);
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	906
				907	#ifdef CONFIG_CGROUP_CPUACCT
Glauber Costa	54c707e	2011-11-28 14:45:19 -0200	[diff] [blame]	908	#include <linux/cgroup.h>
				909	/* track cpu usage of a group of tasks and its child groups */
				910	struct cpuacct {
				911	struct cgroup_subsys_state css;
				912	/* cpuusage holds pointer to a u64-type object on every cpu */
				913	u64 __percpu *cpuusage;
				914	struct kernel_cpustat __percpu *cpustat;
				915	};
				916
Frederic Weisbecker	73fbec6	2012-06-16 15:57:37 +0200	[diff] [blame]	917	extern struct cgroup_subsys cpuacct_subsys;
				918	extern struct cpuacct root_cpuacct;
				919
Glauber Costa	54c707e	2011-11-28 14:45:19 -0200	[diff] [blame]	920	/* return cpu accounting group corresponding to this container */
				921	static inline struct cpuacct cgroup_ca(struct cgroup cgrp)
				922	{
				923	return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
				924	struct cpuacct, css);
				925	}
				926
				927	/* return cpu accounting group to which this task belongs */
				928	static inline struct cpuacct task_ca(struct task_struct tsk)
				929	{
				930	return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
				931	struct cpuacct, css);
				932	}
				933
				934	static inline struct cpuacct parent_ca(struct cpuacct ca)
				935	{
				936	if (!ca \|\| !ca->css.cgroup->parent)
				937	return NULL;
				938	return cgroup_ca(ca->css.cgroup->parent);
				939	}
				940
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	941	extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	942	#else
				943	static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	944	#endif
				945
Frederic Weisbecker	73fbec6	2012-06-16 15:57:37 +0200	[diff] [blame]	946	#ifdef CONFIG_PARAVIRT
				947	static inline u64 steal_ticks(u64 steal)
				948	{
				949	if (unlikely(steal > NSEC_PER_SEC))
				950	return div_u64(steal, TICK_NSEC);
				951
				952	return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
				953	}
				954	#endif
				955
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	956	static inline void inc_nr_running(struct rq *rq)
				957	{
				958	rq->nr_running++;
				959	}
				960
				961	static inline void dec_nr_running(struct rq *rq)
				962	{
				963	rq->nr_running--;
				964	}
				965
				966	extern void update_rq_clock(struct rq *rq);
				967
				968	extern void activate_task(struct rq rq, struct task_struct p, int flags);
				969	extern void deactivate_task(struct rq rq, struct task_struct p, int flags);
				970
				971	extern void check_preempt_curr(struct rq rq, struct task_struct p, int flags);
				972
				973	extern const_debug unsigned int sysctl_sched_time_avg;
				974	extern const_debug unsigned int sysctl_sched_nr_migrate;
				975	extern const_debug unsigned int sysctl_sched_migration_cost;
				976
				977	static inline u64 sched_avg_period(void)
				978	{
				979	return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
				980	}
				981
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	982	#ifdef CONFIG_SCHED_HRTICK
				983
				984	/*
				985	* Use hrtick when:
				986	* - enabled by features
				987	* - hrtimer is actually high res
				988	*/
				989	static inline int hrtick_enabled(struct rq *rq)
				990	{
				991	if (!sched_feat(HRTICK))
				992	return 0;
				993	if (!cpu_active(cpu_of(rq)))
				994	return 0;
				995	return hrtimer_is_hres_active(&rq->hrtick_timer);
				996	}
				997
				998	void hrtick_start(struct rq *rq, u64 delay);
				999
Mike Galbraith	b39e66e	2011-11-22 15:20:07 +0100	[diff] [blame]	1000	#else
				1001
				1002	static inline int hrtick_enabled(struct rq *rq)
				1003	{
				1004	return 0;
				1005	}
				1006
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	1007	#endif /* CONFIG_SCHED_HRTICK */
				1008
				1009	#ifdef CONFIG_SMP
				1010	extern void sched_avg_update(struct rq *rq);
				1011	static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
				1012	{
				1013	rq->rt_avg += rt_delta;
				1014	sched_avg_update(rq);
				1015	}
				1016	#else
				1017	static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
				1018	static inline void sched_avg_update(struct rq *rq) { }
				1019	#endif
				1020
				1021	extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
				1022
				1023	#ifdef CONFIG_SMP
				1024	#ifdef CONFIG_PREEMPT
				1025
				1026	static inline void double_rq_lock(struct rq rq1, struct rq rq2);
				1027
				1028	/*
				1029	* fair double_lock_balance: Safely acquires both rq->locks in a fair
				1030	* way at the expense of forcing extra atomic operations in all
				1031	* invocations. This assures that the double_lock is acquired using the
				1032	* same underlying policy as the spinlock_t on this architecture, which
				1033	* reduces latency compared to the unfair variant below. However, it
				1034	* also adds more overhead and therefore may reduce throughput.
				1035	*/
				1036	static inline int _double_lock_balance(struct rq this_rq, struct rq busiest)
				1037	__releases(this_rq->lock)
				1038	__acquires(busiest->lock)
				1039	__acquires(this_rq->lock)
				1040	{
				1041	raw_spin_unlock(&this_rq->lock);
				1042	double_rq_lock(this_rq, busiest);
				1043
				1044	return 1;
				1045	}
				1046
				1047	#else
				1048	/*
				1049	* Unfair double_lock_balance: Optimizes throughput at the expense of
				1050	* latency by eliminating extra atomic operations when the locks are
				1051	* already in proper order on entry. This favors lower cpu-ids and will
				1052	* grant the double lock to lower cpus over higher ids under contention,
				1053	* regardless of entry order into the function.
				1054	*/
				1055	static inline int _double_lock_balance(struct rq this_rq, struct rq busiest)
				1056	__releases(this_rq->lock)
				1057	__acquires(busiest->lock)
				1058	__acquires(this_rq->lock)
				1059	{
				1060	int ret = 0;
				1061
				1062	if (unlikely(!raw_spin_trylock(&busiest->lock))) {
				1063	if (busiest < this_rq) {
				1064	raw_spin_unlock(&this_rq->lock);
				1065	raw_spin_lock(&busiest->lock);
				1066	raw_spin_lock_nested(&this_rq->lock,
				1067	SINGLE_DEPTH_NESTING);
				1068	ret = 1;
				1069	} else
				1070	raw_spin_lock_nested(&busiest->lock,
				1071	SINGLE_DEPTH_NESTING);
				1072	}
				1073	return ret;
				1074	}
				1075
				1076	#endif /* CONFIG_PREEMPT */
				1077
				1078	/*
				1079	* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
				1080	*/
				1081	static inline int double_lock_balance(struct rq this_rq, struct rq busiest)
				1082	{
				1083	if (unlikely(!irqs_disabled())) {
				1084	/* printk() doesn't work good under rq->lock */
				1085	raw_spin_unlock(&this_rq->lock);
				1086	BUG_ON(1);
				1087	}
				1088
				1089	return _double_lock_balance(this_rq, busiest);
				1090	}
				1091
				1092	static inline void double_unlock_balance(struct rq this_rq, struct rq busiest)
				1093	__releases(busiest->lock)
				1094	{
				1095	raw_spin_unlock(&busiest->lock);
				1096	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
				1097	}
				1098
				1099	/*
				1100	* double_rq_lock - safely lock two runqueues
				1101	*
				1102	* Note this does not disable interrupts like task_rq_lock,
				1103	* you need to do so manually before calling.
				1104	*/
				1105	static inline void double_rq_lock(struct rq rq1, struct rq rq2)
				1106	__acquires(rq1->lock)
				1107	__acquires(rq2->lock)
				1108	{
				1109	BUG_ON(!irqs_disabled());
				1110	if (rq1 == rq2) {
				1111	raw_spin_lock(&rq1->lock);
				1112	__acquire(rq2->lock); /* Fake it out ;) */
				1113	} else {
				1114	if (rq1 < rq2) {
				1115	raw_spin_lock(&rq1->lock);
				1116	raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
				1117	} else {
				1118	raw_spin_lock(&rq2->lock);
				1119	raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
				1120	}
				1121	}
				1122	}
				1123
				1124	/*
				1125	* double_rq_unlock - safely unlock two runqueues
				1126	*
				1127	* Note this does not restore interrupts like task_rq_unlock,
				1128	* you need to do so manually after calling.
				1129	*/
				1130	static inline void double_rq_unlock(struct rq rq1, struct rq rq2)
				1131	__releases(rq1->lock)
				1132	__releases(rq2->lock)
				1133	{
				1134	raw_spin_unlock(&rq1->lock);
				1135	if (rq1 != rq2)
				1136	raw_spin_unlock(&rq2->lock);
				1137	else
				1138	__release(rq2->lock);
				1139	}
				1140
				1141	#else /* CONFIG_SMP */
				1142
				1143	/*
				1144	* double_rq_lock - safely lock two runqueues
				1145	*
				1146	* Note this does not disable interrupts like task_rq_lock,
				1147	* you need to do so manually before calling.
				1148	*/
				1149	static inline void double_rq_lock(struct rq rq1, struct rq rq2)
				1150	__acquires(rq1->lock)
				1151	__acquires(rq2->lock)
				1152	{
				1153	BUG_ON(!irqs_disabled());
				1154	BUG_ON(rq1 != rq2);
				1155	raw_spin_lock(&rq1->lock);
				1156	__acquire(rq2->lock); /* Fake it out ;) */
				1157	}
				1158
				1159	/*
				1160	* double_rq_unlock - safely unlock two runqueues
				1161	*
				1162	* Note this does not restore interrupts like task_rq_unlock,
				1163	* you need to do so manually after calling.
				1164	*/
				1165	static inline void double_rq_unlock(struct rq rq1, struct rq rq2)
				1166	__releases(rq1->lock)
				1167	__releases(rq2->lock)
				1168	{
				1169	BUG_ON(rq1 != rq2);
				1170	raw_spin_unlock(&rq1->lock);
				1171	__release(rq2->lock);
				1172	}
				1173
				1174	#endif
				1175
				1176	extern struct sched_entity __pick_first_entity(struct cfs_rq cfs_rq);
				1177	extern struct sched_entity __pick_last_entity(struct cfs_rq cfs_rq);
				1178	extern void print_cfs_stats(struct seq_file *m, int cpu);
				1179	extern void print_rt_stats(struct seq_file *m, int cpu);
				1180
				1181	extern void init_cfs_rq(struct cfs_rq *cfs_rq);
				1182	extern void init_rt_rq(struct rt_rq rt_rq, struct rq rq);
Peter Zijlstra	029632f	2011-10-25 10:00:11 +0200	[diff] [blame]	1183
				1184	extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
Suresh Siddha	1c792db	2011-12-01 17:07:32 -0800	[diff] [blame]	1185
				1186	#ifdef CONFIG_NO_HZ
				1187	enum rq_nohz_flag_bits {
				1188	NOHZ_TICK_STOPPED,
				1189	NOHZ_BALANCE_KICK,
Suresh Siddha	69e1e81	2011-12-01 17:07:33 -0800	[diff] [blame]	1190	NOHZ_IDLE,
Suresh Siddha	1c792db	2011-12-01 17:07:32 -0800	[diff] [blame]	1191	};
				1192
				1193	#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
				1194	#endif
Frederic Weisbecker	73fbec6	2012-06-16 15:57:37 +0200	[diff] [blame]	1195
				1196	#ifdef CONFIG_IRQ_TIME_ACCOUNTING
				1197
				1198	DECLARE_PER_CPU(u64, cpu_hardirq_time);
				1199	DECLARE_PER_CPU(u64, cpu_softirq_time);
				1200
				1201	#ifndef CONFIG_64BIT
				1202	DECLARE_PER_CPU(seqcount_t, irq_time_seq);
				1203
				1204	static inline void irq_time_write_begin(void)
				1205	{
				1206	__this_cpu_inc(irq_time_seq.sequence);
				1207	smp_wmb();
				1208	}
				1209
				1210	static inline void irq_time_write_end(void)
				1211	{
				1212	smp_wmb();
				1213	__this_cpu_inc(irq_time_seq.sequence);
				1214	}
				1215
				1216	static inline u64 irq_time_read(int cpu)
				1217	{
				1218	u64 irq_time;
				1219	unsigned seq;
				1220
				1221	do {
				1222	seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
				1223	irq_time = per_cpu(cpu_softirq_time, cpu) +
				1224	per_cpu(cpu_hardirq_time, cpu);
				1225	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
				1226
				1227	return irq_time;
				1228	}
				1229	#else /* CONFIG_64BIT */
				1230	static inline void irq_time_write_begin(void)
				1231	{
				1232	}
				1233
				1234	static inline void irq_time_write_end(void)
				1235	{
				1236	}
				1237
				1238	static inline u64 irq_time_read(int cpu)
				1239	{
				1240	return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
				1241	}
				1242	#endif /* CONFIG_64BIT */
				1243	#endif /* CONFIG_IRQ_TIME_ACCOUNTING */