| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (c) 2006-2007 Silicon Graphics, Inc. | 
|  | 3 | * All Rights Reserved. | 
|  | 4 | * | 
|  | 5 | * This program is free software; you can redistribute it and/or | 
|  | 6 | * modify it under the terms of the GNU General Public License as | 
|  | 7 | * published by the Free Software Foundation. | 
|  | 8 | * | 
|  | 9 | * This program is distributed in the hope that it would be useful, | 
|  | 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 12 | * GNU General Public License for more details. | 
|  | 13 | * | 
|  | 14 | * You should have received a copy of the GNU General Public License | 
|  | 15 | * along with this program; if not, write the Free Software Foundation, | 
|  | 16 | * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA | 
|  | 17 | */ | 
|  | 18 | #include "xfs.h" | 
|  | 19 | #include "xfs_mru_cache.h" | 
|  | 20 |  | 
|  | 21 | /* | 
|  | 22 | * The MRU Cache data structure consists of a data store, an array of lists and | 
|  | 23 | * a lock to protect its internal state.  At initialisation time, the client | 
|  | 24 | * supplies an element lifetime in milliseconds and a group count, as well as a | 
|  | 25 | * function pointer to call when deleting elements.  A data structure for | 
|  | 26 | * queueing up work in the form of timed callbacks is also included. | 
|  | 27 | * | 
|  | 28 | * The group count controls how many lists are created, and thereby how finely | 
|  | 29 | * the elements are grouped in time.  When reaping occurs, all the elements in | 
|  | 30 | * all the lists whose time has expired are deleted. | 
|  | 31 | * | 
|  | 32 | * To give an example of how this works in practice, consider a client that | 
|  | 33 | * initialises an MRU Cache with a lifetime of ten seconds and a group count of | 
|  | 34 | * five.  Five internal lists will be created, each representing a two second | 
|  | 35 | * period in time.  When the first element is added, time zero for the data | 
|  | 36 | * structure is initialised to the current time. | 
|  | 37 | * | 
|  | 38 | * All the elements added in the first two seconds are appended to the first | 
|  | 39 | * list.  Elements added in the third second go into the second list, and so on. | 
|  | 40 | * If an element is accessed at any point, it is removed from its list and | 
|  | 41 | * inserted at the head of the current most-recently-used list. | 
|  | 42 | * | 
|  | 43 | * The reaper function will have nothing to do until at least twelve seconds | 
|  | 44 | * have elapsed since the first element was added.  The reason for this is that | 
|  | 45 | * if it were called at t=11s, there could be elements in the first list that | 
|  | 46 | * have only been inactive for nine seconds, so it still does nothing.  If it is | 
|  | 47 | * called anywhere between t=12 and t=14 seconds, it will delete all the | 
|  | 48 | * elements that remain in the first list.  It's therefore possible for elements | 
|  | 49 | * to remain in the data store even after they've been inactive for up to | 
|  | 50 | * (t + t/g) seconds, where t is the inactive element lifetime and g is the | 
|  | 51 | * number of groups. | 
|  | 52 | * | 
|  | 53 | * The above example assumes that the reaper function gets called at least once | 
|  | 54 | * every (t/g) seconds.  If it is called less frequently, unused elements will | 
|  | 55 | * accumulate in the reap list until the reaper function is eventually called. | 
|  | 56 | * The current implementation uses work queue callbacks to carefully time the | 
|  | 57 | * reaper function calls, so this should happen rarely, if at all. | 
|  | 58 | * | 
|  | 59 | * From a design perspective, the primary reason for the choice of a list array | 
|  | 60 | * representing discrete time intervals is that it's only practical to reap | 
|  | 61 | * expired elements in groups of some appreciable size.  This automatically | 
|  | 62 | * introduces a granularity to element lifetimes, so there's no point storing an | 
|  | 63 | * individual timeout with each element that specifies a more precise reap time. | 
|  | 64 | * The bonus is a saving of sizeof(long) bytes of memory per element stored. | 
|  | 65 | * | 
|  | 66 | * The elements could have been stored in just one list, but an array of | 
|  | 67 | * counters or pointers would need to be maintained to allow them to be divided | 
|  | 68 | * up into discrete time groups.  More critically, the process of touching or | 
|  | 69 | * removing an element would involve walking large portions of the entire list, | 
|  | 70 | * which would have a detrimental effect on performance.  The additional memory | 
|  | 71 | * requirement for the array of list heads is minimal. | 
|  | 72 | * | 
|  | 73 | * When an element is touched or deleted, it needs to be removed from its | 
|  | 74 | * current list.  Doubly linked lists are used to make the list maintenance | 
|  | 75 | * portion of these operations O(1).  Since reaper timing can be imprecise, | 
|  | 76 | * inserts and lookups can occur when there are no free lists available.  When | 
|  | 77 | * this happens, all the elements on the LRU list need to be migrated to the end | 
|  | 78 | * of the reap list.  To keep the list maintenance portion of these operations | 
|  | 79 | * O(1) also, list tails need to be accessible without walking the entire list. | 
|  | 80 | * This is the reason why doubly linked list heads are used. | 
|  | 81 | */ | 
|  | 82 |  | 
|  | 83 | /* | 
|  | 84 | * An MRU Cache is a dynamic data structure that stores its elements in a way | 
|  | 85 | * that allows efficient lookups, but also groups them into discrete time | 
|  | 86 | * intervals based on insertion time.  This allows elements to be efficiently | 
|  | 87 | * and automatically reaped after a fixed period of inactivity. | 
|  | 88 | * | 
|  | 89 | * When a client data pointer is stored in the MRU Cache it needs to be added to | 
|  | 90 | * both the data store and to one of the lists.  It must also be possible to | 
|  | 91 | * access each of these entries via the other, i.e. to: | 
|  | 92 | * | 
|  | 93 | *    a) Walk a list, removing the corresponding data store entry for each item. | 
|  | 94 | *    b) Look up a data store entry, then access its list entry directly. | 
|  | 95 | * | 
|  | 96 | * To achieve both of these goals, each entry must contain both a list entry and | 
|  | 97 | * a key, in addition to the user's data pointer.  Note that it's not a good | 
|  | 98 | * idea to have the client embed one of these structures at the top of their own | 
|  | 99 | * data structure, because inserting the same item more than once would most | 
|  | 100 | * likely result in a loop in one of the lists.  That's a sure-fire recipe for | 
|  | 101 | * an infinite loop in the code. | 
|  | 102 | */ | 
|  | 103 | typedef struct xfs_mru_cache_elem | 
|  | 104 | { | 
|  | 105 | struct list_head list_node; | 
|  | 106 | unsigned long	key; | 
|  | 107 | void		*value; | 
|  | 108 | } xfs_mru_cache_elem_t; | 
|  | 109 |  | 
|  | 110 | static kmem_zone_t		*xfs_mru_elem_zone; | 
|  | 111 | static struct workqueue_struct	*xfs_mru_reap_wq; | 
|  | 112 |  | 
|  | 113 | /* | 
|  | 114 | * When inserting, destroying or reaping, it's first necessary to update the | 
|  | 115 | * lists relative to a particular time.  In the case of destroying, that time | 
|  | 116 | * will be well in the future to ensure that all items are moved to the reap | 
|  | 117 | * list.  In all other cases though, the time will be the current time. | 
|  | 118 | * | 
|  | 119 | * This function enters a loop, moving the contents of the LRU list to the reap | 
|  | 120 | * list again and again until either a) the lists are all empty, or b) time zero | 
|  | 121 | * has been advanced sufficiently to be within the immediate element lifetime. | 
|  | 122 | * | 
|  | 123 | * Case a) above is detected by counting how many groups are migrated and | 
|  | 124 | * stopping when they've all been moved.  Case b) is detected by monitoring the | 
|  | 125 | * time_zero field, which is updated as each group is migrated. | 
|  | 126 | * | 
|  | 127 | * The return value is the earliest time that more migration could be needed, or | 
|  | 128 | * zero if there's no need to schedule more work because the lists are empty. | 
|  | 129 | */ | 
|  | 130 | STATIC unsigned long | 
|  | 131 | _xfs_mru_cache_migrate( | 
|  | 132 | xfs_mru_cache_t	*mru, | 
|  | 133 | unsigned long	now) | 
|  | 134 | { | 
|  | 135 | unsigned int	grp; | 
|  | 136 | unsigned int	migrated = 0; | 
|  | 137 | struct list_head *lru_list; | 
|  | 138 |  | 
|  | 139 | /* Nothing to do if the data store is empty. */ | 
|  | 140 | if (!mru->time_zero) | 
|  | 141 | return 0; | 
|  | 142 |  | 
|  | 143 | /* While time zero is older than the time spanned by all the lists. */ | 
|  | 144 | while (mru->time_zero <= now - mru->grp_count * mru->grp_time) { | 
|  | 145 |  | 
|  | 146 | /* | 
|  | 147 | * If the LRU list isn't empty, migrate its elements to the tail | 
|  | 148 | * of the reap list. | 
|  | 149 | */ | 
|  | 150 | lru_list = mru->lists + mru->lru_grp; | 
|  | 151 | if (!list_empty(lru_list)) | 
|  | 152 | list_splice_init(lru_list, mru->reap_list.prev); | 
|  | 153 |  | 
|  | 154 | /* | 
|  | 155 | * Advance the LRU group number, freeing the old LRU list to | 
|  | 156 | * become the new MRU list; advance time zero accordingly. | 
|  | 157 | */ | 
|  | 158 | mru->lru_grp = (mru->lru_grp + 1) % mru->grp_count; | 
|  | 159 | mru->time_zero += mru->grp_time; | 
|  | 160 |  | 
|  | 161 | /* | 
|  | 162 | * If reaping is so far behind that all the elements on all the | 
|  | 163 | * lists have been migrated to the reap list, it's now empty. | 
|  | 164 | */ | 
|  | 165 | if (++migrated == mru->grp_count) { | 
|  | 166 | mru->lru_grp = 0; | 
|  | 167 | mru->time_zero = 0; | 
|  | 168 | return 0; | 
|  | 169 | } | 
|  | 170 | } | 
|  | 171 |  | 
|  | 172 | /* Find the first non-empty list from the LRU end. */ | 
|  | 173 | for (grp = 0; grp < mru->grp_count; grp++) { | 
|  | 174 |  | 
|  | 175 | /* Check the grp'th list from the LRU end. */ | 
|  | 176 | lru_list = mru->lists + ((mru->lru_grp + grp) % mru->grp_count); | 
|  | 177 | if (!list_empty(lru_list)) | 
|  | 178 | return mru->time_zero + | 
|  | 179 | (mru->grp_count + grp) * mru->grp_time; | 
|  | 180 | } | 
|  | 181 |  | 
|  | 182 | /* All the lists must be empty. */ | 
|  | 183 | mru->lru_grp = 0; | 
|  | 184 | mru->time_zero = 0; | 
|  | 185 | return 0; | 
|  | 186 | } | 
|  | 187 |  | 
|  | 188 | /* | 
|  | 189 | * When inserting or doing a lookup, an element needs to be inserted into the | 
|  | 190 | * MRU list.  The lists must be migrated first to ensure that they're | 
|  | 191 | * up-to-date, otherwise the new element could be given a shorter lifetime in | 
|  | 192 | * the cache than it should. | 
|  | 193 | */ | 
|  | 194 | STATIC void | 
|  | 195 | _xfs_mru_cache_list_insert( | 
|  | 196 | xfs_mru_cache_t		*mru, | 
|  | 197 | xfs_mru_cache_elem_t	*elem) | 
|  | 198 | { | 
|  | 199 | unsigned int	grp = 0; | 
|  | 200 | unsigned long	now = jiffies; | 
|  | 201 |  | 
|  | 202 | /* | 
|  | 203 | * If the data store is empty, initialise time zero, leave grp set to | 
|  | 204 | * zero and start the work queue timer if necessary.  Otherwise, set grp | 
|  | 205 | * to the number of group times that have elapsed since time zero. | 
|  | 206 | */ | 
|  | 207 | if (!_xfs_mru_cache_migrate(mru, now)) { | 
|  | 208 | mru->time_zero = now; | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 209 | if (!mru->queued) { | 
|  | 210 | mru->queued = 1; | 
|  | 211 | queue_delayed_work(xfs_mru_reap_wq, &mru->work, | 
|  | 212 | mru->grp_count * mru->grp_time); | 
|  | 213 | } | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 214 | } else { | 
|  | 215 | grp = (now - mru->time_zero) / mru->grp_time; | 
|  | 216 | grp = (mru->lru_grp + grp) % mru->grp_count; | 
|  | 217 | } | 
|  | 218 |  | 
|  | 219 | /* Insert the element at the tail of the corresponding list. */ | 
|  | 220 | list_add_tail(&elem->list_node, mru->lists + grp); | 
|  | 221 | } | 
|  | 222 |  | 
|  | 223 | /* | 
|  | 224 | * When destroying or reaping, all the elements that were migrated to the reap | 
|  | 225 | * list need to be deleted.  For each element this involves removing it from the | 
|  | 226 | * data store, removing it from the reap list, calling the client's free | 
|  | 227 | * function and deleting the element from the element zone. | 
| David Chinner | a8272ce | 2007-11-23 16:28:09 +1100 | [diff] [blame] | 228 | * | 
|  | 229 | * We get called holding the mru->lock, which we drop and then reacquire. | 
|  | 230 | * Sparse need special help with this to tell it we know what we are doing. | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 231 | */ | 
|  | 232 | STATIC void | 
|  | 233 | _xfs_mru_cache_clear_reap_list( | 
| David Chinner | a8272ce | 2007-11-23 16:28:09 +1100 | [diff] [blame] | 234 | xfs_mru_cache_t		*mru) __releases(mru->lock) __acquires(mru->lock) | 
|  | 235 |  | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 236 | { | 
|  | 237 | xfs_mru_cache_elem_t	*elem, *next; | 
|  | 238 | struct list_head	tmp; | 
|  | 239 |  | 
|  | 240 | INIT_LIST_HEAD(&tmp); | 
|  | 241 | list_for_each_entry_safe(elem, next, &mru->reap_list, list_node) { | 
|  | 242 |  | 
|  | 243 | /* Remove the element from the data store. */ | 
|  | 244 | radix_tree_delete(&mru->store, elem->key); | 
|  | 245 |  | 
|  | 246 | /* | 
|  | 247 | * remove to temp list so it can be freed without | 
|  | 248 | * needing to hold the lock | 
|  | 249 | */ | 
|  | 250 | list_move(&elem->list_node, &tmp); | 
|  | 251 | } | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 252 | spin_unlock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 253 |  | 
|  | 254 | list_for_each_entry_safe(elem, next, &tmp, list_node) { | 
|  | 255 |  | 
|  | 256 | /* Remove the element from the reap list. */ | 
|  | 257 | list_del_init(&elem->list_node); | 
|  | 258 |  | 
|  | 259 | /* Call the client's free function with the key and value pointer. */ | 
|  | 260 | mru->free_func(elem->key, elem->value); | 
|  | 261 |  | 
|  | 262 | /* Free the element structure. */ | 
|  | 263 | kmem_zone_free(xfs_mru_elem_zone, elem); | 
|  | 264 | } | 
|  | 265 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 266 | spin_lock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 267 | } | 
|  | 268 |  | 
|  | 269 | /* | 
|  | 270 | * We fire the reap timer every group expiry interval so | 
|  | 271 | * we always have a reaper ready to run. This makes shutdown | 
|  | 272 | * and flushing of the reaper easy to do. Hence we need to | 
|  | 273 | * keep when the next reap must occur so we can determine | 
|  | 274 | * at each interval whether there is anything we need to do. | 
|  | 275 | */ | 
|  | 276 | STATIC void | 
|  | 277 | _xfs_mru_cache_reap( | 
|  | 278 | struct work_struct	*work) | 
|  | 279 | { | 
|  | 280 | xfs_mru_cache_t		*mru = container_of(work, xfs_mru_cache_t, work.work); | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 281 | unsigned long		now, next; | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 282 |  | 
|  | 283 | ASSERT(mru && mru->lists); | 
|  | 284 | if (!mru || !mru->lists) | 
|  | 285 | return; | 
|  | 286 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 287 | spin_lock(&mru->lock); | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 288 | next = _xfs_mru_cache_migrate(mru, jiffies); | 
|  | 289 | _xfs_mru_cache_clear_reap_list(mru); | 
|  | 290 |  | 
|  | 291 | mru->queued = next; | 
|  | 292 | if ((mru->queued > 0)) { | 
|  | 293 | now = jiffies; | 
|  | 294 | if (next <= now) | 
|  | 295 | next = 0; | 
|  | 296 | else | 
|  | 297 | next -= now; | 
|  | 298 | queue_delayed_work(xfs_mru_reap_wq, &mru->work, next); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 299 | } | 
|  | 300 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 301 | spin_unlock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 302 | } | 
|  | 303 |  | 
|  | 304 | int | 
|  | 305 | xfs_mru_cache_init(void) | 
|  | 306 | { | 
|  | 307 | xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), | 
|  | 308 | "xfs_mru_cache_elem"); | 
|  | 309 | if (!xfs_mru_elem_zone) | 
| Christoph Hellwig | 9f8868f | 2008-07-18 17:11:46 +1000 | [diff] [blame] | 310 | goto out; | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 311 |  | 
|  | 312 | xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); | 
| Christoph Hellwig | 9f8868f | 2008-07-18 17:11:46 +1000 | [diff] [blame] | 313 | if (!xfs_mru_reap_wq) | 
|  | 314 | goto out_destroy_mru_elem_zone; | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 315 |  | 
|  | 316 | return 0; | 
| Christoph Hellwig | 9f8868f | 2008-07-18 17:11:46 +1000 | [diff] [blame] | 317 |  | 
|  | 318 | out_destroy_mru_elem_zone: | 
|  | 319 | kmem_zone_destroy(xfs_mru_elem_zone); | 
|  | 320 | out: | 
|  | 321 | return -ENOMEM; | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 322 | } | 
|  | 323 |  | 
|  | 324 | void | 
|  | 325 | xfs_mru_cache_uninit(void) | 
|  | 326 | { | 
|  | 327 | destroy_workqueue(xfs_mru_reap_wq); | 
|  | 328 | kmem_zone_destroy(xfs_mru_elem_zone); | 
|  | 329 | } | 
|  | 330 |  | 
|  | 331 | /* | 
|  | 332 | * To initialise a struct xfs_mru_cache pointer, call xfs_mru_cache_create() | 
|  | 333 | * with the address of the pointer, a lifetime value in milliseconds, a group | 
|  | 334 | * count and a free function to use when deleting elements.  This function | 
|  | 335 | * returns 0 if the initialisation was successful. | 
|  | 336 | */ | 
|  | 337 | int | 
|  | 338 | xfs_mru_cache_create( | 
|  | 339 | xfs_mru_cache_t		**mrup, | 
|  | 340 | unsigned int		lifetime_ms, | 
|  | 341 | unsigned int		grp_count, | 
|  | 342 | xfs_mru_cache_free_func_t free_func) | 
|  | 343 | { | 
|  | 344 | xfs_mru_cache_t	*mru = NULL; | 
|  | 345 | int		err = 0, grp; | 
|  | 346 | unsigned int	grp_time; | 
|  | 347 |  | 
|  | 348 | if (mrup) | 
|  | 349 | *mrup = NULL; | 
|  | 350 |  | 
|  | 351 | if (!mrup || !grp_count || !lifetime_ms || !free_func) | 
|  | 352 | return EINVAL; | 
|  | 353 |  | 
|  | 354 | if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count)) | 
|  | 355 | return EINVAL; | 
|  | 356 |  | 
|  | 357 | if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP))) | 
|  | 358 | return ENOMEM; | 
|  | 359 |  | 
|  | 360 | /* An extra list is needed to avoid reaping up to a grp_time early. */ | 
|  | 361 | mru->grp_count = grp_count + 1; | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 362 | mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 363 |  | 
|  | 364 | if (!mru->lists) { | 
|  | 365 | err = ENOMEM; | 
|  | 366 | goto exit; | 
|  | 367 | } | 
|  | 368 |  | 
|  | 369 | for (grp = 0; grp < mru->grp_count; grp++) | 
|  | 370 | INIT_LIST_HEAD(mru->lists + grp); | 
|  | 371 |  | 
|  | 372 | /* | 
|  | 373 | * We use GFP_KERNEL radix tree preload and do inserts under a | 
|  | 374 | * spinlock so GFP_ATOMIC is appropriate for the radix tree itself. | 
|  | 375 | */ | 
|  | 376 | INIT_RADIX_TREE(&mru->store, GFP_ATOMIC); | 
|  | 377 | INIT_LIST_HEAD(&mru->reap_list); | 
| Eric Sandeen | 007c61c | 2007-10-11 17:43:56 +1000 | [diff] [blame] | 378 | spin_lock_init(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 379 | INIT_DELAYED_WORK(&mru->work, _xfs_mru_cache_reap); | 
|  | 380 |  | 
|  | 381 | mru->grp_time  = grp_time; | 
|  | 382 | mru->free_func = free_func; | 
|  | 383 |  | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 384 | *mrup = mru; | 
|  | 385 |  | 
|  | 386 | exit: | 
|  | 387 | if (err && mru && mru->lists) | 
| Denys Vlasenko | f0e2d93 | 2008-05-19 16:31:57 +1000 | [diff] [blame] | 388 | kmem_free(mru->lists); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 389 | if (err && mru) | 
| Denys Vlasenko | f0e2d93 | 2008-05-19 16:31:57 +1000 | [diff] [blame] | 390 | kmem_free(mru); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 391 |  | 
|  | 392 | return err; | 
|  | 393 | } | 
|  | 394 |  | 
|  | 395 | /* | 
|  | 396 | * Call xfs_mru_cache_flush() to flush out all cached entries, calling their | 
|  | 397 | * free functions as they're deleted.  When this function returns, the caller is | 
|  | 398 | * guaranteed that all the free functions for all the elements have finished | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 399 | * executing and the reaper is not running. | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 400 | */ | 
| Dave Chinner | b657fc8 | 2010-01-11 11:47:47 +0000 | [diff] [blame] | 401 | static void | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 402 | xfs_mru_cache_flush( | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 403 | xfs_mru_cache_t		*mru) | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 404 | { | 
|  | 405 | if (!mru || !mru->lists) | 
|  | 406 | return; | 
|  | 407 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 408 | spin_lock(&mru->lock); | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 409 | if (mru->queued) { | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 410 | spin_unlock(&mru->lock); | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 411 | cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 412 | spin_lock(&mru->lock); | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 413 | } | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 414 |  | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 415 | _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time); | 
|  | 416 | _xfs_mru_cache_clear_reap_list(mru); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 417 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 418 | spin_unlock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 419 | } | 
|  | 420 |  | 
|  | 421 | void | 
|  | 422 | xfs_mru_cache_destroy( | 
|  | 423 | xfs_mru_cache_t		*mru) | 
|  | 424 | { | 
|  | 425 | if (!mru || !mru->lists) | 
|  | 426 | return; | 
|  | 427 |  | 
| David Chinner | 65de556 | 2007-08-16 15:21:11 +1000 | [diff] [blame] | 428 | xfs_mru_cache_flush(mru); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 429 |  | 
| Denys Vlasenko | f0e2d93 | 2008-05-19 16:31:57 +1000 | [diff] [blame] | 430 | kmem_free(mru->lists); | 
|  | 431 | kmem_free(mru); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 432 | } | 
|  | 433 |  | 
|  | 434 | /* | 
|  | 435 | * To insert an element, call xfs_mru_cache_insert() with the data store, the | 
|  | 436 | * element's key and the client data pointer.  This function returns 0 on | 
|  | 437 | * success or ENOMEM if memory for the data element couldn't be allocated. | 
|  | 438 | */ | 
|  | 439 | int | 
|  | 440 | xfs_mru_cache_insert( | 
|  | 441 | xfs_mru_cache_t	*mru, | 
|  | 442 | unsigned long	key, | 
|  | 443 | void		*value) | 
|  | 444 | { | 
|  | 445 | xfs_mru_cache_elem_t *elem; | 
|  | 446 |  | 
|  | 447 | ASSERT(mru && mru->lists); | 
|  | 448 | if (!mru || !mru->lists) | 
|  | 449 | return EINVAL; | 
|  | 450 |  | 
|  | 451 | elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP); | 
|  | 452 | if (!elem) | 
|  | 453 | return ENOMEM; | 
|  | 454 |  | 
|  | 455 | if (radix_tree_preload(GFP_KERNEL)) { | 
|  | 456 | kmem_zone_free(xfs_mru_elem_zone, elem); | 
|  | 457 | return ENOMEM; | 
|  | 458 | } | 
|  | 459 |  | 
|  | 460 | INIT_LIST_HEAD(&elem->list_node); | 
|  | 461 | elem->key = key; | 
|  | 462 | elem->value = value; | 
|  | 463 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 464 | spin_lock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 465 |  | 
|  | 466 | radix_tree_insert(&mru->store, key, elem); | 
|  | 467 | radix_tree_preload_end(); | 
|  | 468 | _xfs_mru_cache_list_insert(mru, elem); | 
|  | 469 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 470 | spin_unlock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 471 |  | 
|  | 472 | return 0; | 
|  | 473 | } | 
|  | 474 |  | 
|  | 475 | /* | 
|  | 476 | * To remove an element without calling the free function, call | 
|  | 477 | * xfs_mru_cache_remove() with the data store and the element's key.  On success | 
|  | 478 | * the client data pointer for the removed element is returned, otherwise this | 
|  | 479 | * function will return a NULL pointer. | 
|  | 480 | */ | 
|  | 481 | void * | 
|  | 482 | xfs_mru_cache_remove( | 
|  | 483 | xfs_mru_cache_t	*mru, | 
|  | 484 | unsigned long	key) | 
|  | 485 | { | 
|  | 486 | xfs_mru_cache_elem_t *elem; | 
|  | 487 | void		*value = NULL; | 
|  | 488 |  | 
|  | 489 | ASSERT(mru && mru->lists); | 
|  | 490 | if (!mru || !mru->lists) | 
|  | 491 | return NULL; | 
|  | 492 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 493 | spin_lock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 494 | elem = radix_tree_delete(&mru->store, key); | 
|  | 495 | if (elem) { | 
|  | 496 | value = elem->value; | 
|  | 497 | list_del(&elem->list_node); | 
|  | 498 | } | 
|  | 499 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 500 | spin_unlock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 501 |  | 
|  | 502 | if (elem) | 
|  | 503 | kmem_zone_free(xfs_mru_elem_zone, elem); | 
|  | 504 |  | 
|  | 505 | return value; | 
|  | 506 | } | 
|  | 507 |  | 
|  | 508 | /* | 
|  | 509 | * To remove and element and call the free function, call xfs_mru_cache_delete() | 
|  | 510 | * with the data store and the element's key. | 
|  | 511 | */ | 
|  | 512 | void | 
|  | 513 | xfs_mru_cache_delete( | 
|  | 514 | xfs_mru_cache_t	*mru, | 
|  | 515 | unsigned long	key) | 
|  | 516 | { | 
|  | 517 | void		*value = xfs_mru_cache_remove(mru, key); | 
|  | 518 |  | 
|  | 519 | if (value) | 
|  | 520 | mru->free_func(key, value); | 
|  | 521 | } | 
|  | 522 |  | 
|  | 523 | /* | 
|  | 524 | * To look up an element using its key, call xfs_mru_cache_lookup() with the | 
|  | 525 | * data store and the element's key.  If found, the element will be moved to the | 
|  | 526 | * head of the MRU list to indicate that it's been touched. | 
|  | 527 | * | 
|  | 528 | * The internal data structures are protected by a spinlock that is STILL HELD | 
|  | 529 | * when this function returns.  Call xfs_mru_cache_done() to release it.  Note | 
|  | 530 | * that it is not safe to call any function that might sleep in the interim. | 
|  | 531 | * | 
|  | 532 | * The implementation could have used reference counting to avoid this | 
|  | 533 | * restriction, but since most clients simply want to get, set or test a member | 
|  | 534 | * of the returned data structure, the extra per-element memory isn't warranted. | 
|  | 535 | * | 
|  | 536 | * If the element isn't found, this function returns NULL and the spinlock is | 
|  | 537 | * released.  xfs_mru_cache_done() should NOT be called when this occurs. | 
| David Chinner | a8272ce | 2007-11-23 16:28:09 +1100 | [diff] [blame] | 538 | * | 
|  | 539 | * Because sparse isn't smart enough to know about conditional lock return | 
|  | 540 | * status, we need to help it get it right by annotating the path that does | 
|  | 541 | * not release the lock. | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 542 | */ | 
|  | 543 | void * | 
|  | 544 | xfs_mru_cache_lookup( | 
|  | 545 | xfs_mru_cache_t	*mru, | 
|  | 546 | unsigned long	key) | 
|  | 547 | { | 
|  | 548 | xfs_mru_cache_elem_t *elem; | 
|  | 549 |  | 
|  | 550 | ASSERT(mru && mru->lists); | 
|  | 551 | if (!mru || !mru->lists) | 
|  | 552 | return NULL; | 
|  | 553 |  | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 554 | spin_lock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 555 | elem = radix_tree_lookup(&mru->store, key); | 
|  | 556 | if (elem) { | 
|  | 557 | list_del(&elem->list_node); | 
|  | 558 | _xfs_mru_cache_list_insert(mru, elem); | 
| David Chinner | a8272ce | 2007-11-23 16:28:09 +1100 | [diff] [blame] | 559 | __release(mru_lock); /* help sparse not be stupid */ | 
|  | 560 | } else | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 561 | spin_unlock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 562 |  | 
|  | 563 | return elem ? elem->value : NULL; | 
|  | 564 | } | 
|  | 565 |  | 
|  | 566 | /* | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 567 | * To release the internal data structure spinlock after having performed an | 
|  | 568 | * xfs_mru_cache_lookup() or an xfs_mru_cache_peek(), call xfs_mru_cache_done() | 
|  | 569 | * with the data store pointer. | 
|  | 570 | */ | 
|  | 571 | void | 
|  | 572 | xfs_mru_cache_done( | 
| David Chinner | a8272ce | 2007-11-23 16:28:09 +1100 | [diff] [blame] | 573 | xfs_mru_cache_t	*mru) __releases(mru->lock) | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 574 | { | 
| Eric Sandeen | ba74d0c | 2007-10-11 17:42:10 +1000 | [diff] [blame] | 575 | spin_unlock(&mru->lock); | 
| David Chinner | 2a82b8b | 2007-07-11 11:09:12 +1000 | [diff] [blame] | 576 | } |