blob: e99022d3a3944ecf64a99431426cee939ae03e7b [file] [log] [blame]
Joerg Roedelb6c02712008-06-26 21:27:53 +02001/*
2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/pci.h>
21#include <linux/gfp.h>
22#include <linux/bitops.h>
Joerg Roedel7f265082008-12-12 13:50:21 +010023#include <linux/debugfs.h>
Joerg Roedelb6c02712008-06-26 21:27:53 +020024#include <linux/scatterlist.h>
25#include <linux/iommu-helper.h>
Joerg Roedelc156e342008-12-02 18:13:27 +010026#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h>
28#endif
Joerg Roedelb6c02712008-06-26 21:27:53 +020029#include <asm/proto.h>
FUJITA Tomonori46a7fa22008-07-11 10:23:42 +090030#include <asm/iommu.h>
Joerg Roedel1d9b16d2008-11-27 18:39:15 +010031#include <asm/gart.h>
Joerg Roedelb6c02712008-06-26 21:27:53 +020032#include <asm/amd_iommu_types.h>
Joerg Roedelc6da9922008-06-26 21:28:06 +020033#include <asm/amd_iommu.h>
Joerg Roedelb6c02712008-06-26 21:27:53 +020034
35#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
36
Joerg Roedel136f78a2008-07-11 17:14:27 +020037#define EXIT_LOOP_COUNT 10000000
38
Joerg Roedelb6c02712008-06-26 21:27:53 +020039static DEFINE_RWLOCK(amd_iommu_devtable_lock);
40
Joerg Roedelbd60b732008-09-11 10:24:48 +020041/* A list of preallocated protection domains */
42static LIST_HEAD(iommu_pd_list);
43static DEFINE_SPINLOCK(iommu_pd_list_lock);
44
Joerg Roedel26961ef2008-12-03 17:00:17 +010045#ifdef CONFIG_IOMMU_API
46static struct iommu_ops amd_iommu_ops;
47#endif
48
Joerg Roedel431b2a22008-07-11 17:14:22 +020049/*
50 * general struct to manage commands send to an IOMMU
51 */
Joerg Roedeld6449532008-07-11 17:14:28 +020052struct iommu_cmd {
Joerg Roedelb6c02712008-06-26 21:27:53 +020053 u32 data[4];
54};
55
Joerg Roedelbd0e5212008-06-26 21:27:56 +020056static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
57 struct unity_map_entry *e);
Joerg Roedele275a2a2008-12-10 18:27:25 +010058static struct dma_ops_domain *find_protection_domain(u16 devid);
59
Joerg Roedelbd0e5212008-06-26 21:27:56 +020060
Joerg Roedel7f265082008-12-12 13:50:21 +010061#ifdef CONFIG_AMD_IOMMU_STATS
62
63/*
64 * Initialization code for statistics collection
65 */
66
Joerg Roedelda49f6d2008-12-12 14:59:58 +010067DECLARE_STATS_COUNTER(compl_wait);
Joerg Roedel0f2a86f2008-12-12 15:05:16 +010068DECLARE_STATS_COUNTER(cnt_map_single);
Joerg Roedel146a6912008-12-12 15:07:12 +010069DECLARE_STATS_COUNTER(cnt_unmap_single);
Joerg Roedeld03f0672008-12-12 15:09:48 +010070DECLARE_STATS_COUNTER(cnt_map_sg);
Joerg Roedel55877a62008-12-12 15:12:14 +010071DECLARE_STATS_COUNTER(cnt_unmap_sg);
Joerg Roedelc8f0fb32008-12-12 15:14:21 +010072DECLARE_STATS_COUNTER(cnt_alloc_coherent);
Joerg Roedel5d31ee72008-12-12 15:16:38 +010073DECLARE_STATS_COUNTER(cnt_free_coherent);
Joerg Roedelc1858972008-12-12 15:42:39 +010074DECLARE_STATS_COUNTER(cross_page);
Joerg Roedelf57d98a2008-12-12 15:46:29 +010075DECLARE_STATS_COUNTER(domain_flush_single);
Joerg Roedelda49f6d2008-12-12 14:59:58 +010076
Joerg Roedel7f265082008-12-12 13:50:21 +010077static struct dentry *stats_dir;
78static struct dentry *de_isolate;
79static struct dentry *de_fflush;
80
81static void amd_iommu_stats_add(struct __iommu_counter *cnt)
82{
83 if (stats_dir == NULL)
84 return;
85
86 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
87 &cnt->value);
88}
89
90static void amd_iommu_stats_init(void)
91{
92 stats_dir = debugfs_create_dir("amd-iommu", NULL);
93 if (stats_dir == NULL)
94 return;
95
96 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
97 (u32 *)&amd_iommu_isolate);
98
99 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
100 (u32 *)&amd_iommu_unmap_flush);
Joerg Roedelda49f6d2008-12-12 14:59:58 +0100101
102 amd_iommu_stats_add(&compl_wait);
Joerg Roedel0f2a86f2008-12-12 15:05:16 +0100103 amd_iommu_stats_add(&cnt_map_single);
Joerg Roedel146a6912008-12-12 15:07:12 +0100104 amd_iommu_stats_add(&cnt_unmap_single);
Joerg Roedeld03f0672008-12-12 15:09:48 +0100105 amd_iommu_stats_add(&cnt_map_sg);
Joerg Roedel55877a62008-12-12 15:12:14 +0100106 amd_iommu_stats_add(&cnt_unmap_sg);
Joerg Roedelc8f0fb32008-12-12 15:14:21 +0100107 amd_iommu_stats_add(&cnt_alloc_coherent);
Joerg Roedel5d31ee72008-12-12 15:16:38 +0100108 amd_iommu_stats_add(&cnt_free_coherent);
Joerg Roedelc1858972008-12-12 15:42:39 +0100109 amd_iommu_stats_add(&cross_page);
Joerg Roedelf57d98a2008-12-12 15:46:29 +0100110 amd_iommu_stats_add(&domain_flush_single);
Joerg Roedel7f265082008-12-12 13:50:21 +0100111}
112
113#endif
114
Joerg Roedel431b2a22008-07-11 17:14:22 +0200115/* returns !0 if the IOMMU is caching non-present entries in its TLB */
Joerg Roedel4da70b92008-06-26 21:28:01 +0200116static int iommu_has_npcache(struct amd_iommu *iommu)
117{
Joerg Roedelae9b9402008-10-30 17:43:57 +0100118 return iommu->cap & (1UL << IOMMU_CAP_NPCACHE);
Joerg Roedel4da70b92008-06-26 21:28:01 +0200119}
120
Joerg Roedel431b2a22008-07-11 17:14:22 +0200121/****************************************************************************
122 *
Joerg Roedela80dc3e2008-09-11 16:51:41 +0200123 * Interrupt handling functions
124 *
125 ****************************************************************************/
126
Joerg Roedel90008ee2008-09-09 16:41:05 +0200127static void iommu_print_event(void *__evt)
128{
129 u32 *event = __evt;
130 int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
131 int devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
132 int domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
133 int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
134 u64 address = (u64)(((u64)event[3]) << 32) | event[2];
135
136 printk(KERN_ERR "AMD IOMMU: Event logged [");
137
138 switch (type) {
139 case EVENT_TYPE_ILL_DEV:
140 printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
141 "address=0x%016llx flags=0x%04x]\n",
142 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
143 address, flags);
144 break;
145 case EVENT_TYPE_IO_FAULT:
146 printk("IO_PAGE_FAULT device=%02x:%02x.%x "
147 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
148 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
149 domid, address, flags);
150 break;
151 case EVENT_TYPE_DEV_TAB_ERR:
152 printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
153 "address=0x%016llx flags=0x%04x]\n",
154 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
155 address, flags);
156 break;
157 case EVENT_TYPE_PAGE_TAB_ERR:
158 printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
159 "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
160 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
161 domid, address, flags);
162 break;
163 case EVENT_TYPE_ILL_CMD:
164 printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
165 break;
166 case EVENT_TYPE_CMD_HARD_ERR:
167 printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
168 "flags=0x%04x]\n", address, flags);
169 break;
170 case EVENT_TYPE_IOTLB_INV_TO:
171 printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
172 "address=0x%016llx]\n",
173 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
174 address);
175 break;
176 case EVENT_TYPE_INV_DEV_REQ:
177 printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
178 "address=0x%016llx flags=0x%04x]\n",
179 PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid),
180 address, flags);
181 break;
182 default:
183 printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
184 }
185}
186
187static void iommu_poll_events(struct amd_iommu *iommu)
188{
189 u32 head, tail;
190 unsigned long flags;
191
192 spin_lock_irqsave(&iommu->lock, flags);
193
194 head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
195 tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
196
197 while (head != tail) {
198 iommu_print_event(iommu->evt_buf + head);
199 head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
200 }
201
202 writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
203
204 spin_unlock_irqrestore(&iommu->lock, flags);
205}
206
Joerg Roedela80dc3e2008-09-11 16:51:41 +0200207irqreturn_t amd_iommu_int_handler(int irq, void *data)
208{
Joerg Roedel90008ee2008-09-09 16:41:05 +0200209 struct amd_iommu *iommu;
210
211 list_for_each_entry(iommu, &amd_iommu_list, list)
212 iommu_poll_events(iommu);
213
214 return IRQ_HANDLED;
Joerg Roedela80dc3e2008-09-11 16:51:41 +0200215}
216
217/****************************************************************************
218 *
Joerg Roedel431b2a22008-07-11 17:14:22 +0200219 * IOMMU command queuing functions
220 *
221 ****************************************************************************/
222
223/*
224 * Writes the command to the IOMMUs command buffer and informs the
225 * hardware about the new command. Must be called with iommu->lock held.
226 */
Joerg Roedeld6449532008-07-11 17:14:28 +0200227static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200228{
229 u32 tail, head;
230 u8 *target;
231
232 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
Jiri Kosina8a7c5ef2008-08-19 02:13:55 +0200233 target = iommu->cmd_buf + tail;
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200234 memcpy_toio(target, cmd, sizeof(*cmd));
235 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
236 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
237 if (tail == head)
238 return -ENOMEM;
239 writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
240
241 return 0;
242}
243
Joerg Roedel431b2a22008-07-11 17:14:22 +0200244/*
245 * General queuing function for commands. Takes iommu->lock and calls
246 * __iommu_queue_command().
247 */
Joerg Roedeld6449532008-07-11 17:14:28 +0200248static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200249{
250 unsigned long flags;
251 int ret;
252
253 spin_lock_irqsave(&iommu->lock, flags);
254 ret = __iommu_queue_command(iommu, cmd);
Joerg Roedel09ee17e2008-12-03 12:19:27 +0100255 if (!ret)
Joerg Roedel0cfd7aa2008-12-10 19:58:00 +0100256 iommu->need_sync = true;
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200257 spin_unlock_irqrestore(&iommu->lock, flags);
258
259 return ret;
260}
261
Joerg Roedel431b2a22008-07-11 17:14:22 +0200262/*
Joerg Roedel8d201962008-12-02 20:34:41 +0100263 * This function waits until an IOMMU has completed a completion
264 * wait command
Joerg Roedel431b2a22008-07-11 17:14:22 +0200265 */
Joerg Roedel8d201962008-12-02 20:34:41 +0100266static void __iommu_wait_for_completion(struct amd_iommu *iommu)
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200267{
Joerg Roedel8d201962008-12-02 20:34:41 +0100268 int ready = 0;
Joerg Roedel519c31b2008-08-14 19:55:15 +0200269 unsigned status = 0;
Joerg Roedel8d201962008-12-02 20:34:41 +0100270 unsigned long i = 0;
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200271
Joerg Roedelda49f6d2008-12-12 14:59:58 +0100272 INC_STATS_COUNTER(compl_wait);
273
Joerg Roedel136f78a2008-07-11 17:14:27 +0200274 while (!ready && (i < EXIT_LOOP_COUNT)) {
275 ++i;
Joerg Roedel519c31b2008-08-14 19:55:15 +0200276 /* wait for the bit to become one */
277 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
278 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
Joerg Roedel136f78a2008-07-11 17:14:27 +0200279 }
280
Joerg Roedel519c31b2008-08-14 19:55:15 +0200281 /* set bit back to zero */
282 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
283 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
284
Joerg Roedel84df8172008-12-17 16:36:44 +0100285 if (unlikely(i == EXIT_LOOP_COUNT))
286 panic("AMD IOMMU: Completion wait loop failed\n");
Joerg Roedel8d201962008-12-02 20:34:41 +0100287}
288
289/*
290 * This function queues a completion wait command into the command
291 * buffer of an IOMMU
292 */
293static int __iommu_completion_wait(struct amd_iommu *iommu)
294{
295 struct iommu_cmd cmd;
296
297 memset(&cmd, 0, sizeof(cmd));
298 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
299 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
300
301 return __iommu_queue_command(iommu, &cmd);
302}
303
304/*
305 * This function is called whenever we need to ensure that the IOMMU has
306 * completed execution of all commands we sent. It sends a
307 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
308 * us about that by writing a value to a physical address we pass with
309 * the command.
310 */
311static int iommu_completion_wait(struct amd_iommu *iommu)
312{
313 int ret = 0;
314 unsigned long flags;
315
316 spin_lock_irqsave(&iommu->lock, flags);
317
318 if (!iommu->need_sync)
319 goto out;
320
321 ret = __iommu_completion_wait(iommu);
322
Joerg Roedel0cfd7aa2008-12-10 19:58:00 +0100323 iommu->need_sync = false;
Joerg Roedel8d201962008-12-02 20:34:41 +0100324
325 if (ret)
326 goto out;
327
328 __iommu_wait_for_completion(iommu);
Joerg Roedel84df8172008-12-17 16:36:44 +0100329
Joerg Roedel7e4f88d2008-09-17 14:19:15 +0200330out:
331 spin_unlock_irqrestore(&iommu->lock, flags);
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200332
333 return 0;
334}
335
Joerg Roedel431b2a22008-07-11 17:14:22 +0200336/*
337 * Command send function for invalidating a device table entry
338 */
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200339static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
340{
Joerg Roedeld6449532008-07-11 17:14:28 +0200341 struct iommu_cmd cmd;
Joerg Roedelee2fa742008-09-17 13:47:25 +0200342 int ret;
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200343
344 BUG_ON(iommu == NULL);
345
346 memset(&cmd, 0, sizeof(cmd));
347 CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
348 cmd.data[0] = devid;
349
Joerg Roedelee2fa742008-09-17 13:47:25 +0200350 ret = iommu_queue_command(iommu, &cmd);
351
Joerg Roedelee2fa742008-09-17 13:47:25 +0200352 return ret;
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200353}
354
Joerg Roedel237b6f32008-12-02 20:54:37 +0100355static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
356 u16 domid, int pde, int s)
357{
358 memset(cmd, 0, sizeof(*cmd));
359 address &= PAGE_MASK;
360 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
361 cmd->data[1] |= domid;
362 cmd->data[2] = lower_32_bits(address);
363 cmd->data[3] = upper_32_bits(address);
364 if (s) /* size bit - we flush more than one 4kb page */
365 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
366 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
367 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
368}
369
Joerg Roedel431b2a22008-07-11 17:14:22 +0200370/*
371 * Generic command send function for invalidaing TLB entries
372 */
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200373static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
374 u64 address, u16 domid, int pde, int s)
375{
Joerg Roedeld6449532008-07-11 17:14:28 +0200376 struct iommu_cmd cmd;
Joerg Roedelee2fa742008-09-17 13:47:25 +0200377 int ret;
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200378
Joerg Roedel237b6f32008-12-02 20:54:37 +0100379 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200380
Joerg Roedelee2fa742008-09-17 13:47:25 +0200381 ret = iommu_queue_command(iommu, &cmd);
382
Joerg Roedelee2fa742008-09-17 13:47:25 +0200383 return ret;
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200384}
385
Joerg Roedel431b2a22008-07-11 17:14:22 +0200386/*
387 * TLB invalidation function which is called from the mapping functions.
388 * It invalidates a single PTE if the range to flush is within a single
389 * page. Otherwise it flushes the whole TLB of the IOMMU.
390 */
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200391static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
392 u64 address, size_t size)
393{
Joerg Roedel999ba412008-07-03 19:35:08 +0200394 int s = 0;
Joerg Roedele3c449f2008-10-15 22:02:11 -0700395 unsigned pages = iommu_num_pages(address, size, PAGE_SIZE);
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200396
397 address &= PAGE_MASK;
398
Joerg Roedel999ba412008-07-03 19:35:08 +0200399 if (pages > 1) {
400 /*
401 * If we have to flush more than one page, flush all
402 * TLB entries for this domain
403 */
404 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
405 s = 1;
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200406 }
407
Joerg Roedel999ba412008-07-03 19:35:08 +0200408 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
409
Joerg Roedela19ae1e2008-06-26 21:27:55 +0200410 return 0;
411}
Joerg Roedelb6c02712008-06-26 21:27:53 +0200412
Joerg Roedel1c655772008-09-04 18:40:05 +0200413/* Flush the whole IO/TLB for a given protection domain */
414static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
415{
416 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
417
Joerg Roedelf57d98a2008-12-12 15:46:29 +0100418 INC_STATS_COUNTER(domain_flush_single);
419
Joerg Roedel1c655772008-09-04 18:40:05 +0200420 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
421}
422
Joerg Roedel43f49602008-12-02 21:01:12 +0100423#ifdef CONFIG_IOMMU_API
424/*
425 * This function is used to flush the IO/TLB for a given protection domain
426 * on every IOMMU in the system
427 */
428static void iommu_flush_domain(u16 domid)
429{
430 unsigned long flags;
431 struct amd_iommu *iommu;
432 struct iommu_cmd cmd;
433
434 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
435 domid, 1, 1);
436
437 list_for_each_entry(iommu, &amd_iommu_list, list) {
438 spin_lock_irqsave(&iommu->lock, flags);
439 __iommu_queue_command(iommu, &cmd);
440 __iommu_completion_wait(iommu);
441 __iommu_wait_for_completion(iommu);
442 spin_unlock_irqrestore(&iommu->lock, flags);
443 }
444}
445#endif
446
Joerg Roedel431b2a22008-07-11 17:14:22 +0200447/****************************************************************************
448 *
449 * The functions below are used the create the page table mappings for
450 * unity mapped regions.
451 *
452 ****************************************************************************/
453
454/*
455 * Generic mapping functions. It maps a physical address into a DMA
456 * address space. It allocates the page table pages if necessary.
457 * In the future it can be extended to a generic mapping function
458 * supporting all features of AMD IOMMU page tables like level skipping
459 * and full 64 bit address spaces.
460 */
Joerg Roedel38e817f2008-12-02 17:27:52 +0100461static int iommu_map_page(struct protection_domain *dom,
462 unsigned long bus_addr,
463 unsigned long phys_addr,
464 int prot)
Joerg Roedelbd0e5212008-06-26 21:27:56 +0200465{
466 u64 __pte, *pte, *page;
467
468 bus_addr = PAGE_ALIGN(bus_addr);
Joerg Roedelbb9d4ff2008-12-04 15:59:48 +0100469 phys_addr = PAGE_ALIGN(phys_addr);
Joerg Roedelbd0e5212008-06-26 21:27:56 +0200470
471 /* only support 512GB address spaces for now */
472 if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
473 return -EINVAL;
474
475 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
476
477 if (!IOMMU_PTE_PRESENT(*pte)) {
478 page = (u64 *)get_zeroed_page(GFP_KERNEL);
479 if (!page)
480 return -ENOMEM;
481 *pte = IOMMU_L2_PDE(virt_to_phys(page));
482 }
483
484 pte = IOMMU_PTE_PAGE(*pte);
485 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
486
487 if (!IOMMU_PTE_PRESENT(*pte)) {
488 page = (u64 *)get_zeroed_page(GFP_KERNEL);
489 if (!page)
490 return -ENOMEM;
491 *pte = IOMMU_L1_PDE(virt_to_phys(page));
492 }
493
494 pte = IOMMU_PTE_PAGE(*pte);
495 pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
496
497 if (IOMMU_PTE_PRESENT(*pte))
498 return -EBUSY;
499
500 __pte = phys_addr | IOMMU_PTE_P;
501 if (prot & IOMMU_PROT_IR)
502 __pte |= IOMMU_PTE_IR;
503 if (prot & IOMMU_PROT_IW)
504 __pte |= IOMMU_PTE_IW;
505
506 *pte = __pte;
507
508 return 0;
509}
510
Joerg Roedeleb74ff62008-12-02 19:59:10 +0100511#ifdef CONFIG_IOMMU_API
512static void iommu_unmap_page(struct protection_domain *dom,
513 unsigned long bus_addr)
514{
515 u64 *pte;
516
517 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
518
519 if (!IOMMU_PTE_PRESENT(*pte))
520 return;
521
522 pte = IOMMU_PTE_PAGE(*pte);
523 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
524
525 if (!IOMMU_PTE_PRESENT(*pte))
526 return;
527
528 pte = IOMMU_PTE_PAGE(*pte);
529 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
530
531 *pte = 0;
532}
533#endif
534
Joerg Roedel431b2a22008-07-11 17:14:22 +0200535/*
536 * This function checks if a specific unity mapping entry is needed for
537 * this specific IOMMU.
538 */
Joerg Roedelbd0e5212008-06-26 21:27:56 +0200539static int iommu_for_unity_map(struct amd_iommu *iommu,
540 struct unity_map_entry *entry)
541{
542 u16 bdf, i;
543
544 for (i = entry->devid_start; i <= entry->devid_end; ++i) {
545 bdf = amd_iommu_alias_table[i];
546 if (amd_iommu_rlookup_table[bdf] == iommu)
547 return 1;
548 }
549
550 return 0;
551}
552
Joerg Roedel431b2a22008-07-11 17:14:22 +0200553/*
554 * Init the unity mappings for a specific IOMMU in the system
555 *
556 * Basically iterates over all unity mapping entries and applies them to
557 * the default domain DMA of that IOMMU if necessary.
558 */
Joerg Roedelbd0e5212008-06-26 21:27:56 +0200559static int iommu_init_unity_mappings(struct amd_iommu *iommu)
560{
561 struct unity_map_entry *entry;
562 int ret;
563
564 list_for_each_entry(entry, &amd_iommu_unity_map, list) {
565 if (!iommu_for_unity_map(iommu, entry))
566 continue;
567 ret = dma_ops_unity_map(iommu->default_dom, entry);
568 if (ret)
569 return ret;
570 }
571
572 return 0;
573}
574
Joerg Roedel431b2a22008-07-11 17:14:22 +0200575/*
576 * This function actually applies the mapping to the page table of the
577 * dma_ops domain.
578 */
Joerg Roedelbd0e5212008-06-26 21:27:56 +0200579static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
580 struct unity_map_entry *e)
581{
582 u64 addr;
583 int ret;
584
585 for (addr = e->address_start; addr < e->address_end;
586 addr += PAGE_SIZE) {
Joerg Roedel38e817f2008-12-02 17:27:52 +0100587 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
Joerg Roedelbd0e5212008-06-26 21:27:56 +0200588 if (ret)
589 return ret;
590 /*
591 * if unity mapping is in aperture range mark the page
592 * as allocated in the aperture
593 */
594 if (addr < dma_dom->aperture_size)
595 __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
596 }
597
598 return 0;
599}
600
Joerg Roedel431b2a22008-07-11 17:14:22 +0200601/*
602 * Inits the unity mappings required for a specific device
603 */
Joerg Roedelbd0e5212008-06-26 21:27:56 +0200604static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
605 u16 devid)
606{
607 struct unity_map_entry *e;
608 int ret;
609
610 list_for_each_entry(e, &amd_iommu_unity_map, list) {
611 if (!(devid >= e->devid_start && devid <= e->devid_end))
612 continue;
613 ret = dma_ops_unity_map(dma_dom, e);
614 if (ret)
615 return ret;
616 }
617
618 return 0;
619}
620
Joerg Roedel431b2a22008-07-11 17:14:22 +0200621/****************************************************************************
622 *
623 * The next functions belong to the address allocator for the dma_ops
624 * interface functions. They work like the allocators in the other IOMMU
625 * drivers. Its basically a bitmap which marks the allocated pages in
626 * the aperture. Maybe it could be enhanced in the future to a more
627 * efficient allocator.
628 *
629 ****************************************************************************/
Joerg Roedeld3086442008-06-26 21:27:57 +0200630
Joerg Roedel431b2a22008-07-11 17:14:22 +0200631/*
632 * The address allocator core function.
633 *
634 * called with domain->lock held
635 */
Joerg Roedeld3086442008-06-26 21:27:57 +0200636static unsigned long dma_ops_alloc_addresses(struct device *dev,
637 struct dma_ops_domain *dom,
Joerg Roedel6d4f3432008-09-04 19:18:02 +0200638 unsigned int pages,
Joerg Roedel832a90c2008-09-18 15:54:23 +0200639 unsigned long align_mask,
640 u64 dma_mask)
Joerg Roedeld3086442008-06-26 21:27:57 +0200641{
FUJITA Tomonori40becd82008-09-29 00:06:36 +0900642 unsigned long limit;
Joerg Roedeld3086442008-06-26 21:27:57 +0200643 unsigned long address;
Joerg Roedeld3086442008-06-26 21:27:57 +0200644 unsigned long boundary_size;
645
646 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
647 PAGE_SIZE) >> PAGE_SHIFT;
FUJITA Tomonori40becd82008-09-29 00:06:36 +0900648 limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
649 dma_mask >> PAGE_SHIFT);
Joerg Roedeld3086442008-06-26 21:27:57 +0200650
Joerg Roedel1c655772008-09-04 18:40:05 +0200651 if (dom->next_bit >= limit) {
Joerg Roedeld3086442008-06-26 21:27:57 +0200652 dom->next_bit = 0;
Joerg Roedel1c655772008-09-04 18:40:05 +0200653 dom->need_flush = true;
654 }
Joerg Roedeld3086442008-06-26 21:27:57 +0200655
656 address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
Joerg Roedel6d4f3432008-09-04 19:18:02 +0200657 0 , boundary_size, align_mask);
Joerg Roedel1c655772008-09-04 18:40:05 +0200658 if (address == -1) {
Joerg Roedeld3086442008-06-26 21:27:57 +0200659 address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
Joerg Roedel6d4f3432008-09-04 19:18:02 +0200660 0, boundary_size, align_mask);
Joerg Roedel1c655772008-09-04 18:40:05 +0200661 dom->need_flush = true;
662 }
Joerg Roedeld3086442008-06-26 21:27:57 +0200663
664 if (likely(address != -1)) {
Joerg Roedeld3086442008-06-26 21:27:57 +0200665 dom->next_bit = address + pages;
666 address <<= PAGE_SHIFT;
667 } else
668 address = bad_dma_address;
669
670 WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
671
672 return address;
673}
674
Joerg Roedel431b2a22008-07-11 17:14:22 +0200675/*
676 * The address free function.
677 *
678 * called with domain->lock held
679 */
Joerg Roedeld3086442008-06-26 21:27:57 +0200680static void dma_ops_free_addresses(struct dma_ops_domain *dom,
681 unsigned long address,
682 unsigned int pages)
683{
684 address >>= PAGE_SHIFT;
685 iommu_area_free(dom->bitmap, address, pages);
Joerg Roedel80be3082008-11-06 14:59:05 +0100686
Joerg Roedel8501c452008-11-17 19:11:46 +0100687 if (address >= dom->next_bit)
Joerg Roedel80be3082008-11-06 14:59:05 +0100688 dom->need_flush = true;
Joerg Roedeld3086442008-06-26 21:27:57 +0200689}
690
Joerg Roedel431b2a22008-07-11 17:14:22 +0200691/****************************************************************************
692 *
693 * The next functions belong to the domain allocation. A domain is
694 * allocated for every IOMMU as the default domain. If device isolation
695 * is enabled, every device get its own domain. The most important thing
696 * about domains is the page table mapping the DMA address space they
697 * contain.
698 *
699 ****************************************************************************/
700
Joerg Roedelec487d12008-06-26 21:27:58 +0200701static u16 domain_id_alloc(void)
702{
703 unsigned long flags;
704 int id;
705
706 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
707 id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
708 BUG_ON(id == 0);
709 if (id > 0 && id < MAX_DOMAIN_ID)
710 __set_bit(id, amd_iommu_pd_alloc_bitmap);
711 else
712 id = 0;
713 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
714
715 return id;
716}
717
Joerg Roedela2acfb72008-12-02 18:28:53 +0100718#ifdef CONFIG_IOMMU_API
719static void domain_id_free(int id)
720{
721 unsigned long flags;
722
723 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
724 if (id > 0 && id < MAX_DOMAIN_ID)
725 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
726 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
727}
728#endif
729
Joerg Roedel431b2a22008-07-11 17:14:22 +0200730/*
731 * Used to reserve address ranges in the aperture (e.g. for exclusion
732 * ranges.
733 */
Joerg Roedelec487d12008-06-26 21:27:58 +0200734static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
735 unsigned long start_page,
736 unsigned int pages)
737{
738 unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
739
740 if (start_page + pages > last_page)
741 pages = last_page - start_page;
742
FUJITA Tomonorid26dbc52008-09-22 22:35:07 +0900743 iommu_area_reserve(dom->bitmap, start_page, pages);
Joerg Roedelec487d12008-06-26 21:27:58 +0200744}
745
Joerg Roedel86db2e52008-12-02 18:20:21 +0100746static void free_pagetable(struct protection_domain *domain)
Joerg Roedelec487d12008-06-26 21:27:58 +0200747{
748 int i, j;
749 u64 *p1, *p2, *p3;
750
Joerg Roedel86db2e52008-12-02 18:20:21 +0100751 p1 = domain->pt_root;
Joerg Roedelec487d12008-06-26 21:27:58 +0200752
753 if (!p1)
754 return;
755
756 for (i = 0; i < 512; ++i) {
757 if (!IOMMU_PTE_PRESENT(p1[i]))
758 continue;
759
760 p2 = IOMMU_PTE_PAGE(p1[i]);
Joerg Roedel3cc3d842008-12-04 16:44:31 +0100761 for (j = 0; j < 512; ++j) {
Joerg Roedelec487d12008-06-26 21:27:58 +0200762 if (!IOMMU_PTE_PRESENT(p2[j]))
763 continue;
764 p3 = IOMMU_PTE_PAGE(p2[j]);
765 free_page((unsigned long)p3);
766 }
767
768 free_page((unsigned long)p2);
769 }
770
771 free_page((unsigned long)p1);
Joerg Roedel86db2e52008-12-02 18:20:21 +0100772
773 domain->pt_root = NULL;
Joerg Roedelec487d12008-06-26 21:27:58 +0200774}
775
Joerg Roedel431b2a22008-07-11 17:14:22 +0200776/*
777 * Free a domain, only used if something went wrong in the
778 * allocation path and we need to free an already allocated page table
779 */
Joerg Roedelec487d12008-06-26 21:27:58 +0200780static void dma_ops_domain_free(struct dma_ops_domain *dom)
781{
782 if (!dom)
783 return;
784
Joerg Roedel86db2e52008-12-02 18:20:21 +0100785 free_pagetable(&dom->domain);
Joerg Roedelec487d12008-06-26 21:27:58 +0200786
787 kfree(dom->pte_pages);
788
789 kfree(dom->bitmap);
790
791 kfree(dom);
792}
793
Joerg Roedel431b2a22008-07-11 17:14:22 +0200794/*
795 * Allocates a new protection domain usable for the dma_ops functions.
796 * It also intializes the page table and the address allocator data
797 * structures required for the dma_ops interface
798 */
Joerg Roedelec487d12008-06-26 21:27:58 +0200799static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
800 unsigned order)
801{
802 struct dma_ops_domain *dma_dom;
803 unsigned i, num_pte_pages;
804 u64 *l2_pde;
805 u64 address;
806
807 /*
808 * Currently the DMA aperture must be between 32 MB and 1GB in size
809 */
810 if ((order < 25) || (order > 30))
811 return NULL;
812
813 dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
814 if (!dma_dom)
815 return NULL;
816
817 spin_lock_init(&dma_dom->domain.lock);
818
819 dma_dom->domain.id = domain_id_alloc();
820 if (dma_dom->domain.id == 0)
821 goto free_dma_dom;
822 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
823 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
Joerg Roedel9fdb19d2008-12-02 17:46:25 +0100824 dma_dom->domain.flags = PD_DMA_OPS_MASK;
Joerg Roedelec487d12008-06-26 21:27:58 +0200825 dma_dom->domain.priv = dma_dom;
826 if (!dma_dom->domain.pt_root)
827 goto free_dma_dom;
828 dma_dom->aperture_size = (1ULL << order);
829 dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
830 GFP_KERNEL);
831 if (!dma_dom->bitmap)
832 goto free_dma_dom;
833 /*
834 * mark the first page as allocated so we never return 0 as
835 * a valid dma-address. So we can use 0 as error value
836 */
837 dma_dom->bitmap[0] = 1;
838 dma_dom->next_bit = 0;
839
Joerg Roedel1c655772008-09-04 18:40:05 +0200840 dma_dom->need_flush = false;
Joerg Roedelbd60b732008-09-11 10:24:48 +0200841 dma_dom->target_dev = 0xffff;
Joerg Roedel1c655772008-09-04 18:40:05 +0200842
Joerg Roedel431b2a22008-07-11 17:14:22 +0200843 /* Intialize the exclusion range if necessary */
Joerg Roedelec487d12008-06-26 21:27:58 +0200844 if (iommu->exclusion_start &&
845 iommu->exclusion_start < dma_dom->aperture_size) {
846 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
Joerg Roedele3c449f2008-10-15 22:02:11 -0700847 int pages = iommu_num_pages(iommu->exclusion_start,
848 iommu->exclusion_length,
849 PAGE_SIZE);
Joerg Roedelec487d12008-06-26 21:27:58 +0200850 dma_ops_reserve_addresses(dma_dom, startpage, pages);
851 }
852
Joerg Roedel431b2a22008-07-11 17:14:22 +0200853 /*
854 * At the last step, build the page tables so we don't need to
855 * allocate page table pages in the dma_ops mapping/unmapping
856 * path.
857 */
Joerg Roedelec487d12008-06-26 21:27:58 +0200858 num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
859 dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
860 GFP_KERNEL);
861 if (!dma_dom->pte_pages)
862 goto free_dma_dom;
863
864 l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
865 if (l2_pde == NULL)
866 goto free_dma_dom;
867
868 dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
869
870 for (i = 0; i < num_pte_pages; ++i) {
871 dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
872 if (!dma_dom->pte_pages[i])
873 goto free_dma_dom;
874 address = virt_to_phys(dma_dom->pte_pages[i]);
875 l2_pde[i] = IOMMU_L1_PDE(address);
876 }
877
878 return dma_dom;
879
880free_dma_dom:
881 dma_ops_domain_free(dma_dom);
882
883 return NULL;
884}
885
Joerg Roedel431b2a22008-07-11 17:14:22 +0200886/*
Joerg Roedel5b28df62008-12-02 17:49:42 +0100887 * little helper function to check whether a given protection domain is a
888 * dma_ops domain
889 */
890static bool dma_ops_domain(struct protection_domain *domain)
891{
892 return domain->flags & PD_DMA_OPS_MASK;
893}
894
895/*
Joerg Roedel431b2a22008-07-11 17:14:22 +0200896 * Find out the protection domain structure for a given PCI device. This
897 * will give us the pointer to the page table root for example.
898 */
Joerg Roedelb20ac0d2008-06-26 21:27:59 +0200899static struct protection_domain *domain_for_device(u16 devid)
900{
901 struct protection_domain *dom;
902 unsigned long flags;
903
904 read_lock_irqsave(&amd_iommu_devtable_lock, flags);
905 dom = amd_iommu_pd_table[devid];
906 read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
907
908 return dom;
909}
910
Joerg Roedel431b2a22008-07-11 17:14:22 +0200911/*
912 * If a device is not yet associated with a domain, this function does
913 * assigns it visible for the hardware
914 */
Joerg Roedelf1179dc2008-12-10 14:39:51 +0100915static void attach_device(struct amd_iommu *iommu,
916 struct protection_domain *domain,
917 u16 devid)
Joerg Roedelb20ac0d2008-06-26 21:27:59 +0200918{
919 unsigned long flags;
Joerg Roedelb20ac0d2008-06-26 21:27:59 +0200920 u64 pte_root = virt_to_phys(domain->pt_root);
921
Joerg Roedel863c74e2008-12-02 17:56:36 +0100922 domain->dev_cnt += 1;
923
Joerg Roedel38ddf412008-09-11 10:38:32 +0200924 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
925 << DEV_ENTRY_MODE_SHIFT;
926 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
Joerg Roedelb20ac0d2008-06-26 21:27:59 +0200927
928 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
Joerg Roedel38ddf412008-09-11 10:38:32 +0200929 amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
930 amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
Joerg Roedelb20ac0d2008-06-26 21:27:59 +0200931 amd_iommu_dev_table[devid].data[2] = domain->id;
932
933 amd_iommu_pd_table[devid] = domain;
934 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
935
936 iommu_queue_inv_dev_entry(iommu, devid);
Joerg Roedelb20ac0d2008-06-26 21:27:59 +0200937}
938
Joerg Roedel355bf552008-12-08 12:02:41 +0100939/*
940 * Removes a device from a protection domain (unlocked)
941 */
942static void __detach_device(struct protection_domain *domain, u16 devid)
943{
944
945 /* lock domain */
946 spin_lock(&domain->lock);
947
948 /* remove domain from the lookup table */
949 amd_iommu_pd_table[devid] = NULL;
950
951 /* remove entry from the device table seen by the hardware */
952 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
953 amd_iommu_dev_table[devid].data[1] = 0;
954 amd_iommu_dev_table[devid].data[2] = 0;
955
956 /* decrease reference counter */
957 domain->dev_cnt -= 1;
958
959 /* ready */
960 spin_unlock(&domain->lock);
961}
962
963/*
964 * Removes a device from a protection domain (with devtable_lock held)
965 */
966static void detach_device(struct protection_domain *domain, u16 devid)
967{
968 unsigned long flags;
969
970 /* lock device table */
971 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
972 __detach_device(domain, devid);
973 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
974}
Joerg Roedele275a2a2008-12-10 18:27:25 +0100975
976static int device_change_notifier(struct notifier_block *nb,
977 unsigned long action, void *data)
978{
979 struct device *dev = data;
980 struct pci_dev *pdev = to_pci_dev(dev);
981 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
982 struct protection_domain *domain;
983 struct dma_ops_domain *dma_domain;
984 struct amd_iommu *iommu;
Joerg Roedel1ac4cbb2008-12-10 19:33:26 +0100985 int order = amd_iommu_aperture_order;
986 unsigned long flags;
Joerg Roedele275a2a2008-12-10 18:27:25 +0100987
988 if (devid > amd_iommu_last_bdf)
989 goto out;
990
991 devid = amd_iommu_alias_table[devid];
992
993 iommu = amd_iommu_rlookup_table[devid];
994 if (iommu == NULL)
995 goto out;
996
997 domain = domain_for_device(devid);
998
999 if (domain && !dma_ops_domain(domain))
1000 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1001 "to a non-dma-ops domain\n", dev_name(dev));
1002
1003 switch (action) {
1004 case BUS_NOTIFY_BOUND_DRIVER:
1005 if (domain)
1006 goto out;
1007 dma_domain = find_protection_domain(devid);
1008 if (!dma_domain)
1009 dma_domain = iommu->default_dom;
1010 attach_device(iommu, &dma_domain->domain, devid);
1011 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1012 "device %s\n", dma_domain->domain.id, dev_name(dev));
1013 break;
1014 case BUS_NOTIFY_UNBIND_DRIVER:
1015 if (!domain)
1016 goto out;
1017 detach_device(domain, devid);
1018 break;
Joerg Roedel1ac4cbb2008-12-10 19:33:26 +01001019 case BUS_NOTIFY_ADD_DEVICE:
1020 /* allocate a protection domain if a device is added */
1021 dma_domain = find_protection_domain(devid);
1022 if (dma_domain)
1023 goto out;
1024 dma_domain = dma_ops_domain_alloc(iommu, order);
1025 if (!dma_domain)
1026 goto out;
1027 dma_domain->target_dev = devid;
1028
1029 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1030 list_add_tail(&dma_domain->list, &iommu_pd_list);
1031 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1032
1033 break;
Joerg Roedele275a2a2008-12-10 18:27:25 +01001034 default:
1035 goto out;
1036 }
1037
1038 iommu_queue_inv_dev_entry(iommu, devid);
1039 iommu_completion_wait(iommu);
1040
1041out:
1042 return 0;
1043}
1044
1045struct notifier_block device_nb = {
1046 .notifier_call = device_change_notifier,
1047};
Joerg Roedel355bf552008-12-08 12:02:41 +01001048
Joerg Roedel431b2a22008-07-11 17:14:22 +02001049/*****************************************************************************
1050 *
1051 * The next functions belong to the dma_ops mapping/unmapping code.
1052 *
1053 *****************************************************************************/
1054
1055/*
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001056 * This function checks if the driver got a valid device from the caller to
1057 * avoid dereferencing invalid pointers.
1058 */
1059static bool check_device(struct device *dev)
1060{
1061 if (!dev || !dev->dma_mask)
1062 return false;
1063
1064 return true;
1065}
1066
1067/*
Joerg Roedelbd60b732008-09-11 10:24:48 +02001068 * In this function the list of preallocated protection domains is traversed to
1069 * find the domain for a specific device
1070 */
1071static struct dma_ops_domain *find_protection_domain(u16 devid)
1072{
1073 struct dma_ops_domain *entry, *ret = NULL;
1074 unsigned long flags;
1075
1076 if (list_empty(&iommu_pd_list))
1077 return NULL;
1078
1079 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1080
1081 list_for_each_entry(entry, &iommu_pd_list, list) {
1082 if (entry->target_dev == devid) {
1083 ret = entry;
Joerg Roedelbd60b732008-09-11 10:24:48 +02001084 break;
1085 }
1086 }
1087
1088 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1089
1090 return ret;
1091}
1092
1093/*
Joerg Roedel431b2a22008-07-11 17:14:22 +02001094 * In the dma_ops path we only have the struct device. This function
1095 * finds the corresponding IOMMU, the protection domain and the
1096 * requestor id for a given device.
1097 * If the device is not yet associated with a domain this is also done
1098 * in this function.
1099 */
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001100static int get_device_resources(struct device *dev,
1101 struct amd_iommu **iommu,
1102 struct protection_domain **domain,
1103 u16 *bdf)
1104{
1105 struct dma_ops_domain *dma_dom;
1106 struct pci_dev *pcidev;
1107 u16 _bdf;
1108
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001109 *iommu = NULL;
1110 *domain = NULL;
1111 *bdf = 0xffff;
1112
1113 if (dev->bus != &pci_bus_type)
1114 return 0;
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001115
1116 pcidev = to_pci_dev(dev);
Joerg Roedeld591b0a2008-07-11 17:14:35 +02001117 _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001118
Joerg Roedel431b2a22008-07-11 17:14:22 +02001119 /* device not translated by any IOMMU in the system? */
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001120 if (_bdf > amd_iommu_last_bdf)
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001121 return 0;
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001122
1123 *bdf = amd_iommu_alias_table[_bdf];
1124
1125 *iommu = amd_iommu_rlookup_table[*bdf];
1126 if (*iommu == NULL)
1127 return 0;
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001128 *domain = domain_for_device(*bdf);
1129 if (*domain == NULL) {
Joerg Roedelbd60b732008-09-11 10:24:48 +02001130 dma_dom = find_protection_domain(*bdf);
1131 if (!dma_dom)
1132 dma_dom = (*iommu)->default_dom;
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001133 *domain = &dma_dom->domain;
Joerg Roedelf1179dc2008-12-10 14:39:51 +01001134 attach_device(*iommu, *domain, *bdf);
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001135 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
Joerg Roedelab896722008-12-10 19:43:07 +01001136 "device %s\n", (*domain)->id, dev_name(dev));
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001137 }
1138
Joerg Roedelf91ba192008-11-25 12:56:12 +01001139 if (domain_for_device(_bdf) == NULL)
Joerg Roedelf1179dc2008-12-10 14:39:51 +01001140 attach_device(*iommu, *domain, _bdf);
Joerg Roedelf91ba192008-11-25 12:56:12 +01001141
Joerg Roedelb20ac0d2008-06-26 21:27:59 +02001142 return 1;
1143}
1144
Joerg Roedel431b2a22008-07-11 17:14:22 +02001145/*
1146 * This is the generic map function. It maps one 4kb page at paddr to
1147 * the given address in the DMA address space for the domain.
1148 */
Joerg Roedelcb76c322008-06-26 21:28:00 +02001149static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
1150 struct dma_ops_domain *dom,
1151 unsigned long address,
1152 phys_addr_t paddr,
1153 int direction)
1154{
1155 u64 *pte, __pte;
1156
1157 WARN_ON(address > dom->aperture_size);
1158
1159 paddr &= PAGE_MASK;
1160
1161 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
1162 pte += IOMMU_PTE_L0_INDEX(address);
1163
1164 __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
1165
1166 if (direction == DMA_TO_DEVICE)
1167 __pte |= IOMMU_PTE_IR;
1168 else if (direction == DMA_FROM_DEVICE)
1169 __pte |= IOMMU_PTE_IW;
1170 else if (direction == DMA_BIDIRECTIONAL)
1171 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
1172
1173 WARN_ON(*pte);
1174
1175 *pte = __pte;
1176
1177 return (dma_addr_t)address;
1178}
1179
Joerg Roedel431b2a22008-07-11 17:14:22 +02001180/*
1181 * The generic unmapping function for on page in the DMA address space.
1182 */
Joerg Roedelcb76c322008-06-26 21:28:00 +02001183static void dma_ops_domain_unmap(struct amd_iommu *iommu,
1184 struct dma_ops_domain *dom,
1185 unsigned long address)
1186{
1187 u64 *pte;
1188
1189 if (address >= dom->aperture_size)
1190 return;
1191
Joerg Roedel8ad909c2008-12-08 14:37:20 +01001192 WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
Joerg Roedelcb76c322008-06-26 21:28:00 +02001193
1194 pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
1195 pte += IOMMU_PTE_L0_INDEX(address);
1196
1197 WARN_ON(!*pte);
1198
1199 *pte = 0ULL;
1200}
1201
Joerg Roedel431b2a22008-07-11 17:14:22 +02001202/*
1203 * This function contains common code for mapping of a physically
Joerg Roedel24f81162008-12-08 14:25:39 +01001204 * contiguous memory region into DMA address space. It is used by all
1205 * mapping functions provided with this IOMMU driver.
Joerg Roedel431b2a22008-07-11 17:14:22 +02001206 * Must be called with the domain lock held.
1207 */
Joerg Roedelcb76c322008-06-26 21:28:00 +02001208static dma_addr_t __map_single(struct device *dev,
1209 struct amd_iommu *iommu,
1210 struct dma_ops_domain *dma_dom,
1211 phys_addr_t paddr,
1212 size_t size,
Joerg Roedel6d4f3432008-09-04 19:18:02 +02001213 int dir,
Joerg Roedel832a90c2008-09-18 15:54:23 +02001214 bool align,
1215 u64 dma_mask)
Joerg Roedelcb76c322008-06-26 21:28:00 +02001216{
1217 dma_addr_t offset = paddr & ~PAGE_MASK;
1218 dma_addr_t address, start;
1219 unsigned int pages;
Joerg Roedel6d4f3432008-09-04 19:18:02 +02001220 unsigned long align_mask = 0;
Joerg Roedelcb76c322008-06-26 21:28:00 +02001221 int i;
1222
Joerg Roedele3c449f2008-10-15 22:02:11 -07001223 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
Joerg Roedelcb76c322008-06-26 21:28:00 +02001224 paddr &= PAGE_MASK;
1225
Joerg Roedelc1858972008-12-12 15:42:39 +01001226 if (pages > 1)
1227 INC_STATS_COUNTER(cross_page);
1228
Joerg Roedel6d4f3432008-09-04 19:18:02 +02001229 if (align)
1230 align_mask = (1UL << get_order(size)) - 1;
1231
Joerg Roedel832a90c2008-09-18 15:54:23 +02001232 address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
1233 dma_mask);
Joerg Roedelcb76c322008-06-26 21:28:00 +02001234 if (unlikely(address == bad_dma_address))
1235 goto out;
1236
1237 start = address;
1238 for (i = 0; i < pages; ++i) {
1239 dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
1240 paddr += PAGE_SIZE;
1241 start += PAGE_SIZE;
1242 }
1243 address += offset;
1244
FUJITA Tomonoriafa9fdc2008-09-20 01:23:30 +09001245 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
Joerg Roedel1c655772008-09-04 18:40:05 +02001246 iommu_flush_tlb(iommu, dma_dom->domain.id);
1247 dma_dom->need_flush = false;
1248 } else if (unlikely(iommu_has_npcache(iommu)))
Joerg Roedel270cab242008-09-04 15:49:46 +02001249 iommu_flush_pages(iommu, dma_dom->domain.id, address, size);
1250
Joerg Roedelcb76c322008-06-26 21:28:00 +02001251out:
1252 return address;
1253}
1254
Joerg Roedel431b2a22008-07-11 17:14:22 +02001255/*
1256 * Does the reverse of the __map_single function. Must be called with
1257 * the domain lock held too
1258 */
Joerg Roedelcb76c322008-06-26 21:28:00 +02001259static void __unmap_single(struct amd_iommu *iommu,
1260 struct dma_ops_domain *dma_dom,
1261 dma_addr_t dma_addr,
1262 size_t size,
1263 int dir)
1264{
1265 dma_addr_t i, start;
1266 unsigned int pages;
1267
Joerg Roedelb8d99052008-12-08 14:40:26 +01001268 if ((dma_addr == bad_dma_address) ||
1269 (dma_addr + size > dma_dom->aperture_size))
Joerg Roedelcb76c322008-06-26 21:28:00 +02001270 return;
1271
Joerg Roedele3c449f2008-10-15 22:02:11 -07001272 pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
Joerg Roedelcb76c322008-06-26 21:28:00 +02001273 dma_addr &= PAGE_MASK;
1274 start = dma_addr;
1275
1276 for (i = 0; i < pages; ++i) {
1277 dma_ops_domain_unmap(iommu, dma_dom, start);
1278 start += PAGE_SIZE;
1279 }
1280
1281 dma_ops_free_addresses(dma_dom, dma_addr, pages);
Joerg Roedel270cab242008-09-04 15:49:46 +02001282
Joerg Roedel80be3082008-11-06 14:59:05 +01001283 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
Joerg Roedel1c655772008-09-04 18:40:05 +02001284 iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
Joerg Roedel80be3082008-11-06 14:59:05 +01001285 dma_dom->need_flush = false;
1286 }
Joerg Roedelcb76c322008-06-26 21:28:00 +02001287}
1288
Joerg Roedel431b2a22008-07-11 17:14:22 +02001289/*
1290 * The exported map_single function for dma_ops.
1291 */
Joerg Roedel4da70b92008-06-26 21:28:01 +02001292static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1293 size_t size, int dir)
1294{
1295 unsigned long flags;
1296 struct amd_iommu *iommu;
1297 struct protection_domain *domain;
1298 u16 devid;
1299 dma_addr_t addr;
Joerg Roedel832a90c2008-09-18 15:54:23 +02001300 u64 dma_mask;
Joerg Roedel4da70b92008-06-26 21:28:01 +02001301
Joerg Roedel0f2a86f2008-12-12 15:05:16 +01001302 INC_STATS_COUNTER(cnt_map_single);
1303
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001304 if (!check_device(dev))
1305 return bad_dma_address;
1306
Joerg Roedel832a90c2008-09-18 15:54:23 +02001307 dma_mask = *dev->dma_mask;
Joerg Roedel4da70b92008-06-26 21:28:01 +02001308
1309 get_device_resources(dev, &iommu, &domain, &devid);
1310
1311 if (iommu == NULL || domain == NULL)
Joerg Roedel431b2a22008-07-11 17:14:22 +02001312 /* device not handled by any AMD IOMMU */
Joerg Roedel4da70b92008-06-26 21:28:01 +02001313 return (dma_addr_t)paddr;
1314
Joerg Roedel5b28df62008-12-02 17:49:42 +01001315 if (!dma_ops_domain(domain))
1316 return bad_dma_address;
1317
Joerg Roedel4da70b92008-06-26 21:28:01 +02001318 spin_lock_irqsave(&domain->lock, flags);
Joerg Roedel832a90c2008-09-18 15:54:23 +02001319 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1320 dma_mask);
Joerg Roedel4da70b92008-06-26 21:28:01 +02001321 if (addr == bad_dma_address)
1322 goto out;
1323
Joerg Roedel09ee17e2008-12-03 12:19:27 +01001324 iommu_completion_wait(iommu);
Joerg Roedel4da70b92008-06-26 21:28:01 +02001325
1326out:
1327 spin_unlock_irqrestore(&domain->lock, flags);
1328
1329 return addr;
1330}
1331
Joerg Roedel431b2a22008-07-11 17:14:22 +02001332/*
1333 * The exported unmap_single function for dma_ops.
1334 */
Joerg Roedel4da70b92008-06-26 21:28:01 +02001335static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1336 size_t size, int dir)
1337{
1338 unsigned long flags;
1339 struct amd_iommu *iommu;
1340 struct protection_domain *domain;
1341 u16 devid;
1342
Joerg Roedel146a6912008-12-12 15:07:12 +01001343 INC_STATS_COUNTER(cnt_unmap_single);
1344
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001345 if (!check_device(dev) ||
1346 !get_device_resources(dev, &iommu, &domain, &devid))
Joerg Roedel431b2a22008-07-11 17:14:22 +02001347 /* device not handled by any AMD IOMMU */
Joerg Roedel4da70b92008-06-26 21:28:01 +02001348 return;
1349
Joerg Roedel5b28df62008-12-02 17:49:42 +01001350 if (!dma_ops_domain(domain))
1351 return;
1352
Joerg Roedel4da70b92008-06-26 21:28:01 +02001353 spin_lock_irqsave(&domain->lock, flags);
1354
1355 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
1356
Joerg Roedel09ee17e2008-12-03 12:19:27 +01001357 iommu_completion_wait(iommu);
Joerg Roedel4da70b92008-06-26 21:28:01 +02001358
1359 spin_unlock_irqrestore(&domain->lock, flags);
1360}
1361
Joerg Roedel431b2a22008-07-11 17:14:22 +02001362/*
1363 * This is a special map_sg function which is used if we should map a
1364 * device which is not handled by an AMD IOMMU in the system.
1365 */
Joerg Roedel65b050a2008-06-26 21:28:02 +02001366static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
1367 int nelems, int dir)
1368{
1369 struct scatterlist *s;
1370 int i;
1371
1372 for_each_sg(sglist, s, nelems, i) {
1373 s->dma_address = (dma_addr_t)sg_phys(s);
1374 s->dma_length = s->length;
1375 }
1376
1377 return nelems;
1378}
1379
Joerg Roedel431b2a22008-07-11 17:14:22 +02001380/*
1381 * The exported map_sg function for dma_ops (handles scatter-gather
1382 * lists).
1383 */
Joerg Roedel65b050a2008-06-26 21:28:02 +02001384static int map_sg(struct device *dev, struct scatterlist *sglist,
1385 int nelems, int dir)
1386{
1387 unsigned long flags;
1388 struct amd_iommu *iommu;
1389 struct protection_domain *domain;
1390 u16 devid;
1391 int i;
1392 struct scatterlist *s;
1393 phys_addr_t paddr;
1394 int mapped_elems = 0;
Joerg Roedel832a90c2008-09-18 15:54:23 +02001395 u64 dma_mask;
Joerg Roedel65b050a2008-06-26 21:28:02 +02001396
Joerg Roedeld03f0672008-12-12 15:09:48 +01001397 INC_STATS_COUNTER(cnt_map_sg);
1398
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001399 if (!check_device(dev))
1400 return 0;
1401
Joerg Roedel832a90c2008-09-18 15:54:23 +02001402 dma_mask = *dev->dma_mask;
Joerg Roedel65b050a2008-06-26 21:28:02 +02001403
1404 get_device_resources(dev, &iommu, &domain, &devid);
1405
1406 if (!iommu || !domain)
1407 return map_sg_no_iommu(dev, sglist, nelems, dir);
1408
Joerg Roedel5b28df62008-12-02 17:49:42 +01001409 if (!dma_ops_domain(domain))
1410 return 0;
1411
Joerg Roedel65b050a2008-06-26 21:28:02 +02001412 spin_lock_irqsave(&domain->lock, flags);
1413
1414 for_each_sg(sglist, s, nelems, i) {
1415 paddr = sg_phys(s);
1416
1417 s->dma_address = __map_single(dev, iommu, domain->priv,
Joerg Roedel832a90c2008-09-18 15:54:23 +02001418 paddr, s->length, dir, false,
1419 dma_mask);
Joerg Roedel65b050a2008-06-26 21:28:02 +02001420
1421 if (s->dma_address) {
1422 s->dma_length = s->length;
1423 mapped_elems++;
1424 } else
1425 goto unmap;
Joerg Roedel65b050a2008-06-26 21:28:02 +02001426 }
1427
Joerg Roedel09ee17e2008-12-03 12:19:27 +01001428 iommu_completion_wait(iommu);
Joerg Roedel65b050a2008-06-26 21:28:02 +02001429
1430out:
1431 spin_unlock_irqrestore(&domain->lock, flags);
1432
1433 return mapped_elems;
1434unmap:
1435 for_each_sg(sglist, s, mapped_elems, i) {
1436 if (s->dma_address)
1437 __unmap_single(iommu, domain->priv, s->dma_address,
1438 s->dma_length, dir);
1439 s->dma_address = s->dma_length = 0;
1440 }
1441
1442 mapped_elems = 0;
1443
1444 goto out;
1445}
1446
Joerg Roedel431b2a22008-07-11 17:14:22 +02001447/*
1448 * The exported map_sg function for dma_ops (handles scatter-gather
1449 * lists).
1450 */
Joerg Roedel65b050a2008-06-26 21:28:02 +02001451static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1452 int nelems, int dir)
1453{
1454 unsigned long flags;
1455 struct amd_iommu *iommu;
1456 struct protection_domain *domain;
1457 struct scatterlist *s;
1458 u16 devid;
1459 int i;
1460
Joerg Roedel55877a62008-12-12 15:12:14 +01001461 INC_STATS_COUNTER(cnt_unmap_sg);
1462
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001463 if (!check_device(dev) ||
1464 !get_device_resources(dev, &iommu, &domain, &devid))
Joerg Roedel65b050a2008-06-26 21:28:02 +02001465 return;
1466
Joerg Roedel5b28df62008-12-02 17:49:42 +01001467 if (!dma_ops_domain(domain))
1468 return;
1469
Joerg Roedel65b050a2008-06-26 21:28:02 +02001470 spin_lock_irqsave(&domain->lock, flags);
1471
1472 for_each_sg(sglist, s, nelems, i) {
1473 __unmap_single(iommu, domain->priv, s->dma_address,
1474 s->dma_length, dir);
Joerg Roedel65b050a2008-06-26 21:28:02 +02001475 s->dma_address = s->dma_length = 0;
1476 }
1477
Joerg Roedel09ee17e2008-12-03 12:19:27 +01001478 iommu_completion_wait(iommu);
Joerg Roedel65b050a2008-06-26 21:28:02 +02001479
1480 spin_unlock_irqrestore(&domain->lock, flags);
1481}
1482
Joerg Roedel431b2a22008-07-11 17:14:22 +02001483/*
1484 * The exported alloc_coherent function for dma_ops.
1485 */
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001486static void *alloc_coherent(struct device *dev, size_t size,
1487 dma_addr_t *dma_addr, gfp_t flag)
1488{
1489 unsigned long flags;
1490 void *virt_addr;
1491 struct amd_iommu *iommu;
1492 struct protection_domain *domain;
1493 u16 devid;
1494 phys_addr_t paddr;
Joerg Roedel832a90c2008-09-18 15:54:23 +02001495 u64 dma_mask = dev->coherent_dma_mask;
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001496
Joerg Roedelc8f0fb32008-12-12 15:14:21 +01001497 INC_STATS_COUNTER(cnt_alloc_coherent);
1498
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001499 if (!check_device(dev))
1500 return NULL;
1501
FUJITA Tomonori13d9fea2008-09-10 20:19:40 +09001502 if (!get_device_resources(dev, &iommu, &domain, &devid))
1503 flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
1504
Joerg Roedelc97ac532008-09-11 10:59:15 +02001505 flag |= __GFP_ZERO;
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001506 virt_addr = (void *)__get_free_pages(flag, get_order(size));
1507 if (!virt_addr)
1508 return 0;
1509
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001510 paddr = virt_to_phys(virt_addr);
1511
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001512 if (!iommu || !domain) {
1513 *dma_addr = (dma_addr_t)paddr;
1514 return virt_addr;
1515 }
1516
Joerg Roedel5b28df62008-12-02 17:49:42 +01001517 if (!dma_ops_domain(domain))
1518 goto out_free;
1519
Joerg Roedel832a90c2008-09-18 15:54:23 +02001520 if (!dma_mask)
1521 dma_mask = *dev->dma_mask;
1522
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001523 spin_lock_irqsave(&domain->lock, flags);
1524
1525 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
Joerg Roedel832a90c2008-09-18 15:54:23 +02001526 size, DMA_BIDIRECTIONAL, true, dma_mask);
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001527
Joerg Roedel5b28df62008-12-02 17:49:42 +01001528 if (*dma_addr == bad_dma_address)
1529 goto out_free;
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001530
Joerg Roedel09ee17e2008-12-03 12:19:27 +01001531 iommu_completion_wait(iommu);
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001532
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001533 spin_unlock_irqrestore(&domain->lock, flags);
1534
1535 return virt_addr;
Joerg Roedel5b28df62008-12-02 17:49:42 +01001536
1537out_free:
1538
1539 free_pages((unsigned long)virt_addr, get_order(size));
1540
1541 return NULL;
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001542}
1543
Joerg Roedel431b2a22008-07-11 17:14:22 +02001544/*
1545 * The exported free_coherent function for dma_ops.
Joerg Roedel431b2a22008-07-11 17:14:22 +02001546 */
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001547static void free_coherent(struct device *dev, size_t size,
1548 void *virt_addr, dma_addr_t dma_addr)
1549{
1550 unsigned long flags;
1551 struct amd_iommu *iommu;
1552 struct protection_domain *domain;
1553 u16 devid;
1554
Joerg Roedel5d31ee72008-12-12 15:16:38 +01001555 INC_STATS_COUNTER(cnt_free_coherent);
1556
Joerg Roedeldbcc1122008-09-04 15:04:26 +02001557 if (!check_device(dev))
1558 return;
1559
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001560 get_device_resources(dev, &iommu, &domain, &devid);
1561
1562 if (!iommu || !domain)
1563 goto free_mem;
1564
Joerg Roedel5b28df62008-12-02 17:49:42 +01001565 if (!dma_ops_domain(domain))
1566 goto free_mem;
1567
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001568 spin_lock_irqsave(&domain->lock, flags);
1569
1570 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001571
Joerg Roedel09ee17e2008-12-03 12:19:27 +01001572 iommu_completion_wait(iommu);
Joerg Roedel5d8b53c2008-06-26 21:28:03 +02001573
1574 spin_unlock_irqrestore(&domain->lock, flags);
1575
1576free_mem:
1577 free_pages((unsigned long)virt_addr, get_order(size));
1578}
1579
Joerg Roedelc432f3d2008-06-26 21:28:04 +02001580/*
Joerg Roedelb39ba6a2008-09-09 18:40:46 +02001581 * This function is called by the DMA layer to find out if we can handle a
1582 * particular device. It is part of the dma_ops.
1583 */
1584static int amd_iommu_dma_supported(struct device *dev, u64 mask)
1585{
1586 u16 bdf;
1587 struct pci_dev *pcidev;
1588
1589 /* No device or no PCI device */
1590 if (!dev || dev->bus != &pci_bus_type)
1591 return 0;
1592
1593 pcidev = to_pci_dev(dev);
1594
1595 bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
1596
1597 /* Out of our scope? */
1598 if (bdf > amd_iommu_last_bdf)
1599 return 0;
1600
1601 return 1;
1602}
1603
1604/*
Joerg Roedel431b2a22008-07-11 17:14:22 +02001605 * The function for pre-allocating protection domains.
1606 *
Joerg Roedelc432f3d2008-06-26 21:28:04 +02001607 * If the driver core informs the DMA layer if a driver grabs a device
1608 * we don't need to preallocate the protection domains anymore.
1609 * For now we have to.
1610 */
1611void prealloc_protection_domains(void)
1612{
1613 struct pci_dev *dev = NULL;
1614 struct dma_ops_domain *dma_dom;
1615 struct amd_iommu *iommu;
1616 int order = amd_iommu_aperture_order;
1617 u16 devid;
1618
1619 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
Joerg Roedeledcb34d2008-12-10 20:01:45 +01001620 devid = calc_devid(dev->bus->number, dev->devfn);
Joerg Roedel3a61ec32008-07-25 13:07:50 +02001621 if (devid > amd_iommu_last_bdf)
Joerg Roedelc432f3d2008-06-26 21:28:04 +02001622 continue;
1623 devid = amd_iommu_alias_table[devid];
1624 if (domain_for_device(devid))
1625 continue;
1626 iommu = amd_iommu_rlookup_table[devid];
1627 if (!iommu)
1628 continue;
1629 dma_dom = dma_ops_domain_alloc(iommu, order);
1630 if (!dma_dom)
1631 continue;
1632 init_unity_mappings_for_device(dma_dom, devid);
Joerg Roedelbd60b732008-09-11 10:24:48 +02001633 dma_dom->target_dev = devid;
1634
1635 list_add_tail(&dma_dom->list, &iommu_pd_list);
Joerg Roedelc432f3d2008-06-26 21:28:04 +02001636 }
1637}
1638
Joerg Roedel6631ee92008-06-26 21:28:05 +02001639static struct dma_mapping_ops amd_iommu_dma_ops = {
1640 .alloc_coherent = alloc_coherent,
1641 .free_coherent = free_coherent,
1642 .map_single = map_single,
1643 .unmap_single = unmap_single,
1644 .map_sg = map_sg,
1645 .unmap_sg = unmap_sg,
Joerg Roedelb39ba6a2008-09-09 18:40:46 +02001646 .dma_supported = amd_iommu_dma_supported,
Joerg Roedel6631ee92008-06-26 21:28:05 +02001647};
1648
Joerg Roedel431b2a22008-07-11 17:14:22 +02001649/*
1650 * The function which clues the AMD IOMMU driver into dma_ops.
1651 */
Joerg Roedel6631ee92008-06-26 21:28:05 +02001652int __init amd_iommu_init_dma_ops(void)
1653{
1654 struct amd_iommu *iommu;
1655 int order = amd_iommu_aperture_order;
1656 int ret;
1657
Joerg Roedel431b2a22008-07-11 17:14:22 +02001658 /*
1659 * first allocate a default protection domain for every IOMMU we
1660 * found in the system. Devices not assigned to any other
1661 * protection domain will be assigned to the default one.
1662 */
Joerg Roedel6631ee92008-06-26 21:28:05 +02001663 list_for_each_entry(iommu, &amd_iommu_list, list) {
1664 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
1665 if (iommu->default_dom == NULL)
1666 return -ENOMEM;
Joerg Roedele2dc14a2008-12-10 18:48:59 +01001667 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
Joerg Roedel6631ee92008-06-26 21:28:05 +02001668 ret = iommu_init_unity_mappings(iommu);
1669 if (ret)
1670 goto free_domains;
1671 }
1672
Joerg Roedel431b2a22008-07-11 17:14:22 +02001673 /*
1674 * If device isolation is enabled, pre-allocate the protection
1675 * domains for each device.
1676 */
Joerg Roedel6631ee92008-06-26 21:28:05 +02001677 if (amd_iommu_isolate)
1678 prealloc_protection_domains();
1679
1680 iommu_detected = 1;
1681 force_iommu = 1;
1682 bad_dma_address = 0;
Ingo Molnar92af4e22008-06-27 10:48:16 +02001683#ifdef CONFIG_GART_IOMMU
Joerg Roedel6631ee92008-06-26 21:28:05 +02001684 gart_iommu_aperture_disabled = 1;
1685 gart_iommu_aperture = 0;
Ingo Molnar92af4e22008-06-27 10:48:16 +02001686#endif
Joerg Roedel6631ee92008-06-26 21:28:05 +02001687
Joerg Roedel431b2a22008-07-11 17:14:22 +02001688 /* Make the driver finally visible to the drivers */
Joerg Roedel6631ee92008-06-26 21:28:05 +02001689 dma_ops = &amd_iommu_dma_ops;
1690
Joerg Roedel26961ef2008-12-03 17:00:17 +01001691#ifdef CONFIG_IOMMU_API
1692 register_iommu(&amd_iommu_ops);
1693#endif
1694
Joerg Roedele275a2a2008-12-10 18:27:25 +01001695 bus_register_notifier(&pci_bus_type, &device_nb);
1696
Joerg Roedel7f265082008-12-12 13:50:21 +01001697 amd_iommu_stats_init();
1698
Joerg Roedel6631ee92008-06-26 21:28:05 +02001699 return 0;
1700
1701free_domains:
1702
1703 list_for_each_entry(iommu, &amd_iommu_list, list) {
1704 if (iommu->default_dom)
1705 dma_ops_domain_free(iommu->default_dom);
1706 }
1707
1708 return ret;
1709}
Joerg Roedel6d98cd82008-12-08 12:05:55 +01001710
1711/*****************************************************************************
1712 *
1713 * The following functions belong to the exported interface of AMD IOMMU
1714 *
1715 * This interface allows access to lower level functions of the IOMMU
1716 * like protection domain handling and assignement of devices to domains
1717 * which is not possible with the dma_ops interface.
1718 *
1719 *****************************************************************************/
1720
1721#ifdef CONFIG_IOMMU_API
1722
1723static void cleanup_domain(struct protection_domain *domain)
1724{
1725 unsigned long flags;
1726 u16 devid;
1727
1728 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1729
1730 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
1731 if (amd_iommu_pd_table[devid] == domain)
1732 __detach_device(domain, devid);
1733
1734 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1735}
1736
Joerg Roedelc156e342008-12-02 18:13:27 +01001737static int amd_iommu_domain_init(struct iommu_domain *dom)
1738{
1739 struct protection_domain *domain;
1740
1741 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1742 if (!domain)
1743 return -ENOMEM;
1744
1745 spin_lock_init(&domain->lock);
1746 domain->mode = PAGE_MODE_3_LEVEL;
1747 domain->id = domain_id_alloc();
1748 if (!domain->id)
1749 goto out_free;
1750 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1751 if (!domain->pt_root)
1752 goto out_free;
1753
1754 dom->priv = domain;
1755
1756 return 0;
1757
1758out_free:
1759 kfree(domain);
1760
1761 return -ENOMEM;
1762}
1763
Joerg Roedel98383fc2008-12-02 18:34:12 +01001764static void amd_iommu_domain_destroy(struct iommu_domain *dom)
1765{
1766 struct protection_domain *domain = dom->priv;
1767
1768 if (!domain)
1769 return;
1770
1771 if (domain->dev_cnt > 0)
1772 cleanup_domain(domain);
1773
1774 BUG_ON(domain->dev_cnt != 0);
1775
1776 free_pagetable(domain);
1777
1778 domain_id_free(domain->id);
1779
1780 kfree(domain);
1781
1782 dom->priv = NULL;
1783}
1784
Joerg Roedel684f2882008-12-08 12:07:44 +01001785static void amd_iommu_detach_device(struct iommu_domain *dom,
1786 struct device *dev)
1787{
1788 struct protection_domain *domain = dom->priv;
1789 struct amd_iommu *iommu;
1790 struct pci_dev *pdev;
1791 u16 devid;
1792
1793 if (dev->bus != &pci_bus_type)
1794 return;
1795
1796 pdev = to_pci_dev(dev);
1797
1798 devid = calc_devid(pdev->bus->number, pdev->devfn);
1799
1800 if (devid > 0)
1801 detach_device(domain, devid);
1802
1803 iommu = amd_iommu_rlookup_table[devid];
1804 if (!iommu)
1805 return;
1806
1807 iommu_queue_inv_dev_entry(iommu, devid);
1808 iommu_completion_wait(iommu);
1809}
1810
Joerg Roedel01106062008-12-02 19:34:11 +01001811static int amd_iommu_attach_device(struct iommu_domain *dom,
1812 struct device *dev)
1813{
1814 struct protection_domain *domain = dom->priv;
1815 struct protection_domain *old_domain;
1816 struct amd_iommu *iommu;
1817 struct pci_dev *pdev;
1818 u16 devid;
1819
1820 if (dev->bus != &pci_bus_type)
1821 return -EINVAL;
1822
1823 pdev = to_pci_dev(dev);
1824
1825 devid = calc_devid(pdev->bus->number, pdev->devfn);
1826
1827 if (devid >= amd_iommu_last_bdf ||
1828 devid != amd_iommu_alias_table[devid])
1829 return -EINVAL;
1830
1831 iommu = amd_iommu_rlookup_table[devid];
1832 if (!iommu)
1833 return -EINVAL;
1834
1835 old_domain = domain_for_device(devid);
1836 if (old_domain)
1837 return -EBUSY;
1838
1839 attach_device(iommu, domain, devid);
1840
1841 iommu_completion_wait(iommu);
1842
1843 return 0;
1844}
1845
Joerg Roedelc6229ca2008-12-02 19:48:43 +01001846static int amd_iommu_map_range(struct iommu_domain *dom,
1847 unsigned long iova, phys_addr_t paddr,
1848 size_t size, int iommu_prot)
1849{
1850 struct protection_domain *domain = dom->priv;
1851 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
1852 int prot = 0;
1853 int ret;
1854
1855 if (iommu_prot & IOMMU_READ)
1856 prot |= IOMMU_PROT_IR;
1857 if (iommu_prot & IOMMU_WRITE)
1858 prot |= IOMMU_PROT_IW;
1859
1860 iova &= PAGE_MASK;
1861 paddr &= PAGE_MASK;
1862
1863 for (i = 0; i < npages; ++i) {
1864 ret = iommu_map_page(domain, iova, paddr, prot);
1865 if (ret)
1866 return ret;
1867
1868 iova += PAGE_SIZE;
1869 paddr += PAGE_SIZE;
1870 }
1871
1872 return 0;
1873}
1874
Joerg Roedeleb74ff62008-12-02 19:59:10 +01001875static void amd_iommu_unmap_range(struct iommu_domain *dom,
1876 unsigned long iova, size_t size)
1877{
1878
1879 struct protection_domain *domain = dom->priv;
1880 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
1881
1882 iova &= PAGE_MASK;
1883
1884 for (i = 0; i < npages; ++i) {
1885 iommu_unmap_page(domain, iova);
1886 iova += PAGE_SIZE;
1887 }
1888
1889 iommu_flush_domain(domain->id);
1890}
1891
Joerg Roedel645c4c82008-12-02 20:05:50 +01001892static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
1893 unsigned long iova)
1894{
1895 struct protection_domain *domain = dom->priv;
1896 unsigned long offset = iova & ~PAGE_MASK;
1897 phys_addr_t paddr;
1898 u64 *pte;
1899
1900 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
1901
1902 if (!IOMMU_PTE_PRESENT(*pte))
1903 return 0;
1904
1905 pte = IOMMU_PTE_PAGE(*pte);
1906 pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
1907
1908 if (!IOMMU_PTE_PRESENT(*pte))
1909 return 0;
1910
1911 pte = IOMMU_PTE_PAGE(*pte);
1912 pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
1913
1914 if (!IOMMU_PTE_PRESENT(*pte))
1915 return 0;
1916
1917 paddr = *pte & IOMMU_PAGE_MASK;
1918 paddr |= offset;
1919
1920 return paddr;
1921}
1922
Joerg Roedel26961ef2008-12-03 17:00:17 +01001923static struct iommu_ops amd_iommu_ops = {
1924 .domain_init = amd_iommu_domain_init,
1925 .domain_destroy = amd_iommu_domain_destroy,
1926 .attach_dev = amd_iommu_attach_device,
1927 .detach_dev = amd_iommu_detach_device,
1928 .map = amd_iommu_map_range,
1929 .unmap = amd_iommu_unmap_range,
1930 .iova_to_phys = amd_iommu_iova_to_phys,
1931};
1932
Joerg Roedel6d98cd82008-12-08 12:05:55 +01001933#endif