Sophie

Sophie

distrib > CentOS > 5 > x86_64 > by-pkgid > ea32411352494358b8d75a78402a4713 > files > 5809

kernel-2.6.18-238.19.1.el5.centos.plus.src.rpm

From: ddugger@redhat.com <ddugger@redhat.com>
Date: Mon, 23 Mar 2009 10:23:18 -0600
Subject: [xen] VT-d2: support queue invalidation
Message-id: 200903231623.n2NGNIS7022099@sobek.n0ano.com
O-Subject: [RHEL5.4 PATCH 12/21 V2] VT-d2: support queue invalidation
Bugzilla: 484227
RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
RH-Acked-by: Chris Lalancette <clalance@redhat.com>

implement queued invalidation, which is a VT-d2 feature that allows
software to invalidate multiple translation at a time.  Enable queued
invalidation method if HW support is detected. Otherwise, register
invalidation method, invalidate one entry at a time, is used.

[Version 2 update: Remove unnecessary global variable from routine
  __iommu_flush_iec. ]

Upstream Status: Accepted (CS 16775, 17099)

BZ: 484227

Signed-off-by: Weidong Han <weidong.han@intel.com>
Signed-off-by: Gerd Hoffman <kraxel@redhat.com>
Signed-off-by: Don Dugger <donald.d.dugger@intel.com>

diff --git a/drivers/passthrough/vtd/Makefile b/drivers/passthrough/vtd/Makefile
index 61bc48b..c508028 100644
--- a/drivers/passthrough/vtd/Makefile
+++ b/drivers/passthrough/vtd/Makefile
@@ -4,3 +4,4 @@ subdir-$(x86_64) += x86
 obj-y += iommu.o
 obj-y += dmar.o
 obj-y += utils.o
+obj-y += qinval.o
diff --git a/drivers/passthrough/vtd/extern.h b/drivers/passthrough/vtd/extern.h
index 6fec3e5..35090a6 100644
--- a/drivers/passthrough/vtd/extern.h
+++ b/drivers/passthrough/vtd/extern.h
@@ -30,4 +30,15 @@ void dump_iommu_info(unsigned char key);
 
 void clear_fault_bits(struct iommu *iommu);
 
+int qinval_setup(struct iommu *iommu);
+int queue_invalidate_context(struct iommu *iommu,
+    u16 did, u16 source_id, u8 function_mask, u8 granu);
+int queue_invalidate_iotlb(struct iommu *iommu,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr);
+int queue_invalidate_iec(struct iommu *iommu,
+    u8 granu, u8 im, u16 iidx);
+int invalidate_sync(struct iommu *iommu);
+int iommu_flush_iec_global(struct iommu *iommu);
+int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
+
 #endif // _VTD_EXTERN_H_
diff --git a/drivers/passthrough/vtd/iommu.c b/drivers/passthrough/vtd/iommu.c
index b78b80c..e297195 100644
--- a/drivers/passthrough/vtd/iommu.c
+++ b/drivers/passthrough/vtd/iommu.c
@@ -84,6 +84,9 @@ static struct intel_iommu *alloc_intel_iommu(void)
         return NULL;
     memset(intel, 0, sizeof(struct intel_iommu));
 
+    spin_lock_init(&intel->qi_ctrl.qinval_lock);
+    spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
+
     return intel;
 }
 
@@ -92,6 +95,11 @@ static void free_intel_iommu(struct intel_iommu *intel)
     xfree(intel);
 }
 
+struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
+{
+    return iommu ? &iommu->intel->qi_ctrl : NULL;
+}
+
 struct iommu_flush *iommu_get_flush(struct iommu *iommu)
 {
     return iommu ? &iommu->intel->flush : NULL;
@@ -1665,6 +1673,14 @@ static int init_vtd_hw(void)
         flush->iotlb = flush_iotlb_reg;
     }
 
+    for_each_drhd_unit ( drhd )
+    {
+        iommu = drhd->iommu;
+        if ( qinval_setup(iommu) != 0 )
+            dprintk(XENLOG_INFO VTDPREFIX,
+                    "Queued Invalidation hardware not found\n");
+    }
+
     return 0;
 }
 
diff --git a/drivers/passthrough/vtd/iommu.h b/drivers/passthrough/vtd/iommu.h
index 73c1657..5a7d69f 100644
--- a/drivers/passthrough/vtd/iommu.h
+++ b/drivers/passthrough/vtd/iommu.h
@@ -271,11 +271,119 @@ struct dma_pte {
             (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
 #define dma_pte_present(p) (((p).val & 3) != 0)
 
+/* queue invalidation entry */
+struct qinval_entry {
+    union {
+        struct {
+            u64 lo;
+            u64 hi;
+        }val;
+        struct {
+            struct {
+                u64 type    : 4,
+                    granu   : 2,
+                    res_1   : 10,
+                    did     : 16,
+                    sid     : 16,
+                    fm      : 2,
+                    res_2   : 14;
+            }lo;
+            struct {
+                u64 res;
+            }hi;
+        }cc_inv_dsc;
+        struct {
+            struct {
+                u64 type    : 4,
+                    granu   : 2,
+                    dw      : 1,
+                    dr      : 1,
+                    res_1   : 8,
+                    did     : 16,
+                    res_2   : 32;
+            }lo;
+            struct {
+                u64 am      : 6,
+                    ih      : 1,
+                    res_1   : 5,
+                    addr    : 52;
+            }hi;
+        }iotlb_inv_dsc;
+        struct {
+            struct {
+                u64 type    : 4,
+                    res_1   : 12,
+                    max_invs_pend: 5,
+                    res_2   : 11,
+                    sid     : 16,
+                    res_3   : 16;
+            }lo;
+            struct {
+                u64 size    : 1,
+                    res_1   : 11,
+                    addr    : 52;
+            }hi;
+        }dev_iotlb_inv_dsc;
+        struct {
+            struct {
+                u64 type    : 4,
+                    granu   : 1,
+                    res_1   : 22,
+                    im      : 5,
+                    iidx    : 16,
+                    res_2   : 16;
+            }lo;
+            struct {
+                u64 res;
+            }hi;
+        }iec_inv_dsc;
+        struct {
+            struct {
+                u64 type    : 4,
+                    iflag   : 1,
+                    sw      : 1,
+                    fn      : 1,
+                    res_1   : 25,
+                    sdata   : 32;
+            }lo;
+            struct {
+                u64 res_1   : 2,
+                    saddr   : 62;
+            }hi;
+        }inv_wait_dsc;
+    }q;
+};
+
 struct poll_info {
     u64 saddr;
     u32 udata;
 };
 
+#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry))
+#define qinval_present(v) ((v).lo & 1)
+#define qinval_fault_disable(v) (((v).lo >> 1) & 1)
+
+#define qinval_set_present(v) do {(v).lo |= 1;} while(0)
+#define qinval_clear_present(v) do {(v).lo &= ~1;} while(0)
+
+#define RESERVED_VAL        0
+
+#define TYPE_INVAL_CONTEXT      0x1
+#define TYPE_INVAL_IOTLB        0x2
+#define TYPE_INVAL_DEVICE_IOTLB 0x3
+#define TYPE_INVAL_IEC          0x4
+#define TYPE_INVAL_WAIT         0x5
+
+#define NOTIFY_TYPE_POLL        1
+#define NOTIFY_TYPE_INTR        1
+#define INTERRUTP_FLAG          1
+#define STATUS_WRITE            1
+#define FENCE_FLAG              1
+
+#define IEC_GLOBAL_INVL         0
+#define IEC_INDEX_INVL          1
+
+
 #define VTD_PAGE_TABLE_LEVEL_3  3
 #define VTD_PAGE_TABLE_LEVEL_4  4
 
@@ -293,8 +401,17 @@ struct iommu_flush {
                  u64 type, int non_present_entry_flush);
 };
 
+struct qi_ctrl {
+    u64 qinval_maddr;  /* queue invalidation page machine address */
+    int qinval_index;                    /* queue invalidation index */
+    spinlock_t qinval_lock;      /* lock for queue invalidation page */
+    spinlock_t qinval_poll_lock; /* lock for queue invalidation poll addr */
+    volatile u32 qinval_poll_status;     /* used by poll methord to sync */
+};
+
 struct intel_iommu {
     struct iommu_flush flush;
+    struct qi_ctrl qi_ctrl;
 };
 
 #endif
diff --git a/drivers/passthrough/vtd/qinval.c b/drivers/passthrough/vtd/qinval.c
new file mode 100644
index 0000000..9c78def
--- /dev/null
+++ b/drivers/passthrough/vtd/qinval.c
@@ -0,0 +1,463 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@intel.com>
+ * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
+ */
+
+
+#include <xen/sched.h>
+#include <xen/iommu.h>
+#include <xen/time.h>
+#include <xen/pci.h>
+#include <xen/pci_regs.h>
+#include "iommu.h"
+#include "dmar.h"
+#include "vtd.h"
+#include "extern.h"
+
+static void print_qi_regs(struct iommu *iommu)
+{
+    u64 val;
+
+    val = dmar_readq(iommu->reg, DMAR_IQA_REG);
+    printk("DMAR_IQA_REG = %"PRIx64"\n", val);
+
+    val = dmar_readq(iommu->reg, DMAR_IQH_REG);
+    printk("DMAR_IQH_REG = %"PRIx64"\n", val);
+
+    val = dmar_readq(iommu->reg, DMAR_IQT_REG);
+    printk("DMAR_IQT_REG = %"PRIx64"\n", val);
+}
+
+static int qinval_next_index(struct iommu *iommu)
+{
+    u64 val;
+    val = dmar_readq(iommu->reg, DMAR_IQT_REG);
+    return (val >> 4);
+}
+
+static int qinval_update_qtail(struct iommu *iommu, int index)
+{
+    u64 val;
+
+    /* Need an ASSERT to insure that we have got register lock */
+    val = (index < (QINVAL_ENTRY_NR-1)) ? (index + 1) : 0;
+    dmar_writeq(iommu->reg, DMAR_IQT_REG, (val << 4));
+    return 0;
+}
+
+static int gen_cc_inv_dsc(struct iommu *iommu, int index,
+    u16 did, u16 source_id, u8 function_mask, u8 granu)
+{
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
+    qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
+    qinval_entry->q.cc_inv_dsc.lo.granu = granu;
+    qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.cc_inv_dsc.lo.did = did;
+    qinval_entry->q.cc_inv_dsc.lo.sid = source_id;
+    qinval_entry->q.cc_inv_dsc.lo.fm = function_mask;
+    qinval_entry->q.cc_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.cc_inv_dsc.hi.res = 0;
+
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+
+    return 0;
+}
+
+int queue_invalidate_context(struct iommu *iommu,
+    u16 did, u16 source_id, u8 function_mask, u8 granu)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    if ( index == -1 )
+        return -EBUSY;
+    ret = gen_cc_inv_dsc(iommu, index, did, source_id,
+                         function_mask, granu);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_iotlb_inv_dsc(struct iommu *iommu, int index,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
+{
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
+    qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
+    qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
+    qinval_entry->q.iotlb_inv_dsc.lo.dr = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.dw = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.iotlb_inv_dsc.lo.did = did;
+    qinval_entry->q.iotlb_inv_dsc.lo.res_2 = 0;
+
+    qinval_entry->q.iotlb_inv_dsc.hi.am = am;
+    qinval_entry->q.iotlb_inv_dsc.hi.ih = ih;
+    qinval_entry->q.iotlb_inv_dsc.hi.res_1 = 0;
+    qinval_entry->q.iotlb_inv_dsc.hi.addr = addr;
+
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int queue_invalidate_iotlb(struct iommu *iommu,
+    u8 granu, u8 dr, u8 dw, u16 did, u8 am, u8 ih, u64 addr)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+
+    index = qinval_next_index(iommu);
+    ret = gen_iotlb_inv_dsc(iommu, index, granu, dr, dw, did,
+                            am, ih, addr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_wait_dsc(struct iommu *iommu, int index,
+    u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
+{
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
+    qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
+    qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
+    qinval_entry->q.inv_wait_dsc.lo.sw = sw;
+    qinval_entry->q.inv_wait_dsc.lo.fn = fn;
+    qinval_entry->q.inv_wait_dsc.lo.res_1 = 0;
+    qinval_entry->q.inv_wait_dsc.lo.sdata = sdata;
+    qinval_entry->q.inv_wait_dsc.hi.res_1 = 0;
+    qinval_entry->q.inv_wait_dsc.hi.saddr = virt_to_maddr(saddr) >> 2;
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+static int queue_invalidate_wait(struct iommu *iommu,
+    u8 iflag, u8 sw, u8 fn, u32 sdata, volatile u32 *saddr)
+{
+    unsigned long flags;
+    s_time_t start_time;
+    int index = -1;
+    int ret = -1;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    spin_lock_irqsave(&qi_ctrl->qinval_poll_lock, flags);
+    spin_lock(&iommu->register_lock);
+    index = qinval_next_index(iommu);
+    if ( *saddr == 1 )
+        *saddr = 0;
+    ret = gen_wait_dsc(iommu, index, iflag, sw, fn, sdata, saddr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock(&iommu->register_lock);
+
+    /* Now we don't support interrupt method */
+    if ( sw )
+    {
+        /* In case all wait descriptor writes to same addr with same data */
+        start_time = NOW();
+        while ( *saddr != 1 )
+        {
+            if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+            {
+                print_qi_regs(iommu);
+                panic("queue invalidate wait descriptor was not executed\n");
+            }
+            cpu_relax();
+        }
+    }
+    spin_unlock_irqrestore(&qi_ctrl->qinval_poll_lock, flags);
+    return ret;
+}
+
+int invalidate_sync(struct iommu *iommu)
+{
+    int ret = -1;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( qi_ctrl->qinval_maddr != 0 )
+    {
+        ret = queue_invalidate_wait(iommu,
+            0, 1, 1, 1, &qi_ctrl->qinval_poll_status);
+        return ret;
+    }
+    return 0;
+}
+
+static int gen_dev_iotlb_inv_dsc(struct iommu *iommu, int index,
+    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
+{
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.sid = sid;
+    qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
+
+    qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
+    qinval_entry->q.dev_iotlb_inv_dsc.hi.res_1 = 0;
+    qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr >> PAGE_SHIFT_4K;
+
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int qinval_device_iotlb(struct iommu *iommu,
+    u32 max_invs_pend, u16 sid, u16 size, u64 addr)
+{
+    int ret = -1;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    ret = gen_dev_iotlb_inv_dsc(iommu, index, max_invs_pend,
+                                sid, size, addr);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+static int gen_iec_inv_dsc(struct iommu *iommu, int index,
+    u8 granu, u8 im, u16 iidx)
+{
+    unsigned long flags;
+    struct qinval_entry *qinval_entry = NULL, *qinval_entries;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    if ( index == -1 )
+        return -1;
+    spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
+
+    qinval_entries =
+        (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
+    qinval_entry = &qinval_entries[index];
+    qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
+    qinval_entry->q.iec_inv_dsc.lo.granu = granu;
+    qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
+    qinval_entry->q.iec_inv_dsc.lo.im = im;
+    qinval_entry->q.iec_inv_dsc.lo.iidx = iidx;
+    qinval_entry->q.iec_inv_dsc.lo.res_2 = 0;
+    qinval_entry->q.iec_inv_dsc.hi.res = 0;
+
+    unmap_vtd_domain_page(qinval_entries);
+    spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+    return 0;
+}
+
+int queue_invalidate_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
+{
+    int ret;
+    unsigned long flags;
+    int index = -1;
+
+    spin_lock_irqsave(&iommu->register_lock, flags);
+    index = qinval_next_index(iommu);
+    ret = gen_iec_inv_dsc(iommu, index, granu, im, iidx);
+    ret |= qinval_update_qtail(iommu, index);
+    spin_unlock_irqrestore(&iommu->register_lock, flags);
+    return ret;
+}
+
+int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
+{
+    int ret;
+    ret = queue_invalidate_iec(iommu, granu, im, iidx);
+    ret |= invalidate_sync(iommu);
+
+    /*
+     * reading vt-d architecture register will ensure
+     * draining happens in implementation independent way.
+     */
+    (void)dmar_readq(iommu->reg, DMAR_CAP_REG);
+    return ret;
+}
+
+int iommu_flush_iec_global(struct iommu *iommu)
+{
+    return __iommu_flush_iec(iommu, IEC_GLOBAL_INVL, 0, 0);
+}
+
+int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx)
+{
+   return __iommu_flush_iec(iommu, IEC_INDEX_INVL, im, iidx);
+}
+
+static int flush_context_qi(
+    void *_iommu, u16 did, u16 sid, u8 fm, u64 type,
+    int non_present_entry_flush)
+{
+    int ret = 0;
+    struct iommu *iommu = (struct iommu *)_iommu;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    /*
+     * In the non-present entry flush case, if hardware doesn't cache
+     * non-present entry we do nothing and if hardware cache non-present
+     * entry, we flush entries of domain 0 (the domain id is used to cache
+     * any non-present entries)
+     */
+    if ( non_present_entry_flush )
+    {
+        if ( !cap_caching_mode(iommu->cap) )
+            return 1;
+        else
+            did = 0;
+    }
+
+    if ( qi_ctrl->qinval_maddr != 0 )
+    {
+        ret = queue_invalidate_context(iommu, did, sid, fm,
+                                       type >> DMA_CCMD_INVL_GRANU_OFFSET);
+        ret |= invalidate_sync(iommu);
+    }
+    return ret;
+}
+
+static int flush_iotlb_qi(
+    void *_iommu, u16 did,
+    u64 addr, unsigned int size_order, u64 type,
+    int non_present_entry_flush)
+{
+    u8 dr = 0, dw = 0;
+    int ret = 0;
+    struct iommu *iommu = (struct iommu *)_iommu;
+    struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+
+    /*
+     * In the non-present entry flush case, if hardware doesn't cache
+     * non-present entry we do nothing and if hardware cache non-present
+     * entry, we flush entries of domain 0 (the domain id is used to cache
+     * any non-present entries)
+     */
+    if ( non_present_entry_flush )
+    {
+        if ( !cap_caching_mode(iommu->cap) )
+            return 1;
+        else
+            did = 0;
+    }
+
+    if ( qi_ctrl->qinval_maddr != 0 )
+    {
+        /* use queued invalidation */
+        if (cap_write_drain(iommu->cap))
+            dw = 1;
+        if (cap_read_drain(iommu->cap))
+            dr = 1;
+        /* Need to conside the ih bit later */
+        ret = queue_invalidate_iotlb(iommu,
+                  (type >> DMA_TLB_FLUSH_GRANU_OFFSET), dr,
+                  dw, did, (u8)size_order, 0, addr);
+        ret |= invalidate_sync(iommu);
+    }
+    return ret;
+}
+
+int qinval_setup(struct iommu *iommu)
+{
+    s_time_t start_time;
+    struct qi_ctrl *qi_ctrl;
+    struct iommu_flush *flush;
+
+    qi_ctrl = iommu_qi_ctrl(iommu);
+    flush = iommu_get_flush(iommu);
+
+    if ( !ecap_queued_inval(iommu->ecap) )
+        return -ENODEV;
+
+    if ( qi_ctrl->qinval_maddr == 0 )
+    {
+        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL);
+        if ( qi_ctrl->qinval_maddr == 0 )
+        {
+            dprintk(XENLOG_WARNING VTDPREFIX,
+                    "Cannot allocate memory for qi_ctrl->qinval_maddr\n");
+            return -ENOMEM;
+        }
+        flush->context = flush_context_qi;
+        flush->iotlb = flush_iotlb_qi;
+    }
+
+    /* Setup Invalidation Queue Address(IQA) register with the
+     * address of the page we just allocated.  QS field at
+     * bits[2:0] to indicate size of queue is one 4KB page.
+     * That's 256 entries.  Queued Head (IQH) and Queue Tail (IQT)
+     * registers are automatically reset to 0 with write
+     * to IQA register.
+     */
+    dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
+
+    /* enable queued invalidation hardware */
+    iommu->gcmd |= DMA_GCMD_QIE;
+    dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+    /* Make sure hardware complete it */
+    start_time = NOW();
+    while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_QIES) )
+    {
+        if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+            panic("Cannot set QIE field for queue invalidation\n");
+        cpu_relax();
+    }
+
+    return 0;
+}
diff --git a/include/xen/iommu.h b/include/xen/iommu.h
index 0c8612b..0be9909 100644
--- a/include/xen/iommu.h
+++ b/include/xen/iommu.h
@@ -79,6 +79,7 @@ void io_apic_write_remap_rte(unsigned int apic,
                              unsigned int reg, unsigned int value);
 
 struct iommu_flush *iommu_get_flush(struct iommu *iommu);
+struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu);
 void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq);
 struct hvm_irq_dpci *domain_get_irq_dpci(struct domain *domain);
 int domain_set_irq_dpci(struct domain *domain, struct hvm_irq_dpci *dpci);