From: ddugger@redhat.com <ddugger@redhat.com> Date: Mon, 23 Mar 2009 10:22:59 -0600 Subject: [xen] x86: IRQ injection changes for VT-d Message-id: 200903231622.n2NGMx1v022051@sobek.n0ano.com O-Subject: [RHEL5.4 PATCH 5/21 V2] IRQ injection changes for VT-d Bugzilla: 484227 RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com> RH-Acked-by: Chris Lalancette <clalance@redhat.com> implement irq functions for VT-d, intercept interrupts for VT-d in __do_IRQ_guest, and do EOI particularly. Upstream Status: Accepted (CS 15903, 16268) BZ: 484227 Signed-off-by: Weidong Han <weidong.han@intel.com> Signed-off-by: Gerd Hoffman <kraxel@redhat.com> Signed-off-by: Don Dugger <donald.d.dugger@intel.com> diff --git a/arch/x86/hvm/vioapic.c b/arch/x86/hvm/vioapic.c index d755caa..3266c39 100644 --- a/arch/x86/hvm/vioapic.c +++ b/arch/x86/hvm/vioapic.c @@ -458,6 +458,14 @@ void vioapic_update_EOI(struct domain *d, int vector) ent = &vioapic->redirtbl[gsi]; ent->fields.remote_irr = 0; + + if ( iommu_enabled ) + { + spin_unlock(&d->arch.hvm_domain.irq_lock); + hvm_dpci_eoi(current->domain, gsi, ent); + spin_lock(&d->arch.hvm_domain.irq_lock); + } + if ( (ent->fields.trig_mode == VIOAPIC_LEVEL_TRIG) && !ent->fields.mask && hvm_irq->gsi_assert_count[gsi] ) diff --git a/arch/x86/hvm/vmx/intr.c b/arch/x86/hvm/vmx/intr.c index cdea237..f4d1e15 100644 --- a/arch/x86/hvm/vmx/intr.c +++ b/arch/x86/hvm/vmx/intr.c @@ -114,6 +114,7 @@ asmlinkage void vmx_intr_assist(void) unsigned long inst_len; pt_update_irq(v); + hvm_dirq_assist(v); update_tpr_threshold(vcpu_vlapic(v)); diff --git a/arch/x86/hvm/vpic.c b/arch/x86/hvm/vpic.c index 19ef5d0..0835698 100644 --- a/arch/x86/hvm/vpic.c +++ b/arch/x86/hvm/vpic.c @@ -182,8 +182,7 @@ static void vpic_ioport_write( vpic_lock(vpic); - addr &= 1; - if ( addr == 0 ) + if ( (addr & 1) == 0 ) { if ( val & 0x10 ) { @@ -250,7 +249,13 @@ static void vpic_ioport_write( vpic->isr &= ~(1 << irq); if ( cmd == 7 ) vpic->priority_add = (irq + 1) & 7; - break; + /* Release lock and EOI the physical interrupt (if any). */ + vpic_update_int_output(vpic); + vpic_unlock(vpic); + hvm_dpci_eoi(current->domain, + hvm_isa_irq_to_gsi((addr >> 7) ? (irq|8) : irq), + NULL); + return; /* bail immediately */ case 6: /* Set Priority */ vpic->priority_add = (val + 1) & 7; break; diff --git a/arch/x86/irq.c b/arch/x86/irq.c index b9d761c..423d451 100644 --- a/arch/x86/irq.c +++ b/arch/x86/irq.c @@ -236,7 +236,8 @@ static void __do_IRQ_guest(int vector) if ( (action->ack_type != ACKTYPE_NONE) && !test_and_set_bit(irq, d->pirq_mask) ) action->in_flight++; - send_guest_pirq(d, irq); + if ( !hvm_do_IRQ_dpci(d, irq) ) + send_guest_pirq(d, irq); } } diff --git a/drivers/passthrough/Makefile b/drivers/passthrough/Makefile index 662f89c..259497f 100644 --- a/drivers/passthrough/Makefile +++ b/drivers/passthrough/Makefile @@ -3,3 +3,4 @@ subdir-$(x86_64) += vtd obj-y += iommu.o obj-y += pci.o +obj-y += io.o diff --git a/drivers/passthrough/io.c b/drivers/passthrough/io.c new file mode 100644 index 0000000..0452e07 --- /dev/null +++ b/drivers/passthrough/io.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) Allen Kay <allen.m.kay@intel.com> + * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com> + */ + +#include <xen/event.h> +#include <xen/iommu.h> +#include <asm/hvm/irq.h> +#include <asm/hvm/iommu.h> +#include <xen/hvm/irq.h> + +static void pt_irq_time_out(void *data) +{ + struct hvm_mirq_dpci_mapping *irq_map = data; + unsigned int guest_gsi, machine_gsi = 0; + int vector; + struct hvm_irq_dpci *dpci = NULL; + struct dev_intx_gsi_link *digl; + uint32_t device, intx; + + spin_lock(&irq_map->dom->evtchn_lock); + + dpci = domain_get_irq_dpci(irq_map->dom); + ASSERT(dpci); + list_for_each_entry ( digl, &irq_map->digl_list, list ) + { + guest_gsi = digl->gsi; + machine_gsi = dpci->girq[guest_gsi].machine_gsi; + device = digl->device; + intx = digl->intx; + hvm_pci_intx_deassert(irq_map->dom, device, intx); + } + + clear_bit(machine_gsi, dpci->dirq_mask); + vector = irq_to_vector(machine_gsi); + dpci->mirq[machine_gsi].pending = 0; + spin_unlock(&irq_map->dom->evtchn_lock); + pirq_guest_eoi(irq_map->dom, machine_gsi); +} + +int pt_irq_create_bind_vtd( + struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) +{ + struct hvm_irq_dpci *hvm_irq_dpci = NULL; + uint32_t machine_gsi, guest_gsi; + uint32_t device, intx, link; + struct dev_intx_gsi_link *digl; + int rc, pirq = pt_irq_bind->machine_irq; + + if ( pirq < 0 || pirq >= NR_IRQS ) + return -EINVAL; + + spin_lock(&d->evtchn_lock); + + hvm_irq_dpci = domain_get_irq_dpci(d); + if ( hvm_irq_dpci == NULL ) + { + hvm_irq_dpci = xmalloc(struct hvm_irq_dpci); + if ( hvm_irq_dpci == NULL ) + { + spin_unlock(&d->evtchn_lock); + return -ENOMEM; + } + memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci)); + for ( int i = 0; i < NR_IRQS; i++ ) + INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list); + } + + if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 ) + { + xfree(hvm_irq_dpci); + spin_unlock(&d->evtchn_lock); + return -EINVAL; + } + + machine_gsi = pt_irq_bind->machine_irq; + device = pt_irq_bind->u.pci.device; + intx = pt_irq_bind->u.pci.intx; + guest_gsi = hvm_pci_intx_gsi(device, intx); + link = hvm_pci_intx_link(device, intx); + hvm_irq_dpci->link_cnt[link]++; + + digl = xmalloc(struct dev_intx_gsi_link); + if ( !digl ) + { + spin_unlock(&d->evtchn_lock); + return -ENOMEM; + } + + digl->device = device; + digl->intx = intx; + digl->gsi = guest_gsi; + digl->link = link; + list_add_tail(&digl->list, + &hvm_irq_dpci->mirq[machine_gsi].digl_list); + + hvm_irq_dpci->girq[guest_gsi].valid = 1; + hvm_irq_dpci->girq[guest_gsi].device = device; + hvm_irq_dpci->girq[guest_gsi].intx = intx; + hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi; + + /* Bind the same mirq once in the same domain */ + if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping)) + { + unsigned int vector = irq_to_vector(machine_gsi); + + hvm_irq_dpci->mirq[machine_gsi].dom = d; + + /* Init timer before binding */ + init_timer(&hvm_irq_dpci->hvm_timer[vector], + pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0); + /* Deal with gsi for legacy devices */ + rc = pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE); + if ( unlikely(rc) ) + { + kill_timer(&hvm_irq_dpci->hvm_timer[vector]); + hvm_irq_dpci->mirq[machine_gsi].dom = NULL; + clear_bit(machine_gsi, hvm_irq_dpci->mapping); + hvm_irq_dpci->girq[guest_gsi].machine_gsi = 0; + hvm_irq_dpci->girq[guest_gsi].intx = 0; + hvm_irq_dpci->girq[guest_gsi].device = 0; + hvm_irq_dpci->girq[guest_gsi].valid = 0; + list_del(&digl->list); + hvm_irq_dpci->link_cnt[link]--; + spin_unlock(&d->evtchn_lock); + xfree(digl); + return rc; + } + } + + gdprintk(XENLOG_INFO VTDPREFIX, + "VT-d irq bind: m_irq = %x device = %x intx = %x\n", + machine_gsi, device, intx); + + spin_unlock(&d->evtchn_lock); + return 0; +} + +int pt_irq_destroy_bind_vtd( + struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) +{ + struct hvm_irq_dpci *hvm_irq_dpci = NULL; + uint32_t machine_gsi, guest_gsi; + uint32_t device, intx, link; + struct list_head *digl_list, *tmp; + struct dev_intx_gsi_link *digl; + + machine_gsi = pt_irq_bind->machine_irq; + device = pt_irq_bind->u.pci.device; + intx = pt_irq_bind->u.pci.intx; + guest_gsi = hvm_pci_intx_gsi(device, intx); + link = hvm_pci_intx_link(device, intx); + + gdprintk(XENLOG_INFO, + "pt_irq_destroy_bind_vtd: machine_gsi=%d " + "guest_gsi=%d, device=%d, intx=%d.\n", + machine_gsi, guest_gsi, device, intx); + spin_lock(&d->evtchn_lock); + + hvm_irq_dpci = domain_get_irq_dpci(d); + + if ( hvm_irq_dpci == NULL ) + { + spin_unlock(&d->evtchn_lock); + return -EINVAL; + } + + hvm_irq_dpci->link_cnt[link]--; + memset(&hvm_irq_dpci->girq[guest_gsi], 0, + sizeof(struct hvm_girq_dpci_mapping)); + + /* clear the mirq info */ + if ( test_bit(machine_gsi, hvm_irq_dpci->mapping)) + { + list_for_each_safe ( digl_list, tmp, + &hvm_irq_dpci->mirq[machine_gsi].digl_list ) + { + digl = list_entry(digl_list, + struct dev_intx_gsi_link, list); + if ( digl->device == device && + digl->intx == intx && + digl->link == link && + digl->gsi == guest_gsi ) + { + list_del(&digl->list); + xfree(digl); + } + } + + if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) ) + { + pirq_guest_unbind(d, machine_gsi); + kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]); + hvm_irq_dpci->mirq[machine_gsi].dom = NULL; + hvm_irq_dpci->mirq[machine_gsi].flags = 0; + clear_bit(machine_gsi, hvm_irq_dpci->mapping); + } + } + spin_unlock(&d->evtchn_lock); + gdprintk(XENLOG_INFO, + "XEN_DOMCTL_irq_unmapping: m_irq = %x device = %x intx = %x\n", + machine_gsi, device, intx); + + return 0; +} + +int hvm_do_IRQ_dpci(struct domain *d, unsigned int mirq) +{ + struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d); + + ASSERT(spin_is_locked(&irq_desc[irq_to_vector(mirq)].lock)); + if ( !iommu_enabled || (d == dom0) || !dpci || + !test_bit(mirq, dpci->mapping)) + return 0; + + /* + * Set a timer here to avoid situations where the IRQ line is shared, and + * the device belonging to the pass-through guest is not yet active. In + * this case the guest may not pick up the interrupt (e.g., masked at the + * PIC) and we need to detect that. + */ + set_bit(mirq, dpci->dirq_mask); + if ( !test_bit(_HVM_IRQ_DPCI_MSI, &dpci->mirq[mirq].flags) ) + set_timer(&dpci->hvm_timer[irq_to_vector(mirq)], + NOW() + PT_IRQ_TIME_OUT); + vcpu_kick(d->vcpu[0]); + + return 1; +} + +void hvm_dirq_assist(struct vcpu *v) +{ + unsigned int irq; + uint32_t device, intx; + struct domain *d = v->domain; + struct hvm_irq_dpci *hvm_irq_dpci = domain_get_irq_dpci(d); + struct dev_intx_gsi_link *digl; + + if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) ) + return; + + for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS); + irq < NR_IRQS; + irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) ) + { + if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) ) + continue; + + spin_lock(&d->evtchn_lock); + stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]); + + list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list ) + { + device = digl->device; + intx = digl->intx; + hvm_pci_intx_assert(d, device, intx); + hvm_irq_dpci->mirq[irq].pending++; + } + + /* + * Set a timer to see if the guest can finish the interrupt or not. For + * example, the guest OS may unmask the PIC during boot, before the + * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the + * guest will never deal with the irq, then the physical interrupt line + * will never be deasserted. + */ + set_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)], + NOW() + PT_IRQ_TIME_OUT); + spin_unlock(&d->evtchn_lock); + } +} + +void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi, + union vioapic_redir_entry *ent) +{ + struct hvm_irq_dpci *hvm_irq_dpci = NULL; + uint32_t device, intx, machine_gsi; + + if ( !iommu_enabled) + return; + + if ( guest_gsi < NR_ISAIRQS ) + { + hvm_dpci_isairq_eoi(d, guest_gsi); + return; + } + + spin_lock(&d->evtchn_lock); + hvm_irq_dpci = domain_get_irq_dpci(d); + + if((hvm_irq_dpci == NULL) || + (guest_gsi >= NR_ISAIRQS && + !hvm_irq_dpci->girq[guest_gsi].valid) ) + { + spin_unlock(&d->evtchn_lock); + return; + } + + device = hvm_irq_dpci->girq[guest_gsi].device; + intx = hvm_irq_dpci->girq[guest_gsi].intx; + hvm_pci_intx_deassert(d, device, intx); + + machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi; + if ( --hvm_irq_dpci->mirq[machine_gsi].pending == 0 ) + { + if ( (ent == NULL) || !ent->fields.mask ) + { + /* + * No need to get vector lock for timer + * since interrupt is still not EOIed + */ + stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]); + pirq_guest_eoi(d, machine_gsi); + } + } + spin_unlock(&d->evtchn_lock); +} diff --git a/include/asm-x86/hvm/io.h b/include/asm-x86/hvm/io.h index 15981ff..c3b4d76 100644 --- a/include/asm-x86/hvm/io.h +++ b/include/asm-x86/hvm/io.h @@ -144,6 +144,9 @@ void send_invalidate_req(void); extern void handle_mmio(unsigned long gpa); extern void hvm_interrupt_post(struct vcpu *v, int vector, int type); extern void hvm_io_assist(void); +void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq, + union vioapic_redir_entry *ent); + #endif /* __ASM_X86_HVM_IO_H__ */