From a70e8c7863df00207881a1f9a8fcffb4d09fe462 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 23 Jul 2010 21:42:27 -0300 Subject: [PATCH 1/2] device-assignment: Use PCI I/O port sysfs resource file when available RH-Author: Alex Williamson Message-id: <20100723214202.21146.4302.stgit@localhost6.localdomain6> Patchwork-id: 10958 O-Subject: [RHEL6 qemu-kvm PATCH] device-assignment: Use PCI I/O port sysfs resource file when available Bugzilla: 615214 RH-Acked-by: Jes Sorensen RH-Acked-by: Don Dutile RH-Acked-by: Chris Wright Bugzilla: 615214 Upstream status: posted When supported by the host kernel, we can use read/write on the PCI sysfs resource file for I/O port regions. This allows us to avoid raw in/out commands and works with deprivileged guests via libvirt. Signed-off-by: Alex Williamson --- hw/device-assignment.c | 201 +++++++++++++++++++++++++++--------------------- hw/device-assignment.h | 1 2 files changed, 116 insertions(+), 86 deletions(-) Signed-off-by: Eduardo Habkost --- hw/device-assignment.c | 201 +++++++++++++++++++++++++++-------------------- hw/device-assignment.h | 1 + 2 files changed, 116 insertions(+), 86 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index e8ede16..e8cfbd2 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -62,93 +62,100 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev); static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); -static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr) +static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, + uint32_t addr, int len, uint32_t *val) { - return region->u.r_baseport + (addr - region->e_physbase); + uint32_t ret = 0; + uint32_t offset = addr - dev_region->e_physbase; + int fd = dev_region->region->resource_fd; + + if (fd >= 0) { + if (val) { + DEBUG("pwrite val=%x, len=%d, e_phys=%x, offset=%x\n", + *val, len, addr, offset); + if (pwrite(fd, val, len, offset) != len) { + fprintf(stderr, "%s - pwrite failed %s\n", + __func__, strerror(errno)); + } + } else { + if (pread(fd, &ret, len, offset) != len) { + fprintf(stderr, "%s - pread failed %s\n", + __func__, strerror(errno)); + ret = (1UL << (len * 8)) - 1; + } + DEBUG("pread ret=%x, len=%d, e_phys=%x, offset=%x\n", + ret, len, addr, offset); + } + } else { + uint32_t port = offset + dev_region->u.r_baseport; + + if (val) { + DEBUG("out val=%x, len=%d, e_phys=%x, host=%x\n", + *val, len, addr, port); + switch (len) { + case 1: + outb(*val, port); + break; + case 2: + outw(*val, port); + break; + case 4: + outl(*val, port); + break; + } + } else { + switch (len) { + case 1: + ret = inb(port); + break; + case 2: + ret = inw(port); + break; + case 4: + ret = inl(port); + break; + } + DEBUG("in val=%x, len=%d, e_phys=%x, host=%x\n", + ret, len, addr, port); + } + } + return ret; } static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, uint32_t value) { - AssignedDevRegion *r_access = opaque; - uint32_t r_pio = guest_to_host_ioport(r_access, addr); - - DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", - r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->u.r_baseport, value); - - outb(value, r_pio); + assigned_dev_ioport_rw(opaque, addr, 1, &value); + return; } static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, uint32_t value) { - AssignedDevRegion *r_access = opaque; - uint32_t r_pio = guest_to_host_ioport(r_access, addr); - - DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", - r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->u.r_baseport, value); - - outw(value, r_pio); + assigned_dev_ioport_rw(opaque, addr, 2, &value); + return; } static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, uint32_t value) { - AssignedDevRegion *r_access = opaque; - uint32_t r_pio = guest_to_host_ioport(r_access, addr); - - DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", - r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->u.r_baseport, value); - - outl(value, r_pio); + assigned_dev_ioport_rw(opaque, addr, 4, &value); + return; } static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr) { - AssignedDevRegion *r_access = opaque; - uint32_t r_pio = guest_to_host_ioport(r_access, addr); - uint32_t value; - - value = inb(r_pio); - - DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n", - r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->u.r_baseport, value); - - return value; + return assigned_dev_ioport_rw(opaque, addr, 1, NULL); } static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr) { - AssignedDevRegion *r_access = opaque; - uint32_t r_pio = guest_to_host_ioport(r_access, addr); - uint32_t value; - - value = inw(r_pio); - - DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", - r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->u.r_baseport, value); - - return value; + return assigned_dev_ioport_rw(opaque, addr, 2, NULL); } static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr) { - AssignedDevRegion *r_access = opaque; - uint32_t r_pio = guest_to_host_ioport(r_access, addr); - uint32_t value; - - value = inl(r_pio); - - DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", - r_pio, (int)r_access->e_physbase, - (unsigned long)r_access->u.r_baseport, value); - - return value; + return assigned_dev_ioport_rw(opaque, addr, 4, NULL); } static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, @@ -242,7 +249,7 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num, DEBUG("e_phys=0x%x r_baseport=%x type=0x%x len=%d region_num=%d \n", addr, region->u.r_baseport, type, size, region_num); - if (first_map) { + if (first_map && region->region->resource_fd < 0) { struct ioperm_data *data; data = qemu_mallocz(sizeof(struct ioperm_data)); @@ -504,19 +511,37 @@ static int assigned_dev_register_regions(PCIRegion *io_regions, cur_region->size, t, assigned_dev_iomem_map); continue; + } else { + /* handle port io regions */ + uint32_t val; + int ret; + + /* Test kernel support for ioport resource read/write. Old + * kernels return EIO. New kernels only allow 1/2/4 byte reads + * so should return EINVAL for a 3 byte read */ + ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0); + if (ret == 3) { + fprintf(stderr, "I/O port resource supports 3 byte read?!\n"); + abort(); + } else if (errno != EINVAL) { + fprintf(stderr, "Using raw in/out ioport access (sysfs - %s)\n", + strerror(errno)); + close(pci_dev->v_addrs[i].region->resource_fd); + pci_dev->v_addrs[i].region->resource_fd = -1; + } + + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; + pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr; + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; + + pci_register_bar((PCIDevice *) pci_dev, i, + cur_region->size, PCI_BASE_ADDRESS_SPACE_IO, + assigned_dev_ioport_map); + + /* not relevant for port io */ + pci_dev->v_addrs[i].memory_index = 0; } - /* handle port io regions */ - pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; - pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr; - pci_dev->v_addrs[i].r_size = cur_region->size; - pci_dev->v_addrs[i].e_size = 0; - - pci_register_bar((PCIDevice *) pci_dev, i, - cur_region->size, PCI_BASE_ADDRESS_SPACE_IO, - assigned_dev_ioport_map); - - /* not relevant for port io */ - pci_dev->v_addrs[i].memory_index = 0; } /* success */ @@ -591,20 +616,22 @@ again: continue; if (flags & IORESOURCE_MEM) { flags &= ~IORESOURCE_IO; - if (r != PCI_ROM_SLOT) { - snprintf(name, sizeof(name), "%sresource%d", dir, r); - fd = open(name, O_RDWR); - if (fd == -1) - continue; - rp->resource_fd = fd; - } - } else + } else { flags &= ~IORESOURCE_PREFETCH; + } + if (r != PCI_ROM_SLOT) { + snprintf(name, sizeof(name), "%sresource%d", dir, r); + fd = open(name, O_RDWR); + if (fd == -1) + continue; + rp->resource_fd = fd; + } rp->type = flags; rp->valid = 1; rp->base_addr = start; rp->size = size; + pci_dev->v_addrs[r].region = rp; DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n", r, rp->size, start, rp->type, rp->resource_fd); } @@ -676,8 +703,10 @@ static void free_assigned_device(AssignedDevice *dev) continue; if (pci_region->type & IORESOURCE_IO) { - kvm_remove_ioperm_data(region->u.r_baseport, region->r_size); - continue; + if (pci_region->resource_fd < 0) { + kvm_remove_ioperm_data(region->u.r_baseport, + region->r_size); + } } else if (pci_region->type & IORESOURCE_MEM) { if (region->e_size == 0) continue; @@ -709,11 +738,11 @@ static void free_assigned_device(AssignedDevice *dev) fprintf(stderr, "Failed to unmap assigned device region: %s\n", strerror(errno)); - if (pci_region->resource_fd >= 0) { - close(pci_region->resource_fd); - } } - } + } + if (pci_region->resource_fd >= 0) { + close(pci_region->resource_fd); + } } if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) diff --git a/hw/device-assignment.h b/hw/device-assignment.h index b3ef4db..1223681 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -70,6 +70,7 @@ typedef struct { int num; /* our index within v_addrs[] */ pcibus_t e_size; /* emulated size of region in bytes */ pcibus_t r_size; /* real size of region in bytes */ + PCIRegion *region; } AssignedDevRegion; typedef struct AssignedDevice { -- 1.7.0.3