From 1a0b3cb14d45174cc1b2495df1c28a638c8ae743 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Wed, 15 Jun 2011 13:15:20 -0300 Subject: [RHEL6 qemu-kvm PATCH 1/2] raw-posix: Linearize direct I/O on Linux NFS RH-Author: Anthony Liguori Message-id: <1308143720-2821-1-git-send-email-aliguori@redhat.com> Patchwork-id: 27170 O-Subject: [PATCH] [PATCH RHEL6.2 qemu-kvm] raw-posix: Linearize direct I/O on Linux NFS Bugzilla: 711213 RH-Acked-by: Markus Armbruster RH-Acked-by: Christoph Hellwig RH-Acked-by: Kevin Wolf From: Stefan Hajnoczi BZ: 711213 Upstream-status: not appropriate for upstream The Linux NFS client issues separate NFS requests for vectored direct I/O writes. For example, a pwritev() with 8 elements results in 8 write requests to the server. This is very inefficient and a kernel-side fix is not trivial or likely to be available soon. This patch detects files on NFS and uses the QEMU_AIO_MISALIGNED flag to force requests to bounce through a linear buffer. This workaround is enabled by default but can be disabled by setting the QEMU_FORCE_LINEARIZE=off environment variable. It can be set on a per-VM basis using the following libvirt domain XML: ... Khoa Huynh reports the following ffsb benchmark results over 1 Gbit Ethernet: Test (threads=8) unpatched patched (MB/s) (MB/s) Large File Creates (bs=256 KB) 20.5 112.0 Sequential Reads (bs=256 KB) 58.7 112.0 Large File Creates (bs=8 KB) 5.2 5.8 Sequential Reads (bs=8 KB) 46.7 80.9 Random Reads (bs=8 KB) 8.7 23.4 Random Writes (bs=8 KB) 39.6 44.0 Mail Server (bs=8 KB) 10.2 23.6 Test (threads=1) unpatched patched (MB/s) (MB/s) Large File Creates (bs=256 KB) 14.5 49.8 Sequential Reads (bs=256 KB) 87.9 83.9 Large File Creates (bs=8 KB) 4.8 4.8 Sequential Reads (bs=8 KB) 23.2 23.1 Random Reads (bs=8 KB) 4.8 4.7 Random Writes (bs=8 KB) 9.4 12.8 Mail Server (bs=8 KB) 5.4 7.3 Signed-off-by: Stefan Hajnoczi Signed-off-by: Anthony Liguori --- block/raw-posix.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 51 insertions(+), 8 deletions(-) Signed-off-by: Eduardo Habkost --- block/raw-posix.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 51 insertions(+), 8 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index a38ae91..c966044 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -50,8 +50,10 @@ #endif #ifdef __linux__ #include +#include #include #include +#include #endif #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) #include @@ -121,6 +123,7 @@ typedef struct BDRVRawState { #endif uint8_t *aligned_buf; unsigned aligned_buf_size; + bool force_linearize; } BDRVRawState; static int fd_open(BlockDriverState *bs); @@ -130,6 +133,38 @@ static int64_t raw_getlength(BlockDriverState *bs); static int cdrom_reopen(BlockDriverState *bs); #endif +#if defined(__linux__) +static bool is_vectored_io_slow(int fd, int bdrv_flags) +{ + struct statfs stfs; + int ret; + char *env_flag = getenv("QEMU_FORCE_LINEARIZE"); + + if (env_flag && strcasecmp(env_flag, "off") == 0) { + return false; + } + + do { + ret = fstatfs(fd, &stfs); + } while (ret != 0 && errno == EINTR); + + /* + * Linux NFS client splits vectored direct I/O requests into separate NFS + * requests so it is faster to submit a single buffer instead. + */ + if (!ret && stfs.f_type == NFS_SUPER_MAGIC && + (bdrv_flags & BDRV_O_NOCACHE)) { + return true; + } + return false; +} +#else /* !defined(__linux__) */ +static bool is_vectored_io_slow(int fd, int bdrv_flags) +{ + return false; +} +#endif + static int raw_open_common(BlockDriverState *bs, const char *filename, int bdrv_flags, int open_flags) { @@ -162,6 +197,7 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, } s->fd = fd; s->aligned_buf = NULL; + s->force_linearize = is_vectored_io_slow(fd, bdrv_flags); if ((bdrv_flags & BDRV_O_NOCACHE)) { /* @@ -528,20 +564,27 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, return NULL; /* + * Check if buffers need to be copied into a single linear buffer. + */ + if (s->force_linearize && qiov->niov > 1) { + type |= QEMU_AIO_MISALIGNED; + } + + /* * If O_DIRECT is used the buffer needs to be aligned on a sector * boundary. Check if this is the case or telll the low-level * driver that it needs to copy the buffer. */ - if (s->aligned_buf) { - if (!qiov_is_aligned(bs, qiov)) { - type |= QEMU_AIO_MISALIGNED; + if (s->aligned_buf && !qiov_is_aligned(bs, qiov)) { + type |= QEMU_AIO_MISALIGNED; + } + #ifdef CONFIG_LINUX_AIO - } else if (s->use_aio) { - return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, - nb_sectors, cb, opaque, type); -#endif - } + if (s->use_aio && (type & QEMU_AIO_MISALIGNED) == 0) { + return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov, + nb_sectors, cb, opaque, type); } +#endif return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors, cb, opaque, type); -- 1.7.3.2