From 3b8c1761f0e1523622e008836d01a6544b1c21ab Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 25 Jul 2016 14:47:12 +0200 Subject: [PATCH 01/58] qtail: clean up direct access to tqe_prev field instead of accessing tqe_prev field dircetly outside of queue.h use macros to check if element is in list and make sure that afer element is removed from list tqe_prev field could be used to do the same check. Signed-off-by: Igor Mammedov Message-Id: <1469450832-84343-1-git-send-email-imammedo@redhat.com> Signed-off-by: Paolo Bonzini --- blockdev.c | 2 +- exec.c | 3 +-- include/qemu/queue.h | 2 ++ net/filter.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/blockdev.c b/blockdev.c index 97062e33c3..c3b05934d7 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3907,7 +3907,7 @@ void qmp_x_blockdev_del(bool has_id, const char *id, goto out; } - if (!blk && !bs->monitor_list.tqe_prev) { + if (!blk && !QTAILQ_IN_USE(bs, monitor_list)) { error_setg(errp, "Node %s is not owned by the monitor", bs->node_name); goto out; diff --git a/exec.c b/exec.c index 80398b038f..ce3fb9ec8e 100644 --- a/exec.c +++ b/exec.c @@ -617,7 +617,7 @@ void cpu_exec_exit(CPUState *cpu) CPUClass *cc = CPU_GET_CLASS(cpu); cpu_list_lock(); - if (cpu->node.tqe_prev == NULL) { + if (!QTAILQ_IN_USE(cpu, node)) { /* there is nothing to undo since cpu_exec_init() hasn't been called */ cpu_list_unlock(); return; @@ -626,7 +626,6 @@ void cpu_exec_exit(CPUState *cpu) assert(!(cpu_index_auto_assigned && cpu != QTAILQ_LAST(&cpus, CPUTailQ))); QTAILQ_REMOVE(&cpus, cpu, node); - cpu->node.tqe_prev = NULL; cpu->cpu_index = UNASSIGNED_CPU_INDEX; cpu_list_unlock(); diff --git a/include/qemu/queue.h b/include/qemu/queue.h index c2b6c8149d..342073fb4d 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -407,6 +407,7 @@ struct { \ else \ (head)->tqh_last = (elm)->field.tqe_prev; \ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ + (elm)->field.tqe_prev = NULL; \ } while (/*CONSTCOND*/0) #define QTAILQ_FOREACH(var, head, field) \ @@ -430,6 +431,7 @@ struct { \ #define QTAILQ_EMPTY(head) ((head)->tqh_first == NULL) #define QTAILQ_FIRST(head) ((head)->tqh_first) #define QTAILQ_NEXT(elm, field) ((elm)->field.tqe_next) +#define QTAILQ_IN_USE(elm, field) ((elm)->field.tqe_prev != NULL) #define QTAILQ_LAST(head, headname) \ (*(((struct headname *)((head)->tqh_last))->tqh_last)) diff --git a/net/filter.c b/net/filter.c index 888fe6dd93..1dfd2caa23 100644 --- a/net/filter.c +++ b/net/filter.c @@ -239,7 +239,7 @@ static void netfilter_finalize(Object *obj) } if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters) && - nf->next.tqe_prev) { + QTAILQ_IN_USE(nf, next)) { QTAILQ_REMOVE(&nf->netdev->filters, nf, next); } g_free(nf->netdev_id); From 9cd1883c0d01813c47e0153459068f22895f4ade Mon Sep 17 00:00:00 2001 From: Cao jin Date: Thu, 28 Jul 2016 18:50:04 +0800 Subject: [PATCH 02/58] util/qemu-sockets: revert Yoda Conditions to normal Follow CODING_STYLE Cc: Daniel P. Berrange Cc: Gerd Hoffmann Cc: Paolo Bonzini Signed-off-by: Cao jin Message-Id: <1469703004-14800-1-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- util/qemu-sockets.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 2aed799e97..9fa01560ce 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -491,7 +491,7 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr, goto err; } - if (0 != (rc = getaddrinfo(addr, port, &ai, &peer))) { + if ((rc = getaddrinfo(addr, port, &ai, &peer)) != 0) { error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, gai_strerror(rc)); goto err; @@ -517,7 +517,7 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr, port = "0"; } - if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) { + if ((rc = getaddrinfo(addr, port, &ai, &local)) != 0) { error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, gai_strerror(rc)); goto err; @@ -573,20 +573,20 @@ InetSocketAddress *inet_parse(const char *str, Error **errp) if (str[0] == ':') { /* no host given */ host[0] = '\0'; - if (1 != sscanf(str, ":%32[^,]%n", port, &pos)) { + if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) { error_setg(errp, "error parsing port in address '%s'", str); goto fail; } } else if (str[0] == '[') { /* IPv6 addr */ - if (2 != sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos)) { + if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) { error_setg(errp, "error parsing IPv6 address '%s'", str); goto fail; } addr->ipv6 = addr->has_ipv6 = true; } else { /* hostname or IPv4 addr */ - if (2 != sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos)) { + if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) { error_setg(errp, "error parsing address '%s'", str); goto fail; } From 260150512156b475e923b853b39c354f6ddd764e Mon Sep 17 00:00:00 2001 From: Cao jin Date: Thu, 28 Jul 2016 16:54:33 +0800 Subject: [PATCH 03/58] util: fix some coding style issue Fix some coding style issues found in removing NonBlockingConnectHandler. Cc: Daniel P. Berrange Cc: Gerd Hoffmann Cc: Paolo Bonzini Reviwed-by: Daniel P. Berrange Signed-off-by: Cao jin Message-Id: <1469696074-12744-3-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- util/qemu-sockets.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c index 9fa01560ce..6db48b3f2e 100644 --- a/util/qemu-sockets.c +++ b/util/qemu-sockets.c @@ -494,7 +494,7 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr, if ((rc = getaddrinfo(addr, port, &ai, &peer)) != 0) { error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, gai_strerror(rc)); - goto err; + goto err; } /* lookup local addr */ @@ -548,12 +548,16 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr, return sock; err: - if (-1 != sock) + if (sock != -1) { closesocket(sock); - if (local) + } + if (local) { freeaddrinfo(local); - if (peer) + } + if (peer) { freeaddrinfo(peer); + } + return -1; } @@ -816,8 +820,10 @@ int unix_listen(const char *str, char *ostr, int olen, Error **errp) sock = unix_listen_saddr(saddr, true, errp); - if (sock != -1 && ostr) + if (sock != -1 && ostr) { snprintf(ostr, olen, "%s%s", saddr->path, optstr ? optstr : ""); + } + qapi_free_UnixSocketAddress(saddr); return sock; } From 4b7e69509df2fcbfdab8c62c294dbfcfdab8a6e1 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Fri, 15 Jul 2016 20:58:42 +0300 Subject: [PATCH 04/58] tcg: Pass last_tb by value to tb_find_fast() This is a small clean up. tb_find_fast() is a final consumer of this variable so no need to pass it by reference. 'last_tb' is always updated by subsequent cpu_loop_exec_tb() in cpu_exec(). This change also simplifies calling cpu_exec_nocache() in cpu_handle_exception(). Signed-off-by: Sergey Fedorov Signed-off-by: Sergey Fedorov Message-Id: <20160715175852.30749-3-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- cpu-exec.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 5d9710a1ea..cf511f1b9a 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -320,7 +320,7 @@ found: } static inline TranslationBlock *tb_find_fast(CPUState *cpu, - TranslationBlock **last_tb, + TranslationBlock *last_tb, int tb_exit) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; @@ -342,7 +342,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, /* Ensure that no TB jump will be modified as the * translation buffer has been flushed. */ - *last_tb = NULL; + last_tb = NULL; cpu->tb_flushed = false; } #ifndef CONFIG_USER_ONLY @@ -351,12 +351,12 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, * spanning two pages because the mapping for the second page can change. */ if (tb->page_addr[1] != -1) { - *last_tb = NULL; + last_tb = NULL; } #endif /* See if we can patch the calling TB. */ - if (*last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - tb_add_jump(*last_tb, tb_exit, tb); + if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + tb_add_jump(last_tb, tb_exit, tb); } tb_unlock(); return tb; @@ -437,8 +437,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) } else if (replay_has_exception() && cpu->icount_decr.u16.low + cpu->icount_extra == 0) { /* try to cause an exception pending in the log */ - TranslationBlock *last_tb = NULL; /* Avoid chaining TBs */ - cpu_exec_nocache(cpu, 1, tb_find_fast(cpu, &last_tb, 0), true); + cpu_exec_nocache(cpu, 1, tb_find_fast(cpu, NULL, 0), true); *ret = -1; return true; #endif @@ -621,7 +620,7 @@ int cpu_exec(CPUState *cpu) cpu->tb_flushed = false; /* reset before first TB lookup */ for(;;) { cpu_handle_interrupt(cpu, &last_tb); - tb = tb_find_fast(cpu, &last_tb, tb_exit); + tb = tb_find_fast(cpu, last_tb, tb_exit); cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc); /* Try to align the host and virtual clocks if the guest is in advance */ From 89a16b1e4294e3664667a151c2f70c84dfac6fd9 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Fri, 15 Jul 2016 20:58:43 +0300 Subject: [PATCH 05/58] tcg: Prepare safe tb_jmp_cache lookup out of tb_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure atomicity of CPU's 'tb_jmp_cache' access for future translation block lookup out of 'tb_lock'. Note that this patch does *not* make CPU's TLB invalidation safe if it is done from some other thread while the CPU is in its execution loop. Signed-off-by: Alex Bennée Signed-off-by: Sergey Fedorov Signed-off-by: Sergey Fedorov Reviewed-by: Alex Bennée Message-Id: <20160715175852.30749-4-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- cpu-exec.c | 4 ++-- translate-all.c | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index cf511f1b9a..32b58edb31 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -315,7 +315,7 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, found: /* we add the TB in the virtual pc hash table */ - cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; + atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); return tb; } @@ -333,7 +333,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, is executed. */ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); tb_lock(); - tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); diff --git a/translate-all.c b/translate-all.c index 0dd6466e07..77ae59d7e9 100644 --- a/translate-all.c +++ b/translate-all.c @@ -851,7 +851,11 @@ void tb_flush(CPUState *cpu) tcg_ctx.tb_ctx.nb_tbs = 0; CPU_FOREACH(cpu) { - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); + int i; + + for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) { + atomic_set(&cpu->tb_jmp_cache[i], NULL); + } cpu->tb_flushed = true; } @@ -1010,8 +1014,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); CPU_FOREACH(cpu) { - if (cpu->tb_jmp_cache[h] == tb) { - cpu->tb_jmp_cache[h] = NULL; + if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) { + atomic_set(&cpu->tb_jmp_cache[h], NULL); } } From 118b07308a8cedc16ef63d7ab243a95f1701db40 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Fri, 15 Jul 2016 20:58:44 +0300 Subject: [PATCH 06/58] tcg: Prepare safe access to tb_flushed out of tb_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure atomicity and ordering of CPU's 'tb_flushed' access for future translation block lookup out of 'tb_lock'. This field can only be touched from another thread by tb_flush() in user mode emulation. So the only access to be sequential atomic is: * a single write in tb_flush(); * reads/writes out of 'tb_lock'. In future, before enabling MTTCG in system mode, tb_flush() must be safe and this field becomes unnecessary. Signed-off-by: Sergey Fedorov Signed-off-by: Sergey Fedorov Reviewed-by: Alex Bennée Message-Id: <20160715175852.30749-5-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- cpu-exec.c | 16 +++++++--------- translate-all.c | 4 ++-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 32b58edb31..877ff8ed70 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -338,13 +338,6 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); } - if (cpu->tb_flushed) { - /* Ensure that no TB jump will be modified as the - * translation buffer has been flushed. - */ - last_tb = NULL; - cpu->tb_flushed = false; - } #ifndef CONFIG_USER_ONLY /* We don't take care of direct jumps when address mapping changes in * system emulation. So it's not safe to make a direct jump to a TB @@ -356,7 +349,12 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, #endif /* See if we can patch the calling TB. */ if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - tb_add_jump(last_tb, tb_exit, tb); + /* Check if translation buffer has been flushed */ + if (cpu->tb_flushed) { + cpu->tb_flushed = false; + } else { + tb_add_jump(last_tb, tb_exit, tb); + } } tb_unlock(); return tb; @@ -617,7 +615,7 @@ int cpu_exec(CPUState *cpu) break; } - cpu->tb_flushed = false; /* reset before first TB lookup */ + atomic_mb_set(&cpu->tb_flushed, false); /* reset before first TB lookup */ for(;;) { cpu_handle_interrupt(cpu, &last_tb); tb = tb_find_fast(cpu, last_tb, tb_exit); diff --git a/translate-all.c b/translate-all.c index 77ae59d7e9..e753a50640 100644 --- a/translate-all.c +++ b/translate-all.c @@ -848,7 +848,6 @@ void tb_flush(CPUState *cpu) > tcg_ctx.code_gen_buffer_size) { cpu_abort(cpu, "Internal error: code buffer overflow\n"); } - tcg_ctx.tb_ctx.nb_tbs = 0; CPU_FOREACH(cpu) { int i; @@ -856,9 +855,10 @@ void tb_flush(CPUState *cpu) for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) { atomic_set(&cpu->tb_jmp_cache[i], NULL); } - cpu->tb_flushed = true; + atomic_mb_set(&cpu->tb_flushed, true); } + tcg_ctx.tb_ctx.nb_tbs = 0; qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE); page_flush_tb(); From 6d21e4208f382dd8ca1f7995a6dd9ea7ca281163 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 Jul 2016 08:36:18 +0200 Subject: [PATCH 07/58] tcg: Prepare TB invalidation for lockless TB lookup When invalidating a translation block, set an invalid flag into the TranslationBlock structure first. It is also necessary to check whether the target TB is still valid after acquiring 'tb_lock' but before calling tb_add_jump() since TB lookup is to be performed out of 'tb_lock' in future. Note that we don't have to check 'last_tb'; an already invalidated TB will not be executed anyway and it is thus safe to patch it. Suggested-by: Sergey Fedorov Signed-off-by: Paolo Bonzini --- cpu-exec.c | 5 +++-- include/exec/exec-all.h | 2 ++ translate-all.c | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 877ff8ed70..cdaab1dbcd 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -241,7 +241,8 @@ static bool tb_cmp(const void *p, const void *d) if (tb->pc == desc->pc && tb->page_addr[0] == desc->phys_page1 && tb->cs_base == desc->cs_base && - tb->flags == desc->flags) { + tb->flags == desc->flags && + !atomic_read(&tb->invalid)) { /* check next page if needed */ if (tb->page_addr[1] == -1) { return true; @@ -352,7 +353,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, /* Check if translation buffer has been flushed */ if (cpu->tb_flushed) { cpu->tb_flushed = false; - } else { + } else if (!tb->invalid) { tb_add_jump(last_tb, tb_exit, tb); } } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d008296c1b..a0e87be88f 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -225,6 +225,8 @@ struct TranslationBlock { #define CF_USE_ICOUNT 0x20000 #define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */ + uint16_t invalid; + void *tc_ptr; /* pointer to the translated code */ uint8_t *tc_search; /* pointer to search data */ /* original tb when cflags has CF_NOCACHE */ diff --git a/translate-all.c b/translate-all.c index e753a50640..5a5499ffb6 100644 --- a/translate-all.c +++ b/translate-all.c @@ -773,6 +773,7 @@ static TranslationBlock *tb_alloc(target_ulong pc) tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++]; tb->pc = pc; tb->cflags = 0; + tb->invalid = false; return tb; } @@ -994,6 +995,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) uint32_t h; tb_page_addr_t phys_pc; + atomic_set(&tb->invalid, true); + /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); h = tb_hash_func(phys_pc, tb->pc, tb->flags); From 2e1ae44a4f4a6149fbb9dc812243522f07284700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 15 Jul 2016 20:58:48 +0300 Subject: [PATCH 08/58] tcg: set up tb->page_addr before insertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This ensures that if we find the TB on the slow path that tb->page_addr is correctly set before being tested. Signed-off-by: Alex Bennée Reviewed-by: Sergey Fedorov Signed-off-by: Sergey Fedorov Message-Id: <20160715175852.30749-9-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- translate-all.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/translate-all.c b/translate-all.c index 5a5499ffb6..b6663dc91d 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1131,10 +1131,6 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, { uint32_t h; - /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags); - qht_insert(&tcg_ctx.tb_ctx.htable, tb, h); - /* add in the page list */ tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); if (phys_page2 != -1) { @@ -1143,6 +1139,10 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb->page_addr[1] = -1; } + /* add in the hash table */ + h = tb_hash_func(phys_pc, tb->pc, tb->flags); + qht_insert(&tcg_ctx.tb_ctx.htable, tb, h); + #ifdef DEBUG_TB_CHECK tb_page_check(); #endif From 518615c6503ad78d3bb67ddf1cd848c4a41de02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 15 Jul 2016 20:58:49 +0300 Subject: [PATCH 09/58] tcg: cpu-exec: remove tb_lock from the hot-path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lock contention in the hot path of moving between existing patched TranslationBlocks is the main drag in multithreaded performance. This patch pushes the tb_lock() usage down to the two places that really need it: - code generation (tb_gen_code) - jump patching (tb_add_jump) The rest of the code doesn't really need to hold a lock as it is either using per-CPU structures, atomically updated or designed to be used in concurrent read situations (qht_lookup). To keep things simple I removed the #ifdef CONFIG_USER_ONLY stuff as the locks become NOPs anyway until the MTTCG work is completed. Signed-off-by: Alex Bennée Reviewed-by: Richard Henderson Reviewed-by: Sergey Fedorov Signed-off-by: Sergey Fedorov Message-Id: <20160715175852.30749-10-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- cpu-exec.c | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index cdaab1dbcd..7ca2b71a89 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -287,35 +287,29 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, TranslationBlock *tb; tb = tb_find_physical(cpu, pc, cs_base, flags); - if (tb) { - goto found; - } + if (!tb) { -#ifdef CONFIG_USER_ONLY - /* mmap_lock is needed by tb_gen_code, and mmap_lock must be - * taken outside tb_lock. Since we're momentarily dropping - * tb_lock, there's a chance that our desired tb has been - * translated. - */ - tb_unlock(); - mmap_lock(); - tb_lock(); - tb = tb_find_physical(cpu, pc, cs_base, flags); - if (tb) { + /* mmap_lock is needed by tb_gen_code, and mmap_lock must be + * taken outside tb_lock. As system emulation is currently + * single threaded the locks are NOPs. + */ + mmap_lock(); + tb_lock(); + + /* There's a chance that our desired tb has been translated while + * taking the locks so we check again inside the lock. + */ + tb = tb_find_physical(cpu, pc, cs_base, flags); + if (!tb) { + /* if no translated code available, then translate it now */ + tb = tb_gen_code(cpu, pc, cs_base, flags, 0); + } + + tb_unlock(); mmap_unlock(); - goto found; } -#endif - /* if no translated code available, then translate it now */ - tb = tb_gen_code(cpu, pc, cs_base, flags, 0); - -#ifdef CONFIG_USER_ONLY - mmap_unlock(); -#endif - -found: - /* we add the TB in the virtual pc hash table */ + /* We add the TB in the virtual pc hash table for the fast lookup */ atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); return tb; } @@ -333,7 +327,6 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, always be the same before a given translated block is executed. */ cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); - tb_lock(); tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { @@ -350,14 +343,15 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, #endif /* See if we can patch the calling TB. */ if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + tb_lock(); /* Check if translation buffer has been flushed */ if (cpu->tb_flushed) { cpu->tb_flushed = false; } else if (!tb->invalid) { tb_add_jump(last_tb, tb_exit, tb); } + tb_unlock(); } - tb_unlock(); return tb; } From 74d356dd48b64eaa2a6104ac1493ca64cb31fa16 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Fri, 15 Jul 2016 20:58:50 +0300 Subject: [PATCH 10/58] tcg: Avoid bouncing tb_lock between tb_gen_code() and tb_add_jump() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sergey Fedorov Signed-off-by: Sergey Fedorov Reviewed-by: Alex Bennée Message-Id: <20160715175852.30749-11-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- cpu-exec.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 7ca2b71a89..bd9fa5a538 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -282,7 +282,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, static TranslationBlock *tb_find_slow(CPUState *cpu, target_ulong pc, target_ulong cs_base, - uint32_t flags) + uint32_t flags, + bool *have_tb_lock) { TranslationBlock *tb; @@ -295,6 +296,7 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, */ mmap_lock(); tb_lock(); + *have_tb_lock = true; /* There's a chance that our desired tb has been translated while * taking the locks so we check again inside the lock. @@ -305,7 +307,6 @@ static TranslationBlock *tb_find_slow(CPUState *cpu, tb = tb_gen_code(cpu, pc, cs_base, flags, 0); } - tb_unlock(); mmap_unlock(); } @@ -322,6 +323,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags; + bool have_tb_lock = false; /* we record a subset of the CPU state. It will always be the same before a given translated block @@ -330,7 +332,7 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { - tb = tb_find_slow(cpu, pc, cs_base, flags); + tb = tb_find_slow(cpu, pc, cs_base, flags, &have_tb_lock); } #ifndef CONFIG_USER_ONLY /* We don't take care of direct jumps when address mapping changes in @@ -343,13 +345,18 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, #endif /* See if we can patch the calling TB. */ if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - tb_lock(); + if (!have_tb_lock) { + tb_lock(); + have_tb_lock = true; + } /* Check if translation buffer has been flushed */ if (cpu->tb_flushed) { cpu->tb_flushed = false; } else if (!tb->invalid) { tb_add_jump(last_tb, tb_exit, tb); } + } + if (have_tb_lock) { tb_unlock(); } return tb; From bd2710d5da06ad7706d4864f65b3f0c9f7cb4d7f Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Fri, 15 Jul 2016 20:58:51 +0300 Subject: [PATCH 11/58] tcg: Merge tb_find_slow() and tb_find_fast() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions are not too big and can be merged together. This makes locking scheme more clear and easier to follow. Signed-off-by: Sergey Fedorov Signed-off-by: Sergey Fedorov Reviewed-by: Alex Bennée Message-Id: <20160715175852.30749-12-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- cpu-exec.c | 72 +++++++++++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index bd9fa5a538..f7f60b16b1 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -279,45 +279,9 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h); } -static TranslationBlock *tb_find_slow(CPUState *cpu, - target_ulong pc, - target_ulong cs_base, - uint32_t flags, - bool *have_tb_lock) -{ - TranslationBlock *tb; - - tb = tb_find_physical(cpu, pc, cs_base, flags); - if (!tb) { - - /* mmap_lock is needed by tb_gen_code, and mmap_lock must be - * taken outside tb_lock. As system emulation is currently - * single threaded the locks are NOPs. - */ - mmap_lock(); - tb_lock(); - *have_tb_lock = true; - - /* There's a chance that our desired tb has been translated while - * taking the locks so we check again inside the lock. - */ - tb = tb_find_physical(cpu, pc, cs_base, flags); - if (!tb) { - /* if no translated code available, then translate it now */ - tb = tb_gen_code(cpu, pc, cs_base, flags, 0); - } - - mmap_unlock(); - } - - /* We add the TB in the virtual pc hash table for the fast lookup */ - atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); - return tb; -} - -static inline TranslationBlock *tb_find_fast(CPUState *cpu, - TranslationBlock *last_tb, - int tb_exit) +static inline TranslationBlock *tb_find(CPUState *cpu, + TranslationBlock *last_tb, + int tb_exit) { CPUArchState *env = (CPUArchState *)cpu->env_ptr; TranslationBlock *tb; @@ -332,7 +296,31 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu, tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { - tb = tb_find_slow(cpu, pc, cs_base, flags, &have_tb_lock); + tb = tb_find_physical(cpu, pc, cs_base, flags); + if (!tb) { + + /* mmap_lock is needed by tb_gen_code, and mmap_lock must be + * taken outside tb_lock. As system emulation is currently + * single threaded the locks are NOPs. + */ + mmap_lock(); + tb_lock(); + have_tb_lock = true; + + /* There's a chance that our desired tb has been translated while + * taking the locks so we check again inside the lock. + */ + tb = tb_find_physical(cpu, pc, cs_base, flags); + if (!tb) { + /* if no translated code available, then translate it now */ + tb = tb_gen_code(cpu, pc, cs_base, flags, 0); + } + + mmap_unlock(); + } + + /* We add the TB in the virtual pc hash table for the fast lookup */ + atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); } #ifndef CONFIG_USER_ONLY /* We don't take care of direct jumps when address mapping changes in @@ -437,7 +425,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) } else if (replay_has_exception() && cpu->icount_decr.u16.low + cpu->icount_extra == 0) { /* try to cause an exception pending in the log */ - cpu_exec_nocache(cpu, 1, tb_find_fast(cpu, NULL, 0), true); + cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0), true); *ret = -1; return true; #endif @@ -620,7 +608,7 @@ int cpu_exec(CPUState *cpu) atomic_mb_set(&cpu->tb_flushed, false); /* reset before first TB lookup */ for(;;) { cpu_handle_interrupt(cpu, &last_tb); - tb = tb_find_fast(cpu, last_tb, tb_exit); + tb = tb_find(cpu, last_tb, tb_exit); cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc); /* Try to align the host and virtual clocks if the guest is in advance */ From b34de45fc40d01c14b31d3a682e284180a2ed8c5 Mon Sep 17 00:00:00 2001 From: Sergey Fedorov Date: Fri, 15 Jul 2016 20:58:52 +0300 Subject: [PATCH 12/58] tcg: rename tb_find_physical() In fact, this function does not exactly perform a lookup by physical address as it is descibed for comment on get_page_addr_code(). Thus it may be a bit confusing to have "physical" in it's name. So rename it to tb_htable_lookup() to better reflect its actual functionality. Signed-off-by: Sergey Fedorov Signed-off-by: Sergey Fedorov Message-Id: <20160715175852.30749-13-sergey.fedorov@linaro.org> Signed-off-by: Paolo Bonzini --- cpu-exec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index f7f60b16b1..b240b9fa45 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -260,7 +260,7 @@ static bool tb_cmp(const void *p, const void *d) return false; } -static TranslationBlock *tb_find_physical(CPUState *cpu, +static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc, target_ulong cs_base, uint32_t flags) @@ -296,7 +296,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]); if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base || tb->flags != flags)) { - tb = tb_find_physical(cpu, pc, cs_base, flags); + tb = tb_htable_lookup(cpu, pc, cs_base, flags); if (!tb) { /* mmap_lock is needed by tb_gen_code, and mmap_lock must be @@ -310,7 +310,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, /* There's a chance that our desired tb has been translated while * taking the locks so we check again inside the lock. */ - tb = tb_find_physical(cpu, pc, cs_base, flags); + tb = tb_htable_lookup(cpu, pc, cs_base, flags); if (!tb) { /* if no translated code available, then translate it now */ tb = tb_gen_code(cpu, pc, cs_base, flags, 0); From 5b1b6dbd94e2e2e98920f886cb32fcf4a1520b50 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 27 Jul 2016 14:26:15 +0800 Subject: [PATCH 13/58] rules.mak: Don't extract libs from .mo-libs in link command For module build, .mo objects are passed to LINK and consumed in process-archive-undefs. The reason behind that is documented in the comment above process-archive-undefs. Similarly, extract-libs should be called with .mo filtered out too. Otherwise, the .mo-libs are added to the link command incorrectly, spoiling the purpose of modularization. Currently we don't have any .mo-libs usage, but it will be used soon when we modularize more multi-source objects, like sdl and gtk. Reported-by: Colin Lord Signed-off-by: Fam Zheng Message-Id: <1469600777-30413-2-git-send-email-famz@redhat.com> Signed-off-by: Paolo Bonzini --- rules.mak | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules.mak b/rules.mak index 55b0121ce1..5c82c19f73 100644 --- a/rules.mak +++ b/rules.mak @@ -51,7 +51,7 @@ process-archive-undefs = $(filter-out %.a %.mo,$1) \ $(call undefined-symbols,$(filter %.mo,$1)))) \ $(filter %.a,$1) -extract-libs = $(strip $(foreach o,$1,$($o-libs))) +extract-libs = $(strip $(foreach o,$(filter-out %.mo,$1),$($o-libs))) expand-objs = $(strip $(sort $(filter %.o,$1)) \ $(foreach o,$(filter %.mo,$1),$($o-objs)) \ $(filter-out %.o %.mo,$1)) From 490ab15a4948f9150fae781fc421becce9221b32 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Fri, 29 Jul 2016 19:05:36 +0800 Subject: [PATCH 14/58] timer: update comments The comments is outdated. The patch has following changes: 1. tense correction. 2. all clock time value is returned in nanoseconds, so, they are same in precision. 3. virtual clock doesn't use cpu cycles. Cc: Paolo Bonzini Cc: Peter Maydell Signed-off-by: Cao jin Message-Id: <1469790338-28990-2-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- include/qemu/timer.h | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 309f3d09e9..34650b2039 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -22,23 +22,20 @@ * @QEMU_CLOCK_REALTIME: Real time clock * * The real time clock should be used only for stuff which does not - * change the virtual machine state, as it is run even if the virtual - * machine is stopped. The real time clock has a frequency of 1000 - * Hz. + * change the virtual machine state, as it runs even if the virtual + * machine is stopped. * * @QEMU_CLOCK_VIRTUAL: virtual clock * - * The virtual clock is only run during the emulation. It is stopped - * when the virtual machine is stopped. Virtual timers use a high - * precision clock, usually cpu cycles (use ticks_per_sec). + * The virtual clock only runs during the emulation. It stops + * when the virtual machine is stopped. * * @QEMU_CLOCK_HOST: host clock * - * The host clock should be use for device models that emulate accurate + * The host clock should be used for device models that emulate accurate * real time sources. It will continue to run when the virtual machine * is suspended, and it will reflect system time changes the host may - * undergo (e.g. due to NTP). The host clock has the same precision as - * the virtual clock. + * undergo (e.g. due to NTP). * * @QEMU_CLOCK_VIRTUAL_RT: realtime clock used for icount warp * @@ -76,10 +73,6 @@ struct QEMUTimer { extern QEMUTimerListGroup main_loop_tlg; -/* - * QEMUClockType - */ - /* * qemu_clock_get_ns; * @type: the clock type From 1d45cea5496fe509f0b66f01b876067e68a5faa0 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Fri, 29 Jul 2016 19:05:37 +0800 Subject: [PATCH 15/58] cpus: rename local variable to meaningful one The function actually returns monotonic time value in nanosecond, the "ticks" is not suitable. Cc: Paolo Bonzini Cc Peter Crosthwaite Cc: Richard Henderson Signed-off-by: Cao jin Message-Id: <1469790338-28990-3-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- cpus.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpus.c b/cpus.c index 84c3520d44..e11caf7ced 100644 --- a/cpus.c +++ b/cpus.c @@ -219,14 +219,14 @@ int64_t cpu_get_ticks(void) static int64_t cpu_get_clock_locked(void) { - int64_t ticks; + int64_t time; - ticks = timers_state.cpu_clock_offset; + time = timers_state.cpu_clock_offset; if (timers_state.cpu_ticks_enabled) { - ticks += get_clock(); + time += get_clock(); } - return ticks; + return time; } /* return the host CPU monotonic timer and handle stop/restart */ From d90f3cca87844ea797a1c96b88758478ac575e1f Mon Sep 17 00:00:00 2001 From: Cao jin Date: Fri, 29 Jul 2016 19:05:38 +0800 Subject: [PATCH 16/58] cpus: update comments The returned value of cpu_get_clock() is plused with the offset, so it is the time elapsed in virtual machine when vm is active. Cc: Paolo Bonzini Cc Peter Crosthwaite Cc: Richard Henderson Signed-off-by: Cao jin Message-Id: <1469790338-28990-4-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- cpus.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cpus.c b/cpus.c index e11caf7ced..030843132f 100644 --- a/cpus.c +++ b/cpus.c @@ -191,8 +191,12 @@ int64_t cpu_icount_to_ns(int64_t icount) return icount << icount_time_shift; } -/* return the host CPU cycle counter and handle stop/restart */ -/* Caller must hold the BQL */ +/* return the time elapsed in VM between vm_start and vm_stop. Unless + * icount is active, cpu_get_ticks() uses units of the host CPU cycle + * counter. + * + * Caller must hold the BQL + */ int64_t cpu_get_ticks(void) { int64_t ticks; @@ -229,7 +233,8 @@ static int64_t cpu_get_clock_locked(void) return time; } -/* return the host CPU monotonic timer and handle stop/restart */ +/* Return the monotonic time elapsed in VM, i.e., + * the time between vm_start and vm_stop */ int64_t cpu_get_clock(void) { int64_t ti; From dc0a3e448c24d6811a5806058bdacc2c2775c92f Mon Sep 17 00:00:00 2001 From: Colin Lord Date: Fri, 12 Aug 2016 15:30:48 -0400 Subject: [PATCH 17/58] help: Update help to remove misleading display information Updates the help messages to remove misleading information about SDL being the normal display used. Signed-off-by: Colin Lord Message-Id: <1471030248-21637-1-git-send-email-cdlord2@illinois.edu> Reviewed-by: Markus Armbruster Signed-off-by: Paolo Bonzini --- qemu-options.hx | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/qemu-options.hx b/qemu-options.hx index 70dfe986a2..52096268b7 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -982,13 +982,14 @@ DEF("nographic", 0, QEMU_OPTION_nographic, STEXI @item -nographic @findex -nographic -Normally, QEMU uses SDL to display the VGA output. With this option, -you can totally disable graphical output so that QEMU is a simple -command line application. The emulated serial port is redirected on -the console and muxed with the monitor (unless redirected elsewhere -explicitly). Therefore, you can still use QEMU to debug a Linux kernel -with a serial console. Use @key{C-a h} for help on switching between -the console and monitor. +Normally, if QEMU is compiled with graphical window support, it displays +output such as guest graphics, guest console, and the QEMU monitor in a +window. With this option, you can totally disable graphical output so +that QEMU is a simple command line application. The emulated serial port +is redirected on the console and muxed with the monitor (unless +redirected elsewhere explicitly). Therefore, you can still use QEMU to +debug a Linux kernel with a serial console. Use @key{C-a h} for help on +switching between the console and monitor. ETEXI DEF("curses", 0, QEMU_OPTION_curses, @@ -997,9 +998,11 @@ DEF("curses", 0, QEMU_OPTION_curses, STEXI @item -curses @findex -curses -Normally, QEMU uses SDL to display the VGA output. With this option, -QEMU can display the VGA output when in text mode using a -curses/ncurses interface. Nothing is displayed in graphical mode. +Normally, if QEMU is compiled with graphical window support, it displays +output such as guest graphics, guest console, and the QEMU monitor in a +window. With this option, QEMU can display the VGA output when in text +mode using a curses/ncurses interface. Nothing is displayed in graphical +mode. ETEXI DEF("no-frame", 0, QEMU_OPTION_no_frame, @@ -1243,13 +1246,14 @@ DEF("vnc", HAS_ARG, QEMU_OPTION_vnc , STEXI @item -vnc @var{display}[,@var{option}[,@var{option}[,...]]] @findex -vnc -Normally, QEMU uses SDL to display the VGA output. With this option, -you can have QEMU listen on VNC display @var{display} and redirect the VGA -display over the VNC session. It is very useful to enable the usb -tablet device when using this option (option @option{-usbdevice -tablet}). When using the VNC display, you must use the @option{-k} -parameter to set the keyboard layout if you are not using en-us. Valid -syntax for the @var{display} is +Normally, if QEMU is compiled with graphical window support, it displays +output such as guest graphics, guest console, and the QEMU monitor in a +window. With this option, you can have QEMU listen on VNC display +@var{display} and redirect the VGA display over the VNC session. It is +very useful to enable the usb tablet device when using this option +(option @option{-usbdevice tablet}). When using the VNC display, you +must use the @option{-k} parameter to set the keyboard layout if you are +not using en-us. Valid syntax for the @var{display} is @table @option From 64eb7491d314dea6185c56d9b8feaa60bc3e1ce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= Date: Thu, 18 Aug 2016 09:31:26 +0200 Subject: [PATCH 18/58] lsi: print register names in debug prints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify lsi_reg_readb function to have a single exit point. Debug print can now contain the returned value. Signed-off-by: Hervé Poussineau Message-Id: <1471505489-1221-2-git-send-email-hpoussin@reactos.org> Signed-off-by: Paolo Bonzini Signed-off-by: Hervé Poussineau --- hw/scsi/lsi53c895a.c | 219 ++++++++++++++++++++++++++++--------------- 1 file changed, 146 insertions(+), 73 deletions(-) diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index df205cdafe..07dc73ab56 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -34,6 +34,23 @@ do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__); exit(1);} while do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__);} while (0) #endif +#ifdef DEBUG_LSI_REG +static const char *names[] = { + "SCNTL0", "SCNTL1", "SCNTL2", "SCNTL3", "SCID", "SXFER", "SDID", "GPREG", + "SFBR", "SOCL", "SSID", "SBCL", "DSTAT", "SSTAT0", "SSTAT1", "SSTAT2", + "DSA0", "DSA1", "DSA2", "DSA3", "ISTAT", "0x15", "0x16", "0x17", + "CTEST0", "CTEST1", "CTEST2", "CTEST3", "TEMP0", "TEMP1", "TEMP2", "TEMP3", + "DFIFO", "CTEST4", "CTEST5", "CTEST6", "DBC0", "DBC1", "DBC2", "DCMD", + "DNAD0", "DNAD1", "DNAD2", "DNAD3", "DSP0", "DSP1", "DSP2", "DSP3", + "DSPS0", "DSPS1", "DSPS2", "DSPS3", "SCRATCHA0", "SCRATCHA1", "SCRATCHA2", "SCRATCHA3", + "DMODE", "DIEN", "SBR", "DCNTL", "ADDER0", "ADDER1", "ADDER2", "ADDER3", + "SIEN0", "SIEN1", "SIST0", "SIST1", "SLPAR", "0x45", "MACNTL", "GPCNTL", + "STIME0", "STIME1", "RESPID", "0x4b", "STEST0", "STEST1", "STEST2", "STEST3", + "SIDL", "0x51", "0x52", "0x53", "SODL", "0x55", "0x56", "0x57", + "SBDL", "0x59", "0x5a", "0x5b", "SCRATCHB0", "SCRATCHB1", "SCRATCHB2", "SCRATCHB3", +}; +#endif + #define LSI_MAX_DEVS 7 #define LSI_SCNTL0_TRG 0x01 @@ -1480,155 +1497,200 @@ again: static uint8_t lsi_reg_readb(LSIState *s, int offset) { - uint8_t tmp; + uint8_t ret; + #define CASE_GET_REG24(name, addr) \ - case addr: return s->name & 0xff; \ - case addr + 1: return (s->name >> 8) & 0xff; \ - case addr + 2: return (s->name >> 16) & 0xff; + case addr: ret = s->name & 0xff; break; \ + case addr + 1: ret = (s->name >> 8) & 0xff; break; \ + case addr + 2: ret = (s->name >> 16) & 0xff; break; #define CASE_GET_REG32(name, addr) \ - case addr: return s->name & 0xff; \ - case addr + 1: return (s->name >> 8) & 0xff; \ - case addr + 2: return (s->name >> 16) & 0xff; \ - case addr + 3: return (s->name >> 24) & 0xff; + case addr: ret = s->name & 0xff; break; \ + case addr + 1: ret = (s->name >> 8) & 0xff; break; \ + case addr + 2: ret = (s->name >> 16) & 0xff; break; \ + case addr + 3: ret = (s->name >> 24) & 0xff; break; -#ifdef DEBUG_LSI_REG - DPRINTF("Read reg %x\n", offset); -#endif switch (offset) { case 0x00: /* SCNTL0 */ - return s->scntl0; + ret = s->scntl0; + break; case 0x01: /* SCNTL1 */ - return s->scntl1; + ret = s->scntl1; + break; case 0x02: /* SCNTL2 */ - return s->scntl2; + ret = s->scntl2; + break; case 0x03: /* SCNTL3 */ - return s->scntl3; + ret = s->scntl3; + break; case 0x04: /* SCID */ - return s->scid; + ret = s->scid; + break; case 0x05: /* SXFER */ - return s->sxfer; + ret = s->sxfer; + break; case 0x06: /* SDID */ - return s->sdid; + ret = s->sdid; + break; case 0x07: /* GPREG0 */ - return 0x7f; + ret = 0x7f; + break; case 0x08: /* Revision ID */ - return 0x00; + ret = 0x00; + break; case 0x09: /* SOCL */ - return s->socl; + ret = s->socl; + break; case 0xa: /* SSID */ - return s->ssid; + ret = s->ssid; + break; case 0xb: /* SBCL */ /* ??? This is not correct. However it's (hopefully) only used for diagnostics, so should be ok. */ - return 0; + ret = 0; + break; case 0xc: /* DSTAT */ - tmp = s->dstat | LSI_DSTAT_DFE; + ret = s->dstat | LSI_DSTAT_DFE; if ((s->istat0 & LSI_ISTAT0_INTF) == 0) s->dstat = 0; lsi_update_irq(s); - return tmp; + break; case 0x0d: /* SSTAT0 */ - return s->sstat0; + ret = s->sstat0; + break; case 0x0e: /* SSTAT1 */ - return s->sstat1; + ret = s->sstat1; + break; case 0x0f: /* SSTAT2 */ - return s->scntl1 & LSI_SCNTL1_CON ? 0 : 2; + ret = s->scntl1 & LSI_SCNTL1_CON ? 0 : 2; + break; CASE_GET_REG32(dsa, 0x10) case 0x14: /* ISTAT0 */ - return s->istat0; + ret = s->istat0; + break; case 0x15: /* ISTAT1 */ - return s->istat1; + ret = s->istat1; + break; case 0x16: /* MBOX0 */ - return s->mbox0; + ret = s->mbox0; + break; case 0x17: /* MBOX1 */ - return s->mbox1; + ret = s->mbox1; + break; case 0x18: /* CTEST0 */ - return 0xff; + ret = 0xff; + break; case 0x19: /* CTEST1 */ - return 0; + ret = 0; + break; case 0x1a: /* CTEST2 */ - tmp = s->ctest2 | LSI_CTEST2_DACK | LSI_CTEST2_CM; + ret = s->ctest2 | LSI_CTEST2_DACK | LSI_CTEST2_CM; if (s->istat0 & LSI_ISTAT0_SIGP) { s->istat0 &= ~LSI_ISTAT0_SIGP; - tmp |= LSI_CTEST2_SIGP; + ret |= LSI_CTEST2_SIGP; } - return tmp; + break; case 0x1b: /* CTEST3 */ - return s->ctest3; + ret = s->ctest3; + break; CASE_GET_REG32(temp, 0x1c) case 0x20: /* DFIFO */ - return 0; + ret = 0; + break; case 0x21: /* CTEST4 */ - return s->ctest4; + ret = s->ctest4; + break; case 0x22: /* CTEST5 */ - return s->ctest5; + ret = s->ctest5; + break; case 0x23: /* CTEST6 */ - return 0; + ret = 0; + break; CASE_GET_REG24(dbc, 0x24) case 0x27: /* DCMD */ - return s->dcmd; + ret = s->dcmd; + break; CASE_GET_REG32(dnad, 0x28) CASE_GET_REG32(dsp, 0x2c) CASE_GET_REG32(dsps, 0x30) CASE_GET_REG32(scratch[0], 0x34) case 0x38: /* DMODE */ - return s->dmode; + ret = s->dmode; + break; case 0x39: /* DIEN */ - return s->dien; + ret = s->dien; + break; case 0x3a: /* SBR */ - return s->sbr; + ret = s->sbr; + break; case 0x3b: /* DCNTL */ - return s->dcntl; + ret = s->dcntl; + break; /* ADDER Output (Debug of relative jump address) */ CASE_GET_REG32(adder, 0x3c) case 0x40: /* SIEN0 */ - return s->sien0; + ret = s->sien0; + break; case 0x41: /* SIEN1 */ - return s->sien1; + ret = s->sien1; + break; case 0x42: /* SIST0 */ - tmp = s->sist0; + ret = s->sist0; s->sist0 = 0; lsi_update_irq(s); - return tmp; + break; case 0x43: /* SIST1 */ - tmp = s->sist1; + ret = s->sist1; s->sist1 = 0; lsi_update_irq(s); - return tmp; + break; case 0x46: /* MACNTL */ - return 0x0f; + ret = 0x0f; + break; case 0x47: /* GPCNTL0 */ - return 0x0f; + ret = 0x0f; + break; case 0x48: /* STIME0 */ - return s->stime0; + ret = s->stime0; + break; case 0x4a: /* RESPID0 */ - return s->respid0; + ret = s->respid0; + break; case 0x4b: /* RESPID1 */ - return s->respid1; + ret = s->respid1; + break; case 0x4d: /* STEST1 */ - return s->stest1; + ret = s->stest1; + break; case 0x4e: /* STEST2 */ - return s->stest2; + ret = s->stest2; + break; case 0x4f: /* STEST3 */ - return s->stest3; + ret = s->stest3; + break; case 0x50: /* SIDL */ /* This is needed by the linux drivers. We currently only update it during the MSG IN phase. */ - return s->sidl; + ret = s->sidl; + break; case 0x52: /* STEST4 */ - return 0xe0; + ret = 0xe0; + break; case 0x56: /* CCNTL0 */ - return s->ccntl0; + ret = s->ccntl0; + break; case 0x57: /* CCNTL1 */ - return s->ccntl1; + ret = s->ccntl1; + break; case 0x58: /* SBDL */ /* Some drivers peek at the data bus during the MSG IN phase. */ if ((s->sstat1 & PHASE_MASK) == PHASE_MI) return s->msg[0]; - return 0; + ret = 0; + break; case 0x59: /* SBDL high */ - return 0; + ret = 0; + break; CASE_GET_REG32(mmrs, 0xa0) CASE_GET_REG32(mmws, 0xa4) CASE_GET_REG32(sfs, 0xa8) @@ -1643,18 +1705,28 @@ static uint8_t lsi_reg_readb(LSIState *s, int offset) CASE_GET_REG32(ia, 0xd4) CASE_GET_REG32(sbc, 0xd8) CASE_GET_REG32(csbc, 0xdc) - } - if (offset >= 0x5c && offset < 0xa0) { + case 0x5c ... 0x9f: + { int n; int shift; n = (offset - 0x58) >> 2; shift = (offset & 3) * 8; - return (s->scratch[n] >> shift) & 0xff; + ret = (s->scratch[n] >> shift) & 0xff; + break; + } + default: + BADF("readb 0x%x\n", offset); + exit(1); } - BADF("readb 0x%x\n", offset); - exit(1); #undef CASE_GET_REG24 #undef CASE_GET_REG32 + +#ifdef DEBUG_LSI_REG + DPRINTF("Read reg %s %x = %02x\n", + offset < ARRAY_SIZE(names) ? names[offset] : "???", offset, ret); +#endif + + return ret; } static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) @@ -1671,7 +1743,8 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) case addr + 3: s->name &= 0x00ffffff; s->name |= val << 24; break; #ifdef DEBUG_LSI_REG - DPRINTF("Write reg %x = %02x\n", offset, val); + DPRINTF("Write reg %s %x = %02x\n", + offset < ARRAY_SIZE(names) ? names[offset] : "???", offset, val); #endif switch (offset) { case 0x00: /* SCNTL0 */ From 85a20bc4206689dc27668a14b170837a9cbaa0c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= Date: Thu, 18 Aug 2016 09:31:27 +0200 Subject: [PATCH 19/58] lsi: do not exit QEMU if reading invalid register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When guest accesses invalid register, return 0xff instead of exiting. Also add a log when reading or writing invalid registers. Signed-off-by: Hervé Poussineau Message-Id: <1471505489-1221-3-git-send-email-hpoussin@reactos.org> Signed-off-by: Paolo Bonzini Signed-off-by: Hervé Poussineau --- hw/scsi/lsi53c895a.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index 07dc73ab56..9d2e3eb1ba 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -19,6 +19,7 @@ #include "hw/pci/pci.h" #include "hw/scsi/scsi.h" #include "sysemu/dma.h" +#include "qemu/log.h" //#define DEBUG_LSI //#define DEBUG_LSI_REG @@ -34,7 +35,6 @@ do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__); exit(1);} while do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__);} while (0) #endif -#ifdef DEBUG_LSI_REG static const char *names[] = { "SCNTL0", "SCNTL1", "SCNTL2", "SCNTL3", "SCID", "SXFER", "SDID", "GPREG", "SFBR", "SOCL", "SSID", "SBCL", "DSTAT", "SSTAT0", "SSTAT1", "SSTAT2", @@ -49,7 +49,6 @@ static const char *names[] = { "SIDL", "0x51", "0x52", "0x53", "SODL", "0x55", "0x56", "0x57", "SBDL", "0x59", "0x5a", "0x5b", "SCRATCHB0", "SCRATCHB1", "SCRATCHB2", "SCRATCHB3", }; -#endif #define LSI_MAX_DEVS 7 @@ -1715,8 +1714,14 @@ static uint8_t lsi_reg_readb(LSIState *s, int offset) break; } default: - BADF("readb 0x%x\n", offset); - exit(1); + { + qemu_log_mask(LOG_GUEST_ERROR, + "lsi_scsi: invalid read from reg %s %x\n", + offset < ARRAY_SIZE(names) ? names[offset] : "???", + offset); + ret = 0xff; + break; + } } #undef CASE_GET_REG24 #undef CASE_GET_REG32 @@ -1959,7 +1964,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) shift = (offset & 3) * 8; s->scratch[n] = deposit32(s->scratch[n], shift, 8, val); } else { - BADF("Unhandled writeb 0x%x = 0x%x\n", offset, val); + qemu_log_mask(LOG_GUEST_ERROR, + "lsi_scsi: invalid write to reg %s %x (0x%02x)\n", + offset < ARRAY_SIZE(names) ? names[offset] : "???", + offset, val); } } #undef CASE_SET_REG24 From a8632434c7e998be1ffe77871a8fc7ab98049cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= Date: Thu, 18 Aug 2016 09:31:28 +0200 Subject: [PATCH 20/58] lsi: implement I/O memory space for Memory Move instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory Move instructions can read/write data either from PCI memory or from PCI I/O. Implement second case. Windows 98 now works with LSI 53C810A adapter. Signed-off-by: Hervé Poussineau Message-Id: <1471505489-1221-4-git-send-email-hpoussin@reactos.org> Signed-off-by: Paolo Bonzini Signed-off-by: Hervé Poussineau --- hw/scsi/lsi53c895a.c | 49 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index 9d2e3eb1ba..2e99d5e93b 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -210,6 +210,7 @@ typedef struct { MemoryRegion mmio_io; MemoryRegion ram_io; MemoryRegion io_io; + AddressSpace pci_io_as; int carry; /* ??? Should this be an a visible register somewhere? */ int status; @@ -407,6 +408,30 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val); static void lsi_execute_script(LSIState *s); static void lsi_reselect(LSIState *s, lsi_request *p); +static inline int lsi_mem_read(LSIState *s, dma_addr_t addr, + void *buf, dma_addr_t len) +{ + if (s->dmode & LSI_DMODE_SIOM) { + address_space_read(&s->pci_io_as, addr, MEMTXATTRS_UNSPECIFIED, + buf, len); + return 0; + } else { + return pci_dma_read(PCI_DEVICE(s), addr, buf, len); + } +} + +static inline int lsi_mem_write(LSIState *s, dma_addr_t addr, + const void *buf, dma_addr_t len) +{ + if (s->dmode & LSI_DMODE_DIOM) { + address_space_write(&s->pci_io_as, addr, MEMTXATTRS_UNSPECIFIED, + buf, len); + return 0; + } else { + return pci_dma_write(PCI_DEVICE(s), addr, buf, len); + } +} + static inline uint32_t read_dword(LSIState *s, uint32_t addr) { uint32_t buf; @@ -550,7 +575,6 @@ static void lsi_bad_selection(LSIState *s, uint32_t id) /* Initiate a SCSI layer data transfer. */ static void lsi_do_dma(LSIState *s, int out) { - PCIDevice *pci_dev; uint32_t count; dma_addr_t addr; SCSIDevice *dev; @@ -562,7 +586,6 @@ static void lsi_do_dma(LSIState *s, int out) return; } - pci_dev = PCI_DEVICE(s); dev = s->current->req->dev; assert(dev); @@ -588,9 +611,9 @@ static void lsi_do_dma(LSIState *s, int out) } /* ??? Set SFBR to first data byte. */ if (out) { - pci_dma_read(pci_dev, addr, s->current->dma_buf, count); + lsi_mem_read(s, addr, s->current->dma_buf, count); } else { - pci_dma_write(pci_dev, addr, s->current->dma_buf, count); + lsi_mem_write(s, addr, s->current->dma_buf, count); } s->current->dma_len -= count; if (s->current->dma_len == 0) { @@ -1022,15 +1045,14 @@ bad: #define LSI_BUF_SIZE 4096 static void lsi_memcpy(LSIState *s, uint32_t dest, uint32_t src, int count) { - PCIDevice *d = PCI_DEVICE(s); int n; uint8_t buf[LSI_BUF_SIZE]; DPRINTF("memcpy dest 0x%08x src 0x%08x count %d\n", dest, src, count); while (count) { n = (count > LSI_BUF_SIZE) ? LSI_BUF_SIZE : count; - pci_dma_read(d, src, buf, n); - pci_dma_write(d, dest, buf, n); + lsi_mem_read(s, src, buf, n); + lsi_mem_write(s, dest, buf, n); src += n; dest += n; count -= n; @@ -1877,9 +1899,6 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val) CASE_SET_REG32(dsps, 0x30) CASE_SET_REG32(scratch[0], 0x34) case 0x38: /* DMODE */ - if (val & (LSI_DMODE_SIOM | LSI_DMODE_DIOM)) { - BADF("IO mappings not implemented\n"); - } s->dmode = val; break; case 0x39: /* DIEN */ @@ -2189,6 +2208,8 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, "lsi-io", 256); + address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io_io); pci_register_bar(dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio_io); pci_register_bar(dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->ram_io); @@ -2200,6 +2221,13 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) } } +static void lsi_scsi_unrealize(DeviceState *dev, Error **errp) +{ + LSIState *s = LSI53C895A(dev); + + address_space_destroy(&s->pci_io_as); +} + static void lsi_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -2210,6 +2238,7 @@ static void lsi_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_LSI_53C895A; k->class_id = PCI_CLASS_STORAGE_SCSI; k->subsystem_id = 0x1000; + dc->unrealize = lsi_scsi_unrealize; dc->reset = lsi_scsi_reset; dc->vmsd = &vmstate_lsi_scsi; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); From 98f62e3d5d7cd25b025f8410005cc3898dfa16e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Poussineau?= Date: Thu, 18 Aug 2016 09:31:29 +0200 Subject: [PATCH 21/58] lsi: never set DMA FIFO Empty (DFE) bit in DSTAT register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 53C895A datasheet says: "This bit (DFE) is a pure status bit and will not cause an interrupt" This bit is already auto-generated in lsi_read_reg when reading the DSTAT register. This fixes IBM RS/6000 7020 firmware, which is: - resetting the adapter - enabling all interrupt sources (including DIP, ie interrupts from DSTAT) - waiting for ISTAT0 to become 0 (including DIP=0, ie no interrupt coming from DSTAT) Signed-off-by: Hervé Poussineau Message-Id: <1471505489-1221-5-git-send-email-hpoussin@reactos.org> Signed-off-by: Paolo Bonzini Signed-off-by: Hervé Poussineau --- hw/scsi/lsi53c895a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index 2e99d5e93b..feb1191315 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -326,7 +326,7 @@ static void lsi_soft_reset(LSIState *s) s->istat0 = 0; s->istat1 = 0; s->dcmd = 0x40; - s->dstat = LSI_DSTAT_DFE; + s->dstat = 0; s->dien = 0; s->sist0 = 0; s->sist1 = 0; From b1ed728a61183cba0ed0c78885c8f7ee0c42d87b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 24 Aug 2016 16:04:56 +0200 Subject: [PATCH 22/58] MAINTAINERS: add myself as stubs maintainers Let's just remove some files from the pool of unmaintained files. I am obviously not going to send pull requests only for stubs/, but I will ack them if maintainers want that. Signed-off-by: Paolo Bonzini --- MAINTAINERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index b6fb84e826..c63671bcac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1257,6 +1257,11 @@ F: net/slirp.c F: include/net/slirp.h T: git git://git.kiszka.org/qemu.git queues/slirp +Stubs +M: Paolo Bonzini +S: Maintained +F: stubs/ + Tracing M: Stefan Hajnoczi S: Maintained From 48b6206305b8d56524ac2ee347b68e6e0a528559 Mon Sep 17 00:00:00 2001 From: Rony Weng Date: Mon, 29 Aug 2016 15:52:18 +0800 Subject: [PATCH 23/58] scsi-disk: change disk serial length from 20 to 36 Openstack Cinder assigns volume a 36 characters uuid as serial. QEMU will shrinks the uuid to 20 characters, which does not match the original uuid. Note that there is no limit to the length of the serial number in the SCSI spec. 20 was copy-pasted from virtio-blk which in turn was copy-pasted from ATA; 36 is even more arbitrary. However, bumping it up too much might cause issues (e.g. 252 seems to make sense because then the maximum amount of returned data is 256; but who knows there's no off-by-one somewhere for such a nicely rounded number). Signed-off-by: Rony Weng Message-Id: <1472457138-23386-1-git-send-email-ronyweng@synology.com> Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 99c9d618da..77cba31e30 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -599,8 +599,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf) } l = strlen(s->serial); - if (l > 20) { - l = 20; + if (l > 36) { + l = 36; } DPRINTF("Inquiry EVPD[Serial number] " From 7f61f4690dd153be98900a2a508b88989e692753 Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Wed, 31 Aug 2016 12:19:29 +0530 Subject: [PATCH 24/58] vmw_pvscsi: check page count while initialising descriptor rings Vmware Paravirtual SCSI emulation uses command descriptors to process SCSI commands. These descriptors come with their ring buffers. A guest could set the page count for these rings to an arbitrary value, leading to infinite loop or OOB access. Add check to avoid it. Reported-by: Tom Victor Signed-off-by: Prasad J Pandit Message-Id: <1472626169-12989-1-git-send-email-ppandit@redhat.com> Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/vmw_pvscsi.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index 5116f4ad68..4245c15101 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -152,7 +152,7 @@ pvscsi_log2(uint32_t input) return log; } -static int +static void pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri) { int i; @@ -160,10 +160,6 @@ pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri) uint32_t req_ring_size, cmp_ring_size; m->rs_pa = ri->ringsStatePPN << VMW_PAGE_SHIFT; - if ((ri->reqRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES) - || (ri->cmpRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)) { - return -1; - } req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE; cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE; txr_len_log2 = pvscsi_log2(req_ring_size - 1); @@ -195,8 +191,6 @@ pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri) /* Flush ring state page changes */ smp_wmb(); - - return 0; } static int @@ -746,7 +740,7 @@ pvscsi_dbg_dump_tx_rings_config(PVSCSICmdDescSetupRings *rc) trace_pvscsi_tx_rings_num_pages("Confirm Ring", rc->cmpRingNumPages); for (i = 0; i < rc->cmpRingNumPages; i++) { - trace_pvscsi_tx_rings_ppn("Confirm Ring", rc->reqRingPPNs[i]); + trace_pvscsi_tx_rings_ppn("Confirm Ring", rc->cmpRingPPNs[i]); } } @@ -779,11 +773,16 @@ pvscsi_on_cmd_setup_rings(PVSCSIState *s) trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_SETUP_RINGS"); - pvscsi_dbg_dump_tx_rings_config(rc); - if (pvscsi_ring_init_data(&s->rings, rc) < 0) { + if (!rc->reqRingNumPages + || rc->reqRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES + || !rc->cmpRingNumPages + || rc->cmpRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES) { return PVSCSI_COMMAND_PROCESSING_FAILED; } + pvscsi_dbg_dump_tx_rings_config(rc); + pvscsi_ring_init_data(&s->rings, rc); + s->rings_info_valid = TRUE; return PVSCSI_COMMAND_PROCESSING_SUCCEEDED; } From cf2bce203a45d7437029d108357fb23fea0967b6 Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Wed, 31 Aug 2016 17:36:07 +0530 Subject: [PATCH 25/58] scsi: mptconfig: fix an assert expression When LSI SAS1068 Host Bus emulator builds configuration page headers, mptsas_config_pack() should assert that the size fits in a byte. However, the size is expressed in 32-bit units, so up to 1020 bytes fit. The assertion was only allowing replies up to 252 bytes, so fix it. Suggested-by: Paolo Bonzini Signed-off-by: Prasad J Pandit Message-Id: <1472645167-30765-2-git-send-email-ppandit@redhat.com> Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/mptconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi/mptconfig.c b/hw/scsi/mptconfig.c index 707185469e..3e4f400115 100644 --- a/hw/scsi/mptconfig.c +++ b/hw/scsi/mptconfig.c @@ -158,7 +158,7 @@ static size_t mptsas_config_pack(uint8_t **data, const char *fmt, ...) va_end(ap); if (data) { - assert(ret < 256 && (ret % 4) == 0); + assert(ret / 4 < 256 && (ret % 4) == 0); stb_p(*data + 1, ret / 4); } return ret; From 65a8e1f6413a0f6f79894da710b5d6d43361d27d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Aug 2016 11:35:37 +0200 Subject: [PATCH 26/58] scsi: mptconfig: fix misuse of MPTSAS_CONFIG_PACK These issues cause respectively a QEMU crash and a leak of 2 bytes of stack. They were discovered by VictorV of 360 Marvel Team. Reported-by: Tom Victor Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/mptconfig.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/scsi/mptconfig.c b/hw/scsi/mptconfig.c index 3e4f400115..87a416a5cb 100644 --- a/hw/scsi/mptconfig.c +++ b/hw/scsi/mptconfig.c @@ -203,7 +203,7 @@ size_t mptsas_config_manufacturing_1(MPTSASState *s, uint8_t **data, int address { /* VPD - all zeros */ return MPTSAS_CONFIG_PACK(1, MPI_CONFIG_PAGETYPE_MANUFACTURING, 0x00, - "s256"); + "*s256"); } static @@ -328,7 +328,7 @@ size_t mptsas_config_ioc_0(MPTSASState *s, uint8_t **data, int address) return MPTSAS_CONFIG_PACK(0, MPI_CONFIG_PAGETYPE_IOC, 0x01, "*l*lwwb*b*b*blww", pcic->vendor_id, pcic->device_id, pcic->revision, - pcic->subsystem_vendor_id, + pcic->class_id, pcic->subsystem_vendor_id, pcic->subsystem_id); } From 6a7b47a786bb16d36ad6f1733138d4aeb233bb3d Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Thu, 25 Aug 2016 23:10:24 -0400 Subject: [PATCH 27/58] vmxcap: Show raw MSR value This will be helpful to allow checking of bits that are not in the 'bits' table yet. Signed-off-by: Eduardo Habkost Message-Id: <1472181025-10889-2-git-send-email-ehabkost@redhat.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/vmxcap | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index 8f0371f498..9af71ead1b 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -79,6 +79,7 @@ class Misc(object): def show(self): print self.name value = msr().read(self.msr, 0) + print ' Hex: 0x%x' % (value) def first_bit(key): if type(key) is tuple: return key[0] From 349cb2fbfdcbc9f6770389f3b69422e8c441042d Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Thu, 25 Aug 2016 23:10:25 -0400 Subject: [PATCH 28/58] vmxcap: Add TSC scaling bit Signed-off-by: Eduardo Habkost Message-Id: <1472181025-10889-3-git-send-email-ehabkost@redhat.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/vmxcap | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index 9af71ead1b..222025525b 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -173,6 +173,7 @@ controls = [ 16: 'RDSEED exiting', 18: 'EPT-violation #VE', 20: 'Enable XSAVES/XRSTORS', + 25: 'TSC scaling', }, cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2, ), From 173134467a585c66c43467c71b834cd0b488cf46 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Thu, 1 Sep 2016 10:21:19 +0800 Subject: [PATCH 29/58] doc/rcu: fix typo Signed-off-by: Cao jin Message-Id: <1472696479-3619-1-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- docs/rcu.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/rcu.txt b/docs/rcu.txt index 2f70954e82..a70b72c545 100644 --- a/docs/rcu.txt +++ b/docs/rcu.txt @@ -37,7 +37,7 @@ do not matter; as soon as all previous critical sections have finished, there cannot be any readers who hold references to the data structure, and these can now be safely reclaimed (e.g., freed or unref'ed). -Here is a picutre: +Here is a picture: thread 1 thread 2 thread 3 ------------------- ------------------------ ------------------- From 517b3d40166f18e321e964e5fe0d6260e92b2121 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 17 Aug 2016 01:13:52 +0800 Subject: [PATCH 30/58] chardev: Add 'help' option to print all available chardev backend types Signed-off-by: Lin Ma Message-Id: <20160816171352.17021-1-lma@suse.com> Signed-off-by: Paolo Bonzini --- qemu-char.c | 21 ++++++++++++++++----- qemu-options.hx | 3 +++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index 5f82ebb774..cf6a27a1bc 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -39,6 +39,7 @@ #include "io/channel-file.h" #include "io/channel-tls.h" #include "sysemu/replay.h" +#include "qemu/help_option.h" #include @@ -3879,16 +3880,26 @@ CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts, const char *id = qemu_opts_id(opts); char *bid = NULL; - if (id == NULL) { - error_setg(errp, "chardev: no id specified"); - goto err; - } - if (qemu_opt_get(opts, "backend") == NULL) { error_setg(errp, "chardev: \"%s\" missing backend", qemu_opts_id(opts)); goto err; } + + if (is_help_option(qemu_opt_get(opts, "backend"))) { + fprintf(stderr, "Available chardev backend types:\n"); + for (i = backends; i; i = i->next) { + cd = i->data; + fprintf(stderr, "%s\n", cd->name); + } + exit(!is_help_option(qemu_opt_get(opts, "backend"))); + } + + if (id == NULL) { + error_setg(errp, "chardev: no id specified"); + goto err; + } + for (i = backends; i; i = i->next) { cd = i->data; diff --git a/qemu-options.hx b/qemu-options.hx index 52096268b7..4927939d5d 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2152,6 +2152,7 @@ The general form of a character device option is: ETEXI DEF("chardev", HAS_ARG, QEMU_OPTION_chardev, + "-chardev help\n" "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n" " [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off]\n" @@ -2217,6 +2218,8 @@ Backend is one of: @option{spiceport}. The specific backend will determine the applicable options. +Use "-chardev help" to print all available chardev backend types. + All devices must have an id, which can be any string up to 127 characters long. It is used to uniquely identify this device in other command line directives. From 74460f3431104a7c13c407e79aee18e64dffa91d Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 5 Sep 2016 22:11:41 +0200 Subject: [PATCH 31/58] MAINTAINERS: Fix wildcard for scsi headers get_maintainer.pl currently thinks that the scsi headers are currrently unmaintained. So let's fix the corresponding wildcard expression. Signed-off-by: Thomas Huth Message-Id: <1473106301-23102-1-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c63671bcac..ebde2a4e61 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -831,7 +831,7 @@ T: git git://github.com/jasowang/qemu.git net SCSI M: Paolo Bonzini S: Supported -F: include/hw/scsi* +F: include/hw/scsi/* F: hw/scsi/* T: git git://github.com/bonzini/qemu.git scsi-next From a2feb3483e11a389bc93a5bbd40815d6cfdfb07f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 5 Sep 2016 23:31:15 +0200 Subject: [PATCH 32/58] MAINTAINERS: Add some header files to the PC chipset section These header files obviously belong to the PC chipset (since their names match the other .c files in this section). Signed-off-by: Thomas Huth Message-Id: <1473111075-25311-1-git-send-email-thuth@redhat.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ebde2a4e61..d48016653f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -715,6 +715,10 @@ F: hw/misc/pc-testdev.c F: hw/timer/hpet* F: hw/timer/i8254* F: hw/timer/mc146818rtc* +F: include/hw/i2c/pm_smbus.h +F: include/hw/timer/hpet.h +F: include/hw/timer/i8254* +F: include/hw/timer/mc146818rtc* Machine core M: Eduardo Habkost From 49adc5d3f8c6bb75e55ebfeab109c5c37dea65e8 Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Tue, 6 Sep 2016 02:20:43 +0530 Subject: [PATCH 33/58] scsi: pvscsi: limit loop to fetch SG list In PVSCSI paravirtual SCSI bus, pvscsi_convert_sglist can take a very long time or go into an infinite loop due to two different bugs: 1) the request descriptor data length is defined to be 64 bit. While building SG list from a request descriptor, it gets truncated to 32bit in routine 'pvscsi_convert_sglist'. This could lead to an infinite loop situation large 'dataLen' values when data_length is cast to uint32_t and chunk_size becomes always zero. Fix this by removing the incorrect cast. 2) pvscsi_get_next_sg_elem can be called arbitrarily many times if the element has a zero length. Get out of the loop early when this happens, by introducing an upper limit on the number of SG list elements. Reported-by: Li Qiang Signed-off-by: Prasad J Pandit Message-Id: <1473108643-12983-1-git-send-email-ppandit@redhat.com> Signed-off-by: Paolo Bonzini --- hw/scsi/vmw_pvscsi.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index 4245c15101..babac5a68a 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -40,6 +40,8 @@ #define PVSCSI_MAX_DEVS (64) #define PVSCSI_MSIX_NUM_VECTORS (1) +#define PVSCSI_MAX_SG_ELEM 2048 + #define PVSCSI_MAX_CMD_DATA_WORDS \ (sizeof(PVSCSICmdDescSetupRings)/sizeof(uint32_t)) @@ -628,17 +630,16 @@ pvscsi_queue_pending_descriptor(PVSCSIState *s, SCSIDevice **d, static void pvscsi_convert_sglist(PVSCSIRequest *r) { - int chunk_size; + uint32_t chunk_size, elmcnt = 0; uint64_t data_length = r->req.dataLen; PVSCSISGState sg = r->sg; - while (data_length) { - while (!sg.resid) { + while (data_length && elmcnt < PVSCSI_MAX_SG_ELEM) { + while (!sg.resid && elmcnt++ < PVSCSI_MAX_SG_ELEM) { pvscsi_get_next_sg_elem(&sg); trace_pvscsi_convert_sglist(r->req.context, r->sg.dataAddr, r->sg.resid); } - assert(data_length > 0); - chunk_size = MIN((unsigned) data_length, sg.resid); + chunk_size = MIN(data_length, sg.resid); if (chunk_size) { qemu_sglist_add(&r->sgl, sg.dataAddr, chunk_size); } From a3b6e2bb71e6495a44f24e2296ab4feb4b6d4818 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 5 Sep 2016 18:25:18 +0100 Subject: [PATCH 34/58] default-configs: remove CONFIG_PAM The CONFIG_PAM=y setting was added in commit c0907c9e6417cb959dfd9ef6873221536ec91351 Author: Paolo Bonzini Date: Tue Feb 5 15:06:20 2013 +0100 hw: move PCI bridges to hw/pci-* or hw/ARCH but nothing in that commit, nor anything pre-existing, ever referenced CONFIG_PAM. Signed-off-by: Daniel P. Berrange Message-Id: <1473096320-1638-2-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- default-configs/i386-softmmu.mak | 1 - default-configs/x86_64-softmmu.mak | 1 - 2 files changed, 2 deletions(-) diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index b177e52104..4a89631f28 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -37,7 +37,6 @@ CONFIG_I8259=y CONFIG_PFLASH_CFI01=y CONFIG_TPM_TIS=$(CONFIG_TPM) CONFIG_MC146818RTC=y -CONFIG_PAM=y CONFIG_PCI_PIIX=y CONFIG_WDT_IB700=y CONFIG_XEN_I386=$(CONFIG_XEN) diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index 6e3b312c5f..b2bf736367 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -37,7 +37,6 @@ CONFIG_I8259=y CONFIG_PFLASH_CFI01=y CONFIG_TPM_TIS=$(CONFIG_TPM) CONFIG_MC146818RTC=y -CONFIG_PAM=y CONFIG_PCI_PIIX=y CONFIG_WDT_IB700=y CONFIG_XEN_I386=$(CONFIG_XEN) From dd32222b6ef3c9bd96fb7ea4b7a2029ab0c90620 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 5 Sep 2016 18:25:19 +0100 Subject: [PATCH 35/58] default-configs: removed obsolete CONFIG_ISA_MMIO The use of the CONFIG_ISA_MMIO setting was removed in commit 61fcb628627ea464dc1954f615ae13edfefd284f Author: Paolo Bonzini Date: Mon Jul 22 15:54:24 2013 +0200 isa_mmio: delete but this commit only removed it from some of the default config files. Signed-off-by: Daniel P. Berrange Message-Id: <1473096320-1638-3-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- default-configs/arm-softmmu.mak | 1 - default-configs/ppc-softmmu.mak | 1 - default-configs/ppc64-softmmu.mak | 1 - default-configs/sparc64-softmmu.mak | 1 - 4 files changed, 4 deletions(-) diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak index 7a19863b18..e1243602e9 100644 --- a/default-configs/arm-softmmu.mak +++ b/default-configs/arm-softmmu.mak @@ -3,7 +3,6 @@ include pci.mak include usb.mak CONFIG_VGA=y -CONFIG_ISA_MMIO=y CONFIG_NAND=y CONFIG_ECC=y CONFIG_SERIAL=y diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak index 4befde3c7c..d4d0f9b192 100644 --- a/default-configs/ppc-softmmu.mak +++ b/default-configs/ppc-softmmu.mak @@ -3,7 +3,6 @@ include pci.mak include sound.mak include usb.mak -CONFIG_ISA_MMIO=y CONFIG_ESCC=y CONFIG_M48T59=y CONFIG_SERIAL=y diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak index c4be59f638..db5a4d6f5e 100644 --- a/default-configs/ppc64-softmmu.mak +++ b/default-configs/ppc64-softmmu.mak @@ -4,7 +4,6 @@ include pci.mak include sound.mak include usb.mak CONFIG_VIRTIO_VGA=y -CONFIG_ISA_MMIO=y CONFIG_ESCC=y CONFIG_M48T59=y CONFIG_SERIAL=y diff --git a/default-configs/sparc64-softmmu.mak b/default-configs/sparc64-softmmu.mak index 123bb993ef..c0cdd644c8 100644 --- a/default-configs/sparc64-softmmu.mak +++ b/default-configs/sparc64-softmmu.mak @@ -2,7 +2,6 @@ include pci.mak include usb.mak -CONFIG_ISA_MMIO=y CONFIG_M48T59=y CONFIG_PTIMER=y CONFIG_SERIAL=y From e270d00afa58c8a2903ec85db51407abc4e3269d Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Mon, 5 Sep 2016 18:25:20 +0100 Subject: [PATCH 36/58] default-configs: remove CONFIG_PIIX_PCI The CONFIG_PIIX_PCI=y setting was added in commit 70615c38ded2a20ad8282b7dcde95482fc0a7744 Author: Blue Swirl Date: Mon Mar 22 20:18:40 2010 +0000 Compile sound devices only once but nothing in that commit, nor anything pre-existing, ever referenced CONFIG_PIIX_PCI. Signed-off-by: Daniel P. Berrange Message-Id: <1473096320-1638-4-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- default-configs/i386-softmmu.mak | 1 - default-configs/x86_64-softmmu.mak | 1 - 2 files changed, 2 deletions(-) diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak index 4a89631f28..0b513602c8 100644 --- a/default-configs/i386-softmmu.mak +++ b/default-configs/i386-softmmu.mak @@ -30,7 +30,6 @@ CONFIG_I8257=y CONFIG_IDE_ISA=y CONFIG_IDE_PIIX=y CONFIG_NE2000_ISA=y -CONFIG_PIIX_PCI=y CONFIG_HPET=y CONFIG_APPLESMC=y CONFIG_I8259=y diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak index b2bf736367..7f89503f95 100644 --- a/default-configs/x86_64-softmmu.mak +++ b/default-configs/x86_64-softmmu.mak @@ -30,7 +30,6 @@ CONFIG_I8257=y CONFIG_IDE_ISA=y CONFIG_IDE_PIIX=y CONFIG_NE2000_ISA=y -CONFIG_PIIX_PCI=y CONFIG_HPET=y CONFIG_APPLESMC=y CONFIG_I8259=y From b72981b910097b31f4d0b9c111a2d2cfd9ee585b Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 6 Sep 2016 14:56:02 +0100 Subject: [PATCH 37/58] ipmi: check return of qemu_chr_fe_write() for errors The continue_send() method in ipmi_bmc_extern.c directly assigns the return value of qemu_chr_fe_write() to the variable tracking the I/O buffer offset. This ignores the possibility that the return value could be -1 and so will cause I/O go backwards on EAGAIN. Fortunately 'outpos' is unsigned, so can't go negative - it will become MAX_INT which will cause the loop to stop, and avoid an accidental out of bounds array access. Signed-off-by: Daniel P. Berrange Message-Id: <1473170165-540-2-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- hw/ipmi/ipmi_bmc_extern.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/ipmi/ipmi_bmc_extern.c b/hw/ipmi/ipmi_bmc_extern.c index 5b73983e7d..d93e3f3426 100644 --- a/hw/ipmi/ipmi_bmc_extern.c +++ b/hw/ipmi/ipmi_bmc_extern.c @@ -100,12 +100,16 @@ ipmb_checksum(const unsigned char *data, int size, unsigned char start) static void continue_send(IPMIBmcExtern *ibe) { + int ret; if (ibe->outlen == 0) { goto check_reset; } send: - ibe->outpos += qemu_chr_fe_write(ibe->chr, ibe->outbuf + ibe->outpos, - ibe->outlen - ibe->outpos); + ret = qemu_chr_fe_write(ibe->chr, ibe->outbuf + ibe->outpos, + ibe->outlen - ibe->outpos); + if (ret > 0) { + ibe->outpos += ret; + } if (ibe->outpos < ibe->outlen) { /* Not fully transmitted, try again in a 10ms */ timer_mod_ns(ibe->extern_timer, From 7983e829336f68b6df6952dd4b03493b1486fcf5 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 6 Sep 2016 14:56:03 +0100 Subject: [PATCH 38/58] sclpconsolelm: remove bogus check for -EAGAIN The write_console_data() method in sclpconsole-lm.c checks whether the return value of qemu_chr_fe_write() has the value of -EAGAIN and if so then increments the buffer offset by the value of EAGAIN. Fortunately qemu_chr_fe_write() will never return EAGAIN directly, rather it returns -1 with errno set to EAGAIN, so this broken code path was not reachable. The behaviour on EAGAIN was stil bad though, causing the write_console_data() to busy_wait repeatedly calling qemu_chr_fe_write() with no sleep between iters. Just remove all this loop logic and replace with a call to qemu_chr_fe_write_all(). Acked-by: Cornelia Huck Signed-off-by: Daniel P. Berrange Message-Id: <1473170165-540-3-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- hw/char/sclpconsole-lm.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c index a22ad8d016..dbe753147b 100644 --- a/hw/char/sclpconsole-lm.c +++ b/hw/char/sclpconsole-lm.c @@ -191,9 +191,6 @@ static int read_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr, */ static int write_console_data(SCLPEvent *event, const uint8_t *buf, int len) { - int ret = 0; - const uint8_t *buf_offset; - SCLPConsoleLM *scon = SCLPLM_CONSOLE(event); if (!scon->chr) { @@ -201,21 +198,9 @@ static int write_console_data(SCLPEvent *event, const uint8_t *buf, int len) return len; } - buf_offset = buf; - while (len > 0) { - ret = qemu_chr_fe_write(scon->chr, buf, len); - if (ret == 0) { - /* a pty doesn't seem to be connected - no error */ - len = 0; - } else if (ret == -EAGAIN || (ret > 0 && ret < len)) { - len -= ret; - buf_offset += ret; - } else { - len = 0; - } - } - - return ret; + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + return qemu_chr_fe_write_all(scon->chr, buf, len); } static int process_mdb(SCLPEvent *event, MDBO *mdbo) From 6ab3fc32ea640026726bc5f9f4db622d0954fb8a Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 6 Sep 2016 14:56:04 +0100 Subject: [PATCH 39/58] hw: replace most use of qemu_chr_fe_write with qemu_chr_fe_write_all The qemu_chr_fe_write method will return -1 on EAGAIN if the chardev backend write would block. Almost no callers of the qemu_chr_fe_write() method check the return value, instead blindly assuming data was successfully sent. In most cases this will lead to silent data loss on interactive consoles, but in some cases (eg RNG EGD) it'll just cause corruption of the protocol being spoken. We unfortunately can't fix the virtio-console code, due to a bug in the Linux guest drivers, which would cause the entire Linux kernel to hang if we delay processing of the incoming data in any way. Fixing this requires first fixing the guest driver to not hold spinlocks while writing to the hvc device backend. Fixes bug: https://bugs.launchpad.net/qemu/+bug/1586756 Signed-off-by: Daniel P. Berrange Message-Id: <1473170165-540-4-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- backends/rng-egd.c | 4 +++- gdbstub.c | 4 +++- hw/arm/omap2.c | 8 +++++--- hw/arm/pxa2xx.c | 4 +++- hw/arm/strongarm.c | 4 +++- hw/char/bcm2835_aux.c | 4 +++- hw/char/debugcon.c | 4 +++- hw/char/digic-uart.c | 2 ++ hw/char/escc.c | 4 +++- hw/char/etraxfs_ser.c | 4 +++- hw/char/exynos4210_uart.c | 4 +++- hw/char/grlib_apbuart.c | 4 +++- hw/char/imx_serial.c | 4 +++- hw/char/ipoctal232.c | 4 +++- hw/char/lm32_juart.c | 2 ++ hw/char/lm32_uart.c | 2 ++ hw/char/mcf_uart.c | 4 +++- hw/char/parallel.c | 4 +++- hw/char/pl011.c | 4 +++- hw/char/sclpconsole-lm.c | 4 +++- hw/char/sclpconsole.c | 2 ++ hw/char/sh_serial.c | 4 +++- hw/char/spapr_vty.c | 5 +++-- hw/char/stm32f2xx_usart.c | 2 ++ hw/char/virtio-console.c | 21 +++++++++++++++++++++ hw/char/xilinx_uartlite.c | 4 +++- hw/usb/ccid-card-passthru.c | 7 +++++-- hw/usb/dev-serial.c | 4 +++- slirp/slirp.c | 4 +++- 29 files changed, 104 insertions(+), 27 deletions(-) diff --git a/backends/rng-egd.c b/backends/rng-egd.c index 7a1b9242d8..ba17c075cf 100644 --- a/backends/rng-egd.c +++ b/backends/rng-egd.c @@ -41,7 +41,9 @@ static void rng_egd_request_entropy(RngBackend *b, RngRequest *req) header[0] = 0x02; header[1] = len; - qemu_chr_fe_write(s->chr, header, sizeof(header)); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, header, sizeof(header)); size -= len; } diff --git a/gdbstub.c b/gdbstub.c index 5da66f1794..ecea8c42cf 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -402,7 +402,9 @@ static void put_buffer(GDBState *s, const uint8_t *buf, int len) } } #else - qemu_chr_fe_write(s->chr, buf, len); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, buf, len); #endif } diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c index 3a0d77714a..7e11c65cba 100644 --- a/hw/arm/omap2.c +++ b/hw/arm/omap2.c @@ -769,14 +769,16 @@ static void omap_sti_fifo_write(void *opaque, hwaddr addr, if (ch == STI_TRACE_CONTROL_CHANNEL) { /* Flush channel value. */ - qemu_chr_fe_write(s->chr, (const uint8_t *) "\r", 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, (const uint8_t *) "\r", 1); } else if (ch == STI_TRACE_CONSOLE_CHANNEL || 1) { if (value == 0xc0 || value == 0xc3) { /* Open channel ch. */ } else if (value == 0x00) - qemu_chr_fe_write(s->chr, (const uint8_t *) "\n", 1); + qemu_chr_fe_write_all(s->chr, (const uint8_t *) "\n", 1); else - qemu_chr_fe_write(s->chr, &byte, 1); + qemu_chr_fe_write_all(s->chr, &byte, 1); } } diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c index cb55704687..0241e07d84 100644 --- a/hw/arm/pxa2xx.c +++ b/hw/arm/pxa2xx.c @@ -1903,7 +1903,9 @@ static void pxa2xx_fir_write(void *opaque, hwaddr addr, else ch = ~value; if (s->chr && s->enable && (s->control[0] & (1 << 3))) /* TXE */ - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); break; case ICSR0: s->status[0] &= ~(value & 0x66); diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c index f1b2c6c966..021cbf9a0f 100644 --- a/hw/arm/strongarm.c +++ b/hw/arm/strongarm.c @@ -1108,7 +1108,9 @@ static void strongarm_uart_tx(void *opaque) if (s->utcr3 & UTCR3_LBM) /* loopback */ { strongarm_uart_receive(s, &s->tx_fifo[s->tx_start], 1); } else if (s->chr) { - qemu_chr_fe_write(s->chr, &s->tx_fifo[s->tx_start], 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &s->tx_fifo[s->tx_start], 1); } s->tx_start = (s->tx_start + 1) % 8; diff --git a/hw/char/bcm2835_aux.c b/hw/char/bcm2835_aux.c index 319f1652f6..f7a845d3e2 100644 --- a/hw/char/bcm2835_aux.c +++ b/hw/char/bcm2835_aux.c @@ -169,7 +169,9 @@ static void bcm2835_aux_write(void *opaque, hwaddr offset, uint64_t value, /* "DLAB bit set means access baudrate register" is NYI */ ch = value; if (s->chr) { - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); } break; diff --git a/hw/char/debugcon.c b/hw/char/debugcon.c index e7f025ec67..4402033861 100644 --- a/hw/char/debugcon.c +++ b/hw/char/debugcon.c @@ -60,7 +60,9 @@ static void debugcon_ioport_write(void *opaque, hwaddr addr, uint64_t val, printf(" [debugcon: write addr=0x%04" HWADDR_PRIx " val=0x%02" PRIx64 "]\n", addr, val); #endif - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); } diff --git a/hw/char/digic-uart.c b/hw/char/digic-uart.c index c7604e6766..e96a9b2d8d 100644 --- a/hw/char/digic-uart.c +++ b/hw/char/digic-uart.c @@ -77,6 +77,8 @@ static void digic_uart_write(void *opaque, hwaddr addr, uint64_t value, switch (addr) { case R_TX: if (s->chr) { + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ qemu_chr_fe_write_all(s->chr, &ch, 1); } break; diff --git a/hw/char/escc.c b/hw/char/escc.c index 31a5f902f9..aa1739762b 100644 --- a/hw/char/escc.c +++ b/hw/char/escc.c @@ -557,7 +557,9 @@ static void escc_mem_write(void *opaque, hwaddr addr, s->tx = val; if (s->wregs[W_TXCTRL2] & TXCTRL2_TXEN) { // tx enabled if (s->chr) - qemu_chr_fe_write(s->chr, &s->tx, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &s->tx, 1); else if (s->type == kbd && !s->disabled) { handle_kbd_command(s, val); } diff --git a/hw/char/etraxfs_ser.c b/hw/char/etraxfs_ser.c index 04ca04fe2c..c99cc5d130 100644 --- a/hw/char/etraxfs_ser.c +++ b/hw/char/etraxfs_ser.c @@ -126,7 +126,9 @@ ser_write(void *opaque, hwaddr addr, switch (addr) { case RW_DOUT: - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); s->regs[R_INTR] |= 3; s->pending_tx = 1; s->regs[addr] = value; diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c index 885ecc027b..1107578138 100644 --- a/hw/char/exynos4210_uart.c +++ b/hw/char/exynos4210_uart.c @@ -387,7 +387,9 @@ static void exynos4210_uart_write(void *opaque, hwaddr offset, s->reg[I_(UTRSTAT)] &= ~(UTRSTAT_TRANSMITTER_EMPTY | UTRSTAT_Tx_BUFFER_EMPTY); ch = (uint8_t)val; - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); #if DEBUG_Tx_DATA fprintf(stderr, "%c", ch); #endif diff --git a/hw/char/grlib_apbuart.c b/hw/char/grlib_apbuart.c index 871524c82f..778148a15e 100644 --- a/hw/char/grlib_apbuart.c +++ b/hw/char/grlib_apbuart.c @@ -203,7 +203,9 @@ static void grlib_apbuart_write(void *opaque, hwaddr addr, /* Transmit when character device available and transmitter enabled */ if ((uart->chr) && (uart->control & UART_TRANSMIT_ENABLE)) { c = value & 0xFF; - qemu_chr_fe_write(uart->chr, &c, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(uart->chr, &c, 1); /* Generate interrupt */ if (uart->control & UART_TRANSMIT_INTERRUPT) { qemu_irq_pulse(uart->irq); diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c index 44856d671e..5c3fa61e4c 100644 --- a/hw/char/imx_serial.c +++ b/hw/char/imx_serial.c @@ -182,7 +182,9 @@ static void imx_serial_write(void *opaque, hwaddr offset, ch = value; if (s->ucr2 & UCR2_TXEN) { if (s->chr) { - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); } s->usr1 &= ~USR1_TRDY; imx_update(s); diff --git a/hw/char/ipoctal232.c b/hw/char/ipoctal232.c index 9ead32af60..2859fdd7fb 100644 --- a/hw/char/ipoctal232.c +++ b/hw/char/ipoctal232.c @@ -360,7 +360,9 @@ static void io_write(IPackDevice *ip, uint8_t addr, uint16_t val) DPRINTF("Write THR%c (0x%x)\n", channel + 'a', reg); if (ch->dev) { uint8_t thr = reg; - qemu_chr_fe_write(ch->dev, &thr, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(ch->dev, &thr, 1); } } else { DPRINTF("Write THR%c (0x%x), Tx disabled\n", channel + 'a', reg); diff --git a/hw/char/lm32_juart.c b/hw/char/lm32_juart.c index 28c2cf702d..cb1ac76731 100644 --- a/hw/char/lm32_juart.c +++ b/hw/char/lm32_juart.c @@ -76,6 +76,8 @@ void lm32_juart_set_jtx(DeviceState *d, uint32_t jtx) s->jtx = jtx; if (s->chr) { + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ qemu_chr_fe_write_all(s->chr, &ch, 1); } } diff --git a/hw/char/lm32_uart.c b/hw/char/lm32_uart.c index b5c760dda3..be93697a39 100644 --- a/hw/char/lm32_uart.c +++ b/hw/char/lm32_uart.c @@ -178,6 +178,8 @@ static void uart_write(void *opaque, hwaddr addr, switch (addr) { case R_RXTX: if (s->chr) { + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ qemu_chr_fe_write_all(s->chr, &ch, 1); } break; diff --git a/hw/char/mcf_uart.c b/hw/char/mcf_uart.c index 3c0438fd79..c184859c83 100644 --- a/hw/char/mcf_uart.c +++ b/hw/char/mcf_uart.c @@ -114,7 +114,9 @@ static void mcf_uart_do_tx(mcf_uart_state *s) { if (s->tx_enabled && (s->sr & MCF_UART_TxEMP) == 0) { if (s->chr) - qemu_chr_fe_write(s->chr, (unsigned char *)&s->tb, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, (unsigned char *)&s->tb, 1); s->sr |= MCF_UART_TxEMP; } if (s->tx_enabled) { diff --git a/hw/char/parallel.c b/hw/char/parallel.c index fa085667ff..da22e36356 100644 --- a/hw/char/parallel.c +++ b/hw/char/parallel.c @@ -129,7 +129,9 @@ parallel_ioport_write_sw(void *opaque, uint32_t addr, uint32_t val) if (val & PARA_CTR_STROBE) { s->status &= ~PARA_STS_BUSY; if ((s->control & PARA_CTR_STROBE) == 0) - qemu_chr_fe_write(s->chr, &s->dataw, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &s->dataw, 1); } else { if (s->control & PARA_CTR_INTEN) { s->irq_pending = 1; diff --git a/hw/char/pl011.c b/hw/char/pl011.c index c0fbf8a874..786e605fdd 100644 --- a/hw/char/pl011.c +++ b/hw/char/pl011.c @@ -146,7 +146,9 @@ static void pl011_write(void *opaque, hwaddr offset, /* ??? Check if transmitter is enabled. */ ch = value; if (s->chr) - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); s->int_level |= PL011_INT_TX; pl011_update(s); break; diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c index dbe753147b..9a563269e6 100644 --- a/hw/char/sclpconsole-lm.c +++ b/hw/char/sclpconsole-lm.c @@ -89,7 +89,9 @@ static void chr_read(void *opaque, const uint8_t *buf, int size) scon->buf[scon->length] = *buf; scon->length += 1; if (scon->echo) { - qemu_chr_fe_write(scon->chr, buf, size); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(scon->chr, buf, size); } } diff --git a/hw/char/sclpconsole.c b/hw/char/sclpconsole.c index d22464826b..a75ad4f60a 100644 --- a/hw/char/sclpconsole.c +++ b/hw/char/sclpconsole.c @@ -168,6 +168,8 @@ static ssize_t write_console_data(SCLPEvent *event, const uint8_t *buf, return len; } + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ return qemu_chr_fe_write_all(scon->chr, buf, len); } diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c index 4c55dcb7dc..97ce5629a4 100644 --- a/hw/char/sh_serial.c +++ b/hw/char/sh_serial.c @@ -111,7 +111,9 @@ static void sh_serial_write(void *opaque, hwaddr offs, case 0x0c: /* FTDR / TDR */ if (s->chr) { ch = val; - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); } s->dr = val; s->flags &= ~SH_SERIAL_FLAG_TDE; diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c index 3498d7b052..9aeafc0c42 100644 --- a/hw/char/spapr_vty.c +++ b/hw/char/spapr_vty.c @@ -60,8 +60,9 @@ void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len) { VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(sdev); - /* FIXME: should check the qemu_chr_fe_write() return value */ - qemu_chr_fe_write(dev->chardev, buf, len); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(dev->chardev, buf, len); } static void spapr_vty_realize(VIOsPAPRDevice *sdev, Error **errp) diff --git a/hw/char/stm32f2xx_usart.c b/hw/char/stm32f2xx_usart.c index 15657abda9..4c6640dbe9 100644 --- a/hw/char/stm32f2xx_usart.c +++ b/hw/char/stm32f2xx_usart.c @@ -153,6 +153,8 @@ static void stm32f2xx_usart_write(void *opaque, hwaddr addr, if (value < 0xF000) { ch = value; if (s->chr) { + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ qemu_chr_fe_write_all(s->chr, &ch, 1); } s->usart_sr |= USART_SR_TC; diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c index 4f0e03d3b7..d44c18c128 100644 --- a/hw/char/virtio-console.c +++ b/hw/char/virtio-console.c @@ -68,6 +68,27 @@ static ssize_t flush_buf(VirtIOSerialPort *port, */ if (ret < 0) ret = 0; + + /* XXX we should be queuing data to send later for the + * console devices too rather than silently dropping + * console data on EAGAIN. The Linux virtio-console + * hvc driver though does sends with spinlocks held, + * so if we enable throttling that'll stall the entire + * guest kernel, not merely the process writing to the + * console. + * + * While we could queue data for later write without + * enabling throttling, this would result in the guest + * being able to trigger arbitrary memory usage in QEMU + * buffering data for later writes. + * + * So fixing this problem likely requires fixing the + * Linux virtio-console hvc driver to not hold spinlocks + * while writing, and instead merely block the process + * that's writing. QEMU would then need some way to detect + * if the guest had the fixed driver too, before we can + * use throttling on host side. + */ if (!k->is_console) { virtio_serial_throttle_port(port, true); if (!vcon->watch) { diff --git a/hw/char/xilinx_uartlite.c b/hw/char/xilinx_uartlite.c index 4847efb29f..3766dc2c5b 100644 --- a/hw/char/xilinx_uartlite.c +++ b/hw/char/xilinx_uartlite.c @@ -144,7 +144,9 @@ uart_write(void *opaque, hwaddr addr, case R_TX: if (s->chr) - qemu_chr_fe_write(s->chr, &ch, 1); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->chr, &ch, 1); s->regs[addr] = value; diff --git a/hw/usb/ccid-card-passthru.c b/hw/usb/ccid-card-passthru.c index c0e90e501c..2eacea72f3 100644 --- a/hw/usb/ccid-card-passthru.c +++ b/hw/usb/ccid-card-passthru.c @@ -75,8 +75,11 @@ static void ccid_card_vscard_send_msg(PassthruState *s, scr_msg_header.type = htonl(type); scr_msg_header.reader_id = htonl(reader_id); scr_msg_header.length = htonl(length); - qemu_chr_fe_write(s->cs, (uint8_t *)&scr_msg_header, sizeof(VSCMsgHeader)); - qemu_chr_fe_write(s->cs, payload, length); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->cs, (uint8_t *)&scr_msg_header, + sizeof(VSCMsgHeader)); + qemu_chr_fe_write_all(s->cs, payload, length); } static void ccid_card_vscard_send_apdu(PassthruState *s, diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c index ba8538e60e..966ad84b90 100644 --- a/hw/usb/dev-serial.c +++ b/hw/usb/dev-serial.c @@ -366,7 +366,9 @@ static void usb_serial_handle_data(USBDevice *dev, USBPacket *p) goto fail; for (i = 0; i < p->iov.niov; i++) { iov = p->iov.iov + i; - qemu_chr_fe_write(s->cs, iov->iov_base, iov->iov_len); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s->cs, iov->iov_base, iov->iov_len); } p->actual_length = p->iov.size; break; diff --git a/slirp/slirp.c b/slirp/slirp.c index d67eda12f4..6e2b4e5a90 100644 --- a/slirp/slirp.c +++ b/slirp/slirp.c @@ -1072,7 +1072,9 @@ int slirp_add_exec(Slirp *slirp, int do_pty, const void *args, ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) { if (so->s == -1 && so->extra) { - qemu_chr_fe_write(so->extra, buf, len); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(so->extra, buf, len); return len; } From 90f998f5f4267a0c22e983f533d19b9de1849283 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 6 Sep 2016 14:56:05 +0100 Subject: [PATCH 40/58] char: convert qemu_chr_fe_write to qemu_chr_fe_write_all The mux chardev was not checking the return value of any qemu_chr_fe_write() call so would silently loose data on EAGAIN. Similarly the qemu_chr_fe_printf method would not check errors and was not in a position to retry even if it could check. Signed-off-by: Daniel P. Berrange Message-Id: <1473170165-540-5-git-send-email-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-char.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/qemu-char.c b/qemu-char.c index cf6a27a1bc..7fa87a8b6e 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -441,7 +441,9 @@ void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...) va_list ap; va_start(ap, fmt); vsnprintf(buf, sizeof(buf), fmt, ap); - qemu_chr_fe_write(s, (uint8_t *)buf, strlen(buf)); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(s, (uint8_t *)buf, strlen(buf)); va_end(ap); } @@ -557,7 +559,9 @@ static int mux_chr_write(CharDriverState *chr, const uint8_t *buf, int len) (secs / 60) % 60, secs % 60, (int)(ti % 1000)); - qemu_chr_fe_write(d->drv, (uint8_t *)buf1, strlen(buf1)); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(d->drv, (uint8_t *)buf1, strlen(buf1)); d->linestart = 0; } ret += qemu_chr_fe_write(d->drv, buf+i, 1); @@ -595,13 +599,15 @@ static void mux_print_help(CharDriverState *chr) "\n\rEscape-Char set to Ascii: 0x%02x\n\r\n\r", term_escape_char); } - qemu_chr_fe_write(chr, (uint8_t *)cbuf, strlen(cbuf)); + /* XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks */ + qemu_chr_fe_write_all(chr, (uint8_t *)cbuf, strlen(cbuf)); for (i = 0; mux_help[i] != NULL; i++) { for (j=0; mux_help[i][j] != '\0'; j++) { if (mux_help[i][j] == '%') - qemu_chr_fe_write(chr, (uint8_t *)ebuf, strlen(ebuf)); + qemu_chr_fe_write_all(chr, (uint8_t *)ebuf, strlen(ebuf)); else - qemu_chr_fe_write(chr, (uint8_t *)&mux_help[i][j], 1); + qemu_chr_fe_write_all(chr, (uint8_t *)&mux_help[i][j], 1); } } } @@ -626,7 +632,7 @@ static int mux_proc_byte(CharDriverState *chr, MuxDriver *d, int ch) case 'x': { const char *term = "QEMU: Terminated\n\r"; - qemu_chr_fe_write(chr, (uint8_t *)term, strlen(term)); + qemu_chr_fe_write_all(chr, (uint8_t *)term, strlen(term)); exit(0); break; } From 421cc3e7e89cb807d3c5f6de486abb2167c8e792 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 8 Sep 2016 17:42:53 +0200 Subject: [PATCH 41/58] Revert "megasas: remove useless check for cmd->frame" This reverts commit 8cc46787b5b58f01a11c919c7ff939ed009e27fc. It turns out that cmd->frame can be NULL and thus the commit can cause a SIGSEGV Reported-by: Holger Schranz Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- hw/scsi/megasas.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index e968302fdc..52a41239cf 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -1981,7 +1981,11 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr, break; } if (frame_status != MFI_STAT_INVALID_STATUS) { - cmd->frame->header.cmd_status = frame_status; + if (cmd->frame) { + cmd->frame->header.cmd_status = frame_status; + } else { + megasas_frame_set_cmd_status(s, frame_addr, frame_status); + } megasas_unmap_frame(s, cmd); megasas_complete_frame(s, cmd->context); } From a952c18683bbfa67f6c508c7417001a0bd1b2b97 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Wed, 31 Aug 2016 18:15:51 +0200 Subject: [PATCH 42/58] i8257: Make device "i8257" unavailable with -device The ISA DMA controller needs to be wired up to the ISA bus by isa_bus_dma() to actually work. Signed-off-by: Markus Armbruster Message-Id: <1472660151-19517-1-git-send-email-armbru@redhat.com> Signed-off-by: Paolo Bonzini --- hw/dma/i8257.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hw/dma/i8257.c b/hw/dma/i8257.c index bffbdea0ca..8bd82e8bc8 100644 --- a/hw/dma/i8257.c +++ b/hw/dma/i8257.c @@ -600,6 +600,8 @@ static void i8257_class_init(ObjectClass *klass, void *data) idc->release_DREQ = i8257_dma_release_DREQ; idc->schedule = i8257_dma_schedule; idc->register_channel = i8257_dma_register_channel; + /* Reason: needs to be wired up by isa_bus_dma() to work */ + dc->cannot_instantiate_with_device_add_yet = true; } static const TypeInfo i8257_info = { From c2cd627ddb13f62557aaf66305edb03cc3d9612d Mon Sep 17 00:00:00 2001 From: Cao jin Date: Mon, 12 Sep 2016 14:34:56 +0800 Subject: [PATCH 43/58] kvm-all: drop kvm_setup_guest_memory kvm_setup_guest_memory only does "madvise to QEMU_MADV_DONTFORK" and is only called by ram_block_add, which actually is duplicate code. Bonus: add simple comment for kvm_has_sync_mmu to make life easier. Suggested-by: Paolo Bonzini Signed-off-by: Cao jin Message-Id: <1473662096-32598-1-git-send-email-caoj.fnst@cn.fujitsu.com> Signed-off-by: Paolo Bonzini --- exec.c | 4 +--- include/sysemu/kvm.h | 1 - kvm-all.c | 15 +-------------- kvm-stub.c | 4 ---- 4 files changed, 2 insertions(+), 22 deletions(-) diff --git a/exec.c b/exec.c index ce3fb9ec8e..c81d5ab981 100644 --- a/exec.c +++ b/exec.c @@ -1621,10 +1621,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) if (new_block->host) { qemu_ram_setup_dump(new_block->host, new_block->max_length); qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE); + /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */ qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK); - if (kvm_enabled()) { - kvm_setup_guest_memory(new_block->host, new_block->max_length); - } } } diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index c9c243631e..4938f65c7a 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -221,7 +221,6 @@ int kvm_destroy_vcpu(CPUState *cpu); #ifdef NEED_CPU_H #include "cpu.h" -void kvm_setup_guest_memory(void *start, size_t size); void kvm_flush_coalesced_mmio_buffer(void); int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr, diff --git a/kvm-all.c b/kvm-all.c index ebf35b0c5b..8a4382eed8 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -2148,6 +2148,7 @@ void kvm_device_access(int fd, int group, uint64_t attr, } } +/* Return 1 on success, 0 on failure */ int kvm_has_sync_mmu(void) { return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU); @@ -2190,20 +2191,6 @@ int kvm_has_intx_set_mask(void) return kvm_state->intx_set_mask; } -void kvm_setup_guest_memory(void *start, size_t size) -{ - if (!kvm_has_sync_mmu()) { - int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK); - - if (ret) { - perror("qemu_madvise"); - fprintf(stderr, - "Need MADV_DONTFORK in absence of synchronous KVM MMU\n"); - exit(1); - } - } -} - #ifdef KVM_CAP_SET_GUEST_DEBUG struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, target_ulong pc) diff --git a/kvm-stub.c b/kvm-stub.c index 64e23f6be0..322712764f 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -73,10 +73,6 @@ int kvm_has_many_ioeventfds(void) return 0; } -void kvm_setup_guest_memory(void *start, size_t size) -{ -} - int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap) { return -ENOSYS; From 705ac1ca539ba05ab52c3290ccb45f9e47aa7c5e Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Wed, 24 Aug 2016 16:44:23 -0400 Subject: [PATCH 44/58] atomics: Remove redundant barrier()'s Remove the redundant barrier() after the fence as agreed in previous discussion here: https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg00489.html Signed-off-by: Pranith Kumar Message-Id: <20160824204424.14041-3-bobby.prani@gmail.com> Signed-off-by: Paolo Bonzini --- include/qemu/atomic.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 43b06458f1..8348eccadb 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -72,16 +72,16 @@ * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). */ -#define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); barrier(); }) -#define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); }) -#define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); }) +#define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); }) +#define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); }) +#define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); }) /* Most compilers currently treat consume and acquire the same, but really * no processors except Alpha need a barrier here. Leave it in if * using Thread Sanitizer to avoid warnings, otherwise optimize it away. */ #if defined(__SANITIZE_THREAD__) -#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); }) +#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); }) #elsif defined(__alpha__) #define smp_read_barrier_depends() asm volatile("mb":::"memory") #else From 89943de17c4e276f2c47f05b4604e8816a6a636c Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Mon, 29 Aug 2016 13:17:01 -0400 Subject: [PATCH 45/58] atomics: Use __atomic_*_n() variant primitives Use the __atomic_*_n() primitives which take the value as argument. It is not necessary to store the value locally before calling the primitive, hence saving us a stack store and load. Signed-off-by: Pranith Kumar Message-Id: <20160829171701.14025-1-bobby.prani@gmail.com> Signed-off-by: Paolo Bonzini --- include/qemu/atomic.h | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 8348eccadb..0cce246ea9 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -96,15 +96,12 @@ #define atomic_read(ptr) \ ({ \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - typeof_strip_qual(*ptr) _val; \ - __atomic_load(ptr, &_val, __ATOMIC_RELAXED); \ - _val; \ + __atomic_load_n(ptr, __ATOMIC_RELAXED); \ }) #define atomic_set(ptr, i) do { \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - typeof(*ptr) _val = (i); \ - __atomic_store(ptr, &_val, __ATOMIC_RELAXED); \ + __atomic_store_n(ptr, i, __ATOMIC_RELAXED); \ } while(0) /* See above: most compilers currently treat consume and acquire the @@ -129,8 +126,7 @@ #define atomic_rcu_set(ptr, i) do { \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - typeof(*ptr) _val = (i); \ - __atomic_store(ptr, &_val, __ATOMIC_RELEASE); \ + __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ } while(0) /* atomic_mb_read/set semantics map Java volatile variables. They are @@ -153,9 +149,8 @@ #define atomic_mb_set(ptr, i) do { \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - typeof(*ptr) _val = (i); \ smp_wmb(); \ - __atomic_store(ptr, &_val, __ATOMIC_RELAXED); \ + __atomic_store_n(ptr, i, __ATOMIC_RELAXED); \ smp_mb(); \ } while(0) #else @@ -169,8 +164,7 @@ #define atomic_mb_set(ptr, i) do { \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - typeof(*ptr) _val = (i); \ - __atomic_store(ptr, &_val, __ATOMIC_SEQ_CST); \ + __atomic_store_n(ptr, i, __ATOMIC_SEQ_CST); \ } while(0) #endif @@ -179,17 +173,15 @@ #define atomic_xchg(ptr, i) ({ \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - typeof_strip_qual(*ptr) _new = (i), _old; \ - __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \ - _old; \ + __atomic_exchange_n(ptr, i, __ATOMIC_SEQ_CST); \ }) /* Returns the eventual value, failed or not */ #define atomic_cmpxchg(ptr, old, new) \ ({ \ QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ - typeof_strip_qual(*ptr) _old = (old), _new = (new); \ - __atomic_compare_exchange(ptr, &_old, &_new, false, \ + typeof_strip_qual(*ptr) _old = (old); \ + __atomic_compare_exchange_n(ptr, &_old, new, false, \ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ _old; \ }) From 0cebabd5e8277864ef87b4526cb1c9b3f0c06ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Llu=C3=ADs=20Vilanova?= Date: Wed, 7 Sep 2016 14:49:04 +0200 Subject: [PATCH 46/58] checkpatch: Fix whitespace checks for documentation code blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent blank lines in documentation code blocks to be signalled as incorrect trailing whitespace. Code blocks in documentation are 4-column aligned, and blank lines in them should have exactly 4 columns of trailing whitespace to prevent QEMU's wiki to render them as separate code blocks. Signed-off-by: Lluís Vilanova Message-Id: <147325254382.22644.5531276787733455773.stgit@fimbulvetr.bsc.es> Signed-off-by: Paolo Bonzini Signed-off-by: Lluís Vilanova --- scripts/checkpatch.pl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index b0096a4460..dde3f5f9d9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1320,6 +1320,16 @@ sub process { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; ERROR("DOS line endings\n" . $herevet); + } elsif ($realfile =~ /^docs\/.+\.txt/ || + $realfile =~ /^docs\/.+\.md/) { + if ($rawline =~ /^\+\s+$/ && $rawline !~ /^\+ {4}$/) { + # TODO: properly check we're in a code block + # (surrounding text is 4-column aligned) + my $herevet = "$here\n" . cat_vet($rawline) . "\n"; + ERROR("code blocks in documentation should have " . + "empty lines with exactly 4 columns of " . + "whitespace\n" . $herevet); + } } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; ERROR("trailing whitespace\n" . $herevet); From 0342454f8aa6fc55e515bad26425533e10b58085 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 2 Sep 2016 17:36:23 +0200 Subject: [PATCH 47/58] optionrom: do not rely on compiler's bswap optimization Recent compilers can detect and inline manually-written bswap code, but GCC 4.2.1 (the last GPLv2 version) cannot and generates really awful code. Depending on how the compiler is configured, it might also not want to generate bswap because it was not in i386. Using asm is fine because TCG knows about bswap and all processors with virtualization extensions also do. Reported-by: Peter Maydell Signed-off-by: Paolo Bonzini --- pc-bios/linuxboot_dma.bin | Bin 1536 -> 1536 bytes pc-bios/optionrom/linuxboot_dma.c | 18 ++++-------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/pc-bios/linuxboot_dma.bin b/pc-bios/linuxboot_dma.bin index 238a195d3869995067f158d243d852778d38a736..218d3ab4a29bfb5ab7125ec7a4d29dad1860c673 100644 GIT binary patch delta 339 zcmZqRY2cX5I!ZFabcw)paf5ZeoP33G zdVSzOsF@rvS$43jB}`lbCe8sCR|Ja#Es#jtB?z$$=!diyoG|q~F!jP<^&kI2&6k0R mi-5&%g2lUqTvT|{c8Nj^@==jVdvS2{A0}qz$py>_jEn%58-ITQ delta 431 zcmXwyy-Pw-7{<>{qpRJ}@2)#oPZ9?Sj_1IY+UVhKVTXm|=QOWBvzry@$#;^g|K?zbJ)xjp@E!I)% zF%~h@`lJl`7o^VbIAj;9Z$q}B-Z>JlP=hoQPpID~vBIc<{3Gh0E~VQP8$yK~wSk_R zBR@F~t1la>C#XLxLl%WiS`iNEzKD~?1>_q7KFwI;Y&GR#3p?1wHg5bDp@(cf6Gc%O zenF&h6{lGQOJfaxMq_?R0|6&MXOSY|$Edk%tz}QOOD)&5W_3$8E7r01{rkUZ8+S&# z&xUiuZOxVD^nfFnpFArF8{vh?J0_pe`G(;+!(Gh}COZoS;UHL?3>8HV;ojxRR4itB Ee=`xOo&W#< diff --git a/pc-bios/optionrom/linuxboot_dma.c b/pc-bios/optionrom/linuxboot_dma.c index 7549797732..4754282ad7 100644 --- a/pc-bios/optionrom/linuxboot_dma.c +++ b/pc-bios/optionrom/linuxboot_dma.c @@ -122,24 +122,14 @@ static inline void writel_es(uint16_t offset, uint32_t val) static inline uint32_t bswap32(uint32_t x) { - return - ((x & 0x000000ffU) << 24) | - ((x & 0x0000ff00U) << 8) | - ((x & 0x00ff0000U) >> 8) | - ((x & 0xff000000U) >> 24); + asm("bswapl %0" : "=r" (x) : "0" (x)); + return x; } static inline uint64_t bswap64(uint64_t x) { - return - ((x & 0x00000000000000ffULL) << 56) | - ((x & 0x000000000000ff00ULL) << 40) | - ((x & 0x0000000000ff0000ULL) << 24) | - ((x & 0x00000000ff000000ULL) << 8) | - ((x & 0x000000ff00000000ULL) >> 8) | - ((x & 0x0000ff0000000000ULL) >> 24) | - ((x & 0x00ff000000000000ULL) >> 40) | - ((x & 0xff00000000000000ULL) >> 56); + asm("bswapl %%eax; bswapl %%edx; xchg %%eax, %%edx" : "=A" (x) : "0" (x)); + return x; } static inline uint64_t cpu_to_be64(uint64_t x) From 78d6a05d2f69cbfa6e95f0a4a24a2c934969913b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 12 Sep 2016 18:18:35 +0100 Subject: [PATCH 48/58] x86/lapic: Load LAPIC state at post_load Load the LAPIC state during post_load (rather than when the CPU starts). This allows an interrupt to be delivered from the ioapic to the lapic prior to cpu loading, in particular the RTC that starts ticking as soon as we load it's state. Fixes a case where Windows hangs after migration due to RTC interrupts disappearing. Signed-off-by: Dr. David Alan Gilbert Suggested-by: Paolo Bonzini Signed-off-by: Paolo Bonzini --- hw/i386/kvm/apic.c | 27 +++++++++++++++++++++++++-- include/sysemu/kvm.h | 1 - target-i386/kvm.c | 17 ----------------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 2bd0de82b4..5d140b9341 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -28,9 +28,8 @@ static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, return *((uint32_t *)(kapic->regs + (reg_id << 4))); } -void kvm_put_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic) +static void kvm_put_apic_state(APICCommonState *s, struct kvm_lapic_state *kapic) { - APICCommonState *s = APIC_COMMON(dev); int i; memset(kapic, 0, sizeof(*kapic)); @@ -125,6 +124,27 @@ static void kvm_apic_vapic_base_update(APICCommonState *s) } } +static void kvm_apic_put(void *data) +{ + APICCommonState *s = data; + struct kvm_lapic_state kapic; + int ret; + + kvm_put_apic_state(s, &kapic); + + ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_LAPIC, &kapic); + if (ret < 0) { + fprintf(stderr, "KVM_SET_LAPIC failed: %s\n", strerror(ret)); + abort(); + } +} + +static void kvm_apic_post_load(APICCommonState *s) +{ + fprintf(stderr, "%s: Yeh\n", __func__); + run_on_cpu(CPU(s->cpu), kvm_apic_put, s); +} + static void do_inject_external_nmi(void *data) { APICCommonState *s = data; @@ -178,6 +198,8 @@ static void kvm_apic_reset(APICCommonState *s) { /* Not used by KVM, which uses the CPU mp_state instead. */ s->wait_for_sipi = 0; + + run_on_cpu(CPU(s->cpu), kvm_apic_put, s); } static void kvm_apic_realize(DeviceState *dev, Error **errp) @@ -206,6 +228,7 @@ static void kvm_apic_class_init(ObjectClass *klass, void *data) k->set_base = kvm_apic_set_base; k->set_tpr = kvm_apic_set_tpr; k->get_tpr = kvm_apic_get_tpr; + k->post_load = kvm_apic_post_load; k->enable_tpr_reporting = kvm_apic_enable_tpr_reporting; k->vapic_base_update = kvm_apic_vapic_base_update; k->external_nmi = kvm_apic_external_nmi; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 4938f65c7a..f2a7b3b8fe 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -371,7 +371,6 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg); void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin); -void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); struct kvm_guest_debug; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index d1a25c5465..f1ad805665 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2416,19 +2416,6 @@ static int kvm_get_apic(X86CPU *cpu) return 0; } -static int kvm_put_apic(X86CPU *cpu) -{ - DeviceState *apic = cpu->apic_state; - struct kvm_lapic_state kapic; - - if (apic && kvm_irqchip_in_kernel()) { - kvm_put_apic_state(apic, &kapic); - - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_LAPIC, &kapic); - } - return 0; -} - static int kvm_put_vcpu_events(X86CPU *cpu, int level) { CPUState *cs = CPU(cpu); @@ -2670,10 +2657,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level) if (ret < 0) { return ret; } - ret = kvm_put_apic(x86_cpu); - if (ret < 0) { - return ret; - } } ret = kvm_put_tscdeadline_msr(x86_cpu); From 2286459d3ac19eb0697503282ce7830907d055ea Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 13 Sep 2016 18:42:38 +0200 Subject: [PATCH 49/58] ppc: do not redefine CPUPPCState Just include the file that is supposed to bring it in. Signed-off-by: Paolo Bonzini --- include/hw/ppc/fdt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/hw/ppc/fdt.h b/include/hw/ppc/fdt.h index 2c68d1616f..0cabb6af04 100644 --- a/include/hw/ppc/fdt.h +++ b/include/hw/ppc/fdt.h @@ -11,8 +11,7 @@ #define PPC_FDT_H #include "qemu/error-report.h" - -typedef struct CPUPPCState CPUPPCState; +#include "target-ppc/cpu-qom.h" #define _FDT(exp) \ do { \ From 88ca8e80defa4ec92c90054f151212cd32deb359 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 29 Aug 2016 11:46:12 -0700 Subject: [PATCH 50/58] cutils: Move buffer_is_zero and subroutines to a new file Signed-off-by: Richard Henderson Message-Id: <1472496380-19706-2-git-send-email-rth@twiddle.net> Signed-off-by: Paolo Bonzini --- util/Makefile.objs | 1 + util/bufferiszero.c | 272 ++++++++++++++++++++++++++++++++++++++++++++ util/cutils.c | 244 --------------------------------------- 3 files changed, 273 insertions(+), 244 deletions(-) create mode 100644 util/bufferiszero.c diff --git a/util/Makefile.objs b/util/Makefile.objs index 96cb1e0e58..ffca8f3002 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -1,4 +1,5 @@ util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o +util-obj-y += bufferiszero.o util-obj-$(CONFIG_POSIX) += compatfd.o util-obj-$(CONFIG_POSIX) += event_notifier-posix.o util-obj-$(CONFIG_POSIX) += mmap-alloc.o diff --git a/util/bufferiszero.c b/util/bufferiszero.c new file mode 100644 index 0000000000..9bb1ae5738 --- /dev/null +++ b/util/bufferiszero.c @@ -0,0 +1,272 @@ +/* + * Simple C functions to supplement the C library + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/cutils.h" + + +/* vector definitions */ +#ifdef __ALTIVEC__ +#include +/* The altivec.h header says we're allowed to undef these for + * C++ compatibility. Here we don't care about C++, but we + * undef them anyway to avoid namespace pollution. + */ +#undef vector +#undef pixel +#undef bool +#define VECTYPE __vector unsigned char +#define SPLAT(p) vec_splat(vec_ld(0, p), 0) +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) +#define VEC_OR(v1, v2) ((v1) | (v2)) +/* altivec.h may redefine the bool macro as vector type. + * Reset it to POSIX semantics. */ +#define bool _Bool +#elif defined __SSE2__ +#include +#define VECTYPE __m128i +#define SPLAT(p) _mm_set1_epi8(*(p)) +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) +#define VEC_OR(v1, v2) (_mm_or_si128(v1, v2)) +#elif defined(__aarch64__) +#include "arm_neon.h" +#define VECTYPE uint64x2_t +#define ALL_EQ(v1, v2) \ + ((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \ + (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1))) +#define VEC_OR(v1, v2) ((v1) | (v2)) +#else +#define VECTYPE unsigned long +#define SPLAT(p) (*(p) * (~0UL / 255)) +#define ALL_EQ(v1, v2) ((v1) == (v2)) +#define VEC_OR(v1, v2) ((v1) | (v2)) +#endif + +#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8 + +static bool +can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len) +{ + return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(VECTYPE) == 0); +} + +/* + * Searches for an area with non-zero content in a buffer + * + * Attention! The len must be a multiple of + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * and addr must be a multiple of sizeof(VECTYPE) due to + * restriction of optimizations in this function. + * + * can_use_buffer_find_nonzero_offset_inner() can be used to + * check these requirements. + * + * The return value is the offset of the non-zero area rounded + * down to a multiple of sizeof(VECTYPE) for the first + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * afterwards. + * + * If the buffer is all zero the return value is equal to len. + */ + +static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len) +{ + const VECTYPE *p = buf; + const VECTYPE zero = (VECTYPE){0}; + size_t i; + + assert(can_use_buffer_find_nonzero_offset_inner(buf, len)); + + if (!len) { + return 0; + } + + for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { + if (!ALL_EQ(p[i], zero)) { + return i * sizeof(VECTYPE); + } + } + + for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; + i < len / sizeof(VECTYPE); + i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { + VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]); + VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]); + VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]); + VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]); + VECTYPE tmp01 = VEC_OR(tmp0, tmp1); + VECTYPE tmp23 = VEC_OR(tmp2, tmp3); + if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) { + break; + } + } + + return i * sizeof(VECTYPE); +} + +#if defined CONFIG_AVX2_OPT +#pragma GCC push_options +#pragma GCC target("avx2") +#include +#include + +#define AVX2_VECTYPE __m256i +#define AVX2_SPLAT(p) _mm256_set1_epi8(*(p)) +#define AVX2_ALL_EQ(v1, v2) \ + (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF) +#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2)) + +static bool +can_use_buffer_find_nonzero_offset_avx2(const void *buf, size_t len) +{ + return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR + * sizeof(AVX2_VECTYPE)) == 0 + && ((uintptr_t) buf) % sizeof(AVX2_VECTYPE) == 0); +} + +static size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len) +{ + const AVX2_VECTYPE *p = buf; + const AVX2_VECTYPE zero = (AVX2_VECTYPE){0}; + size_t i; + + assert(can_use_buffer_find_nonzero_offset_avx2(buf, len)); + + if (!len) { + return 0; + } + + for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { + if (!AVX2_ALL_EQ(p[i], zero)) { + return i * sizeof(AVX2_VECTYPE); + } + } + + for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; + i < len / sizeof(AVX2_VECTYPE); + i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { + AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]); + AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]); + AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]); + AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]); + AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1); + AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3); + if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) { + break; + } + } + + return i * sizeof(AVX2_VECTYPE); +} + +static bool avx2_support(void) +{ + int a, b, c, d; + + if (__get_cpuid_max(0, NULL) < 7) { + return false; + } + + __cpuid_count(7, 0, a, b, c, d); + + return b & bit_AVX2; +} + +bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \ + __attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc"))); +size_t buffer_find_nonzero_offset(const void *buf, size_t len) \ + __attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc"))); + +static void *buffer_find_nonzero_offset_ifunc(void) +{ + typeof(buffer_find_nonzero_offset) *func = (avx2_support()) ? + buffer_find_nonzero_offset_avx2 : buffer_find_nonzero_offset_inner; + + return func; +} + +static void *can_use_buffer_find_nonzero_offset_ifunc(void) +{ + typeof(can_use_buffer_find_nonzero_offset) *func = (avx2_support()) ? + can_use_buffer_find_nonzero_offset_avx2 : + can_use_buffer_find_nonzero_offset_inner; + + return func; +} +#pragma GCC pop_options +#else +bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return can_use_buffer_find_nonzero_offset_inner(buf, len); +} + +size_t buffer_find_nonzero_offset(const void *buf, size_t len) +{ + return buffer_find_nonzero_offset_inner(buf, len); +} +#endif + +/* + * Checks if a buffer is all zeroes + * + * Attention! The len must be a multiple of 4 * sizeof(long) due to + * restriction of optimizations in this function. + */ +bool buffer_is_zero(const void *buf, size_t len) +{ + /* + * Use long as the biggest available internal data type that fits into the + * CPU register and unroll the loop to smooth out the effect of memory + * latency. + */ + + size_t i; + long d0, d1, d2, d3; + const long * const data = buf; + + /* use vector optimized zero check if possible */ + if (can_use_buffer_find_nonzero_offset(buf, len)) { + return buffer_find_nonzero_offset(buf, len) == len; + } + + assert(len % (4 * sizeof(long)) == 0); + len /= sizeof(long); + + for (i = 0; i < len; i += 4) { + d0 = data[i + 0]; + d1 = data[i + 1]; + d2 = data[i + 2]; + d3 = data[i + 3]; + + if (d0 || d1 || d2 || d3) { + return false; + } + } + + return true; +} + diff --git a/util/cutils.c b/util/cutils.c index 7505fdaa81..4fefcf3be3 100644 --- a/util/cutils.c +++ b/util/cutils.c @@ -161,250 +161,6 @@ int qemu_fdatasync(int fd) #endif } -/* vector definitions */ -#ifdef __ALTIVEC__ -#include -/* The altivec.h header says we're allowed to undef these for - * C++ compatibility. Here we don't care about C++, but we - * undef them anyway to avoid namespace pollution. - */ -#undef vector -#undef pixel -#undef bool -#define VECTYPE __vector unsigned char -#define SPLAT(p) vec_splat(vec_ld(0, p), 0) -#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) -#define VEC_OR(v1, v2) ((v1) | (v2)) -/* altivec.h may redefine the bool macro as vector type. - * Reset it to POSIX semantics. */ -#define bool _Bool -#elif defined __SSE2__ -#include -#define VECTYPE __m128i -#define SPLAT(p) _mm_set1_epi8(*(p)) -#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) -#define VEC_OR(v1, v2) (_mm_or_si128(v1, v2)) -#elif defined(__aarch64__) -#include "arm_neon.h" -#define VECTYPE uint64x2_t -#define ALL_EQ(v1, v2) \ - ((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \ - (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1))) -#define VEC_OR(v1, v2) ((v1) | (v2)) -#else -#define VECTYPE unsigned long -#define SPLAT(p) (*(p) * (~0UL / 255)) -#define ALL_EQ(v1, v2) ((v1) == (v2)) -#define VEC_OR(v1, v2) ((v1) | (v2)) -#endif - -#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8 - -static bool -can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len) -{ - return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR - * sizeof(VECTYPE)) == 0 - && ((uintptr_t) buf) % sizeof(VECTYPE) == 0); -} - -/* - * Searches for an area with non-zero content in a buffer - * - * Attention! The len must be a multiple of - * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) - * and addr must be a multiple of sizeof(VECTYPE) due to - * restriction of optimizations in this function. - * - * can_use_buffer_find_nonzero_offset_inner() can be used to - * check these requirements. - * - * The return value is the offset of the non-zero area rounded - * down to a multiple of sizeof(VECTYPE) for the first - * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to - * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) - * afterwards. - * - * If the buffer is all zero the return value is equal to len. - */ - -static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len) -{ - const VECTYPE *p = buf; - const VECTYPE zero = (VECTYPE){0}; - size_t i; - - assert(can_use_buffer_find_nonzero_offset_inner(buf, len)); - - if (!len) { - return 0; - } - - for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { - if (!ALL_EQ(p[i], zero)) { - return i * sizeof(VECTYPE); - } - } - - for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; - i < len / sizeof(VECTYPE); - i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { - VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]); - VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]); - VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]); - VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]); - VECTYPE tmp01 = VEC_OR(tmp0, tmp1); - VECTYPE tmp23 = VEC_OR(tmp2, tmp3); - if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) { - break; - } - } - - return i * sizeof(VECTYPE); -} - -#if defined CONFIG_AVX2_OPT -#pragma GCC push_options -#pragma GCC target("avx2") -#include -#include - -#define AVX2_VECTYPE __m256i -#define AVX2_SPLAT(p) _mm256_set1_epi8(*(p)) -#define AVX2_ALL_EQ(v1, v2) \ - (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF) -#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2)) - -static bool -can_use_buffer_find_nonzero_offset_avx2(const void *buf, size_t len) -{ - return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR - * sizeof(AVX2_VECTYPE)) == 0 - && ((uintptr_t) buf) % sizeof(AVX2_VECTYPE) == 0); -} - -static size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len) -{ - const AVX2_VECTYPE *p = buf; - const AVX2_VECTYPE zero = (AVX2_VECTYPE){0}; - size_t i; - - assert(can_use_buffer_find_nonzero_offset_avx2(buf, len)); - - if (!len) { - return 0; - } - - for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { - if (!AVX2_ALL_EQ(p[i], zero)) { - return i * sizeof(AVX2_VECTYPE); - } - } - - for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; - i < len / sizeof(AVX2_VECTYPE); - i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { - AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]); - AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]); - AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]); - AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]); - AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1); - AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3); - if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) { - break; - } - } - - return i * sizeof(AVX2_VECTYPE); -} - -static bool avx2_support(void) -{ - int a, b, c, d; - - if (__get_cpuid_max(0, NULL) < 7) { - return false; - } - - __cpuid_count(7, 0, a, b, c, d); - - return b & bit_AVX2; -} - -bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \ - __attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc"))); -size_t buffer_find_nonzero_offset(const void *buf, size_t len) \ - __attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc"))); - -static void *buffer_find_nonzero_offset_ifunc(void) -{ - typeof(buffer_find_nonzero_offset) *func = (avx2_support()) ? - buffer_find_nonzero_offset_avx2 : buffer_find_nonzero_offset_inner; - - return func; -} - -static void *can_use_buffer_find_nonzero_offset_ifunc(void) -{ - typeof(can_use_buffer_find_nonzero_offset) *func = (avx2_support()) ? - can_use_buffer_find_nonzero_offset_avx2 : - can_use_buffer_find_nonzero_offset_inner; - - return func; -} -#pragma GCC pop_options -#else -bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) -{ - return can_use_buffer_find_nonzero_offset_inner(buf, len); -} - -size_t buffer_find_nonzero_offset(const void *buf, size_t len) -{ - return buffer_find_nonzero_offset_inner(buf, len); -} -#endif - -/* - * Checks if a buffer is all zeroes - * - * Attention! The len must be a multiple of 4 * sizeof(long) due to - * restriction of optimizations in this function. - */ -bool buffer_is_zero(const void *buf, size_t len) -{ - /* - * Use long as the biggest available internal data type that fits into the - * CPU register and unroll the loop to smooth out the effect of memory - * latency. - */ - - size_t i; - long d0, d1, d2, d3; - const long * const data = buf; - - /* use vector optimized zero check if possible */ - if (can_use_buffer_find_nonzero_offset(buf, len)) { - return buffer_find_nonzero_offset(buf, len) == len; - } - - assert(len % (4 * sizeof(long)) == 0); - len /= sizeof(long); - - for (i = 0; i < len; i += 4) { - d0 = data[i + 0]; - d1 = data[i + 1]; - d2 = data[i + 2]; - d3 = data[i + 3]; - - if (d0 || d1 || d2 || d3) { - return false; - } - } - - return true; -} - #ifndef _WIN32 /* Sets a specific flag */ int fcntl_setfl(int fd, int flag) From 8c70c1b0c79cdfe9cc6e58c793b2b4e41aabede8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 29 Aug 2016 11:46:13 -0700 Subject: [PATCH 51/58] cutils: Remove SPLAT macro This is unused and complicates the vector interface. Signed-off-by: Richard Henderson Message-Id: <1472496380-19706-3-git-send-email-rth@twiddle.net> Signed-off-by: Paolo Bonzini --- util/bufferiszero.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/util/bufferiszero.c b/util/bufferiszero.c index 9bb1ae5738..067d08f1ca 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -37,7 +37,6 @@ #undef pixel #undef bool #define VECTYPE __vector unsigned char -#define SPLAT(p) vec_splat(vec_ld(0, p), 0) #define ALL_EQ(v1, v2) vec_all_eq(v1, v2) #define VEC_OR(v1, v2) ((v1) | (v2)) /* altivec.h may redefine the bool macro as vector type. @@ -46,7 +45,6 @@ #elif defined __SSE2__ #include #define VECTYPE __m128i -#define SPLAT(p) _mm_set1_epi8(*(p)) #define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) #define VEC_OR(v1, v2) (_mm_or_si128(v1, v2)) #elif defined(__aarch64__) @@ -58,7 +56,6 @@ #define VEC_OR(v1, v2) ((v1) | (v2)) #else #define VECTYPE unsigned long -#define SPLAT(p) (*(p) * (~0UL / 255)) #define ALL_EQ(v1, v2) ((v1) == (v2)) #define VEC_OR(v1, v2) ((v1) | (v2)) #endif @@ -135,7 +132,6 @@ static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len) #include #define AVX2_VECTYPE __m256i -#define AVX2_SPLAT(p) _mm256_set1_epi8(*(p)) #define AVX2_ALL_EQ(v1, v2) \ (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF) #define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2)) From a1febc4950f2c6232c002f401d7cd409f6fa6a88 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 29 Aug 2016 11:46:14 -0700 Subject: [PATCH 52/58] cutils: Export only buffer_is_zero Since the two users don't make use of the returned offset, beyond ensuring that the entire buffer is zero, consider the can_use_buffer_find_nonzero_offset and buffer_find_nonzero_offset functions internal. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Richard Henderson Message-Id: <1472496380-19706-4-git-send-email-rth@twiddle.net> Signed-off-by: Paolo Bonzini --- include/qemu/cutils.h | 2 -- migration/ram.c | 2 +- migration/rdma.c | 5 +---- util/bufferiszero.c | 8 ++++---- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h index 3e4ea236f0..ca58577565 100644 --- a/include/qemu/cutils.h +++ b/include/qemu/cutils.h @@ -168,8 +168,6 @@ int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end, /* used to print char* safely */ #define STR_OR_NULL(str) ((str) ? (str) : "null") -bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len); -size_t buffer_find_nonzero_offset(const void *buf, size_t len); bool buffer_is_zero(const void *buf, size_t len); /* diff --git a/migration/ram.c b/migration/ram.c index a3d70c4c62..a6e1c63a08 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -73,7 +73,7 @@ static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE]; static inline bool is_zero_range(uint8_t *p, uint64_t size) { - return buffer_find_nonzero_offset(p, size) == size; + return buffer_is_zero(p, size); } /* struct contains XBZRLE cache and a static page diff --git a/migration/rdma.c b/migration/rdma.c index 5110ec828d..88bdb64457 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -1934,10 +1934,7 @@ retry: * memset() + madvise() the entire chunk without RDMA. */ - if (can_use_buffer_find_nonzero_offset((void *)(uintptr_t)sge.addr, - length) - && buffer_find_nonzero_offset((void *)(uintptr_t)sge.addr, - length) == length) { + if (buffer_is_zero((void *)(uintptr_t)sge.addr, length)) { RDMACompress comp = { .offset = current_addr, .value = 0, diff --git a/util/bufferiszero.c b/util/bufferiszero.c index 067d08f1ca..0cf8b6e7e4 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -192,9 +192,9 @@ static bool avx2_support(void) return b & bit_AVX2; } -bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \ +static bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \ __attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc"))); -size_t buffer_find_nonzero_offset(const void *buf, size_t len) \ +static size_t buffer_find_nonzero_offset(const void *buf, size_t len) \ __attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc"))); static void *buffer_find_nonzero_offset_ifunc(void) @@ -215,12 +215,12 @@ static void *can_use_buffer_find_nonzero_offset_ifunc(void) } #pragma GCC pop_options #else -bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) +static bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) { return can_use_buffer_find_nonzero_offset_inner(buf, len); } -size_t buffer_find_nonzero_offset(const void *buf, size_t len) +static size_t buffer_find_nonzero_offset(const void *buf, size_t len) { return buffer_find_nonzero_offset_inner(buf, len); } From 5e33a8722254f99cbce6ede73adb4b735d94f58f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 29 Aug 2016 11:46:15 -0700 Subject: [PATCH 53/58] cutils: Rearrange buffer_is_zero acceleration Allow selection of several acceleration functions based on the size and alignment of the buffer. Do not require ifunc support for AVX2 acceleration. Signed-off-by: Richard Henderson Message-Id: <1472496380-19706-5-git-send-email-rth@twiddle.net> Signed-off-by: Paolo Bonzini --- configure | 21 +-- util/bufferiszero.c | 382 ++++++++++++++++++++------------------------ 2 files changed, 180 insertions(+), 223 deletions(-) diff --git a/configure b/configure index 331c36fb84..7e09b79fe5 100755 --- a/configure +++ b/configure @@ -1794,28 +1794,19 @@ fi ########################################## # avx2 optimization requirement check - -if test "$static" = "no" ; then - cat > $TMPC << EOF +cat > $TMPC << EOF #pragma GCC push_options #pragma GCC target("avx2") #include #include - static int bar(void *a) { - return _mm256_movemask_epi8(_mm256_cmpeq_epi8(*(__m256i *)a, (__m256i){0})); + __m256i x = *(__m256i *)a; + return _mm256_testz_si256(x, x); } -static void *bar_ifunc(void) {return (void*) bar;} -int foo(void *a) __attribute__((ifunc("bar_ifunc"))); -int main(int argc, char *argv[]) { return foo(argv[0]);} +int main(int argc, char *argv[]) { return bar(argv[0]); } EOF - if compile_object "" ; then - if has readelf; then - if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then - avx2_opt="yes" - fi - fi - fi +if compile_object "" ; then + avx2_opt="yes" fi ######################################### diff --git a/util/bufferiszero.c b/util/bufferiszero.c index 0cf8b6e7e4..025cb8fbd1 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -24,245 +24,211 @@ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/cutils.h" +#include "qemu/bswap.h" /* vector definitions */ -#ifdef __ALTIVEC__ + +extern void link_error(void); + +#define ACCEL_BUFFER_ZERO(NAME, SIZE, VECTYPE, NONZERO) \ +static bool NAME(const void *buf, size_t len) \ +{ \ + const void *end = buf + len; \ + do { \ + const VECTYPE *p = buf; \ + VECTYPE t; \ + if (SIZE == sizeof(VECTYPE) * 4) { \ + t = (p[0] | p[1]) | (p[2] | p[3]); \ + } else if (SIZE == sizeof(VECTYPE) * 8) { \ + t = p[0] | p[1]; \ + t |= p[2] | p[3]; \ + t |= p[4] | p[5]; \ + t |= p[6] | p[7]; \ + } else { \ + link_error(); \ + } \ + if (unlikely(NONZERO(t))) { \ + return false; \ + } \ + buf += SIZE; \ + } while (buf < end); \ + return true; \ +} + +static bool +buffer_zero_int(const void *buf, size_t len) +{ + if (unlikely(len < 8)) { + /* For a very small buffer, simply accumulate all the bytes. */ + const unsigned char *p = buf; + const unsigned char *e = buf + len; + unsigned char t = 0; + + do { + t |= *p++; + } while (p < e); + + return t == 0; + } else { + /* Otherwise, use the unaligned memory access functions to + handle the beginning and end of the buffer, with a couple + of loops handling the middle aligned section. */ + uint64_t t = ldq_he_p(buf); + const uint64_t *p = (uint64_t *)(((uintptr_t)buf + 8) & -8); + const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8); + + for (; p + 8 <= e; p += 8) { + __builtin_prefetch(p + 8); + if (t) { + return false; + } + t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7]; + } + while (p < e) { + t |= *p++; + } + t |= ldq_he_p(buf + len - 8); + + return t == 0; + } +} + +#if defined(__ALTIVEC__) #include /* The altivec.h header says we're allowed to undef these for * C++ compatibility. Here we don't care about C++, but we * undef them anyway to avoid namespace pollution. + * altivec.h may redefine the bool macro as vector type. + * Reset it to POSIX semantics. */ #undef vector #undef pixel #undef bool -#define VECTYPE __vector unsigned char -#define ALL_EQ(v1, v2) vec_all_eq(v1, v2) -#define VEC_OR(v1, v2) ((v1) | (v2)) -/* altivec.h may redefine the bool macro as vector type. - * Reset it to POSIX semantics. */ #define bool _Bool -#elif defined __SSE2__ +#define DO_NONZERO(X) vec_any_ne(X, (__vector unsigned char){ 0 }) +ACCEL_BUFFER_ZERO(buffer_zero_ppc, 128, __vector unsigned char, DO_NONZERO) + +static bool select_accel_fn(const void *buf, size_t len) +{ + uintptr_t ibuf = (uintptr_t)buf; + if (len % 128 == 0 && ibuf % sizeof(__vector unsigned char) == 0) { + return buffer_zero_ppc(buf, len); + } + return buffer_zero_int(buf, len); +} + +#elif defined(CONFIG_AVX2_OPT) || (defined(CONFIG_CPUID_H) && defined(__SSE2__)) +#include + +/* Do not use push_options pragmas unnecessarily, because clang + * does not support them. + */ +#ifndef __SSE2__ +#pragma GCC push_options +#pragma GCC target("sse2") +#endif #include -#define VECTYPE __m128i -#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF) -#define VEC_OR(v1, v2) (_mm_or_si128(v1, v2)) -#elif defined(__aarch64__) -#include "arm_neon.h" -#define VECTYPE uint64x2_t -#define ALL_EQ(v1, v2) \ - ((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \ - (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1))) -#define VEC_OR(v1, v2) ((v1) | (v2)) -#else -#define VECTYPE unsigned long -#define ALL_EQ(v1, v2) ((v1) == (v2)) -#define VEC_OR(v1, v2) ((v1) | (v2)) +#define SSE2_NONZERO(X) \ + (_mm_movemask_epi8(_mm_cmpeq_epi8((X), _mm_setzero_si128())) != 0xFFFF) +ACCEL_BUFFER_ZERO(buffer_zero_sse2, 64, __m128i, SSE2_NONZERO) +#ifndef __SSE2__ +#pragma GCC pop_options #endif -#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8 - -static bool -can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len) -{ - return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR - * sizeof(VECTYPE)) == 0 - && ((uintptr_t) buf) % sizeof(VECTYPE) == 0); -} - -/* - * Searches for an area with non-zero content in a buffer - * - * Attention! The len must be a multiple of - * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) - * and addr must be a multiple of sizeof(VECTYPE) due to - * restriction of optimizations in this function. - * - * can_use_buffer_find_nonzero_offset_inner() can be used to - * check these requirements. - * - * The return value is the offset of the non-zero area rounded - * down to a multiple of sizeof(VECTYPE) for the first - * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to - * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) - * afterwards. - * - * If the buffer is all zero the return value is equal to len. - */ - -static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len) -{ - const VECTYPE *p = buf; - const VECTYPE zero = (VECTYPE){0}; - size_t i; - - assert(can_use_buffer_find_nonzero_offset_inner(buf, len)); - - if (!len) { - return 0; - } - - for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { - if (!ALL_EQ(p[i], zero)) { - return i * sizeof(VECTYPE); - } - } - - for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; - i < len / sizeof(VECTYPE); - i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { - VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]); - VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]); - VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]); - VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]); - VECTYPE tmp01 = VEC_OR(tmp0, tmp1); - VECTYPE tmp23 = VEC_OR(tmp2, tmp3); - if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) { - break; - } - } - - return i * sizeof(VECTYPE); -} - -#if defined CONFIG_AVX2_OPT +#ifdef CONFIG_AVX2_OPT #pragma GCC push_options #pragma GCC target("avx2") -#include #include - -#define AVX2_VECTYPE __m256i -#define AVX2_ALL_EQ(v1, v2) \ - (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF) -#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2)) - -static bool -can_use_buffer_find_nonzero_offset_avx2(const void *buf, size_t len) -{ - return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR - * sizeof(AVX2_VECTYPE)) == 0 - && ((uintptr_t) buf) % sizeof(AVX2_VECTYPE) == 0); -} - -static size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len) -{ - const AVX2_VECTYPE *p = buf; - const AVX2_VECTYPE zero = (AVX2_VECTYPE){0}; - size_t i; - - assert(can_use_buffer_find_nonzero_offset_avx2(buf, len)); - - if (!len) { - return 0; - } - - for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { - if (!AVX2_ALL_EQ(p[i], zero)) { - return i * sizeof(AVX2_VECTYPE); - } - } - - for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; - i < len / sizeof(AVX2_VECTYPE); - i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { - AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]); - AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]); - AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]); - AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]); - AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1); - AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3); - if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) { - break; - } - } - - return i * sizeof(AVX2_VECTYPE); -} - -static bool avx2_support(void) -{ - int a, b, c, d; - - if (__get_cpuid_max(0, NULL) < 7) { - return false; - } - - __cpuid_count(7, 0, a, b, c, d); - - return b & bit_AVX2; -} - -static bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \ - __attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc"))); -static size_t buffer_find_nonzero_offset(const void *buf, size_t len) \ - __attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc"))); - -static void *buffer_find_nonzero_offset_ifunc(void) -{ - typeof(buffer_find_nonzero_offset) *func = (avx2_support()) ? - buffer_find_nonzero_offset_avx2 : buffer_find_nonzero_offset_inner; - - return func; -} - -static void *can_use_buffer_find_nonzero_offset_ifunc(void) -{ - typeof(can_use_buffer_find_nonzero_offset) *func = (avx2_support()) ? - can_use_buffer_find_nonzero_offset_avx2 : - can_use_buffer_find_nonzero_offset_inner; - - return func; -} +#define AVX2_NONZERO(X) !_mm256_testz_si256((X), (X)) +ACCEL_BUFFER_ZERO(buffer_zero_avx2, 128, __m256i, AVX2_NONZERO) #pragma GCC pop_options -#else -static bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) +#endif + +#define CACHE_AVX2 2 +#define CACHE_AVX1 4 +#define CACHE_SSE4 8 +#define CACHE_SSE2 16 + +static unsigned cpuid_cache; + +static void __attribute__((constructor)) init_cpuid_cache(void) { - return can_use_buffer_find_nonzero_offset_inner(buf, len); + int max = __get_cpuid_max(0, NULL); + int a, b, c, d; + unsigned cache = 0; + + if (max >= 1) { + __cpuid(1, a, b, c, d); + if (d & bit_SSE2) { + cache |= CACHE_SSE2; + } +#ifdef CONFIG_AVX2_OPT + if (c & bit_SSE4_1) { + cache |= CACHE_SSE4; + } + + /* We must check that AVX is not just available, but usable. */ + if ((c & bit_OSXSAVE) && (c & bit_AVX)) { + __asm("xgetbv" : "=a"(a), "=d"(d) : "c"(0)); + if ((a & 6) == 6) { + cache |= CACHE_AVX1; + if (max >= 7) { + __cpuid_count(7, 0, a, b, c, d); + if (b & bit_AVX2) { + cache |= CACHE_AVX2; + } + } + } + } +#endif + } + cpuid_cache = cache; } -static size_t buffer_find_nonzero_offset(const void *buf, size_t len) +static bool select_accel_fn(const void *buf, size_t len) { - return buffer_find_nonzero_offset_inner(buf, len); + uintptr_t ibuf = (uintptr_t)buf; +#ifdef CONFIG_AVX2_OPT + if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) { + return buffer_zero_avx2(buf, len); + } +#endif + if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) { + return buffer_zero_sse2(buf, len); + } + return buffer_zero_int(buf, len); } + +#elif defined(__aarch64__) +#include "arm_neon.h" + +#define DO_NONZERO(X) (vgetq_lane_u64((X), 0) | vgetq_lane_u64((X), 1)) +ACCEL_BUFFER_ZERO(buffer_zero_neon, 128, uint64x2_t, DO_NONZERO) + +static bool select_accel_fn(const void *buf, size_t len) +{ + uintptr_t ibuf = (uintptr_t)buf; + if (len % 128 == 0 && ibuf % sizeof(uint64x2_t) == 0) { + return buffer_zero_neon(buf, len); + } + return buffer_zero_int(buf, len); +} + +#else +#define select_accel_fn buffer_zero_int #endif /* * Checks if a buffer is all zeroes - * - * Attention! The len must be a multiple of 4 * sizeof(long) due to - * restriction of optimizations in this function. */ bool buffer_is_zero(const void *buf, size_t len) { - /* - * Use long as the biggest available internal data type that fits into the - * CPU register and unroll the loop to smooth out the effect of memory - * latency. - */ - - size_t i; - long d0, d1, d2, d3; - const long * const data = buf; - - /* use vector optimized zero check if possible */ - if (can_use_buffer_find_nonzero_offset(buf, len)) { - return buffer_find_nonzero_offset(buf, len) == len; + if (unlikely(len == 0)) { + return true; } - assert(len % (4 * sizeof(long)) == 0); - len /= sizeof(long); - - for (i = 0; i < len; i += 4) { - d0 = data[i + 0]; - d1 = data[i + 1]; - d2 = data[i + 2]; - d3 = data[i + 3]; - - if (d0 || d1 || d2 || d3) { - return false; - } - } - - return true; + /* Use an optimized zero check if possible. Note that this also + includes a check for an unrolled loop over 64-bit integers. */ + return select_accel_fn(buf, len); } - From 2250d3a293d36ed9d8143d4c3d3e94086c429af4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 29 Aug 2016 11:46:19 -0700 Subject: [PATCH 54/58] cutils: Remove aarch64 buffer zero checking The revised integer version is 4 times faster than the neon version on an AppliedMicro Mustang. Even with hand scheduling and additional unrolling I cannot make any neon version run as fast as the integer. Signed-off-by: Richard Henderson Signed-off-by: Paolo Bonzini --- util/bufferiszero.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/util/bufferiszero.c b/util/bufferiszero.c index 025cb8fbd1..e6679b3eea 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -200,21 +200,6 @@ static bool select_accel_fn(const void *buf, size_t len) return buffer_zero_int(buf, len); } -#elif defined(__aarch64__) -#include "arm_neon.h" - -#define DO_NONZERO(X) (vgetq_lane_u64((X), 0) | vgetq_lane_u64((X), 1)) -ACCEL_BUFFER_ZERO(buffer_zero_neon, 128, uint64x2_t, DO_NONZERO) - -static bool select_accel_fn(const void *buf, size_t len) -{ - uintptr_t ibuf = (uintptr_t)buf; - if (len % 128 == 0 && ibuf % sizeof(uint64x2_t) == 0) { - return buffer_zero_neon(buf, len); - } - return buffer_zero_int(buf, len); -} - #else #define select_accel_fn buffer_zero_int #endif From 43ff5e01ecba41628f68eb3c62fd49e0405fcca9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 29 Aug 2016 11:46:20 -0700 Subject: [PATCH 55/58] cutils: Remove ppc buffer zero checking For ppc64le, gcc6 does extremely poorly with the Altivec code. Moreover, on POWER7 and POWER8, a hand-optimized Altivec version turns out to be no faster than the revised integer version, and therefore not worth the effort. Signed-off-by: Richard Henderson Signed-off-by: Paolo Bonzini --- util/bufferiszero.c | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/util/bufferiszero.c b/util/bufferiszero.c index e6679b3eea..a3a842f9fd 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -94,31 +94,7 @@ buffer_zero_int(const void *buf, size_t len) } } -#if defined(__ALTIVEC__) -#include -/* The altivec.h header says we're allowed to undef these for - * C++ compatibility. Here we don't care about C++, but we - * undef them anyway to avoid namespace pollution. - * altivec.h may redefine the bool macro as vector type. - * Reset it to POSIX semantics. - */ -#undef vector -#undef pixel -#undef bool -#define bool _Bool -#define DO_NONZERO(X) vec_any_ne(X, (__vector unsigned char){ 0 }) -ACCEL_BUFFER_ZERO(buffer_zero_ppc, 128, __vector unsigned char, DO_NONZERO) - -static bool select_accel_fn(const void *buf, size_t len) -{ - uintptr_t ibuf = (uintptr_t)buf; - if (len % 128 == 0 && ibuf % sizeof(__vector unsigned char) == 0) { - return buffer_zero_ppc(buf, len); - } - return buffer_zero_int(buf, len); -} - -#elif defined(CONFIG_AVX2_OPT) || (defined(CONFIG_CPUID_H) && defined(__SSE2__)) +#if defined(CONFIG_AVX2_OPT) || (defined(CONFIG_CPUID_H) && defined(__SSE2__)) #include /* Do not use push_options pragmas unnecessarily, because clang From efad6682452ec85a898609c885c2721ea12585db Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 29 Aug 2016 11:46:16 -0700 Subject: [PATCH 56/58] cutils: Add test for buffer_is_zero Signed-off-by: Richard Henderson Message-Id: <1472496380-19706-6-git-send-email-rth@twiddle.net> Signed-off-by: Paolo Bonzini --- include/qemu/cutils.h | 1 + tests/Makefile.include | 3 ++ tests/test-bufferiszero.c | 78 +++++++++++++++++++++++++++++++++++++++ util/bufferiszero.c | 20 ++++++++++ 4 files changed, 102 insertions(+) create mode 100644 tests/test-bufferiszero.c diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h index ca58577565..8033929139 100644 --- a/include/qemu/cutils.h +++ b/include/qemu/cutils.h @@ -169,6 +169,7 @@ int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end, #define STR_OR_NULL(str) ((str) ? (str) : "null") bool buffer_is_zero(const void *buf, size_t len); +bool test_buffer_is_zero_next_accel(void); /* * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128) diff --git a/tests/Makefile.include b/tests/Makefile.include index e3a3266370..bde274d31e 100644 --- a/tests/Makefile.include +++ b/tests/Makefile.include @@ -112,6 +112,8 @@ check-unit-y += tests/test-crypto-xts$(EXESUF) check-unit-y += tests/test-crypto-block$(EXESUF) gcov-files-test-logging-y = tests/test-logging.c check-unit-y += tests/test-logging$(EXESUF) +check-unit-y += tests/test-bufferiszero$(EXESUF) +gcov-files-check-bufferiszero-y = util/bufferiszero.c check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -484,6 +486,7 @@ tests/test-qdist$(EXESUF): tests/test-qdist.o $(test-util-obj-y) tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y) tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y) tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y) +tests/test-bufferiszero$(EXESUF): tests/test-bufferiszero.o $(test-util-obj-y) tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\ diff --git a/tests/test-bufferiszero.c b/tests/test-bufferiszero.c new file mode 100644 index 0000000000..42d194cadf --- /dev/null +++ b/tests/test-bufferiszero.c @@ -0,0 +1,78 @@ +/* + * QEMU buffer_is_zero test + * + * Copyright (c) 2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + * + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" + +static char buffer[8 * 1024 * 1024]; + +static void test_1(void) +{ + size_t s, a, o; + + /* Basic positive test. */ + g_assert(buffer_is_zero(buffer, sizeof(buffer))); + + /* Basic negative test. */ + buffer[sizeof(buffer) - 1] = 1; + g_assert(!buffer_is_zero(buffer, sizeof(buffer))); + buffer[sizeof(buffer) - 1] = 0; + + /* Positive tests for size and alignment. */ + for (a = 1; a <= 64; a++) { + for (s = 1; s < 1024; s++) { + buffer[a - 1] = 1; + buffer[a + s] = 1; + g_assert(buffer_is_zero(buffer + a, s)); + buffer[a - 1] = 0; + buffer[a + s] = 0; + } + } + + /* Negative tests for size, alignment, and the offset of the marker. */ + for (a = 1; a <= 64; a++) { + for (s = 1; s < 1024; s++) { + for (o = 0; o < s; ++o) { + buffer[a + o] = 1; + g_assert(!buffer_is_zero(buffer + a, s)); + buffer[a + o] = 0; + } + } + } +} + +static void test_2(void) +{ + if (g_test_perf()) { + test_1(); + } else { + do { + test_1(); + } while (test_buffer_is_zero_next_accel()); + } +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/cutils/bufferiszero", test_2); + + return g_test_run(); +} diff --git a/util/bufferiszero.c b/util/bufferiszero.c index a3a842f9fd..4af3caa1b6 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -162,6 +162,19 @@ static void __attribute__((constructor)) init_cpuid_cache(void) cpuid_cache = cache; } +#define HAVE_NEXT_ACCEL +bool test_buffer_is_zero_next_accel(void) +{ + /* If no bits set, we just tested buffer_zero_int, and there + are no more acceleration options to test. */ + if (cpuid_cache == 0) { + return false; + } + /* Disable the accelerator we used before and select a new one. */ + cpuid_cache &= cpuid_cache - 1; + return true; +} + static bool select_accel_fn(const void *buf, size_t len) { uintptr_t ibuf = (uintptr_t)buf; @@ -180,6 +193,13 @@ static bool select_accel_fn(const void *buf, size_t len) #define select_accel_fn buffer_zero_int #endif +#ifndef HAVE_NEXT_ACCEL +bool test_buffer_is_zero_next_accel(void) +{ + return false; +} +#endif + /* * Checks if a buffer is all zeroes */ From 86444f084b23c967d556039b22a67d80a72725ca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 13 Sep 2016 17:04:52 +0200 Subject: [PATCH 57/58] cutils: Add SSE4 version Signed-off-by: Paolo Bonzini --- util/bufferiszero.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/util/bufferiszero.c b/util/bufferiszero.c index 4af3caa1b6..bafd3d159c 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -113,6 +113,13 @@ ACCEL_BUFFER_ZERO(buffer_zero_sse2, 64, __m128i, SSE2_NONZERO) #endif #ifdef CONFIG_AVX2_OPT +#pragma GCC push_options +#pragma GCC target("sse4") +#include +#define SSE4_NONZERO(X) !_mm_testz_si128((X), (X)) +ACCEL_BUFFER_ZERO(buffer_zero_sse4, 64, __m128i, SSE4_NONZERO) +#pragma GCC pop_options + #pragma GCC push_options #pragma GCC target("avx2") #include @@ -182,6 +189,9 @@ static bool select_accel_fn(const void *buf, size_t len) if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) { return buffer_zero_avx2(buf, len); } + if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE4)) { + return buffer_zero_sse4(buf, len); + } #endif if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) { return buffer_zero_sse2(buf, len); From 083d012a388e7e2a8bfd9144c2c9bcceb29a78fc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 29 Aug 2016 11:46:17 -0700 Subject: [PATCH 58/58] cutils: Add generic prefetch There's no real knowledge of the cacheline size, just prefetching one loop ahead. Signed-off-by: Richard Henderson Message-Id: <1472496380-19706-7-git-send-email-rth@twiddle.net> Signed-off-by: Paolo Bonzini --- util/bufferiszero.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/util/bufferiszero.c b/util/bufferiszero.c index bafd3d159c..abe65f9d88 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -38,6 +38,8 @@ static bool NAME(const void *buf, size_t len) \ do { \ const VECTYPE *p = buf; \ VECTYPE t; \ + __builtin_prefetch(buf + SIZE); \ + barrier(); \ if (SIZE == sizeof(VECTYPE) * 4) { \ t = (p[0] | p[1]) | (p[2] | p[3]); \ } else if (SIZE == sizeof(VECTYPE) * 8) { \ @@ -219,6 +221,9 @@ bool buffer_is_zero(const void *buf, size_t len) return true; } + /* Fetch the beginning of the buffer while we select the accelerator. */ + __builtin_prefetch(buf); + /* Use an optimized zero check if possible. Note that this also includes a check for an unrolled loop over 64-bit integers. */ return select_accel_fn(buf, len);