diff --git a/Makefile b/Makefile index 8e2fc6624c..cfb18f1525 100644 --- a/Makefile +++ b/Makefile @@ -731,6 +731,7 @@ distclean: clean rm -rf .doctrees $(call clean-manual,devel) $(call clean-manual,interop) + $(call clean-manual,specs) for d in $(TARGET_DIRS); do \ rm -rf $$d || exit 1 ; \ done @@ -781,6 +782,7 @@ endef .PHONY: install-sphinxdocs install-sphinxdocs: sphinxdocs $(call install-manual,interop) + $(call install-manual,specs) install-doc: $(DOCS) install-sphinxdocs $(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)" @@ -962,7 +964,7 @@ docs/version.texi: $(SRC_PATH)/VERSION config-host.mak # and handles "don't rebuild things unless necessary" itself. # The '.doctrees' files are cached information to speed this up. .PHONY: sphinxdocs -sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html $(MANUAL_BUILDDIR)/interop/index.html +sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html $(MANUAL_BUILDDIR)/interop/index.html $(MANUAL_BUILDDIR)/specs/index.html # Canned command to build a single manual build-manual = $(call quiet-command,sphinx-build $(if $(V),,-q) -W -n -b html -D version=$(VERSION) -D release="$(FULL_VERSION)" -d .doctrees/$1 $(SRC_PATH)/docs/$1 $(MANUAL_BUILDDIR)/$1 ,"SPHINX","$(MANUAL_BUILDDIR)/$1") @@ -975,6 +977,9 @@ $(MANUAL_BUILDDIR)/devel/index.html: $(call manual-deps,devel) $(MANUAL_BUILDDIR)/interop/index.html: $(call manual-deps,interop) $(call build-manual,interop) +$(MANUAL_BUILDDIR)/specs/index.html: $(call manual-deps,specs) + $(call build-manual,specs) + qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@") diff --git a/docs/devel/index.rst b/docs/devel/index.rst index 2a4ddf40ad..1ec61fcfed 100644 --- a/docs/devel/index.rst +++ b/docs/devel/index.rst @@ -21,3 +21,4 @@ Contents: testing decodetree secure-coding-practices + tcg diff --git a/docs/devel/tcg.rst b/docs/devel/tcg.rst new file mode 100644 index 0000000000..4956a30a4e --- /dev/null +++ b/docs/devel/tcg.rst @@ -0,0 +1,111 @@ +==================== +Translator Internals +==================== + +QEMU is a dynamic translator. When it first encounters a piece of code, +it converts it to the host instruction set. Usually dynamic translators +are very complicated and highly CPU dependent. QEMU uses some tricks +which make it relatively easily portable and simple while achieving good +performances. + +QEMU's dynamic translation backend is called TCG, for "Tiny Code +Generator". For more information, please take a look at ``tcg/README``. + +Some notable features of QEMU's dynamic translator are: + +CPU state optimisations +----------------------- + +The target CPUs have many internal states which change the way it +evaluates instructions. In order to achieve a good speed, the +translation phase considers that some state information of the virtual +CPU cannot change in it. The state is recorded in the Translation +Block (TB). If the state changes (e.g. privilege level), a new TB will +be generated and the previous TB won't be used anymore until the state +matches the state recorded in the previous TB. The same idea can be applied +to other aspects of the CPU state. For example, on x86, if the SS, +DS and ES segments have a zero base, then the translator does not even +generate an addition for the segment base. + +Direct block chaining +--------------------- + +After each translated basic block is executed, QEMU uses the simulated +Program Counter (PC) and other cpu state information (such as the CS +segment base value) to find the next basic block. + +In order to accelerate the most common cases where the new simulated PC +is known, QEMU can patch a basic block so that it jumps directly to the +next one. + +The most portable code uses an indirect jump. An indirect jump makes +it easier to make the jump target modification atomic. On some host +architectures (such as x86 or PowerPC), the ``JUMP`` opcode is +directly patched so that the block chaining has no overhead. + +Self-modifying code and translated code invalidation +---------------------------------------------------- + +Self-modifying code is a special challenge in x86 emulation because no +instruction cache invalidation is signaled by the application when code +is modified. + +User-mode emulation marks a host page as write-protected (if it is +not already read-only) every time translated code is generated for a +basic block. Then, if a write access is done to the page, Linux raises +a SEGV signal. QEMU then invalidates all the translated code in the page +and enables write accesses to the page. For system emulation, write +protection is achieved through the software MMU. + +Correct translated code invalidation is done efficiently by maintaining +a linked list of every translated block contained in a given page. Other +linked lists are also maintained to undo direct block chaining. + +On RISC targets, correctly written software uses memory barriers and +cache flushes, so some of the protection above would not be +necessary. However, QEMU still requires that the generated code always +matches the target instructions in memory in order to handle +exceptions correctly. + +Exception support +----------------- + +longjmp() is used when an exception such as division by zero is +encountered. + +The host SIGSEGV and SIGBUS signal handlers are used to get invalid +memory accesses. QEMU keeps a map from host program counter to +target program counter, and looks up where the exception happened +based on the host program counter at the exception point. + +On some targets, some bits of the virtual CPU's state are not flushed to the +memory until the end of the translation block. This is done for internal +emulation state that is rarely accessed directly by the program and/or changes +very often throughout the execution of a translation block---this includes +condition codes on x86, delay slots on SPARC, conditional execution on +ARM, and so on. This state is stored for each target instruction, and +looked up on exceptions. + +MMU emulation +------------- + +For system emulation QEMU uses a software MMU. In that mode, the MMU +virtual to physical address translation is done at every memory +access. + +QEMU uses an address translation cache (TLB) to speed up the translation. +In order to avoid flushing the translated code each time the MMU +mappings change, all caches in QEMU are physically indexed. This +means that each basic block is indexed with its physical address. + +In order to avoid invalidating the basic block chain when MMU mappings +change, chaining is only performed when the destination of the jump +shares a page with the basic block that is performing the jump. + +The MMU can also distinguish RAM and ROM memory areas from MMIO memory +areas. Access is faster for RAM and ROM because the translation cache also +hosts the offset between guest address and host memory. Accessing MMIO +memory areas instead calls out to C code for device emulation. +Finally, the MMU helps tracking dirty pages and pages pointed to by +translation blocks. + diff --git a/docs/specs/conf.py b/docs/specs/conf.py new file mode 100644 index 0000000000..4d56f3ae13 --- /dev/null +++ b/docs/specs/conf.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# +# QEMU documentation build configuration file for the 'specs' manual. +# +# This includes the top level conf file and then makes any necessary tweaks. +import sys +import os + +qemu_docdir = os.path.abspath("..") +parent_config = os.path.join(qemu_docdir, "conf.py") +exec(compile(open(parent_config, "rb").read(), parent_config, 'exec')) + +# This slightly misuses the 'description', but is the best way to get +# the manual title to appear in the sidebar. +html_theme_options['description'] = \ + u'System Emulation Guest Hardware Specifications' diff --git a/docs/specs/index.rst b/docs/specs/index.rst index 2e927519c2..40adb97c5e 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -1,8 +1,8 @@ -. This is the top level page for the 'specs' manual +.. This is the top level page for the 'specs' manual -QEMU full-system emulation guest hardware specifications -======================================================== +QEMU System Emulation Guest Hardware Specifications +=================================================== Contents: @@ -10,4 +10,5 @@ Contents: .. toctree:: :maxdepth: 2 - xive + ppc-xive + ppc-spapr-xive diff --git a/qemu-tech.texi b/qemu-tech.texi index 7c3d1f05e1..3451cfaa5b 100644 --- a/qemu-tech.texi +++ b/qemu-tech.texi @@ -161,160 +161,6 @@ may be created from overlay with minimal amount of hand-written code. @end itemize -@node Translator Internals -@section Translator Internals - -QEMU is a dynamic translator. When it first encounters a piece of code, -it converts it to the host instruction set. Usually dynamic translators -are very complicated and highly CPU dependent. QEMU uses some tricks -which make it relatively easily portable and simple while achieving good -performances. - -QEMU's dynamic translation backend is called TCG, for "Tiny Code -Generator". For more information, please take a look at @code{tcg/README}. - -Some notable features of QEMU's dynamic translator are: - -@table @strong - -@item CPU state optimisations: -The target CPUs have many internal states which change the way it -evaluates instructions. In order to achieve a good speed, the -translation phase considers that some state information of the virtual -CPU cannot change in it. The state is recorded in the Translation -Block (TB). If the state changes (e.g. privilege level), a new TB will -be generated and the previous TB won't be used anymore until the state -matches the state recorded in the previous TB. The same idea can be applied -to other aspects of the CPU state. For example, on x86, if the SS, -DS and ES segments have a zero base, then the translator does not even -generate an addition for the segment base. - -@item Direct block chaining: -After each translated basic block is executed, QEMU uses the simulated -Program Counter (PC) and other cpu state information (such as the CS -segment base value) to find the next basic block. - -In order to accelerate the most common cases where the new simulated PC -is known, QEMU can patch a basic block so that it jumps directly to the -next one. - -The most portable code uses an indirect jump. An indirect jump makes -it easier to make the jump target modification atomic. On some host -architectures (such as x86 or PowerPC), the @code{JUMP} opcode is -directly patched so that the block chaining has no overhead. - -@item Self-modifying code and translated code invalidation: -Self-modifying code is a special challenge in x86 emulation because no -instruction cache invalidation is signaled by the application when code -is modified. - -User-mode emulation marks a host page as write-protected (if it is -not already read-only) every time translated code is generated for a -basic block. Then, if a write access is done to the page, Linux raises -a SEGV signal. QEMU then invalidates all the translated code in the page -and enables write accesses to the page. For system emulation, write -protection is achieved through the software MMU. - -Correct translated code invalidation is done efficiently by maintaining -a linked list of every translated block contained in a given page. Other -linked lists are also maintained to undo direct block chaining. - -On RISC targets, correctly written software uses memory barriers and -cache flushes, so some of the protection above would not be -necessary. However, QEMU still requires that the generated code always -matches the target instructions in memory in order to handle -exceptions correctly. - -@item Exception support: -longjmp() is used when an exception such as division by zero is -encountered. - -The host SIGSEGV and SIGBUS signal handlers are used to get invalid -memory accesses. QEMU keeps a map from host program counter to -target program counter, and looks up where the exception happened -based on the host program counter at the exception point. - -On some targets, some bits of the virtual CPU's state are not flushed to the -memory until the end of the translation block. This is done for internal -emulation state that is rarely accessed directly by the program and/or changes -very often throughout the execution of a translation block---this includes -condition codes on x86, delay slots on SPARC, conditional execution on -ARM, and so on. This state is stored for each target instruction, and -looked up on exceptions. - -@item MMU emulation: -For system emulation QEMU uses a software MMU. In that mode, the MMU -virtual to physical address translation is done at every memory -access. - -QEMU uses an address translation cache (TLB) to speed up the translation. -In order to avoid flushing the translated code each time the MMU -mappings change, all caches in QEMU are physically indexed. This -means that each basic block is indexed with its physical address. - -In order to avoid invalidating the basic block chain when MMU mappings -change, chaining is only performed when the destination of the jump -shares a page with the basic block that is performing the jump. - -The MMU can also distinguish RAM and ROM memory areas from MMIO memory -areas. Access is faster for RAM and ROM because the translation cache also -hosts the offset between guest address and host memory. Accessing MMIO -memory areas instead calls out to C code for device emulation. -Finally, the MMU helps tracking dirty pages and pages pointed to by -translation blocks. -@end table - -@node QEMU compared to other emulators -@section QEMU compared to other emulators - -Like bochs [1], QEMU emulates an x86 CPU. But QEMU is much faster than -bochs as it uses dynamic compilation. Bochs is closely tied to x86 PC -emulation while QEMU can emulate several processors. - -Like Valgrind [2], QEMU does user space emulation and dynamic -translation. Valgrind is mainly a memory debugger while QEMU has no -support for it (QEMU could be used to detect out of bound memory -accesses as Valgrind, but it has no support to track uninitialised data -as Valgrind does). The Valgrind dynamic translator generates better code -than QEMU (in particular it does register allocation) but it is closely -tied to an x86 host and target and has no support for precise exceptions -and system emulation. - -EM86 [3] is the closest project to user space QEMU (and QEMU still uses -some of its code, in particular the ELF file loader). EM86 was limited -to an alpha host and used a proprietary and slow interpreter (the -interpreter part of the FX!32 Digital Win32 code translator [4]). - -TWIN from Willows Software was a Windows API emulator like Wine. It is less -accurate than Wine but includes a protected mode x86 interpreter to launch -x86 Windows executables. Such an approach has greater potential because most -of the Windows API is executed natively but it is far more difficult to -develop because all the data structures and function parameters exchanged -between the API and the x86 code must be converted. - -User mode Linux [5] was the only solution before QEMU to launch a -Linux kernel as a process while not needing any host kernel -patches. However, user mode Linux requires heavy kernel patches while -QEMU accepts unpatched Linux kernels. The price to pay is that QEMU is -slower. - -The Plex86 [6] PC virtualizer is done in the same spirit as the now -obsolete qemu-fast system emulator. It requires a patched Linux kernel -to work (you cannot launch the same kernel on your PC), but the -patches are really small. As it is a PC virtualizer (no emulation is -done except for some privileged instructions), it has the potential of -being faster than QEMU. The downside is that a complicated (and -potentially unsafe) host kernel patch is needed. - -The commercial PC Virtualizers (VMWare [7], VirtualPC [8]) are faster -than QEMU (without virtualization), but they all need specific, proprietary -and potentially unsafe host drivers. Moreover, they are unable to -provide cycle exact simulation as an emulator can. - -VirtualBox [9], Xen [10] and KVM [11] are based on QEMU. QEMU-SystemC -[12] uses QEMU to simulate a system where some hardware devices are -developed in SystemC. - @node Managed start up options @section Managed start up options @@ -350,59 +196,3 @@ depend on an initialized machine, including but not limited to: @item query-status @item x-exit-preconfig @end table - -@node Bibliography -@section Bibliography - -@table @asis - -@item [1] -@url{http://bochs.sourceforge.net/}, the Bochs IA-32 Emulator Project, -by Kevin Lawton et al. - -@item [2] -@url{http://www.valgrind.org/}, Valgrind, an open-source memory debugger -for GNU/Linux. - -@item [3] -@url{http://ftp.dreamtime.org/pub/linux/Linux-Alpha/em86/v0.2/docs/em86.html}, -the EM86 x86 emulator on Alpha-Linux. - -@item [4] -@url{http://www.usenix.org/publications/library/proceedings/usenix-nt97/@/full_papers/chernoff/chernoff.pdf}, -DIGITAL FX!32: Running 32-Bit x86 Applications on Alpha NT, by Anton -Chernoff and Ray Hookway. - -@item [5] -@url{http://user-mode-linux.sourceforge.net/}, -The User-mode Linux Kernel. - -@item [6] -@url{http://www.plex86.org/}, -The new Plex86 project. - -@item [7] -@url{http://www.vmware.com/}, -The VMWare PC virtualizer. - -@item [8] -@url{https://www.microsoft.com/download/details.aspx?id=3702}, -The VirtualPC PC virtualizer. - -@item [9] -@url{http://virtualbox.org/}, -The VirtualBox PC virtualizer. - -@item [10] -@url{http://www.xen.org/}, -The Xen hypervisor. - -@item [11] -@url{http://www.linux-kvm.org/}, -Kernel Based Virtual Machine (KVM). - -@item [12] -@url{http://www.greensocs.com/projects/QEMUSystemC}, -QEMU-SystemC, a hardware co-simulator. - -@end table