Merge remote-tracking branch 'stefanha/block' into staging

# By Stefan Hajnoczi (4) and others
# Via Stefan Hajnoczi
* stefanha/block:
  virtio-blk: do not relay a previous driver's WCE configuration to the current
  blockdev: do not default cache.no-flush to true
  block: don't lose data from last incomplete sector
  qcow2: Correct snapshots size for overlap check
  coroutine: fix /perf/nesting coroutine benchmark
  coroutine: add qemu_coroutine_yield benchmark
  qemu-timer: do not take the lock in timer_pending
  qemu-timer: make qemu_timer_mod_ns() and qemu_timer_del() thread-safe
  qemu-timer: drop outdated signal safety comments
  osdep: warn if open(O_DIRECT) on fails with EINVAL
  libcacard: link against qemu-error.o for error_report()

Message-id: 1379698931-946-1-git-send-email-stefanha@redhat.com
This commit is contained in:
Anthony Liguori 2013-09-23 11:53:05 -05:00
commit 16121fa39e
10 changed files with 155 additions and 40 deletions

View file

@ -2669,7 +2669,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
goto out;
}
total_sectors = len >> BDRV_SECTOR_BITS;
total_sectors = (len + BDRV_SECTOR_SIZE - 1) >> BDRV_SECTOR_BITS;
max_nb_sectors = MAX(0, total_sectors - sector_num);
if (max_nb_sectors > 0) {
ret = drv->bdrv_co_readv(bs, sector_num,

View file

@ -192,7 +192,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
/* The snapshot list position has not yet been updated, so these clusters
* must indeed be completely free */
ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset,
s->snapshots_size);
snapshots_size);
if (ret < 0) {
return ret;
}

View file

@ -443,7 +443,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts,
if (qemu_opt_get_bool(opts, "cache.direct", false)) {
bdrv_flags |= BDRV_O_NOCACHE;
}
if (qemu_opt_get_bool(opts, "cache.no-flush", true)) {
if (qemu_opt_get_bool(opts, "cache.no-flush", false)) {
bdrv_flags |= BDRV_O_NO_FLUSH;
}

View file

@ -460,9 +460,9 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running,
static void virtio_blk_reset(VirtIODevice *vdev)
{
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
VirtIOBlock *s = VIRTIO_BLK(vdev);
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
if (s->dataplane) {
virtio_blk_data_plane_stop(s->dataplane);
}
@ -473,6 +473,7 @@ static void virtio_blk_reset(VirtIODevice *vdev)
* are per-device request lists.
*/
bdrv_drain_all();
bdrv_set_enable_write_cache(s->bs, s->original_wce);
}
/* coalesce internal state, copy to pci i/o region 0
@ -564,7 +565,25 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
}
features = vdev->guest_features;
bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
/* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send
* cache flushes. Thus, the "auto writethrough" behavior is never
* necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature.
* Leaving it enabled would break the following sequence:
*
* Guest started with "-drive cache=writethrough"
* Guest sets status to 0
* Guest sets DRIVER bit in status field
* Guest reads host features (WCE=0, CONFIG_WCE=1)
* Guest writes guest features (WCE=0, CONFIG_WCE=1)
* Guest writes 1 to the WCE configuration field (writeback mode)
* Guest sets DRIVER_OK bit in status field
*
* s->bs would erroneously be placed in writethrough mode.
*/
if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) {
bdrv_set_enable_write_cache(s->bs, !!(features & (1 << VIRTIO_BLK_F_WCE)));
}
}
static void virtio_blk_save(QEMUFile *f, void *opaque)
@ -674,6 +693,7 @@ static int virtio_blk_device_init(VirtIODevice *vdev)
}
blkconf_serial(&blk->conf, &blk->serial);
s->original_wce = bdrv_enable_write_cache(blk->conf.bs);
if (blkconf_geometry(&blk->conf, NULL, 65535, 255, 255) < 0) {
return -1;
}

View file

@ -123,6 +123,7 @@ typedef struct VirtIOBlock {
BlockConf *conf;
VirtIOBlkConf blk;
unsigned short sector_mask;
bool original_wce;
VMChangeStateEntry *change;
#ifdef CONFIG_VIRTIO_BLK_DATA_PLANE
Notifier migration_state_notifier;

View file

@ -115,6 +115,10 @@ static inline int64_t qemu_clock_get_us(QEMUClockType type)
* Determines whether a clock's default timer list
* has timers attached
*
* Note that this function should not be used when other threads also access
* the timer list. The return value may be outdated by the time it is acted
* upon.
*
* Returns: true if the clock's default timer list
* has timers attached
*/
@ -271,6 +275,10 @@ void timerlist_free(QEMUTimerList *timer_list);
*
* Determine whether a timer list has active timers
*
* Note that this function should not be used when other threads also access
* the timer list. The return value may be outdated by the time it is acted
* upon.
*
* Returns: true if the timer list has timers.
*/
bool timerlist_has_timers(QEMUTimerList *timer_list);
@ -512,6 +520,9 @@ void timer_free(QEMUTimer *ts);
* @ts: the timer
*
* Delete a timer from the active list.
*
* This function is thread-safe but the timer and its timer list must not be
* freed while this function is running.
*/
void timer_del(QEMUTimer *ts);
@ -521,6 +532,9 @@ void timer_del(QEMUTimer *ts);
* @expire_time: the expiry time in nanoseconds
*
* Modify a timer to expire at @expire_time
*
* This function is thread-safe but the timer and its timer list must not be
* freed while this function is running.
*/
void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
@ -531,6 +545,9 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
*
* Modify a timer to expiry at @expire_time, taking into
* account the scale associated with the timer.
*
* This function is thread-safe but the timer and its timer list must not be
* freed while this function is running.
*/
void timer_mod(QEMUTimer *ts, int64_t expire_timer);

View file

@ -4,7 +4,8 @@ TOOLS += vscclient$(EXESUF)
# objects linked into a shared library, built with libtool with -fPIC if required
libcacard-obj-y = $(stub-obj-y) $(libcacard-y)
libcacard-obj-y += util/osdep.o util/cutils.o util/qemu-timer-common.o util/error.o
libcacard-obj-y += util/osdep.o util/cutils.o util/qemu-timer-common.o
libcacard-obj-y += util/error.o util/qemu-error.o
libcacard-obj-$(CONFIG_WIN32) += util/oslib-win32.o util/qemu-thread-win32.o
libcacard-obj-$(CONFIG_POSIX) += util/oslib-posix.o util/qemu-thread-posix.o
libcacard-obj-y += $(filter trace/%, $(util-obj-y))

View file

@ -66,6 +66,7 @@ QEMUClock qemu_clocks[QEMU_CLOCK_MAX];
struct QEMUTimerList {
QEMUClock *clock;
QemuMutex active_timers_lock;
QEMUTimer *active_timers;
QLIST_ENTRY(QEMUTimerList) list;
QEMUTimerListNotifyCB *notify_cb;
@ -101,6 +102,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
timer_list->clock = clock;
timer_list->notify_cb = cb;
timer_list->notify_opaque = opaque;
qemu_mutex_init(&timer_list->active_timers_lock);
QLIST_INSERT_HEAD(&clock->timerlists, timer_list, list);
return timer_list;
}
@ -111,6 +113,7 @@ void timerlist_free(QEMUTimerList *timer_list)
if (timer_list->clock) {
QLIST_REMOVE(timer_list, list);
}
qemu_mutex_destroy(&timer_list->active_timers_lock);
g_free(timer_list);
}
@ -163,9 +166,17 @@ bool qemu_clock_has_timers(QEMUClockType type)
bool timerlist_expired(QEMUTimerList *timer_list)
{
return (timer_list->active_timers &&
timer_list->active_timers->expire_time <
qemu_clock_get_ns(timer_list->clock->type));
int64_t expire_time;
qemu_mutex_lock(&timer_list->active_timers_lock);
if (!timer_list->active_timers) {
qemu_mutex_unlock(&timer_list->active_timers_lock);
return false;
}
expire_time = timer_list->active_timers->expire_time;
qemu_mutex_unlock(&timer_list->active_timers_lock);
return expire_time < qemu_clock_get_ns(timer_list->clock->type);
}
bool qemu_clock_expired(QEMUClockType type)
@ -182,13 +193,25 @@ bool qemu_clock_expired(QEMUClockType type)
int64_t timerlist_deadline_ns(QEMUTimerList *timer_list)
{
int64_t delta;
int64_t expire_time;
if (!timer_list->clock->enabled || !timer_list->active_timers) {
if (!timer_list->clock->enabled) {
return -1;
}
delta = timer_list->active_timers->expire_time -
qemu_clock_get_ns(timer_list->clock->type);
/* The active timers list may be modified before the caller uses our return
* value but ->notify_cb() is called when the deadline changes. Therefore
* the caller should notice the change and there is no race condition.
*/
qemu_mutex_lock(&timer_list->active_timers_lock);
if (!timer_list->active_timers) {
qemu_mutex_unlock(&timer_list->active_timers_lock);
return -1;
}
expire_time = timer_list->active_timers->expire_time;
qemu_mutex_unlock(&timer_list->active_timers_lock);
delta = expire_time - qemu_clock_get_ns(timer_list->clock->type);
if (delta <= 0) {
return 0;
@ -289,6 +312,7 @@ void timer_init(QEMUTimer *ts,
ts->cb = cb;
ts->opaque = opaque;
ts->scale = scale;
ts->expire_time = -1;
}
void timer_free(QEMUTimer *ts)
@ -296,14 +320,12 @@ void timer_free(QEMUTimer *ts)
g_free(ts);
}
/* stop a timer, but do not dealloc it */
void timer_del(QEMUTimer *ts)
static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
{
QEMUTimer **pt, *t;
/* NOTE: this code must be signal safe because
timer_expired() can be called from a signal. */
pt = &ts->timer_list->active_timers;
ts->expire_time = -1;
pt = &timer_list->active_timers;
for(;;) {
t = *pt;
if (!t)
@ -316,18 +338,28 @@ void timer_del(QEMUTimer *ts)
}
}
/* stop a timer, but do not dealloc it */
void timer_del(QEMUTimer *ts)
{
QEMUTimerList *timer_list = ts->timer_list;
qemu_mutex_lock(&timer_list->active_timers_lock);
timer_del_locked(timer_list, ts);
qemu_mutex_unlock(&timer_list->active_timers_lock);
}
/* modify the current timer so that it will be fired when current_time
>= expire_time. The corresponding callback will be called. */
void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
{
QEMUTimerList *timer_list = ts->timer_list;
QEMUTimer **pt, *t;
timer_del(ts);
qemu_mutex_lock(&timer_list->active_timers_lock);
timer_del_locked(timer_list, ts);
/* add the timer in the sorted list */
/* NOTE: this code must be signal safe because
timer_expired() can be called from a signal. */
pt = &ts->timer_list->active_timers;
pt = &timer_list->active_timers;
for(;;) {
t = *pt;
if (!timer_expired_ns(t, expire_time)) {
@ -335,15 +367,16 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
}
pt = &t->next;
}
ts->expire_time = expire_time;
ts->expire_time = MAX(expire_time, 0);
ts->next = *pt;
*pt = ts;
qemu_mutex_unlock(&timer_list->active_timers_lock);
/* Rearm if necessary */
if (pt == &ts->timer_list->active_timers) {
if (pt == &timer_list->active_timers) {
/* Interrupt execution to force deadline recalculation. */
qemu_clock_warp(ts->timer_list->clock->type);
timerlist_notify(ts->timer_list);
qemu_clock_warp(timer_list->clock->type);
timerlist_notify(timer_list);
}
}
@ -354,13 +387,7 @@ void timer_mod(QEMUTimer *ts, int64_t expire_time)
bool timer_pending(QEMUTimer *ts)
{
QEMUTimer *t;
for (t = ts->timer_list->active_timers; t != NULL; t = t->next) {
if (t == ts) {
return true;
}
}
return false;
return ts->expire_time >= 0;
}
bool timer_expired(QEMUTimer *timer_head, int64_t current_time)
@ -373,23 +400,32 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
QEMUTimer *ts;
int64_t current_time;
bool progress = false;
QEMUTimerCB *cb;
void *opaque;
if (!timer_list->clock->enabled) {
return progress;
}
current_time = qemu_clock_get_ns(timer_list->clock->type);
for(;;) {
qemu_mutex_lock(&timer_list->active_timers_lock);
ts = timer_list->active_timers;
if (!timer_expired_ns(ts, current_time)) {
qemu_mutex_unlock(&timer_list->active_timers_lock);
break;
}
/* remove timer from the list before calling the callback */
timer_list->active_timers = ts->next;
ts->next = NULL;
ts->expire_time = -1;
cb = ts->cb;
opaque = ts->opaque;
qemu_mutex_unlock(&timer_list->active_timers_lock);
/* run the callback (the timer list can be modified) */
ts->cb(ts->opaque);
cb(opaque);
progress = true;
}
return progress;

View file

@ -182,17 +182,17 @@ static void perf_nesting(void)
unsigned int i, maxcycles, maxnesting;
double duration;
maxcycles = 100000000;
maxcycles = 10000;
maxnesting = 1000;
Coroutine *root;
NestData nd = {
.n_enter = 0,
.n_return = 0,
.max = maxnesting,
};
g_test_timer_start();
for (i = 0; i < maxcycles; i++) {
NestData nd = {
.n_enter = 0,
.n_return = 0,
.max = maxnesting,
};
root = qemu_coroutine_create(nest);
qemu_coroutine_enter(root, &nd);
}
@ -202,6 +202,38 @@ static void perf_nesting(void)
maxcycles, maxnesting, duration);
}
/*
* Yield benchmark
*/
static void coroutine_fn yield_loop(void *opaque)
{
unsigned int *counter = opaque;
while ((*counter) > 0) {
(*counter)--;
qemu_coroutine_yield();
}
}
static void perf_yield(void)
{
unsigned int i, maxcycles;
double duration;
maxcycles = 100000000;
i = maxcycles;
Coroutine *coroutine = qemu_coroutine_create(yield_loop);
g_test_timer_start();
while (i > 0) {
qemu_coroutine_enter(coroutine, &i);
}
duration = g_test_timer_elapsed();
g_test_message("Yield %u iterations: %f s\n",
maxcycles, duration);
}
int main(int argc, char **argv)
{
@ -214,6 +246,7 @@ int main(int argc, char **argv)
if (g_test_perf()) {
g_test_add_func("/perf/lifecycle", perf_lifecycle);
g_test_add_func("/perf/nesting", perf_nesting);
g_test_add_func("/perf/yield", perf_yield);
}
return g_test_run();
}

View file

@ -207,6 +207,13 @@ int qemu_open(const char *name, int flags, ...)
}
#endif
#ifdef O_DIRECT
if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
error_report("file system may not support O_DIRECT");
errno = EINVAL; /* in case it was clobbered */
}
#endif /* O_DIRECT */
return ret;
}