Fix a bug introduced between 1.1 and 1.1.5. Loading the time was moved

outside the critical region.

Make it work with 2.0.  It wasn't designed to be called at splclock().

Make it work with prescaling.  The overflow threshold was bogus.

Make it work for any HZ.  Side effect of fixing prescaling.

Speed it up.  Allocate registers better.  Reduce multiplication and
division to multiplication and a shift.  Speed is now 5-6 usec on a
486DX/33, was about 3 usec more.

Optimize for the non-pentium case.  The pentium code got moved around
a bit and hasn't been tested.

Change #include's to 2.0 style.
This commit is contained in:
Bruce Evans 1994-11-05 23:53:46 +00:00
parent b2e3ee0a69
commit ae406484f0
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=4179

View file

@ -31,54 +31,34 @@
* SUCH DAMAGE.
*
* from: Steve McCanne's microtime code
* $Id: microtime.s,v 1.5 1994/08/11 00:28:17 wollman Exp $
* $Id: microtime.s,v 1.6 1994/08/13 17:45:09 wollman Exp $
*/
#include "machine/asmacros.h"
#include "../isa/isa.h"
#include "../isa/timerreg.h"
#include <machine/asmacros.h>
.extern _pentium_mhz
#include <i386/isa/icu.h>
#include <i386/isa/isa.h>
#include <i386/isa/timerreg.h>
/*
* Use a higher resolution version of microtime if HZ is not
* overridden (i.e. it is 100Hz).
*/
#ifndef HZ
ENTRY(microtime)
pushl %edi # save registers
pushl %esi
pushl %ebx
movl $_time, %ebx # get timeval ptr
#if defined(I586_CPU)
movl _pentium_mhz, %ecx
orl %ecx, %ecx
jz 0f
#ifdef I586_CPU
movl _pentium_mhz, %ecx
testl %ecx, %ecx
jne pentium_microtime
#else
xorl %ecx, %ecx # clear ecx
#endif
cli
.byte 0x0f, 0x31 /* RDTSC */
idivl _pentium_mhz /* get value in usec */
movl 4(%ebx), %esi
movl (%ebx), %edi
sti
jmp 4f
#endif /* Pentium code */
0:
movl (%ebx), %edi # sec = time.tv_sec
movl 4(%ebx), %esi # usec = time.tv_usec
movb $TIMER_SEL0|TIMER_LATCH, %al # prepare to latch
cli # disable interrupts
cli # disable interrupts
movl $(TIMER_SEL0|TIMER_LATCH), %eax
outb %al, $TIMER_MODE # latch timer 0's counter
xorl %ebx, %ebx # clear ebx
inb $TIMER_CNTR0, %al # Read counter value, LSB first
movb %al, %bl
inb $TIMER_CNTR0, %al # read counter value, LSB first
movb %al, %cl
inb $TIMER_CNTR0, %al
movb %al, %bh
movb %al, %ch
# Now check for counter overflow. This is tricky because the
# timer chip doesn't let us atomically read the current counter
@ -92,63 +72,110 @@ ENTRY(microtime)
# from the IRR, and mistakenly add a correction to the "close
# to zero" value.
#
# We compare the counter value to heuristic constant 11890.
# We compare the counter value to the prepared overflow threshold.
# If the counter value is less than this, we assume the counter
# didn't overflow between disabling interrupts above and latching
# the counter value. For example, we assume that the above 10 or so
# instructions take less than 11932 - 11890 = 42 microseconds to
# execute.
# didn't overflow between disabling timer interrupts and latching
# the counter value above. For example, we assume that interrupts
# are enabled when we are called (or were disabled just a few
# cycles before we are called and that the instructions before the
# "cli" are fast) and that the "cli" and "outb" instructions take
# less than 10 timer cycles to execute. The last assumption is
# very safe.
#
# Otherwise, the counter might have overflowed. We check for this
# condition by reading the interrupt request register out of the ICU.
# If it overflowed, we add in one clock period.
#
# The heuristic is "very accurate" because it works 100% if
# we're called from an ipl less than the clock. Otherwise,
# it might not work. Currently, only gettimeofday and bpf
# call microtime so it's not a problem.
# The heuristic is "very accurate" because it works 100% if we're
# called with interrupts enabled. Otherwise, it might not work.
# Currently, only siointrts() calls us with interrupts disabled, so
# the problem can be avoided at some cost to the general case. The
# costs are complications in callers to disable interrupts in
# IO_ICU1 and extra reads of the IRR forced by a conservative
# overflow threshold.
#
# In 2.0, we are called at splhigh() from mi_switch(), so we have
# to allow for the overflow bit being in ipending instead of in
# the IRR. Our caller may have executed many instructions since
# ipending was set, so the heuristic for the IRR is inappropriate
# for ipending. However, we don't need another heuristic, since
# the "cli" suffices to lock ipending.
movl _timer0_prescale, %eax # adjust value if timer is
addl _timer0_divisor, %eax # reprogrammed
addl $-11932, %eax
subl %eax, %ebx
movl _timer0_max_count, %edx # prepare for 2 uses
cmpl $11890, %ebx # do we have a possible overflow condition
jle 1f
testb $IRQ0, _ipending # is a soft timer interrupt pending?
jne overflow
# Do we have a possible overflow condition?
cmpl _timer0_overflow_threshold, %ecx
jbe 1f
inb $IO_ICU1, %al # read IRR in ICU
testb $1, %al # is a timer interrupt pending?
testb $IRQ0, %al # is a hard timer interrupt pending?
je 1f
addl $-11932, %ebx # yes, subtract one clock period
overflow:
subl %edx, %ecx # some intr pending, count timer down through 0
1:
# Subtract counter value from max count since it is a count-down value.
subl %ecx, %edx
# Adjust for partial ticks.
addl _timer0_prescaler_count, %edx
# To divide by 1.193200, we multiply by 27465 and shift right by 15.
#
# The multiplier was originally calculated to be
#
# 2^18 * 1000000 / 1193200 = 219698.
#
# The frequency is 1193200 to be compatible with rounding errors in
# the calculation of the usual maximum count. 2^18 is the largest
# power of 2 such that multiplying `i' by it doesn't overflow for i
# in the range of interest ([0, 11932 + 5)). We adjusted the
# multiplier a little to minimise the average of
#
# fabs(i / 1.1193200 - ((multiplier * i) >> 18))
#
# for i in the range and then removed powers of 2 to speed up the
# multiplication and to avoid overflow for i outside the range
# (i may be as high as 2^17 if the timer is programmed to its
# maximum maximum count). The absolute error is less than 1 for
# all i in the range.
#if 0
imul $27645, %edx # 25 cycles on a 486
#else
leal (%edx,%edx,2), %eax # a = 3 2 cycles on a 486
leal (%edx,%eax,4), %eax # a = 13 2
movl %eax, %ecx # c = 13 1
shl $5, %eax # a = 416 2
addl %ecx, %eax # a = 429 1
leal (%edx,%eax,8), %eax # a = 3433 2
leal (%edx,%eax,8), %eax # a = 27465 2 (total 12 cycles)
#endif /* 0 */
shr $15, %eax
common_microtime:
addl _time+4, %eax # usec += time.tv_sec
movl _time, %edx # sec = time.tv_sec
sti # enable interrupts
movl $11932, %eax # subtract counter value from 11932 since
subl %ebx, %eax # it is a count-down value
cmpl $1000000, %eax # usec valid?
jb 1f
subl $1000000, %eax # adjust usec
incl %edx # bump sec
1:
movl 4(%esp), %ecx # load timeval pointer arg
movl %edx, (%ecx) # tvp->tv_sec = sec
movl %eax, 4(%ecx) # tvp->tv_usec = usec
movl %eax, %ebx # this really is a "imull $1000, %eax, %eax"
sall $10, %eax # instruction, but this saves us
sall $3, %ebx # 33/23 clocks on a 486/386 CPU
subl %ebx, %eax #
sall $1, %ebx # /sos
subl %ebx, %eax #
movl $0, %edx # zero extend eax into edx for div
movl $1193, %ecx
idivl %ecx # convert to usecs: mult by 1000/1193
4:
addl %eax, %esi # add counter usecs to time.tv_usec
cmpl $1000000, %esi # carry in timeval?
jl 2f
subl $1000000, %esi # adjust usec
incl %edi # bump sec
2:
movl 16(%esp), %ecx # load timeval pointer arg
movl %edi, (%ecx) # tvp->tv_sec = sec
movl %esi, 4(%ecx) # tvp->tv_usec = usec
popl %ebx # restore regs
popl %esi
popl %edi
ret
#endif /* normal value of HZ */
ALIGN_TEXT
pentium_microtime:
cli
.byte 0x0f, 0x31 # RDTSC
divl %ecx # get value in usec
jmp common_microtime