From ae406484f0c0b75714db6dc83b65f69181ea5ae1 Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Sat, 5 Nov 1994 23:53:46 +0000 Subject: [PATCH] Fix a bug introduced between 1.1 and 1.1.5. Loading the time was moved outside the critical region. Make it work with 2.0. It wasn't designed to be called at splclock(). Make it work with prescaling. The overflow threshold was bogus. Make it work for any HZ. Side effect of fixing prescaling. Speed it up. Allocate registers better. Reduce multiplication and division to multiplication and a shift. Speed is now 5-6 usec on a 486DX/33, was about 3 usec more. Optimize for the non-pentium case. The pentium code got moved around a bit and hasn't been tested. Change #include's to 2.0 style. --- sys/i386/i386/microtime.s | 189 ++++++++++++++++++++++---------------- 1 file changed, 108 insertions(+), 81 deletions(-) diff --git a/sys/i386/i386/microtime.s b/sys/i386/i386/microtime.s index 07bbba623215..19031c727c0b 100644 --- a/sys/i386/i386/microtime.s +++ b/sys/i386/i386/microtime.s @@ -31,54 +31,34 @@ * SUCH DAMAGE. * * from: Steve McCanne's microtime code - * $Id: microtime.s,v 1.5 1994/08/11 00:28:17 wollman Exp $ + * $Id: microtime.s,v 1.6 1994/08/13 17:45:09 wollman Exp $ */ -#include "machine/asmacros.h" -#include "../isa/isa.h" -#include "../isa/timerreg.h" +#include - .extern _pentium_mhz +#include +#include +#include -/* - * Use a higher resolution version of microtime if HZ is not - * overridden (i.e. it is 100Hz). - */ -#ifndef HZ ENTRY(microtime) - pushl %edi # save registers - pushl %esi - pushl %ebx - movl $_time, %ebx # get timeval ptr - -#if defined(I586_CPU) - movl _pentium_mhz, %ecx - orl %ecx, %ecx - jz 0f +#ifdef I586_CPU + movl _pentium_mhz, %ecx + testl %ecx, %ecx + jne pentium_microtime +#else + xorl %ecx, %ecx # clear ecx +#endif - cli - .byte 0x0f, 0x31 /* RDTSC */ - idivl _pentium_mhz /* get value in usec */ - movl 4(%ebx), %esi - movl (%ebx), %edi - sti - jmp 4f -#endif /* Pentium code */ -0: - movl (%ebx), %edi # sec = time.tv_sec - movl 4(%ebx), %esi # usec = time.tv_usec + movb $TIMER_SEL0|TIMER_LATCH, %al # prepare to latch - cli # disable interrupts + cli # disable interrupts - movl $(TIMER_SEL0|TIMER_LATCH), %eax outb %al, $TIMER_MODE # latch timer 0's counter - - xorl %ebx, %ebx # clear ebx - inb $TIMER_CNTR0, %al # Read counter value, LSB first - movb %al, %bl + inb $TIMER_CNTR0, %al # read counter value, LSB first + movb %al, %cl inb $TIMER_CNTR0, %al - movb %al, %bh + movb %al, %ch # Now check for counter overflow. This is tricky because the # timer chip doesn't let us atomically read the current counter @@ -92,63 +72,110 @@ ENTRY(microtime) # from the IRR, and mistakenly add a correction to the "close # to zero" value. # - # We compare the counter value to heuristic constant 11890. + # We compare the counter value to the prepared overflow threshold. # If the counter value is less than this, we assume the counter - # didn't overflow between disabling interrupts above and latching - # the counter value. For example, we assume that the above 10 or so - # instructions take less than 11932 - 11890 = 42 microseconds to - # execute. + # didn't overflow between disabling timer interrupts and latching + # the counter value above. For example, we assume that interrupts + # are enabled when we are called (or were disabled just a few + # cycles before we are called and that the instructions before the + # "cli" are fast) and that the "cli" and "outb" instructions take + # less than 10 timer cycles to execute. The last assumption is + # very safe. # # Otherwise, the counter might have overflowed. We check for this # condition by reading the interrupt request register out of the ICU. # If it overflowed, we add in one clock period. # - # The heuristic is "very accurate" because it works 100% if - # we're called from an ipl less than the clock. Otherwise, - # it might not work. Currently, only gettimeofday and bpf - # call microtime so it's not a problem. + # The heuristic is "very accurate" because it works 100% if we're + # called with interrupts enabled. Otherwise, it might not work. + # Currently, only siointrts() calls us with interrupts disabled, so + # the problem can be avoided at some cost to the general case. The + # costs are complications in callers to disable interrupts in + # IO_ICU1 and extra reads of the IRR forced by a conservative + # overflow threshold. + # + # In 2.0, we are called at splhigh() from mi_switch(), so we have + # to allow for the overflow bit being in ipending instead of in + # the IRR. Our caller may have executed many instructions since + # ipending was set, so the heuristic for the IRR is inappropriate + # for ipending. However, we don't need another heuristic, since + # the "cli" suffices to lock ipending. - movl _timer0_prescale, %eax # adjust value if timer is - addl _timer0_divisor, %eax # reprogrammed - addl $-11932, %eax - subl %eax, %ebx + movl _timer0_max_count, %edx # prepare for 2 uses - cmpl $11890, %ebx # do we have a possible overflow condition - jle 1f + testb $IRQ0, _ipending # is a soft timer interrupt pending? + jne overflow + + # Do we have a possible overflow condition? + cmpl _timer0_overflow_threshold, %ecx + jbe 1f inb $IO_ICU1, %al # read IRR in ICU - testb $1, %al # is a timer interrupt pending? + testb $IRQ0, %al # is a hard timer interrupt pending? je 1f - addl $-11932, %ebx # yes, subtract one clock period +overflow: + subl %edx, %ecx # some intr pending, count timer down through 0 1: + + # Subtract counter value from max count since it is a count-down value. + subl %ecx, %edx + + # Adjust for partial ticks. + addl _timer0_prescaler_count, %edx + + # To divide by 1.193200, we multiply by 27465 and shift right by 15. + # + # The multiplier was originally calculated to be + # + # 2^18 * 1000000 / 1193200 = 219698. + # + # The frequency is 1193200 to be compatible with rounding errors in + # the calculation of the usual maximum count. 2^18 is the largest + # power of 2 such that multiplying `i' by it doesn't overflow for i + # in the range of interest ([0, 11932 + 5)). We adjusted the + # multiplier a little to minimise the average of + # + # fabs(i / 1.1193200 - ((multiplier * i) >> 18)) + # + # for i in the range and then removed powers of 2 to speed up the + # multiplication and to avoid overflow for i outside the range + # (i may be as high as 2^17 if the timer is programmed to its + # maximum maximum count). The absolute error is less than 1 for + # all i in the range. + +#if 0 + imul $27645, %edx # 25 cycles on a 486 +#else + leal (%edx,%edx,2), %eax # a = 3 2 cycles on a 486 + leal (%edx,%eax,4), %eax # a = 13 2 + movl %eax, %ecx # c = 13 1 + shl $5, %eax # a = 416 2 + addl %ecx, %eax # a = 429 1 + leal (%edx,%eax,8), %eax # a = 3433 2 + leal (%edx,%eax,8), %eax # a = 27465 2 (total 12 cycles) +#endif /* 0 */ + shr $15, %eax + +common_microtime: + addl _time+4, %eax # usec += time.tv_sec + movl _time, %edx # sec = time.tv_sec + sti # enable interrupts - movl $11932, %eax # subtract counter value from 11932 since - subl %ebx, %eax # it is a count-down value + cmpl $1000000, %eax # usec valid? + jb 1f + subl $1000000, %eax # adjust usec + incl %edx # bump sec +1: + movl 4(%esp), %ecx # load timeval pointer arg + movl %edx, (%ecx) # tvp->tv_sec = sec + movl %eax, 4(%ecx) # tvp->tv_usec = usec - movl %eax, %ebx # this really is a "imull $1000, %eax, %eax" - sall $10, %eax # instruction, but this saves us - sall $3, %ebx # 33/23 clocks on a 486/386 CPU - subl %ebx, %eax # - sall $1, %ebx # /sos - subl %ebx, %eax # - - movl $0, %edx # zero extend eax into edx for div - movl $1193, %ecx - idivl %ecx # convert to usecs: mult by 1000/1193 -4: - addl %eax, %esi # add counter usecs to time.tv_usec - cmpl $1000000, %esi # carry in timeval? - jl 2f - subl $1000000, %esi # adjust usec - incl %edi # bump sec -2: - movl 16(%esp), %ecx # load timeval pointer arg - movl %edi, (%ecx) # tvp->tv_sec = sec - movl %esi, 4(%ecx) # tvp->tv_usec = usec - - popl %ebx # restore regs - popl %esi - popl %edi ret -#endif /* normal value of HZ */ + + ALIGN_TEXT +pentium_microtime: + cli + .byte 0x0f, 0x31 # RDTSC + divl %ecx # get value in usec + jmp common_microtime