linux/arch/arm/lib/div64.S
Thomas Gleixner d2912cb15b treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 500
Based on 2 normalized pattern(s):

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license version 2 as
  published by the free software foundation

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license version 2 as
  published by the free software foundation #

extracted by the scancode license scanner the SPDX license identifier

  GPL-2.0-only

has been chosen to replace the boilerplate/reference in 4122 file(s).

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Enrico Weigelt <info@metux.net>
Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org>
Reviewed-by: Allison Randal <allison@lohutok.net>
Cc: linux-spdx@vger.kernel.org
Link: https://lkml.kernel.org/r/20190604081206.933168790@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-06-19 17:09:55 +02:00

210 lines
3.8 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* linux/arch/arm/lib/div64.S
*
* Optimized computation of 64-bit dividend / 32-bit divisor
*
* Author: Nicolas Pitre
* Created: Oct 5, 2003
* Copyright: Monta Vista Software, Inc.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>
#ifdef __ARMEB__
#define xh r0
#define xl r1
#define yh r2
#define yl r3
#else
#define xl r0
#define xh r1
#define yl r2
#define yh r3
#endif
/*
* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
*
* Note: Calling convention is totally non standard for optimal code.
* This is meant to be used by do_div() from include/asm/div64.h only.
*
* Input parameters:
* xh-xl = dividend (clobbered)
* r4 = divisor (preserved)
*
* Output values:
* yh-yl = result
* xh = remainder
*
* Clobbered regs: xl, ip
*/
ENTRY(__do_div64)
UNWIND(.fnstart)
@ Test for easy paths first.
subs ip, r4, #1
bls 9f @ divisor is 0 or 1
tst ip, r4
beq 8f @ divisor is power of 2
@ See if we need to handle upper 32-bit result.
cmp xh, r4
mov yh, #0
blo 3f
@ Align divisor with upper part of dividend.
@ The aligned divisor is stored in yl preserving the original.
@ The bit position is stored in ip.
#if __LINUX_ARM_ARCH__ >= 5
clz yl, r4
clz ip, xh
sub yl, yl, ip
mov ip, #1
mov ip, ip, lsl yl
mov yl, r4, lsl yl
#else
mov yl, r4
mov ip, #1
1: cmp yl, #0x80000000
cmpcc yl, xh
movcc yl, yl, lsl #1
movcc ip, ip, lsl #1
bcc 1b
#endif
@ The division loop for needed upper bit positions.
@ Break out early if dividend reaches 0.
2: cmp xh, yl
orrcs yh, yh, ip
subscs xh, xh, yl
movsne ip, ip, lsr #1
mov yl, yl, lsr #1
bne 2b
@ See if we need to handle lower 32-bit result.
3: cmp xh, #0
mov yl, #0
cmpeq xl, r4
movlo xh, xl
retlo lr
@ The division loop for lower bit positions.
@ Here we shift remainer bits leftwards rather than moving the
@ divisor for comparisons, considering the carry-out bit as well.
mov ip, #0x80000000
4: movs xl, xl, lsl #1
adcs xh, xh, xh
beq 6f
cmpcc xh, r4
5: orrcs yl, yl, ip
subcs xh, xh, r4
movs ip, ip, lsr #1
bne 4b
ret lr
@ The top part of remainder became zero. If carry is set
@ (the 33th bit) this is a false positive so resume the loop.
@ Otherwise, if lower part is also null then we are done.
6: bcs 5b
cmp xl, #0
reteq lr
@ We still have remainer bits in the low part. Bring them up.
#if __LINUX_ARM_ARCH__ >= 5
clz xh, xl @ we know xh is zero here so...
add xh, xh, #1
mov xl, xl, lsl xh
mov ip, ip, lsr xh
#else
7: movs xl, xl, lsl #1
mov ip, ip, lsr #1
bcc 7b
#endif
@ Current remainder is now 1. It is worthless to compare with
@ divisor at this point since divisor can not be smaller than 3 here.
@ If possible, branch for another shift in the division loop.
@ If no bit position left then we are done.
movs ip, ip, lsr #1
mov xh, #1
bne 4b
ret lr
8: @ Division by a power of 2: determine what that divisor order is
@ then simply shift values around
#if __LINUX_ARM_ARCH__ >= 5
clz ip, r4
rsb ip, ip, #31
#else
mov yl, r4
cmp r4, #(1 << 16)
mov ip, #0
movhs yl, yl, lsr #16
movhs ip, #16
cmp yl, #(1 << 8)
movhs yl, yl, lsr #8
addhs ip, ip, #8
cmp yl, #(1 << 4)
movhs yl, yl, lsr #4
addhs ip, ip, #4
cmp yl, #(1 << 2)
addhi ip, ip, #3
addls ip, ip, yl, lsr #1
#endif
mov yh, xh, lsr ip
mov yl, xl, lsr ip
rsb ip, ip, #32
ARM( orr yl, yl, xh, lsl ip )
THUMB( lsl xh, xh, ip )
THUMB( orr yl, yl, xh )
mov xh, xl, lsl ip
mov xh, xh, lsr ip
ret lr
@ eq -> division by 1: obvious enough...
9: moveq yl, xl
moveq yh, xh
moveq xh, #0
reteq lr
UNWIND(.fnend)
UNWIND(.fnstart)
UNWIND(.pad #4)
UNWIND(.save {lr})
Ldiv0_64:
@ Division by 0:
str lr, [sp, #-8]!
bl __div0
@ as wrong as it could be...
mov yl, #0
mov yh, #0
mov xh, #0
ldr pc, [sp], #8
UNWIND(.fnend)
ENDPROC(__do_div64)