From 354e9778a3327e125ed99e86d7b6e648ed0ccd53 Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Tue, 29 Mar 2016 11:39:33 -0400 Subject: [PATCH] cmd/asm: add s390x support s390x doesn't introduce any new assembly syntax. There are a few instructions which require the operands to be reordered, notably the storage-storage instructions that put the length into From3 so that the memory operands can be put into From and To. The assembly test currently covers a subset of instructions but tries to hit edge cases as much as possible. Unlike the other ports it can be linked as an executable to make disassembling it easy. It would be nice to autogenerate it at some point in the future. Change-Id: I8dd542c34b9e450b8129d46693a5acb0ded791ce Reviewed-on: https://go-review.googlesource.com/21253 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/cmd/asm/internal/arch/arch.go | 58 ++++++ src/cmd/asm/internal/arch/s390x.go | 139 ++++++++++++++ src/cmd/asm/internal/asm/asm.go | 30 +++ src/cmd/asm/internal/asm/endtoend_test.go | 4 + src/cmd/asm/internal/asm/operand_test.go | 103 +++++++++++ src/cmd/asm/internal/asm/testdata/s390x.s | 215 ++++++++++++++++++++++ 6 files changed, 549 insertions(+) create mode 100644 src/cmd/asm/internal/arch/s390x.go create mode 100644 src/cmd/asm/internal/asm/testdata/s390x.s diff --git a/src/cmd/asm/internal/arch/arch.go b/src/cmd/asm/internal/arch/arch.go index 6159ede6c56..bd9658301cf 100644 --- a/src/cmd/asm/internal/arch/arch.go +++ b/src/cmd/asm/internal/arch/arch.go @@ -10,6 +10,7 @@ import ( "cmd/internal/obj/arm64" "cmd/internal/obj/mips" "cmd/internal/obj/ppc64" + "cmd/internal/obj/s390x" "cmd/internal/obj/x86" "fmt" "strings" @@ -74,6 +75,10 @@ func Set(GOARCH string) *Arch { a := archPPC64() a.LinkArch = &ppc64.Linkppc64le return a + case "s390x": + a := archS390x() + a.LinkArch = &s390x.Links390x + return a } return nil } @@ -416,3 +421,56 @@ func archMips64() *Arch { IsJump: jumpMIPS64, } } + +func archS390x() *Arch { + register := make(map[string]int16) + // Create maps for easy lookup of instruction names etc. + // Note that there is no list of names as there is for x86. + for i := s390x.REG_R0; i <= s390x.REG_R15; i++ { + register[obj.Rconv(i)] = int16(i) + } + for i := s390x.REG_F0; i <= s390x.REG_F15; i++ { + register[obj.Rconv(i)] = int16(i) + } + for i := s390x.REG_V0; i <= s390x.REG_V31; i++ { + register[obj.Rconv(i)] = int16(i) + } + for i := s390x.REG_AR0; i <= s390x.REG_AR15; i++ { + register[obj.Rconv(i)] = int16(i) + } + register["LR"] = s390x.REG_LR + // Pseudo-registers. + register["SB"] = RSB + register["FP"] = RFP + register["PC"] = RPC + // Avoid unintentionally clobbering g using R13. + delete(register, "R13") + register["g"] = s390x.REG_R13 + registerPrefix := map[string]bool{ + "AR": true, + "F": true, + "R": true, + } + + instructions := make(map[string]obj.As) + for i, s := range obj.Anames { + instructions[s] = obj.As(i) + } + for i, s := range s390x.Anames { + if obj.As(i) >= obj.A_ARCHSPECIFIC { + instructions[s] = obj.As(i) + obj.ABaseS390X + } + } + // Annoying aliases. + instructions["BR"] = s390x.ABR + instructions["BL"] = s390x.ABL + + return &Arch{ + LinkArch: &s390x.Links390x, + Instructions: instructions, + Register: register, + RegisterPrefix: registerPrefix, + RegisterNumber: s390xRegisterNumber, + IsJump: jumpS390x, + } +} diff --git a/src/cmd/asm/internal/arch/s390x.go b/src/cmd/asm/internal/arch/s390x.go new file mode 100644 index 00000000000..6fa0292759d --- /dev/null +++ b/src/cmd/asm/internal/arch/s390x.go @@ -0,0 +1,139 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file encapsulates some of the odd characteristics of the +// s390x instruction set, to minimize its interaction +// with the core of the assembler. + +package arch + +import ( + "cmd/internal/obj" + "cmd/internal/obj/s390x" +) + +func jumpS390x(word string) bool { + switch word { + case "BC", + "BCL", + "BEQ", + "BGE", + "BGT", + "BL", + "BLE", + "BLT", + "BNE", + "BR", + "BVC", + "BVS", + "CMPBEQ", + "CMPBGE", + "CMPBGT", + "CMPBLE", + "CMPBLT", + "CMPBNE", + "CMPUBEQ", + "CMPUBGE", + "CMPUBGT", + "CMPUBLE", + "CMPUBLT", + "CMPUBNE", + "CALL", + "JMP": + return true + } + return false +} + +// IsS390xRLD reports whether the op (as defined by an s390x.A* constant) is +// one of the RLD-like instructions that require special handling. +// The FMADD-like instructions behave similarly. +func IsS390xRLD(op obj.As) bool { + switch op { + case s390x.AFMADD, + s390x.AFMADDS, + s390x.AFMSUB, + s390x.AFMSUBS, + s390x.AFNMADD, + s390x.AFNMADDS, + s390x.AFNMSUB, + s390x.AFNMSUBS: + return true + } + return false +} + +// IsS390xCMP reports whether the op (as defined by an s390x.A* constant) is +// one of the CMP instructions that require special handling. +func IsS390xCMP(op obj.As) bool { + switch op { + case s390x.ACMP, s390x.ACMPU, s390x.ACMPW, s390x.ACMPWU: + return true + } + return false +} + +// IsS390xNEG reports whether the op (as defined by an s390x.A* constant) is +// one of the NEG-like instructions that require special handling. +func IsS390xNEG(op obj.As) bool { + switch op { + case s390x.AADDME, + s390x.AADDZE, + s390x.ANEG, + s390x.ASUBME, + s390x.ASUBZE: + return true + } + return false +} + +// IsS390xWithLength reports whether the op (as defined by an s390x.A* constant) +// refers to an instruction which takes a length as its first argument. +func IsS390xWithLength(op obj.As) bool { + switch op { + case s390x.AMVC, s390x.ACLC, s390x.AXC, s390x.AOC, s390x.ANC: + return true + case s390x.AVLL, s390x.AVSTL: + return true + } + return false +} + +// IsS390xWithIndex reports whether the op (as defined by an s390x.A* constant) +// refers to an instruction which takes an index as its first argument. +func IsS390xWithIndex(op obj.As) bool { + switch op { + case s390x.AVSCEG, s390x.AVSCEF, s390x.AVGEG, s390x.AVGEF: + return true + case s390x.AVGMG, s390x.AVGMF, s390x.AVGMH, s390x.AVGMB: + return true + case s390x.AVLEIG, s390x.AVLEIF, s390x.AVLEIH, s390x.AVLEIB: + return true + case s390x.AVPDI: + return true + } + return false +} + +func s390xRegisterNumber(name string, n int16) (int16, bool) { + switch name { + case "AR": + if 0 <= n && n <= 15 { + return s390x.REG_AR0 + n, true + } + case "F": + if 0 <= n && n <= 15 { + return s390x.REG_F0 + n, true + } + case "R": + if 0 <= n && n <= 15 { + return s390x.REG_R0 + n, true + } + case "V": + if 0 <= n && n <= 31 { + return s390x.REG_V0 + n, true + } + } + return 0, false +} diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go index 0c52c904f58..950fd735c99 100644 --- a/src/cmd/asm/internal/asm/asm.go +++ b/src/cmd/asm/internal/asm/asm.go @@ -386,6 +386,20 @@ func (p *Parser) asmJump(op obj.As, cond string, a []obj.Addr) { prog.Reg = p.getRegister(prog, op, &a[1]) break } + if p.arch.Thechar == 'z' { + // 3-operand jumps. + target = &a[2] + prog.From = a[0] + if a[1].Reg != 0 { + // Compare two registers and jump. + prog.Reg = p.getRegister(prog, op, &a[1]) + } else { + // Compare register with immediate and jump. + prog.From3 = newAddr(a[1]) + } + break + } + fallthrough default: p.errorf("wrong number of arguments to %s instruction", obj.Aconv(op)) @@ -598,6 +612,15 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { p.errorf("invalid addressing modes for %s instruction", obj.Aconv(op)) return } + case 'z': + if arch.IsS390xWithLength(op) || arch.IsS390xWithIndex(op) { + prog.From = a[1] + prog.From3 = newAddr(a[0]) + } else { + prog.Reg = p.getRegister(prog, op, &a[1]) + prog.From = a[0] + } + prog.To = a[2] default: p.errorf("TODO: implement three-operand instructions for this architecture") return @@ -633,6 +656,13 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { prog.To = a[3] break } + if p.arch.Thechar == 'z' { + prog.From = a[1] + prog.Reg = p.getRegister(prog, op, &a[2]) + prog.From3 = newAddr(a[0]) + prog.To = a[3] + break + } p.errorf("can't handle %s instruction with 4 operands", obj.Aconv(op)) return case 5: diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index 4a3e0ee2659..1307c4243f1 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -389,3 +389,7 @@ func TestMIPS64EndToEnd(t *testing.T) { func TestPPC64EndToEnd(t *testing.T) { testEndToEnd(t, "ppc64", "ppc64") } + +func TestS390XEndToEnd(t *testing.T) { + testEndToEnd(t, "s390x", "s390x") +} diff --git a/src/cmd/asm/internal/asm/operand_test.go b/src/cmd/asm/internal/asm/operand_test.go index bc6a495c987..066d55a3943 100644 --- a/src/cmd/asm/internal/asm/operand_test.go +++ b/src/cmd/asm/internal/asm/operand_test.go @@ -70,6 +70,11 @@ func TestMIPS64OperandParser(t *testing.T) { testOperandParser(t, parser, mips64OperandTests) } +func TestS390XOperandParser(t *testing.T) { + parser := newParser("s390x") + testOperandParser(t, parser, s390xOperandTests) +} + type operandTest struct { input, output string } @@ -526,3 +531,101 @@ var mips64OperandTests = []operandTest{ {"·trunc(SB)", "\"\".trunc(SB)"}, {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms. } + +var s390xOperandTests = []operandTest{ + {"$((1<<63)-1)", "$9223372036854775807"}, + {"$(-64*1024)", "$-65536"}, + {"$(1024 * 8)", "$8192"}, + {"$-1", "$-1"}, + {"$-24(R4)", "$-24(R4)"}, + {"$0", "$0"}, + {"$0(R1)", "$(R1)"}, + {"$0.5", "$(0.5)"}, + {"$0x7000", "$28672"}, + {"$0x88888eef", "$2290650863"}, + {"$1", "$1"}, + {"$_main<>(SB)", "$_main<>(SB)"}, + {"$argframe(FP)", "$argframe(FP)"}, + {"$~3", "$-4"}, + {"(-288-3*8)(R1)", "-312(R1)"}, + {"(16)(R7)", "16(R7)"}, + {"(8)(g)", "8(g)"}, + {"(R0)", "(R0)"}, + {"(R3)", "(R3)"}, + {"(R4)", "(R4)"}, + {"(R5)", "(R5)"}, + {"-1(R4)", "-1(R4)"}, + {"-1(R5)", "-1(R5)"}, + {"6(PC)", "6(PC)"}, + {"R0", "R0"}, + {"R1", "R1"}, + {"R2", "R2"}, + {"R3", "R3"}, + {"R4", "R4"}, + {"R5", "R5"}, + {"R6", "R6"}, + {"R7", "R7"}, + {"R8", "R8"}, + {"R9", "R9"}, + {"R10", "R10"}, + {"R11", "R11"}, + {"R12", "R12"}, + // {"R13", "R13"}, R13 is g + {"R14", "R14"}, + {"R15", "R15"}, + {"F0", "F0"}, + {"F1", "F1"}, + {"F2", "F2"}, + {"F3", "F3"}, + {"F4", "F4"}, + {"F5", "F5"}, + {"F6", "F6"}, + {"F7", "F7"}, + {"F8", "F8"}, + {"F9", "F9"}, + {"F10", "F10"}, + {"F11", "F11"}, + {"F12", "F12"}, + {"F13", "F13"}, + {"F14", "F14"}, + {"F15", "F15"}, + {"V0", "V0"}, + {"V1", "V1"}, + {"V2", "V2"}, + {"V3", "V3"}, + {"V4", "V4"}, + {"V5", "V5"}, + {"V6", "V6"}, + {"V7", "V7"}, + {"V8", "V8"}, + {"V9", "V9"}, + {"V10", "V10"}, + {"V11", "V11"}, + {"V12", "V12"}, + {"V13", "V13"}, + {"V14", "V14"}, + {"V15", "V15"}, + {"V16", "V16"}, + {"V17", "V17"}, + {"V18", "V18"}, + {"V19", "V19"}, + {"V20", "V20"}, + {"V21", "V21"}, + {"V22", "V22"}, + {"V23", "V23"}, + {"V24", "V24"}, + {"V25", "V25"}, + {"V26", "V26"}, + {"V27", "V27"}, + {"V28", "V28"}, + {"V29", "V29"}, + {"V30", "V30"}, + {"V31", "V31"}, + {"a(FP)", "a(FP)"}, + {"g", "g"}, + {"ret+8(FP)", "ret+8(FP)"}, + {"runtime·abort(SB)", "runtime.abort(SB)"}, + {"·AddUint32(SB)", "\"\".AddUint32(SB)"}, + {"·trunc(SB)", "\"\".trunc(SB)"}, + {"[):[o-FP", ""}, // Issue 12469 - asm hung parsing the o-FP range on non ARM platforms. +} diff --git a/src/cmd/asm/internal/asm/testdata/s390x.s b/src/cmd/asm/internal/asm/testdata/s390x.s new file mode 100644 index 00000000000..148cd2eaaed --- /dev/null +++ b/src/cmd/asm/internal/asm/testdata/s390x.s @@ -0,0 +1,215 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +TEXT main·foo(SB),7,$16-0 // TEXT main.foo(SB), 7, $16-0 + MOVD R1, R2 // b9040021 + MOVW R3, R4 // b9140043 + MOVH R5, R6 // b9070065 + MOVB R7, R8 // b9060087 + MOVWZ R1, R2 // b9160021 + MOVHZ R2, R3 // b9850032 + MOVBZ R4, R5 // b9840054 + MOVDBR R1, R2 // b90f0021 + MOVWBR R3, R4 // b91f0043 + + MOVD (R15), R1 // e310f0000004 + MOVW (R15), R2 // e320f0000014 + MOVH (R15), R3 // e330f0000015 + MOVB (R15), R4 // e340f0000077 + MOVWZ (R15), R5 // e350f0000016 + MOVHZ (R15), R6 // e360f0000091 + MOVBZ (R15), R7 // e370f0000090 + MOVDBR (R15), R8 // e380f000000f + MOVWBR (R15), R9 // e390f000001e + + MOVD R1, n-8(SP) // e310f0100024 + MOVW R2, n-8(SP) // e320f0100050 + MOVH R3, n-8(SP) // e330f0100070 + MOVB R4, n-8(SP) // e340f0100072 + MOVWZ R5, n-8(SP) // e350f0100050 + MOVHZ R6, n-8(SP) // e360f0100070 + MOVBZ R7, n-8(SP) // e370f0100072 + MOVDBR R8, n-8(SP) // e380f010002f + MOVWBR R9, n-8(SP) // e390f010003e + + MOVD $-8589934592, R1 // c01efffffffe + MOVW $-131072, R2 // c021fffe0000 + MOVH $-512, R3 // a739fe00 + MOVB $-1, R4 // a749ffff + + MOVD $-2147483648, n-8(SP) // c0b180000000e3b0f0100024 + MOVW $-131072, n-8(SP) // c0b1fffe0000e3b0f0100050 + MOVH $-512, n-8(SP) // e544f010fe00 + MOVB $-1, n-8(SP) // 92fff010 + + ADD R1, R2 // b9e81022 + ADD R1, R2, R3 // b9e81032 + ADD $8192, R1 // c21800002000 + ADD $8192, R1, R2 // ec21200000d9 + ADDC R1, R2 // b9ea1022 + ADDC $1, R1, R2 // b9040021c22a00000001 + ADDC R1, R2, R3 // b9ea1032 + SUB R3, R4 // b9090043 + SUB R3, R4, R5 // b9e93054 + SUB $8192, R3 // c238ffffe000 + SUB $8192, R3, R4 // ec43e00000d9 + SUBC R1, R2 // b90b0021 + SUBC $1, R1, R2 // b9040021c22affffffff + SUBC R2, R3, R4 // b9eb2043 + MULLW R6, R7 // b91c0076 + MULLW R6, R7, R8 // b9040087b91c0086 + MULLW $8192, R6 // c26000002000 + MULLW $8192, R6, R7 // b9040076c27000002000 + DIVD R1, R2 // b90400b2b90d00a1b904002b + DIVD R1, R2, R3 // b90400b2b90d00a1b904003b + DIVW R4, R5 // b90400b5b91d00a4b904005b + DIVW R4, R5, R6 // b90400b5b91d00a4b904006b + DIVDU R7, R8 // b90400a0b90400b8b98700a7b904008b + DIVDU R7, R8, R9 // b90400a0b90400b8b98700a7b904009b + DIVWU R1, R2 // b90400a0b90400b2b99700a1b904002b + DIVWU R1, R2, R3 // b90400a0b90400b2b99700a1b904003b + + XC $8, (R15), n-8(SP) // XC (R15), $8, n-8(SP) // d707f010f000 + NC $8, (R15), n-8(SP) // NC (R15), $8, n-8(SP) // d407f010f000 + OC $8, (R15), n-8(SP) // OC (R15), $8, n-8(SP) // d607f010f000 + MVC $8, (R15), n-8(SP) // MVC (R15), $8, n-8(SP) // d207f010f000 + CLC $8, (R15), n-8(SP) // CLC (R15), $8, n-8(SP) // d507f000f010 + XC $256, -8(R15), -8(R15) // XC -8(R15), $256, -8(R15) // b90400afc2a8fffffff8d7ffa000a000 + MVC $256, 8192(R1), 8192(R2) // MVC 8192(R1), $256, 8192(R2) // b90400a2c2a800002000b90400b1c2b800002000d2ffa000b000 + + CMP R1, R2 // b9200012 + CMP R3, $-2147483648 // c23c80000000 + CMPU R4, R5 // b9210045 + CMPU R6, $4294967295 // c26effffffff + CMPW R7, R8 // 1978 + CMPW R9, $-2147483648 // c29d80000000 + CMPWU R1, R2 // 1512 + CMPWU R3, $4294967295 // c23fffffffff + + BNE 0(PC) // a7740000 + BEQ 0(PC) // a7840000 + BLT 0(PC) // a7440000 + BLE 0(PC) // a7c40000 + BGT 0(PC) // a7240000 + BGE 0(PC) // a7a40000 + + CMPBNE R1, R2, 0(PC) // ec1200007064 + CMPBEQ R3, R4, 0(PC) // ec3400008064 + CMPBLT R5, R6, 0(PC) // ec5600004064 + CMPBLE R7, R8, 0(PC) // ec780000c064 + CMPBGT R9, R1, 0(PC) // ec9100002064 + CMPBGE R2, R3, 0(PC) // ec230000a064 + + CMPBNE R1, $-127, 0(PC) // ec170000817c + CMPBEQ R3, $0, 0(PC) // ec380000007c + CMPBLT R5, $128, 0(PC) // ec540000807c + CMPBLE R7, $127, 0(PC) // ec7c00007f7c + CMPBGT R9, $0, 0(PC) // ec920000007c + CMPBGE R2, $128, 0(PC) // ec2a0000807c + + CMPUBNE R1, R2, 0(PC) // ec1200007065 + CMPUBEQ R3, R4, 0(PC) // ec3400008065 + CMPUBLT R5, R6, 0(PC) // ec5600004065 + CMPUBLE R7, R8, 0(PC) // ec780000c065 + CMPUBGT R9, R1, 0(PC) // ec9100002065 + CMPUBGE R2, R3, 0(PC) // ec230000a065 + + CMPUBNE R1, $256, 0(PC) // ec170000007d + CMPUBEQ R3, $0, 0(PC) // ec380000007d + CMPUBLT R5, $256, 0(PC) // ec540000007d + CMPUBLE R7, $0, 0(PC) // ec7c0000007d + CMPUBGT R9, $256, 0(PC) // ec920000007d + CMPUBGE R2, $0, 0(PC) // ec2a0000007d + + CEFBRA R0, F15 // b39400f0 + CDFBRA R1, F14 // b39500e1 + CEGBRA R2, F13 // b3a400d2 + CDGBRA R3, F12 // b3a500c3 + + CELFBR R0, F15 // b39000f0 + CDLFBR R1, F14 // b39100e1 + CELGBR R2, F13 // b3a000d2 + CDLGBR R3, F12 // b3a100c3 + + CFEBRA F15, R1 // b398501f + CFDBRA F14, R2 // b399502e + CGEBRA F13, R3 // b3a8503d + CGDBRA F12, R4 // b3a9504c + + CLFEBR F15, R1 // b39c501f + CLFDBR F14, R2 // b39d502e + CLGEBR F13, R3 // b3ac503d + CLGDBR F12, R4 // b3ad504c + + FMOVS $0, F11 // b37400b0 + FMOVD $0, F12 // b37500c0 + FMOVS (R1)(R2*1), F0 // ed0210000064 + FMOVS n-8(SP), F15 // edf0f0100064 + FMOVD -9999999(R8)(R9*1), F8 // c0a1ff67698141aa9000ed8a80000065 + FMOVD F4, F5 // 2854 + FADDS F0, F15 // b30a00f0 + FADD F1, F14 // b31a00e1 + FSUBS F2, F13 // b30b00d2 + FSUB F3, F12 // b31b00c3 + FMULS F4, F11 // b31700b4 + FMUL F5, F10 // b31c00a5 + FDIVS F6, F9 // b30d0096 + FDIV F7, F8 // b31d0087 + FABS F1, F2 // b3100021 + FSQRTS F3, F4 // b3140043 + FSQRT F5, F15 // b31500f5 + + VL (R15), V1 // e710f0000006 + VST V1, (R15) // e710f000000e + VL (R15), V31 // e7f0f0000806 + VST V31, (R15) // e7f0f000080e + VESLB $5, V14 // e7ee00050030 + VESRAG $0, V15, V16 // e70f0000383a + VLM (R15), V8, V23 // e787f0000436 + VSTM V8, V23, (R15) // e787f000043e + VONE V1 // e710ffff0044 + VZERO V16 // e70000000844 + VGBM $52428, V31 // e7f0cccc0844 + VREPIB $255, V4 // e74000ff0045 + VREPG $1, V4, V16 // e7040001384d + VREPB $4, V31, V1 // e71f0004044d + VFTCIDB $4095, V1, V2 // e721fff0304a + WFTCIDB $3276, V15, V16 // e70fccc8384a + VPOPCT V8, V19 // e73800000850 + VFEEZBS V1, V2, V31 // e7f120300880 + WFCHDBS V22, V23, V4 // e746701836eb + VMNH V1, V2, V30 // e7e1200018fe + VO V2, V1, V0 // e7021000006a + VERLLVF V2, V30, V27 // e7be20002c73 + VSCBIB V0, V23, V24 // e78700000cf5 + VNOT V16, V1 // e7101000046b + VCLZF V16, V17 // e71000002c53 + VLVGP R3, R4, V8 // e78340000062 + + // Some vector instructions have their inputs reordered. + // Typically the reordering puts the length/index input into From3. + VGEG $1, 8(R15)(V30*1), V31 // VGEG 8(R15)(V30*1), $1, V31 // e7fef0081c12 + VSCEG $1, V31, 16(R15)(V30*1) // VSCEG V31, $1, 16(R15)(V30*1) // e7fef0101c1a + VGEF $0, 2048(R15)(V1*1), V2 // VGEF 2048(R15)(V1*1), $0, V2 // e721f8000013 + VSCEF $0, V2, 4095(R15)(V1*1) // VSCEF V2, $0, 4095(R15)(V1*1) // e721ffff001b + VLL R0, (R15), V1 // VLL (R15), R0, V1 // e710f0000037 + VSTL R0, V16, (R15) // VSTL V16, R0, (R15) // e700f000083f + VGMH $8, $16, V12 // VGMH $16, $8, V12 // e7c008101046 + VLEIF $2, $-43, V16 // VLEIF $-43, $2, V16 // e700ffd52843 + VSLDB $3, V1, V16, V18 // VSLDB V1, V16, $3, V18 // e72100030a77 + VERIMB $2, V31, V1, V2 // VERIMB V31, V1, $2, V2 // e72f10020472 + VSEL V1, V2, V3, V4 // VSEL V2, V3, V1, V4 // e7412000308d + VGFMAH V21, V31, V24, V0 // VGFMAH V31, V24, V21, V0 // e705f10087bc + WFMSDB V2, V25, V24, V31 // WFMSDB V25, V24, V2, V31 // e7f298038b8e + VPERM V31, V0, V2, V3 // VPERM V0, V2, V31, V3 // e73f0000248c + VPDI $1, V2, V31, V1 // VPDI V2, V31, $1, V1 // e712f0001284 + + RET + +TEXT main·init(SB),7,$0 // TEXT main.init(SB), 7, $0 + RET + +TEXT main·main(SB),7,$0 // TEXT main.main(SB), 7, $0 + BL main·foo(SB) // CALL main.foo(SB) + RET