mirror of
https://github.com/minio/minio
synced 2024-11-05 17:34:01 +00:00
Merge with Intel ISAL changes from github.com/minio-io/isal
- These changes bring in a much needed Mac OSX port for Intel ISAL library - At the current stage this MacOSX part of code is considered beta - pkg/cpu now supports OSX - pkg/checksum/crc32c - is still WIP, rest of the packages have been validated
This commit is contained in:
parent
c82d2b95d7
commit
f347a1e590
31 changed files with 864 additions and 153 deletions
5
Makefile
5
Makefile
|
@ -14,9 +14,6 @@ build-erasure:
|
|||
@$(MAKE) $(MAKE_OPTIONS) -C pkg/erasure/isal lib
|
||||
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/erasure
|
||||
|
||||
build-signify:
|
||||
@$(MAKE) $(MAKE_OPTIONS) -C pkg/signify
|
||||
|
||||
build-cpu:
|
||||
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/cpu
|
||||
|
||||
|
@ -53,7 +50,7 @@ build-storage-append:
|
|||
build-storage-encoded:
|
||||
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/storage/encodedstorage
|
||||
|
||||
cover: build-erasure build-signify build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512
|
||||
cover: build-erasure build-split build-crc32c build-cpu build-scsi build-storage build-md5 build-sha1 build-sha256 build-sha512
|
||||
@godep go test -race -coverprofile=cover.out github.com/minio-io/minio/pkg/gateway
|
||||
|
||||
install: build-erasure
|
||||
|
|
|
@ -168,7 +168,7 @@ continue_block:
|
|||
|
||||
## branch into array
|
||||
lea jump_table(%rip), bufp
|
||||
movzxw (bufp, %rax, 2), len
|
||||
movzwq (bufp, %rax, 2), len
|
||||
offset=crc_array-jump_table
|
||||
lea offset(bufp, len, 1), bufp
|
||||
jmp *bufp
|
||||
|
@ -194,18 +194,22 @@ full_block:
|
|||
crc_array:
|
||||
i=128
|
||||
.rept 128-1
|
||||
#if !defined(__clang__)
|
||||
.altmacro
|
||||
LABEL crc_ %i
|
||||
.noaltmacro
|
||||
#endif
|
||||
crc32q -i*8(block_0), crc_init
|
||||
crc32q -i*8(block_1), crc1
|
||||
crc32q -i*8(block_2), crc2
|
||||
i=(i-1)
|
||||
.endr
|
||||
|
||||
#if !defined(__clang__)
|
||||
.altmacro
|
||||
LABEL crc_ %i
|
||||
.noaltmacro
|
||||
#endif
|
||||
crc32q -i*8(block_0), crc_init
|
||||
crc32q -i*8(block_1), crc1
|
||||
# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
|
||||
|
|
|
@ -14,9 +14,18 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define HAS_SSE _has_sse41
|
||||
#define HAS_AVX _has_avx
|
||||
#define HAS_AVX2 _has_avx2
|
||||
#else
|
||||
#define HAS_SSE has_sse41
|
||||
#define HAS_AVX has_avx
|
||||
#define HAS_AVX2 has_avx2
|
||||
#endif
|
||||
|
||||
.file "cpufeatures.c"
|
||||
.text
|
||||
.type cpuid, @function
|
||||
cpuid:
|
||||
.LFB2:
|
||||
.cfi_startproc
|
||||
|
@ -56,10 +65,8 @@ cpuid:
|
|||
ret
|
||||
.cfi_endproc
|
||||
.LFE2:
|
||||
.size cpuid, .-cpuid
|
||||
.globl has_sse41
|
||||
.type has_sse41, @function
|
||||
has_sse41:
|
||||
.globl HAS_SSE
|
||||
HAS_SSE:
|
||||
.LFB3:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
|
@ -82,10 +89,8 @@ has_sse41:
|
|||
ret
|
||||
.cfi_endproc
|
||||
.LFE3:
|
||||
.size has_sse41, .-has_sse41
|
||||
.globl has_avx
|
||||
.type has_avx, @function
|
||||
has_avx:
|
||||
.globl HAS_AVX
|
||||
HAS_AVX:
|
||||
.LFB4:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
|
@ -108,10 +113,8 @@ has_avx:
|
|||
ret
|
||||
.cfi_endproc
|
||||
.LFE4:
|
||||
.size has_avx, .-has_avx
|
||||
.globl has_avx2
|
||||
.type has_avx2, @function
|
||||
has_avx2:
|
||||
.globl HAS_AVX2
|
||||
HAS_AVX2:
|
||||
.LFB5:
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
|
@ -133,7 +136,3 @@ has_avx2:
|
|||
.cfi_def_cfa 7, 8
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LFE5:
|
||||
.size has_avx2, .-has_avx2
|
||||
.ident "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
|
|
@ -1,12 +1,17 @@
|
|||
all: build test
|
||||
.PHONY: all
|
||||
|
||||
SYSTEM_NAME := $(shell uname -s)
|
||||
|
||||
test:
|
||||
@godep go test -race -coverprofile=cover.out
|
||||
|
||||
isal/isal-l.a:
|
||||
ifeq ($(SYSTEM_NAME), Darwin)
|
||||
@$(MAKE) -C isal arch=osx lib
|
||||
else
|
||||
@$(MAKE) -C isal lib
|
||||
|
||||
endif
|
||||
build: isal/isal-l.a
|
||||
@godep go build
|
||||
|
||||
|
|
|
@ -60,6 +60,8 @@ int32_t minio_get_source_target (int errs, int k, int m,
|
|||
|
||||
*source = tmp_source;
|
||||
*target = tmp_target;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
21
pkg/erasure/isal/.gitignore
vendored
21
pkg/erasure/isal/.gitignore
vendored
|
@ -1,3 +1,22 @@
|
|||
*.o
|
||||
*.a
|
||||
*.so
|
||||
*.so
|
||||
*~
|
||||
*.dSYM
|
||||
erasure-code-base-test
|
||||
erasure-code-sse-test
|
||||
erasure-code-test
|
||||
gf-2vect-dot-prod-sse-test
|
||||
gf-3vect-dot-prod-sse-test
|
||||
gf-4vect-dot-prod-sse-test
|
||||
gf-5vect-dot-prod-sse-test
|
||||
gf-6vect-dot-prod-sse-test
|
||||
gf-inverse-test
|
||||
gf-vect-dot-prod-avx-test
|
||||
gf-vect-dot-prod-base-test
|
||||
gf-vect-dot-prod-sse-test
|
||||
gf-vect-dot-prod-test
|
||||
gf-vect-mul-avx-test
|
||||
gf-vect-mul-base-test
|
||||
gf-vect-mul-sse-test
|
||||
gf-vect-mul-test
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
|
||||
units = src
|
||||
|
||||
default: slib
|
||||
default: lib
|
||||
|
||||
include $(foreach unit,$(units), $(unit)/Makefile)
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef __unix__
|
||||
#if !defined(__unix__) && !defined(__APPLE__)
|
||||
#ifdef __MINGW32__
|
||||
# include <_mingw.h>
|
||||
#endif
|
||||
|
@ -59,7 +59,7 @@ typedef unsigned char UINT8;
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef __unix__
|
||||
#if defined(__unix__) || defined(__APPLE__)
|
||||
# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
|
||||
# define __forceinline static inline
|
||||
#else
|
||||
|
|
|
@ -63,6 +63,11 @@ ASFLAGS_win64 = -f win64
|
|||
CFLAGS_icl = -Qstd=c99
|
||||
ARFLAGS_win64 = -out:$@
|
||||
|
||||
# arch=osx build options
|
||||
ASFLAGS_osx = -f macho64
|
||||
ARFLAGS_osx = -r $@
|
||||
STRIP_gcc = strip -d $@
|
||||
|
||||
# arch=mingw build options
|
||||
ASFLAGS_mingw = -f win64
|
||||
ARFLAGS_mingw = cr $@
|
||||
|
@ -101,9 +106,13 @@ ifeq ($(arch),win64)
|
|||
lib_name := $(basename $(lib_name)).lib
|
||||
endif
|
||||
lsrcwin64 = $(lsrc)
|
||||
lsrcosx = $(lsrc)
|
||||
unit_testswin64 = $(unit_tests)
|
||||
unit_testsosx = $(unit_tests)
|
||||
exampleswin64 = $(examples)
|
||||
examplesosx = $(examples)
|
||||
perf_testswin64 = $(perf_tests)
|
||||
perf_testsosx = $(perf_tests)
|
||||
|
||||
# Build and run unit tests, performance tests, etc.
|
||||
all_tests = $(sort $(perf_tests$(arch)) $(unit_tests$(arch)) $(examples$(arch)) $(other_tests))
|
||||
|
@ -199,7 +208,6 @@ perf_report:
|
|||
@echo Summary:
|
||||
-grep runtime $(rpt_name)
|
||||
|
||||
|
||||
clean:
|
||||
@echo Cleaning up
|
||||
@$(RM) -r $(O)/*.o *.a $(all_tests) $(lib_name) $(so_lib_name)
|
||||
|
|
|
@ -33,6 +33,42 @@
|
|||
%define WRT_OPT
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define EC_ENCODE_DATA_SSE _ec_encode_data_sse
|
||||
%define EC_ENCODE_DATA_AVX _ec_encode_data_avx
|
||||
%define EC_ENCODE_DATA_AVX2 _ec_encode_data_avx2
|
||||
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
|
||||
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
|
||||
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
|
||||
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
|
||||
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
|
||||
%define GF_VECT_MUL_BASE _gf_vect_mul_base
|
||||
%define EC_ENCODE_DATA_BASE _ec_encode_data_base
|
||||
%define GF_VECT_DOT_PROD_BASE _gf_vect_dot_prod_base
|
||||
|
||||
%define EC_ENCODE_DATA _ec_encode_data
|
||||
%define GF_VECT_MUL _gf_vect_mul
|
||||
%define GF_VECT_DOT_PROD _gf_vect_dot_prod
|
||||
|
||||
%else
|
||||
%define EC_ENCODE_DATA_SSE ec_encode_data_sse
|
||||
%define EC_ENCODE_DATA_AVX ec_encode_data_avx
|
||||
%define EC_ENCODE_DATA_AVX2 ec_encode_data_avx2
|
||||
%define GF_VECT_MUL_SSE gf_vect_mul_sse
|
||||
%define GF_VECT_MUL_AVX gf_vect_mul_avx
|
||||
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
|
||||
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
|
||||
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
|
||||
%define GF_VECT_MUL_BASE gf_vect_mul_base
|
||||
%define EC_ENCODE_DATA_BASE ec_encode_data_base
|
||||
%define GF_VECT_DOT_PROD_BASE gf_vect_dot_prod_base
|
||||
|
||||
%define EC_ENCODE_DATA ec_encode_data
|
||||
%define GF_VECT_MUL gf_vect_mul
|
||||
%define GF_VECT_DOT_PROD gf_vect_dot_prod
|
||||
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf32
|
||||
|
||||
[bits 32]
|
||||
|
@ -51,19 +87,19 @@ default rel
|
|||
%define wrd_sz qword
|
||||
%define arg1 rsi
|
||||
|
||||
extern ec_encode_data_sse
|
||||
extern ec_encode_data_avx
|
||||
extern ec_encode_data_avx2
|
||||
extern gf_vect_mul_sse
|
||||
extern gf_vect_mul_avx
|
||||
extern gf_vect_dot_prod_sse
|
||||
extern gf_vect_dot_prod_avx
|
||||
extern gf_vect_dot_prod_avx2
|
||||
extern EC_ENCODE_DATA_SSE
|
||||
extern EC_ENCODE_DATA_AVX
|
||||
extern EC_ENCODE_DATA_AVX2
|
||||
extern GF_VECT_MUL_SSE
|
||||
extern GF_VECT_MUL_AVX
|
||||
extern GF_VECT_DOT_PROD_SSE
|
||||
extern GF_VECT_DOT_PROD_AVX
|
||||
extern GF_VECT_DOT_PROD_AVX2
|
||||
%endif
|
||||
|
||||
extern gf_vect_mul_base
|
||||
extern ec_encode_data_base
|
||||
extern gf_vect_dot_prod_base
|
||||
extern GF_VECT_MUL_BASE
|
||||
extern EC_ENCODE_DATA_BASE
|
||||
extern GF_VECT_DOT_PROD_BASE
|
||||
|
||||
section .data
|
||||
;;; *_mbinit are initial values for *_dispatched; is updated on first call.
|
||||
|
@ -82,33 +118,33 @@ section .text
|
|||
;;;;
|
||||
; ec_encode_data multibinary function
|
||||
;;;;
|
||||
global ec_encode_data:function
|
||||
global EC_ENCODE_DATA:function
|
||||
ec_encode_data_mbinit:
|
||||
call ec_encode_data_dispatch_init
|
||||
|
||||
ec_encode_data:
|
||||
EC_ENCODE_DATA:
|
||||
jmp wrd_sz [ec_encode_data_dispatched]
|
||||
|
||||
ec_encode_data_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [ec_encode_data_base]
|
||||
lea arg1, [EC_ENCODE_DATA_BASE]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [ec_encode_data_base WRT_OPT] ; Default
|
||||
lea arg1, [EC_ENCODE_DATA_BASE WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
lea rbx, [ec_encode_data_sse WRT_OPT]
|
||||
lea rbx, [EC_ENCODE_DATA_BASE WRT_OPT]
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
cmovne arg1, rbx
|
||||
|
||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
lea rbx, [ec_encode_data_avx WRT_OPT]
|
||||
lea rbx, [EC_ENCODE_DATA_AVX WRT_OPT]
|
||||
|
||||
jne _done_ec_encode_data_init
|
||||
mov rsi, rbx
|
||||
|
@ -118,7 +154,7 @@ ec_encode_data_dispatch_init:
|
|||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||
lea rbx, [ec_encode_data_avx2 WRT_OPT]
|
||||
lea rbx, [EC_ENCODE_DATA_AVX2 WRT_OPT]
|
||||
cmovne rsi, rbx
|
||||
|
||||
;; Does it have xmm and ymm support
|
||||
|
@ -127,7 +163,7 @@ ec_encode_data_dispatch_init:
|
|||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
je _done_ec_encode_data_init
|
||||
lea rsi, [ec_encode_data_sse WRT_OPT]
|
||||
lea rsi, [EC_ENCODE_DATA_SSE WRT_OPT]
|
||||
|
||||
_done_ec_encode_data_init:
|
||||
pop rdx
|
||||
|
@ -142,30 +178,30 @@ _done_ec_encode_data_init:
|
|||
;;;;
|
||||
; gf_vect_mul multibinary function
|
||||
;;;;
|
||||
global gf_vect_mul:function
|
||||
global GF_VECT_MUL:function
|
||||
gf_vect_mul_mbinit:
|
||||
call gf_vect_mul_dispatch_init
|
||||
|
||||
gf_vect_mul:
|
||||
GF_VECT_MUL:
|
||||
jmp wrd_sz [gf_vect_mul_dispatched]
|
||||
|
||||
gf_vect_mul_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [gf_vect_mul_base]
|
||||
lea arg1, [GF_VECT_MUL_BASE]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [gf_vect_mul_base WRT_OPT] ; Default
|
||||
lea arg1, [GF_VECT_MUL_BASE WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_2
|
||||
lea rbx, [gf_vect_mul_sse WRT_OPT]
|
||||
je _done_gf_vect_mul_dispatch_init
|
||||
mov arg1, rbx
|
||||
lea rbx, [GF_VECT_MUL_SSE WRT_OPT]
|
||||
je _done_gf_vect_mul_dispatch_init
|
||||
mov arg1, rbx
|
||||
|
||||
;; Try for AVX
|
||||
and ecx, (FLAG_CPUID1_ECX_OSXSAVE | FLAG_CPUID1_ECX_AVX)
|
||||
|
@ -178,49 +214,49 @@ gf_vect_mul_dispatch_init:
|
|||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
jne _done_gf_vect_mul_dispatch_init
|
||||
lea arg1, [gf_vect_mul_avx WRT_OPT]
|
||||
lea arg1, [GF_VECT_MUL_AVX WRT_OPT]
|
||||
|
||||
_done_gf_vect_mul_dispatch_init:
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
%endif ;; END 32-bit check
|
||||
mov [gf_vect_mul_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
pop rdx
|
||||
pop rcx
|
||||
pop rbx
|
||||
pop rax
|
||||
%endif ;; END 32-bit check
|
||||
mov [gf_vect_mul_dispatched], arg1
|
||||
pop arg1
|
||||
ret
|
||||
|
||||
|
||||
;;;;
|
||||
; gf_vect_dot_prod multibinary function
|
||||
;;;;
|
||||
global gf_vect_dot_prod:function
|
||||
global GF_VECT_DOT_PROD:function
|
||||
gf_vect_dot_prod_mbinit:
|
||||
call gf_vect_dot_prod_dispatch_init
|
||||
|
||||
gf_vect_dot_prod:
|
||||
GF_VECT_DOT_PROD:
|
||||
jmp wrd_sz [gf_vect_dot_prod_dispatched]
|
||||
|
||||
gf_vect_dot_prod_dispatch_init:
|
||||
push arg1
|
||||
%ifidn __OUTPUT_FORMAT__, elf32 ;; 32-bit check
|
||||
lea arg1, [gf_vect_dot_prod_base]
|
||||
lea arg1, [GF_VECT_DOT_PROD_BASE]
|
||||
%else
|
||||
push rax
|
||||
push rbx
|
||||
push rcx
|
||||
push rdx
|
||||
lea arg1, [gf_vect_dot_prod_base WRT_OPT] ; Default
|
||||
lea arg1, [GF_VECT_DOT_PROD_BASE WRT_OPT] ; Default
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
lea rbx, [gf_vect_dot_prod_sse WRT_OPT]
|
||||
lea rbx, [GF_VECT_DOT_PROD_SSE WRT_OPT]
|
||||
test ecx, FLAG_CPUID1_ECX_SSE4_1
|
||||
cmovne arg1, rbx
|
||||
|
||||
and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
|
||||
lea rbx, [gf_vect_dot_prod_avx WRT_OPT]
|
||||
lea rbx, [GF_VECT_DOT_PROD_AVX WRT_OPT]
|
||||
|
||||
jne _done_gf_vect_dot_prod_init
|
||||
mov rsi, rbx
|
||||
|
@ -230,7 +266,7 @@ gf_vect_dot_prod_dispatch_init:
|
|||
mov eax, 7
|
||||
cpuid
|
||||
test ebx, FLAG_CPUID1_EBX_AVX2
|
||||
lea rbx, [gf_vect_dot_prod_avx2 WRT_OPT]
|
||||
lea rbx, [GF_VECT_DOT_PROD_AVX2 WRT_OPT]
|
||||
cmovne rsi, rbx
|
||||
|
||||
;; Does it have xmm and ymm support
|
||||
|
@ -238,8 +274,8 @@ gf_vect_dot_prod_dispatch_init:
|
|||
xgetbv
|
||||
and eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
cmp eax, FLAG_XGETBV_EAX_XMM_YMM
|
||||
je _done_gf_vect_dot_prod_init
|
||||
lea rsi, [gf_vect_dot_prod_sse WRT_OPT]
|
||||
je _done_gf_vect_dot_prod_init
|
||||
lea rsi, [GF_VECT_DOT_PROD_SSE WRT_OPT]
|
||||
|
||||
_done_gf_vect_dot_prod_init:
|
||||
pop rdx
|
||||
|
@ -261,6 +297,6 @@ global %1_slver
|
|||
%endmacro
|
||||
|
||||
;;; func core, ver, snum
|
||||
slversion ec_encode_data, 00, 02, 0133
|
||||
slversion gf_vect_mul, 00, 02, 0134
|
||||
slversion gf_vect_dot_prod, 00, 01, 0138
|
||||
slversion EC_ENCODE_DATA, 00, 02, 0133
|
||||
slversion GF_VECT_MUL, 00, 02, 0134
|
||||
slversion GF_VECT_DOT_PROD, 00, 01, 0138
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_DOT_PROD_AVX _gf_2vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_2VECT_DOT_PROD_AVX gf_2vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -58,6 +63,31 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -143,9 +173,8 @@ section .text
|
|||
%define xp2 xmm3
|
||||
|
||||
align 16
|
||||
global gf_2vect_dot_prod_avx:function
|
||||
|
||||
func(gf_2vect_dot_prod_avx)
|
||||
global GF_2VECT_DOT_PROD_AVX:function
|
||||
func(GF_2VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -231,4 +260,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_2vect_dot_prod_avx, 02, 03, 0191
|
||||
slversion GF_2VECT_DOT_PROD_AVX, 02, 03, 0191
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_DOT_PROD_AVX2 _gf_2vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_2VECT_DOT_PROD_AVX2 gf_2vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -60,6 +65,33 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -149,9 +181,8 @@ section .text
|
|||
%define xp2 ymm3
|
||||
|
||||
align 16
|
||||
global gf_2vect_dot_prod_avx2:function
|
||||
|
||||
func(gf_2vect_dot_prod_avx2)
|
||||
global GF_2VECT_DOT_PROD_AVX2:function
|
||||
func(GF_2VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
|
@ -243,4 +274,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_2vect_dot_prod_avx2, 04, 03, 0196
|
||||
slversion GF_2VECT_DOT_PROD_AVX2, 04, 03, 0196
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_2VECT_DOT_PROD_SSE _gf_2vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_2VECT_DOT_PROD_SSE gf_2vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -58,6 +63,31 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -144,9 +174,8 @@ section .text
|
|||
%define xp2 xmm3
|
||||
|
||||
align 16
|
||||
global gf_2vect_dot_prod_sse:function
|
||||
|
||||
func(gf_2vect_dot_prod_sse)
|
||||
global GF_2VECT_DOT_PROD_SSE:function
|
||||
func(GF_2VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -233,4 +262,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_2vect_dot_prod_sse, 00, 02, 0062
|
||||
slversion GF_2VECT_DOT_PROD_SSE, 00, 02, 0062
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_DOT_PROD_AVX _gf_3vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_3VECT_DOT_PROD_AVX gf_3vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -60,6 +65,33 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -157,8 +189,8 @@ section .text
|
|||
%define xp3 xmm4
|
||||
|
||||
align 16
|
||||
global gf_3vect_dot_prod_avx:function
|
||||
func(gf_3vect_dot_prod_avx)
|
||||
global GF_3VECT_DOT_PROD_AVX:function
|
||||
func(GF_3VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -255,4 +287,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_3vect_dot_prod_avx, 02, 03, 0192
|
||||
slversion GF_3VECT_DOT_PROD_AVX, 02, 03, 0192
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_DOT_PROD_AVX2 _gf_3vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_3VECT_DOT_PROD_AVX2 gf_3vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -62,6 +67,35 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -162,8 +196,8 @@ section .text
|
|||
%define xp3 ymm4
|
||||
|
||||
align 16
|
||||
global gf_3vect_dot_prod_avx2:function
|
||||
func(gf_3vect_dot_prod_avx2)
|
||||
global GF_3VECT_DOT_PROD_AVX2:function
|
||||
func(GF_3VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
|
@ -268,4 +302,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_3vect_dot_prod_avx2, 04, 03, 0197
|
||||
slversion GF_3VECT_DOT_PROD_AVX2, 04, 03, 0197
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_3VECT_DOT_PROD_SSE _gf_3vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_3VECT_DOT_PROD_SSE gf_3vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -60,6 +65,33 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -157,8 +189,8 @@ section .text
|
|||
%define xp3 xmm4
|
||||
|
||||
align 16
|
||||
global gf_3vect_dot_prod_sse:function
|
||||
func(gf_3vect_dot_prod_sse)
|
||||
global GF_3VECT_DOT_PROD_SSE:function
|
||||
func(GF_3VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -256,4 +288,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_3vect_dot_prod_sse, 00, 03, 0063
|
||||
slversion GF_3VECT_DOT_PROD_SSE, 00, 03, 0063
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_DOT_PROD_AVX _gf_4vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_4VECT_DOT_PROD_AVX gf_4vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -66,6 +71,39 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -182,8 +220,8 @@ section .text
|
|||
%define xp4 xmm5
|
||||
|
||||
align 16
|
||||
global gf_4vect_dot_prod_avx:function
|
||||
func(gf_4vect_dot_prod_avx)
|
||||
global GF_4VECT_DOT_PROD_AVX:function
|
||||
func(GF_4VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -293,4 +331,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_4vect_dot_prod_avx, 00, 02, 0064
|
||||
slversion GF_4VECT_DOT_PROD_AVX, 00, 02, 0064
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_DOT_PROD_AVX2 _gf_4vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_4VECT_DOT_PROD_AVX2 gf_4vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -68,6 +73,41 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -187,8 +227,8 @@ section .text
|
|||
%define xp4 ymm5
|
||||
|
||||
align 16
|
||||
global gf_4vect_dot_prod_avx2:function
|
||||
func(gf_4vect_dot_prod_avx2)
|
||||
global GF_4VECT_DOT_PROD_AVX2:function
|
||||
func(GF_4VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
|
@ -302,4 +342,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_4vect_dot_prod_avx2, 04, 03, 0064
|
||||
slversion GF_4VECT_DOT_PROD_AVX2, 04, 03, 0064
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_4VECT_DOT_PROD_SSE _gf_4vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_4VECT_DOT_PROD_SSE gf_4vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -66,6 +71,39 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -182,8 +220,8 @@ section .text
|
|||
%define xp4 xmm5
|
||||
|
||||
align 16
|
||||
global gf_4vect_dot_prod_sse:function
|
||||
func(gf_4vect_dot_prod_sse)
|
||||
global GF_4VECT_DOT_PROD_SSE:function
|
||||
func(GF_4VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -293,4 +331,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_4vect_dot_prod_sse, 00, 03, 0064
|
||||
slversion GF_4VECT_DOT_PROD_SSE, 00, 03, 0064
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_DOT_PROD_AVX _gf_5vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_5VECT_DOT_PROD_AVX gf_5vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -66,6 +71,39 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_DOT_PROD_AVX2 _gf_5vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_5VECT_DOT_PROD_AVX2 gf_5vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -68,6 +73,41 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -189,8 +229,8 @@ section .text
|
|||
%define xp5 ymm6
|
||||
|
||||
align 16
|
||||
global gf_5vect_dot_prod_avx2:function
|
||||
func(gf_5vect_dot_prod_avx2)
|
||||
global GF_5VECT_DOT_PROD_AVX2:function
|
||||
func(GF_5VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
|
@ -320,4 +360,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_5vect_dot_prod_avx2, 04, 03, 0199
|
||||
slversion GF_5VECT_DOT_PROD_AVX2, 04, 03, 0199
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_5VECT_DOT_PROD_SSE _gf_5vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_5VECT_DOT_PROD_SSE gf_5vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -66,6 +71,39 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -184,8 +222,8 @@ section .text
|
|||
%define xp5 xmm6
|
||||
|
||||
align 16
|
||||
global gf_5vect_dot_prod_sse:function
|
||||
func(gf_5vect_dot_prod_sse)
|
||||
global GF_5VECT_DOT_PROD_SSE:function
|
||||
func(GF_5VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -309,4 +347,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_5vect_dot_prod_sse, 00, 03, 0065
|
||||
slversion GF_5VECT_DOT_PROD_SSE, 00, 03, 0065
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_DOT_PROD_AVX _gf_6vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_6VECT_DOT_PROD_AVX gf_6vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -66,6 +71,39 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -182,8 +220,8 @@ section .text
|
|||
%define xp6 xmm7
|
||||
|
||||
align 16
|
||||
global gf_6vect_dot_prod_avx:function
|
||||
func(gf_6vect_dot_prod_avx)
|
||||
global GF_6VECT_DOT_PROD_AVX:function
|
||||
func(GF_6VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -320,4 +358,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_6vect_dot_prod_avx, 02, 03, 0195
|
||||
slversion GF_6VECT_DOT_PROD_AVX, 02, 03, 0195
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_DOT_PROD_AVX2 _gf_6vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_6VECT_DOT_PROD_AVX2 gf_6vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -68,6 +73,41 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_6VECT_DOT_PROD_SSE _gf_6vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_6VECT_DOT_PROD_SSE gf_6vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -66,6 +71,39 @@
|
|||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r13 ; must be saved and restored
|
||||
%define tmp4 r12 ; must be saved and restored
|
||||
%define tmp5 r14 ; must be saved and restored
|
||||
%define tmp6 r15 ; must be saved and restored
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define LOG_PS 3
|
||||
|
||||
%define func(x) x:
|
||||
%macro FUNC_SAVE 0
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
%endmacro
|
||||
%macro FUNC_RESTORE 0
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -182,8 +220,8 @@ section .text
|
|||
%define xp6 xmm7
|
||||
|
||||
align 16
|
||||
global gf_6vect_dot_prod_sse:function
|
||||
func(gf_6vect_dot_prod_sse)
|
||||
global GF_6VECT_DOT_PROD_SSE:function
|
||||
func(GF_6VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -320,4 +358,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_6vect_dot_prod_sse, 00, 03, 0066
|
||||
slversion GF_6VECT_DOT_PROD_SSE, 00, 03, 0066
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_DOT_PROD_AVX _gf_vect_dot_prod_avx
|
||||
%else
|
||||
%define GF_VECT_DOT_PROD_AVX gf_vect_dot_prod_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -51,6 +56,24 @@
|
|||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -121,8 +144,8 @@ section .text
|
|||
%define xp xmm2
|
||||
|
||||
align 16
|
||||
global gf_vect_dot_prod_avx:function
|
||||
func(gf_vect_dot_prod_avx)
|
||||
global GF_VECT_DOT_PROD_AVX:function
|
||||
func(GF_VECT_DOT_PROD_AVX)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -195,4 +218,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_dot_prod_avx, 02, 03, 0061
|
||||
slversion GF_VECT_DOT_PROD_AVX, 02, 03, 0061
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_DOT_PROD_AVX2 _gf_vect_dot_prod_avx2
|
||||
%else
|
||||
%define GF_VECT_DOT_PROD_AVX2 gf_vect_dot_prod_avx2
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -53,6 +58,26 @@
|
|||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
|
||||
%define tmp r11
|
||||
%define tmp.w r11d
|
||||
%define tmp.b r11b
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -126,8 +151,8 @@ section .text
|
|||
%define xp ymm2
|
||||
|
||||
align 16
|
||||
global gf_vect_dot_prod_avx2:function
|
||||
func(gf_vect_dot_prod_avx2)
|
||||
global GF_VECT_DOT_PROD_AVX2:function
|
||||
func(GF_VECT_DOT_PROD_AVX2)
|
||||
FUNC_SAVE
|
||||
sub len, 32
|
||||
jl .return_fail
|
||||
|
@ -200,4 +225,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_dot_prod_avx2, 04, 03, 0190
|
||||
slversion GF_VECT_DOT_PROD_AVX2, 04, 03, 0190
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_DOT_PROD_SSE _gf_vect_dot_prod_sse
|
||||
%else
|
||||
%define GF_VECT_DOT_PROD_SSE gf_vect_dot_prod_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -50,6 +55,23 @@
|
|||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
|
||||
%define tmp r11
|
||||
%define tmp2 r10
|
||||
%define tmp3 r9
|
||||
%define return rax
|
||||
%define PS 8
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -121,8 +143,8 @@ section .text
|
|||
%define xp xmm2
|
||||
|
||||
align 16
|
||||
global gf_vect_dot_prod_sse:function
|
||||
func(gf_vect_dot_prod_sse)
|
||||
global GF_VECT_DOT_PROD_SSE:function
|
||||
func(GF_VECT_DOT_PROD_SSE)
|
||||
FUNC_SAVE
|
||||
sub len, 16
|
||||
jl .return_fail
|
||||
|
@ -192,4 +214,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_dot_prod_sse, 00, 03, 0060
|
||||
slversion GF_VECT_DOT_PROD_SSE, 00, 03, 0060
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_MUL_AVX _gf_vect_mul_avx
|
||||
%else
|
||||
%define GF_VECT_MUL_AVX gf_vect_mul_avx
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -46,6 +51,19 @@
|
|||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -111,8 +129,8 @@ section .text
|
|||
%define xtmp2c xmm7
|
||||
|
||||
align 16
|
||||
global gf_vect_mul_avx:function
|
||||
func(gf_vect_mul_avx)
|
||||
global GF_VECT_MUL_AVX:function
|
||||
func(GF_VECT_MUL_AVX)
|
||||
FUNC_SAVE
|
||||
mov pos, 0
|
||||
vmovdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
@ -169,4 +187,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_mul_avx, 01, 02, 0036
|
||||
slversion GF_VECT_MUL_AVX, 01, 02, 0036
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; Copyright(c) 2011-2014 Intel Corporation All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions
|
||||
; modification, are permitted provided that the following conditions
|
||||
; are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
|
@ -32,6 +32,11 @@
|
|||
;;;
|
||||
;;; Author: Gregory Tucker
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, macho64
|
||||
%define GF_VECT_MUL_SSE _gf_vect_mul_sse
|
||||
%else
|
||||
%define GF_VECT_MUL_SSE gf_vect_mul_sse
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, elf64
|
||||
%define arg0 rdi
|
||||
|
@ -46,6 +51,19 @@
|
|||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, macho64
|
||||
%define arg0 rdi
|
||||
%define arg1 rsi
|
||||
%define arg2 rdx
|
||||
%define arg3 rcx
|
||||
%define arg4 r8
|
||||
%define arg5 r9
|
||||
%define tmp r11
|
||||
%define return rax
|
||||
%define func(x) x:
|
||||
%define FUNC_SAVE
|
||||
%define FUNC_RESTORE
|
||||
|
||||
%elifidn __OUTPUT_FORMAT__, win64
|
||||
%define arg0 rcx
|
||||
%define arg1 rdx
|
||||
|
@ -112,8 +130,8 @@ section .text
|
|||
|
||||
|
||||
align 16
|
||||
global gf_vect_mul_sse:function
|
||||
func(gf_vect_mul_sse)
|
||||
global GF_VECT_MUL_SSE:function
|
||||
func(GF_VECT_MUL_SSE)
|
||||
FUNC_SAVE
|
||||
mov pos, 0
|
||||
movdqa xmask0f, [mask0f] ;Load mask of lower nibble in each byte
|
||||
|
@ -175,4 +193,4 @@ global %1_slver
|
|||
db 0x%3, 0x%2
|
||||
%endmacro
|
||||
;;; func core, ver, snum
|
||||
slversion gf_vect_mul_sse, 00, 02, 0034
|
||||
slversion GF_VECT_MUL_SSE, 00, 02, 0034
|
||||
|
|
|
@ -23,7 +23,7 @@ func (s *MySuite) TestPiping(c *C) {
|
|||
// Run the command on each directory
|
||||
for _, dir := range dirs {
|
||||
// find $DIR -type f # Find all files
|
||||
ls := exec.Command("ls", dir, "-l")
|
||||
ls := exec.Command("ls", "-l", dir)
|
||||
|
||||
// | sort -t. -k2 # Sort by file extension
|
||||
sort := exec.Command("sort", "-t.", "-k2")
|
||||
|
|
Loading…
Reference in a new issue