mirror of
https://github.com/archlinuxarm/PKGBUILDs.git
synced 2024-11-18 22:54:00 +00:00
317 lines
15 KiB
Diff
317 lines
15 KiB
Diff
From 7361ef732b432e153496c30da66081d7e530c7f6 Mon Sep 17 00:00:00 2001
|
|
From: Peter de Rivaz <peter.derivaz@argondesign.com>
|
|
Date: Mon, 14 Dec 2015 16:35:29 +0000
|
|
Subject: [PATCH] Fix for issue 1114 compile error
|
|
|
|
In 32-bit build with --enable-shared, there is a lot of
|
|
register pressure and register src_strideq is reused.
|
|
The code needs to use the stack based version of src_stride,
|
|
but this doesn't compile when used in an lea instruction.
|
|
|
|
This patch also fixes a related segmentation fault caused by the
|
|
implementation using src_strideq even though it has been
|
|
reused.
|
|
|
|
This patch also fixes the HBD subpel variance tests that fail
|
|
when compiled without disable-optimizations.
|
|
These failures were caused by local variables in the assembler
|
|
routines colliding with the caller's stack frame.
|
|
|
|
Change-Id: Ice9d4dafdcbdc6038ad5ee7c1c09a8f06deca362
|
|
---
|
|
vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm | 18 +++----
|
|
vpx_dsp/x86/highbd_variance_sse2.c | 64 ++++++++++++++----------
|
|
2 files changed, 44 insertions(+), 38 deletions(-)
|
|
|
|
diff --git a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
|
|
index 22d52a2..30ee81b 100644
|
|
--- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
|
|
+++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
|
|
@@ -79,20 +79,13 @@ SECTION .text
|
|
|
|
%macro INC_SRC_BY_SRC_STRIDE 0
|
|
%if ARCH_X86=1 && CONFIG_PIC=1
|
|
- lea srcq, [srcq + src_stridemp*2]
|
|
+ add srcq, src_stridemp
|
|
+ add srcq, src_stridemp
|
|
%else
|
|
lea srcq, [srcq + src_strideq*2]
|
|
%endif
|
|
%endmacro
|
|
|
|
-%macro INC_SRC_BY_SRC_2STRIDE 0
|
|
-%if ARCH_X86=1 && CONFIG_PIC=1
|
|
- lea srcq, [srcq + src_stridemp*4]
|
|
-%else
|
|
- lea srcq, [srcq + src_strideq*4]
|
|
-%endif
|
|
-%endmacro
|
|
-
|
|
%macro SUBPEL_VARIANCE 1-2 0 ; W
|
|
%define bilin_filter_m bilin_filter_m_sse2
|
|
%define filter_idx_shift 5
|
|
@@ -984,8 +977,9 @@ SECTION .text
|
|
.x_other_y_other_loop:
|
|
movu m2, [srcq]
|
|
movu m4, [srcq+2]
|
|
- movu m3, [srcq+src_strideq*2]
|
|
- movu m5, [srcq+src_strideq*2+2]
|
|
+ INC_SRC_BY_SRC_STRIDE
|
|
+ movu m3, [srcq]
|
|
+ movu m5, [srcq+2]
|
|
pmullw m2, filter_x_a
|
|
pmullw m4, filter_x_b
|
|
paddw m2, filter_rnd
|
|
@@ -1018,7 +1012,7 @@ SECTION .text
|
|
SUM_SSE m0, m2, m4, m3, m6, m7
|
|
mova m0, m5
|
|
|
|
- INC_SRC_BY_SRC_2STRIDE
|
|
+ INC_SRC_BY_SRC_STRIDE
|
|
lea dstq, [dstq + dst_strideq * 4]
|
|
%if %2 == 1 ; avg
|
|
add secq, sec_str
|
|
diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
|
|
index b45331c..81ec5db 100644
|
|
--- a/vpx_dsp/x86/highbd_variance_sse2.c
|
|
+++ b/vpx_dsp/x86/highbd_variance_sse2.c
|
|
@@ -243,13 +243,18 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
|
|
}
|
|
|
|
#if CONFIG_USE_X86INC
|
|
+// The 2 unused parameters are place holders for PIC enabled build.
|
|
+// These definitions are for functions defined in
|
|
+// highbd_subpel_variance_impl_sse2.asm
|
|
#define DECL(w, opt) \
|
|
int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
|
|
ptrdiff_t src_stride, \
|
|
int x_offset, int y_offset, \
|
|
const uint16_t *dst, \
|
|
ptrdiff_t dst_stride, \
|
|
- int height, unsigned int *sse);
|
|
+ int height, \
|
|
+ unsigned int *sse, \
|
|
+ void *unused0, void *unused);
|
|
#define DECLS(opt1, opt2) \
|
|
DECL(8, opt1); \
|
|
DECL(16, opt1)
|
|
@@ -274,7 +279,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
|
|
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
|
|
x_offset, y_offset, \
|
|
dst, dst_stride, h, \
|
|
- &sse); \
|
|
+ &sse, NULL, NULL); \
|
|
if (w > wf) { \
|
|
unsigned int sse2; \
|
|
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
|
|
@@ -282,19 +287,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
|
|
x_offset, y_offset, \
|
|
dst + 16, \
|
|
dst_stride, \
|
|
- h, &sse2); \
|
|
+ h, &sse2, \
|
|
+ NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
if (w > wf * 2) { \
|
|
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 32, dst_stride, \
|
|
- h, &sse2); \
|
|
+ h, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
|
src + 48, src_stride, x_offset, y_offset, \
|
|
- dst + 48, dst_stride, h, &sse2); \
|
|
+ dst + 48, dst_stride, h, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
} \
|
|
@@ -312,7 +318,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
|
|
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
|
|
x_offset, y_offset, \
|
|
dst, dst_stride, \
|
|
- h, &sse); \
|
|
+ h, &sse, NULL, NULL); \
|
|
if (w > wf) { \
|
|
uint32_t sse2; \
|
|
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
|
|
@@ -320,20 +326,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
|
|
x_offset, y_offset, \
|
|
dst + 16, \
|
|
dst_stride, \
|
|
- h, &sse2); \
|
|
+ h, &sse2, \
|
|
+ NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
if (w > wf * 2) { \
|
|
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 32, dst_stride, \
|
|
- h, &sse2); \
|
|
+ h, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 48, dst_stride, \
|
|
- h, &sse2); \
|
|
+ h, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
} \
|
|
@@ -359,27 +366,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
|
|
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
|
src + (start_row * src_stride), src_stride, \
|
|
x_offset, y_offset, dst + (start_row * dst_stride), \
|
|
- dst_stride, height, &sse2); \
|
|
+ dst_stride, height, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
long_sse += sse2; \
|
|
if (w > wf) { \
|
|
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
|
src + 16 + (start_row * src_stride), src_stride, \
|
|
x_offset, y_offset, dst + 16 + (start_row * dst_stride), \
|
|
- dst_stride, height, &sse2); \
|
|
+ dst_stride, height, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
long_sse += sse2; \
|
|
if (w > wf * 2) { \
|
|
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
|
src + 32 + (start_row * src_stride), src_stride, \
|
|
x_offset, y_offset, dst + 32 + (start_row * dst_stride), \
|
|
- dst_stride, height, &sse2); \
|
|
+ dst_stride, height, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
long_sse += sse2; \
|
|
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
|
src + 48 + (start_row * src_stride), src_stride, \
|
|
x_offset, y_offset, dst + 48 + (start_row * dst_stride), \
|
|
- dst_stride, height, &sse2); \
|
|
+ dst_stride, height, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
long_sse += sse2; \
|
|
}\
|
|
@@ -410,6 +417,7 @@ FNS(sse2, sse);
|
|
#undef FNS
|
|
#undef FN
|
|
|
|
+// The 2 unused parameters are place holders for PIC enabled build.
|
|
#define DECL(w, opt) \
|
|
int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
|
|
ptrdiff_t src_stride, \
|
|
@@ -419,7 +427,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
|
|
const uint16_t *sec, \
|
|
ptrdiff_t sec_stride, \
|
|
int height, \
|
|
- unsigned int *sse);
|
|
+ unsigned int *sse, \
|
|
+ void *unused0, void *unused);
|
|
#define DECLS(opt1) \
|
|
DECL(16, opt1) \
|
|
DECL(8, opt1)
|
|
@@ -439,23 +448,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|
uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
|
|
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src, src_stride, x_offset, \
|
|
- y_offset, dst, dst_stride, sec, w, h, &sse); \
|
|
+ y_offset, dst, dst_stride, sec, w, h, &sse, NULL, NULL); \
|
|
if (w > wf) { \
|
|
uint32_t sse2; \
|
|
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src + 16, src_stride, x_offset, y_offset, \
|
|
- dst + 16, dst_stride, sec + 16, w, h, &sse2); \
|
|
+ dst + 16, dst_stride, sec + 16, w, h, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
if (w > wf * 2) { \
|
|
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src + 32, src_stride, x_offset, y_offset, \
|
|
- dst + 32, dst_stride, sec + 32, w, h, &sse2); \
|
|
+ dst + 32, dst_stride, sec + 32, w, h, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src + 48, src_stride, x_offset, y_offset, \
|
|
- dst + 48, dst_stride, sec + 48, w, h, &sse2); \
|
|
+ dst + 48, dst_stride, sec + 48, w, h, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
} \
|
|
@@ -475,14 +484,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src, src_stride, x_offset, \
|
|
y_offset, dst, dst_stride, \
|
|
- sec, w, h, &sse); \
|
|
+ sec, w, h, &sse, NULL, NULL); \
|
|
if (w > wf) { \
|
|
uint32_t sse2; \
|
|
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src + 16, src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 16, dst_stride, \
|
|
- sec + 16, w, h, &sse2); \
|
|
+ sec + 16, w, h, &sse2, \
|
|
+ NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
if (w > wf * 2) { \
|
|
@@ -490,14 +500,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|
src + 32, src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 32, dst_stride, \
|
|
- sec + 32, w, h, &sse2); \
|
|
+ sec + 32, w, h, &sse2, \
|
|
+ NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src + 48, src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 48, dst_stride, \
|
|
- sec + 48, w, h, &sse2); \
|
|
+ sec + 48, w, h, &sse2, \
|
|
+ NULL, NULL); \
|
|
se += se2; \
|
|
sse += sse2; \
|
|
} \
|
|
@@ -525,7 +537,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src + (start_row * src_stride), src_stride, x_offset, \
|
|
y_offset, dst + (start_row * dst_stride), dst_stride, \
|
|
- sec + (start_row * w), w, height, &sse2); \
|
|
+ sec + (start_row * w), w, height, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
long_sse += sse2; \
|
|
if (w > wf) { \
|
|
@@ -533,7 +545,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|
src + 16 + (start_row * src_stride), src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 16 + (start_row * dst_stride), dst_stride, \
|
|
- sec + 16 + (start_row * w), w, height, &sse2); \
|
|
+ sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
long_sse += sse2; \
|
|
if (w > wf * 2) { \
|
|
@@ -541,14 +553,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|
src + 32 + (start_row * src_stride), src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 32 + (start_row * dst_stride), dst_stride, \
|
|
- sec + 32 + (start_row * w), w, height, &sse2); \
|
|
+ sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
long_sse += sse2; \
|
|
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
|
src + 48 + (start_row * src_stride), src_stride, \
|
|
x_offset, y_offset, \
|
|
dst + 48 + (start_row * dst_stride), dst_stride, \
|
|
- sec + 48 + (start_row * w), w, height, &sse2); \
|
|
+ sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \
|
|
se += se2; \
|
|
long_sse += sse2; \
|
|
} \
|
|
--
|
|
2.7.0
|
|
|