PKGBUILDs/extra/webkitgtk/ARM64-GNU-assembler-fails-in-TransformationMatrix-multiply.patch
2016-03-19 02:21:13 +00:00

112 lines
4.4 KiB
Diff

Index: trunk/Source/WebCore/platform/graphics/transforms/TransformationMatrix.cpp
===================================================================
--- trunk/Source/WebCore/platform/graphics/transforms/TransformationMatrix.cpp (revision 165676)
+++ trunk/Source/WebCore/platform/graphics/transforms/TransformationMatrix.cpp (revision 166233)
@@ -1041,59 +1041,59 @@
asm volatile (
// First, load the leftMatrix completely in memory. The leftMatrix is in v16-v23.
- "mov x4, %[leftMatrix]\n\t"
- "ld1.2d {v16, v17, v18, v19}, [%[leftMatrix]], #64\n\t"
- "ld1.2d {v20, v21, v22, v23}, [%[leftMatrix]]\n\t"
+ "mov x4, %[leftMatrix]\n\t"
+ "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%[leftMatrix]], #64\n\t"
+ "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%[leftMatrix]]\n\t"
// First row.
- "ld4r.2d {v24, v25, v26, v27}, [%[rightMatrix]], #32\n\t"
- "fmul.2d v28, v24, v16\n\t"
- "fmul.2d v29, v24, v17\n\t"
- "fmla.2d v28, v25, v18\n\t"
- "fmla.2d v29, v25, v19\n\t"
- "fmla.2d v28, v26, v20\n\t"
- "fmla.2d v29, v26, v21\n\t"
- "fmla.2d v28, v27, v22\n\t"
- "fmla.2d v29, v27, v23\n\t"
-
- "ld4r.2d {v0, v1, v2, v3}, [%[rightMatrix]], #32\n\t"
- "st1.2d {v28, v29}, [x4], #32\n\t"
+ "ld4r {v24.2d, v25.2d, v26.2d, v27.2d}, [%[rightMatrix]], #32\n\t"
+ "fmul v28.2d, v24.2d, v16.2d\n\t"
+ "fmul v29.2d, v24.2d, v17.2d\n\t"
+ "fmla v28.2d, v25.2d, v18.2d\n\t"
+ "fmla v29.2d, v25.2d, v19.2d\n\t"
+ "fmla v28.2d, v26.2d, v20.2d\n\t"
+ "fmla v29.2d, v26.2d, v21.2d\n\t"
+ "fmla v28.2d, v27.2d, v22.2d\n\t"
+ "fmla v29.2d, v27.2d, v23.2d\n\t"
+
+ "ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [%[rightMatrix]], #32\n\t"
+ "st1 {v28.2d, v29.2d}, [x4], #32\n\t"
// Second row.
- "fmul.2d v30, v0, v16\n\t"
- "fmul.2d v31, v0, v17\n\t"
- "fmla.2d v30, v1, v18\n\t"
- "fmla.2d v31, v1, v19\n\t"
- "fmla.2d v30, v2, v20\n\t"
- "fmla.2d v31, v2, v21\n\t"
- "fmla.2d v30, v3, v22\n\t"
- "fmla.2d v31, v3, v23\n\t"
-
- "ld4r.2d {v24, v25, v26, v27}, [%[rightMatrix]], #32\n\t"
- "st1.2d {v30, v31}, [x4], #32\n\t"
+ "fmul v30.2d, v0.2d, v16.2d\n\t"
+ "fmul v31.2d, v0.2d, v17.2d\n\t"
+ "fmla v30.2d, v1.2d, v18.2d\n\t"
+ "fmla v31.2d, v1.2d, v19.2d\n\t"
+ "fmla v30.2d, v2.2d, v20.2d\n\t"
+ "fmla v31.2d, v2.2d, v21.2d\n\t"
+ "fmla v30.2d, v3.2d, v22.2d\n\t"
+ "fmla v31.2d, v3.2d, v23.2d\n\t"
+
+ "ld4r {v24.2d, v25.2d, v26.2d, v27.2d}, [%[rightMatrix]], #32\n\t"
+ "st1 {v30.2d, v31.2d}, [x4], #32\n\t"
// Third row.
- "fmul.2d v28, v24, v16\n\t"
- "fmul.2d v29, v24, v17\n\t"
- "fmla.2d v28, v25, v18\n\t"
- "fmla.2d v29, v25, v19\n\t"
- "fmla.2d v28, v26, v20\n\t"
- "fmla.2d v29, v26, v21\n\t"
- "fmla.2d v28, v27, v22\n\t"
- "fmla.2d v29, v27, v23\n\t"
-
- "ld4r.2d {v0, v1, v2, v3}, [%[rightMatrix]], #32\n\t"
- "st1.2d {v28, v29}, [x4], #32\n\t"
+ "fmul v28.2d, v24.2d, v16.2d\n\t"
+ "fmul v29.2d, v24.2d, v17.2d\n\t"
+ "fmla v28.2d, v25.2d, v18.2d\n\t"
+ "fmla v29.2d, v25.2d, v19.2d\n\t"
+ "fmla v28.2d, v26.2d, v20.2d\n\t"
+ "fmla v29.2d, v26.2d, v21.2d\n\t"
+ "fmla v28.2d, v27.2d, v22.2d\n\t"
+ "fmla v29.2d, v27.2d, v23.2d\n\t"
+
+ "ld4r {v0.2d, v1.2d, v2.2d, v3.2d}, [%[rightMatrix]], #32\n\t"
+ "st1 {v28.2d, v29.2d}, [x4], #32\n\t"
// Fourth row.
- "fmul.2d v30, v0, v16\n\t"
- "fmul.2d v31, v0, v17\n\t"
- "fmla.2d v30, v1, v18\n\t"
- "fmla.2d v31, v1, v19\n\t"
- "fmla.2d v30, v2, v20\n\t"
- "fmla.2d v31, v2, v21\n\t"
- "fmla.2d v30, v3, v22\n\t"
- "fmla.2d v31, v3, v23\n\t"
-
- "st1.2d {v30, v31}, [x4]\n\t"
+ "fmul v30.2d, v0.2d, v16.2d\n\t"
+ "fmul v31.2d, v0.2d, v17.2d\n\t"
+ "fmla v30.2d, v1.2d, v18.2d\n\t"
+ "fmla v31.2d, v1.2d, v19.2d\n\t"
+ "fmla v30.2d, v2.2d, v20.2d\n\t"
+ "fmla v31.2d, v2.2d, v21.2d\n\t"
+ "fmla v30.2d, v3.2d, v22.2d\n\t"
+ "fmla v31.2d, v3.2d, v23.2d\n\t"
+
+ "st1 {v30.2d, v31.2d}, [x4]\n\t"
: [leftMatrix]"+r"(leftMatrix), [rightMatrix]"+r"(rightMatrix)