Inline memove for small constant sizes and refactor memcpy and memset.
The memory intrinsics are only optimized at -O1 and higher unless the
-fmem-intrin-opt flag is set to force to optimization to take place.
This change also introduces the xchg instruction for two register operands. This
is no longer used in the memory intrinsic lowering (or by anything else) but the
implementation is left for future use.
BUG=
R=jvoung@chromium.org, stichnot@chromium.org
Review URL: https://codereview.chromium.org/1278173009.
diff --git a/crosstest/crosstest.cfg b/crosstest/crosstest.cfg
index 620edb6..c92963c 100644
--- a/crosstest/crosstest.cfg
+++ b/crosstest/crosstest.cfg
@@ -5,6 +5,7 @@
[mem_intrin]
driver: mem_intrin_main.cpp
test: mem_intrin.cpp
+flags: --sz=-fmem-intrin-opt
[test_arith]
driver: test_arith_main.cpp
diff --git a/crosstest/mem_intrin.cpp b/crosstest/mem_intrin.cpp
index 0fe0387..b84cdb9 100644
--- a/crosstest/mem_intrin.cpp
+++ b/crosstest/mem_intrin.cpp
@@ -40,30 +40,6 @@
return (sum_of_sums << 8) | sum;
}
-#define NWORDS 32
-#define BYTE_LENGTH (NWORDS * sizeof(elem_t))
-
-int memcpy_test_fixed_len(uint8_t init) {
- elem_t buf[NWORDS];
- elem_t buf2[NWORDS];
- reset_buf((uint8_t *)buf, init, BYTE_LENGTH);
- memcpy((void *)buf2, (void *)buf, BYTE_LENGTH);
- return fletcher_checksum((uint8_t *)buf2, BYTE_LENGTH);
-}
-
-int memmove_test_fixed_len(uint8_t init) {
- elem_t buf[NWORDS];
- reset_buf((uint8_t *)buf, init, BYTE_LENGTH);
- memmove((void *)(buf + 4), (void *)buf, BYTE_LENGTH - (4 * sizeof(elem_t)));
- return fletcher_checksum((uint8_t *)buf + 4, BYTE_LENGTH - 4);
-}
-
-int memset_test_fixed_len(uint8_t init) {
- elem_t buf[NWORDS];
- memset((void *)buf, init, BYTE_LENGTH);
- return fletcher_checksum((uint8_t *)buf, BYTE_LENGTH);
-}
-
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length) {
reset_buf(buf, init, length);
memcpy((void *)buf2, (void *)buf, length);
@@ -94,3 +70,33 @@
memset((void *)buf2, init + 4, length);
return fletcher_checksum(buf, length) + fletcher_checksum(buf2, length);
}
+
+#define X(NBYTES) \
+ int memcpy_test_fixed_len_##NBYTES(uint8_t init) { \
+ uint8_t buf[NBYTES]; \
+ uint8_t buf2[NBYTES]; \
+ reset_buf(buf, init, NBYTES); \
+ memcpy((void *)buf2, (void *)buf, NBYTES); \
+ return fletcher_checksum(buf2, NBYTES); \
+ } \
+ \
+ int memmove_test_fixed_len_##NBYTES(uint8_t init) { \
+ uint8_t buf[NBYTES + 16]; \
+ uint8_t buf2[NBYTES + 16]; \
+ reset_buf(buf, init, NBYTES + 16); \
+ reset_buf(buf2, init, NBYTES + 16); \
+ /* Move up */ \
+ memmove((void *)(buf + 16), (void *)buf, NBYTES); \
+ /* Move down */ \
+ memmove((void *)buf2, (void *)(buf2 + 16), NBYTES); \
+ return fletcher_checksum(buf, NBYTES + 16) + \
+ fletcher_checksum(buf2, NBYTES + 16); \
+ } \
+ \
+ int memset_test_fixed_len_##NBYTES(uint8_t init) { \
+ uint8_t buf[NBYTES]; \
+ memset((void *)buf, init, NBYTES); \
+ return fletcher_checksum(buf, NBYTES); \
+ }
+MEMINTRIN_SIZE_TABLE
+#undef X
diff --git a/crosstest/mem_intrin.def b/crosstest/mem_intrin.def
new file mode 100644
index 0000000..16484ad
--- /dev/null
+++ b/crosstest/mem_intrin.def
@@ -0,0 +1,258 @@
+#define MEMINTRIN_SIZE_TABLE \
+ X(0) \
+ X(1) \
+ X(2) \
+ X(3) \
+ X(4) \
+ X(5) \
+ X(6) \
+ X(7) \
+ X(8) \
+ X(9) \
+ X(10) \
+ X(11) \
+ X(12) \
+ X(13) \
+ X(14) \
+ X(15) \
+ X(16) \
+ X(17) \
+ X(18) \
+ X(19) \
+ X(20) \
+ X(21) \
+ X(22) \
+ X(23) \
+ X(24) \
+ X(25) \
+ X(26) \
+ X(27) \
+ X(28) \
+ X(29) \
+ X(30) \
+ X(31) \
+ X(32) \
+ X(33) \
+ X(34) \
+ X(35) \
+ X(36) \
+ X(37) \
+ X(38) \
+ X(39) \
+ X(40) \
+ X(41) \
+ X(42) \
+ X(43) \
+ X(44) \
+ X(45) \
+ X(46) \
+ X(47) \
+ X(48) \
+ X(49) \
+ X(50) \
+ X(51) \
+ X(52) \
+ X(53) \
+ X(54) \
+ X(55) \
+ X(56) \
+ X(57) \
+ X(58) \
+ X(59) \
+ X(60) \
+ X(61) \
+ X(62) \
+ X(63) \
+ X(64) \
+ X(65) \
+ X(66) \
+ X(67) \
+ X(68) \
+ X(69) \
+ X(70) \
+ X(71) \
+ X(72) \
+ X(73) \
+ X(74) \
+ X(75) \
+ X(76) \
+ X(77) \
+ X(78) \
+ X(79) \
+ X(80) \
+ X(81) \
+ X(82) \
+ X(83) \
+ X(84) \
+ X(85) \
+ X(86) \
+ X(87) \
+ X(88) \
+ X(89) \
+ X(90) \
+ X(91) \
+ X(92) \
+ X(93) \
+ X(94) \
+ X(95) \
+ X(96) \
+ X(97) \
+ X(98) \
+ X(99) \
+ X(100) \
+ X(101) \
+ X(102) \
+ X(103) \
+ X(104) \
+ X(105) \
+ X(106) \
+ X(107) \
+ X(108) \
+ X(109) \
+ X(110) \
+ X(111) \
+ X(112) \
+ X(113) \
+ X(114) \
+ X(115) \
+ X(116) \
+ X(117) \
+ X(118) \
+ X(119) \
+ X(120) \
+ X(121) \
+ X(122) \
+ X(123) \
+ X(124) \
+ X(125) \
+ X(126) \
+ X(127) \
+ X(128) \
+ X(129) \
+ X(130) \
+ X(131) \
+ X(132) \
+ X(133) \
+ X(134) \
+ X(135) \
+ X(136) \
+ X(137) \
+ X(138) \
+ X(139) \
+ X(140) \
+ X(141) \
+ X(142) \
+ X(143) \
+ X(144) \
+ X(145) \
+ X(146) \
+ X(147) \
+ X(148) \
+ X(149) \
+ X(150) \
+ X(151) \
+ X(152) \
+ X(153) \
+ X(154) \
+ X(155) \
+ X(156) \
+ X(157) \
+ X(158) \
+ X(159) \
+ X(160) \
+ X(161) \
+ X(162) \
+ X(163) \
+ X(164) \
+ X(165) \
+ X(166) \
+ X(167) \
+ X(168) \
+ X(169) \
+ X(170) \
+ X(171) \
+ X(172) \
+ X(173) \
+ X(174) \
+ X(175) \
+ X(176) \
+ X(177) \
+ X(178) \
+ X(179) \
+ X(180) \
+ X(181) \
+ X(182) \
+ X(183) \
+ X(184) \
+ X(185) \
+ X(186) \
+ X(187) \
+ X(188) \
+ X(189) \
+ X(190) \
+ X(191) \
+ X(192) \
+ X(193) \
+ X(194) \
+ X(195) \
+ X(196) \
+ X(197) \
+ X(198) \
+ X(199) \
+ X(200) \
+ X(201) \
+ X(202) \
+ X(203) \
+ X(204) \
+ X(205) \
+ X(206) \
+ X(207) \
+ X(208) \
+ X(209) \
+ X(210) \
+ X(211) \
+ X(212) \
+ X(213) \
+ X(214) \
+ X(215) \
+ X(216) \
+ X(217) \
+ X(218) \
+ X(219) \
+ X(220) \
+ X(221) \
+ X(222) \
+ X(223) \
+ X(224) \
+ X(225) \
+ X(226) \
+ X(227) \
+ X(228) \
+ X(229) \
+ X(230) \
+ X(231) \
+ X(232) \
+ X(233) \
+ X(234) \
+ X(235) \
+ X(236) \
+ X(237) \
+ X(238) \
+ X(239) \
+ X(240) \
+ X(241) \
+ X(242) \
+ X(243) \
+ X(244) \
+ X(245) \
+ X(246) \
+ X(247) \
+ X(248) \
+ X(249) \
+ X(250) \
+ X(251) \
+ X(252) \
+ X(253) \
+ X(254) \
+ X(255) \
+ X(256)
diff --git a/crosstest/mem_intrin.h b/crosstest/mem_intrin.h
index f04e1b2..8b13fa2 100644
--- a/crosstest/mem_intrin.h
+++ b/crosstest/mem_intrin.h
@@ -6,10 +6,15 @@
*/
#include "xdefs.h"
+#include "mem_intrin.def"
+
int memcpy_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memmove_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
int memset_test(uint8_t *buf, uint8_t *buf2, uint8_t init, SizeT length);
-int memcpy_test_fixed_len(uint8_t init);
-int memmove_test_fixed_len(uint8_t init);
-int memset_test_fixed_len(uint8_t init);
+#define X(NBYTES) \
+ int memcpy_test_fixed_len_##NBYTES(uint8_t init); \
+ int memmove_test_fixed_len_##NBYTES(uint8_t init); \
+ int memset_test_fixed_len_##NBYTES(uint8_t init);
+MEMINTRIN_SIZE_TABLE
+#undef X
diff --git a/crosstest/mem_intrin_main.cpp b/crosstest/mem_intrin_main.cpp
index e1102ec..3b5135d 100644
--- a/crosstest/mem_intrin_main.cpp
+++ b/crosstest/mem_intrin_main.cpp
@@ -14,27 +14,6 @@
#define XSTR(s) STR(s)
#define STR(s) #s
-void testFixedLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
-#define do_test_fixed(test_func) \
- for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
- ++TotalTests; \
- int llc_result = test_func(init_val); \
- int sz_result = Subzero_::test_func(init_val); \
- if (llc_result == sz_result) { \
- ++Passes; \
- } else { \
- ++Failures; \
- printf("Failure (%s): init_val=%d, llc=%d, sz=%d\n", STR(test_func), \
- init_val, llc_result, sz_result); \
- } \
- }
-
- do_test_fixed(memcpy_test_fixed_len);
- do_test_fixed(memmove_test_fixed_len);
- do_test_fixed(memset_test_fixed_len)
-#undef do_test_fixed
-}
-
void testVariableLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
uint8_t buf[256];
uint8_t buf2[256];
@@ -60,6 +39,30 @@
#undef do_test_variable
}
+void testFixedLen(SizeT &TotalTests, SizeT &Passes, SizeT &Failures) {
+#define do_test_fixed(test_func, NBYTES) \
+ for (uint8_t init_val = 0; init_val < 100; ++init_val) { \
+ ++TotalTests; \
+ int llc_result = test_func##_##NBYTES(init_val); \
+ int sz_result = Subzero_::test_func##_##NBYTES(init_val); \
+ if (llc_result == sz_result) { \
+ ++Passes; \
+ } else { \
+ ++Failures; \
+ printf("Failure (%s): init_val=%d, len=%d, llc=%d, sz=%d\n", \
+ STR(test_func), init_val, NBYTES, llc_result, sz_result); \
+ } \
+ }
+
+#define X(NBYTES) \
+ do_test_fixed(memcpy_test_fixed_len, NBYTES); \
+ do_test_fixed(memmove_test_fixed_len, NBYTES); \
+ do_test_fixed(memset_test_fixed_len, NBYTES);
+ MEMINTRIN_SIZE_TABLE
+#undef X
+#undef do_test_fixed
+}
+
#ifdef X8664_STACK_HACK
extern "C" int wrapped_main(int argc, char *argv[]) {
#else // !defined(X8664_STACK_HACK)