15129044dSC.J. Collier/*-
25129044dSC.J. Collier *   BSD LICENSE
35129044dSC.J. Collier *
45129044dSC.J. Collier *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
55129044dSC.J. Collier *   All rights reserved.
65129044dSC.J. Collier *
75129044dSC.J. Collier *   Redistribution and use in source and binary forms, with or without
85129044dSC.J. Collier *   modification, are permitted provided that the following conditions
95129044dSC.J. Collier *   are met:
105129044dSC.J. Collier *
115129044dSC.J. Collier *     * Redistributions of source code must retain the above copyright
125129044dSC.J. Collier *       notice, this list of conditions and the following disclaimer.
135129044dSC.J. Collier *     * Redistributions in binary form must reproduce the above copyright
145129044dSC.J. Collier *       notice, this list of conditions and the following disclaimer in
155129044dSC.J. Collier *       the documentation and/or other materials provided with the
165129044dSC.J. Collier *       distribution.
175129044dSC.J. Collier *     * Neither the name of Intel Corporation nor the names of its
185129044dSC.J. Collier *       contributors may be used to endorse or promote products derived
195129044dSC.J. Collier *       from this software without specific prior written permission.
205129044dSC.J. Collier *
215129044dSC.J. Collier *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
225129044dSC.J. Collier *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
235129044dSC.J. Collier *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
245129044dSC.J. Collier *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
255129044dSC.J. Collier *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
265129044dSC.J. Collier *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
275129044dSC.J. Collier *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
285129044dSC.J. Collier *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
295129044dSC.J. Collier *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
305129044dSC.J. Collier *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
315129044dSC.J. Collier *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
325129044dSC.J. Collier */
335129044dSC.J. Collier
345129044dSC.J. Collier#include <stdint.h>
355129044dSC.J. Collier#include <stdio.h>
365129044dSC.J. Collier#include <string.h>
375129044dSC.J. Collier#include <stdlib.h>
385129044dSC.J. Collier
395129044dSC.J. Collier#include <rte_common.h>
405129044dSC.J. Collier#include <rte_cycles.h>
415129044dSC.J. Collier#include <rte_random.h>
425129044dSC.J. Collier#include <rte_malloc.h>
435129044dSC.J. Collier
445129044dSC.J. Collier#include <rte_memcpy.h>
455129044dSC.J. Collier
465129044dSC.J. Collier#include "test.h"
475129044dSC.J. Collier
485129044dSC.J. Collier/*
495129044dSC.J. Collier * Set this to the maximum buffer size you want to test. If it is 0, then the
505129044dSC.J. Collier * values in the buf_sizes[] array below will be used.
515129044dSC.J. Collier */
525129044dSC.J. Collier#define TEST_VALUE_RANGE        0
535129044dSC.J. Collier
545129044dSC.J. Collier/* List of buffer sizes to test */
555129044dSC.J. Collier#if TEST_VALUE_RANGE == 0
565129044dSC.J. Collierstatic size_t buf_sizes[] = {
575129044dSC.J. Collier	1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128,
585129044dSC.J. Collier	129, 191, 192, 193, 255, 256, 257, 319, 320, 321, 383, 384, 385, 447, 448,
595129044dSC.J. Collier	449, 511, 512, 513, 767, 768, 769, 1023, 1024, 1025, 1518, 1522, 1536, 1600,
605129044dSC.J. Collier	2048, 2560, 3072, 3584, 4096, 4608, 5120, 5632, 6144, 6656, 7168, 7680, 8192
615129044dSC.J. Collier};
625129044dSC.J. Collier/* MUST be as large as largest packet size above */
635129044dSC.J. Collier#define SMALL_BUFFER_SIZE       8192
645129044dSC.J. Collier#else /* TEST_VALUE_RANGE != 0 */
655129044dSC.J. Collierstatic size_t buf_sizes[TEST_VALUE_RANGE];
665129044dSC.J. Collier#define SMALL_BUFFER_SIZE       TEST_VALUE_RANGE
675129044dSC.J. Collier#endif /* TEST_VALUE_RANGE == 0 */
685129044dSC.J. Collier
695129044dSC.J. Collier
705129044dSC.J. Collier/*
715129044dSC.J. Collier * Arrays of this size are used for measuring uncached memory accesses by
725129044dSC.J. Collier * picking a random location within the buffer. Make this smaller if there are
735129044dSC.J. Collier * memory allocation errors.
745129044dSC.J. Collier */
755129044dSC.J. Collier#define LARGE_BUFFER_SIZE       (100 * 1024 * 1024)
765129044dSC.J. Collier
775129044dSC.J. Collier/* How many times to run timing loop for performance tests */
785129044dSC.J. Collier#define TEST_ITERATIONS         1000000
795129044dSC.J. Collier#define TEST_BATCH_SIZE         100
805129044dSC.J. Collier
815129044dSC.J. Collier/* Data is aligned on this many bytes (power of 2) */
825129044dSC.J. Collier#ifdef RTE_MACHINE_CPUFLAG_AVX512F
835129044dSC.J. Collier#define ALIGNMENT_UNIT          64
845129044dSC.J. Collier#elif defined RTE_MACHINE_CPUFLAG_AVX2
855129044dSC.J. Collier#define ALIGNMENT_UNIT          32
865129044dSC.J. Collier#else /* RTE_MACHINE_CPUFLAG */
875129044dSC.J. Collier#define ALIGNMENT_UNIT          16
885129044dSC.J. Collier#endif /* RTE_MACHINE_CPUFLAG */
895129044dSC.J. Collier
905129044dSC.J. Collier/*
915129044dSC.J. Collier * Pointers used in performance tests. The two large buffers are for uncached
925129044dSC.J. Collier * access where random addresses within the buffer are used for each
935129044dSC.J. Collier * memcpy. The two small buffers are for cached access.
945129044dSC.J. Collier */
955129044dSC.J. Collierstatic uint8_t *large_buf_read, *large_buf_write;
965129044dSC.J. Collierstatic uint8_t *small_buf_read, *small_buf_write;
975129044dSC.J. Collier
985129044dSC.J. Collier/* Initialise data buffers. */
995129044dSC.J. Collierstatic int
1005129044dSC.J. Collierinit_buffers(void)
1015129044dSC.J. Collier{
1025129044dSC.J. Collier	unsigned i;
1035129044dSC.J. Collier
1045129044dSC.J. Collier	large_buf_read = rte_malloc("memcpy", LARGE_BUFFER_SIZE + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
1055129044dSC.J. Collier	if (large_buf_read == NULL)
1065129044dSC.J. Collier		goto error_large_buf_read;
1075129044dSC.J. Collier
1085129044dSC.J. Collier	large_buf_write = rte_malloc("memcpy", LARGE_BUFFER_SIZE + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
1095129044dSC.J. Collier	if (large_buf_write == NULL)
1105129044dSC.J. Collier		goto error_large_buf_write;
1115129044dSC.J. Collier
1125129044dSC.J. Collier	small_buf_read = rte_malloc("memcpy", SMALL_BUFFER_SIZE + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
1135129044dSC.J. Collier	if (small_buf_read == NULL)
1145129044dSC.J. Collier		goto error_small_buf_read;
1155129044dSC.J. Collier
1165129044dSC.J. Collier	small_buf_write = rte_malloc("memcpy", SMALL_BUFFER_SIZE + ALIGNMENT_UNIT, ALIGNMENT_UNIT);
1175129044dSC.J. Collier	if (small_buf_write == NULL)
1185129044dSC.J. Collier		goto error_small_buf_write;
1195129044dSC.J. Collier
1205129044dSC.J. Collier	for (i = 0; i < LARGE_BUFFER_SIZE; i++)
1215129044dSC.J. Collier		large_buf_read[i] = rte_rand();
1225129044dSC.J. Collier	for (i = 0; i < SMALL_BUFFER_SIZE; i++)
1235129044dSC.J. Collier		small_buf_read[i] = rte_rand();
1245129044dSC.J. Collier
1255129044dSC.J. Collier	return 0;
1265129044dSC.J. Collier
1275129044dSC.J. Colliererror_small_buf_write:
1285129044dSC.J. Collier	rte_free(small_buf_read);
1295129044dSC.J. Colliererror_small_buf_read:
1305129044dSC.J. Collier	rte_free(large_buf_write);
1315129044dSC.J. Colliererror_large_buf_write:
1325129044dSC.J. Collier	rte_free(large_buf_read);
1335129044dSC.J. Colliererror_large_buf_read:
1345129044dSC.J. Collier	printf("ERROR: not enough memory\n");
1355129044dSC.J. Collier	return -1;
1365129044dSC.J. Collier}
1375129044dSC.J. Collier
1385129044dSC.J. Collier/* Cleanup data buffers */
1395129044dSC.J. Collierstatic void
1405129044dSC.J. Collierfree_buffers(void)
1415129044dSC.J. Collier{
1425129044dSC.J. Collier	rte_free(large_buf_read);
1435129044dSC.J. Collier	rte_free(large_buf_write);
1445129044dSC.J. Collier	rte_free(small_buf_read);
1455129044dSC.J. Collier	rte_free(small_buf_write);
1465129044dSC.J. Collier}
1475129044dSC.J. Collier
1485129044dSC.J. Collier/*
1495129044dSC.J. Collier * Get a random offset into large array, with enough space needed to perform
1505129044dSC.J. Collier * max copy size. Offset is aligned, uoffset is used for unalignment setting.
1515129044dSC.J. Collier */
1525129044dSC.J. Collierstatic inline size_t
1535129044dSC.J. Collierget_rand_offset(size_t uoffset)
1545129044dSC.J. Collier{
1555129044dSC.J. Collier	return ((rte_rand() % (LARGE_BUFFER_SIZE - SMALL_BUFFER_SIZE)) &
1565129044dSC.J. Collier			~(ALIGNMENT_UNIT - 1)) + uoffset;
1575129044dSC.J. Collier}
1585129044dSC.J. Collier
1595129044dSC.J. Collier/* Fill in source and destination addresses. */
1605129044dSC.J. Collierstatic inline void
1615129044dSC.J. Collierfill_addr_arrays(size_t *dst_addr, int is_dst_cached, size_t dst_uoffset,
1625129044dSC.J. Collier				 size_t *src_addr, int is_src_cached, size_t src_uoffset)
1635129044dSC.J. Collier{
1645129044dSC.J. Collier	unsigned int i;
1655129044dSC.J. Collier
1665129044dSC.J. Collier	for (i = 0; i < TEST_BATCH_SIZE; i++) {
1675129044dSC.J. Collier		dst_addr[i] = (is_dst_cached) ? dst_uoffset : get_rand_offset(dst_uoffset);
1685129044dSC.J. Collier		src_addr[i] = (is_src_cached) ? src_uoffset : get_rand_offset(src_uoffset);
1695129044dSC.J. Collier	}
1705129044dSC.J. Collier}
1715129044dSC.J. Collier
1725129044dSC.J. Collier/*
1735129044dSC.J. Collier * WORKAROUND: For some reason the first test doing an uncached write
1745129044dSC.J. Collier * takes a very long time (~25 times longer than is expected). So we do
1755129044dSC.J. Collier * it once without timing.
1765129044dSC.J. Collier */
1775129044dSC.J. Collierstatic void
1785129044dSC.J. Collierdo_uncached_write(uint8_t *dst, int is_dst_cached,
1795129044dSC.J. Collier				  const uint8_t *src, int is_src_cached, size_t size)
1805129044dSC.J. Collier{
1815129044dSC.J. Collier	unsigned i, j;
1825129044dSC.J. Collier	size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE];
1835129044dSC.J. Collier
1845129044dSC.J. Collier	for (i = 0; i < (TEST_ITERATIONS / TEST_BATCH_SIZE); i++) {
1855129044dSC.J. Collier		fill_addr_arrays(dst_addrs, is_dst_cached, 0,
1865129044dSC.J. Collier						 src_addrs, is_src_cached, 0);
1875129044dSC.J. Collier		for (j = 0; j < TEST_BATCH_SIZE; j++) {
1885129044dSC.J. Collier			rte_memcpy(dst+dst_addrs[j], src+src_addrs[j], size);
1895129044dSC.J. Collier		}
1905129044dSC.J. Collier	}
1915129044dSC.J. Collier}
1925129044dSC.J. Collier
1935129044dSC.J. Collier/*
1945129044dSC.J. Collier * Run a single memcpy performance test. This is a macro to ensure that if
1955129044dSC.J. Collier * the "size" parameter is a constant it won't be converted to a variable.
1965129044dSC.J. Collier */
1975129044dSC.J. Collier#define SINGLE_PERF_TEST(dst, is_dst_cached, dst_uoffset,                   \
1985129044dSC.J. Collier                         src, is_src_cached, src_uoffset, size)             \
1995129044dSC.J. Collierdo {                                                                        \
2005129044dSC.J. Collier    unsigned int iter, t;                                                   \
2015129044dSC.J. Collier    size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE];          \
2025129044dSC.J. Collier    uint64_t start_time, total_time = 0;                                    \
2035129044dSC.J. Collier    uint64_t total_time2 = 0;                                               \
2045129044dSC.J. Collier    for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) {    \
2055129044dSC.J. Collier        fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset,             \
2065129044dSC.J. Collier                         src_addrs, is_src_cached, src_uoffset);            \
2075129044dSC.J. Collier        start_time = rte_rdtsc();                                           \
2085129044dSC.J. Collier        for (t = 0; t < TEST_BATCH_SIZE; t++)                               \
2095129044dSC.J. Collier            rte_memcpy(dst+dst_addrs[t], src+src_addrs[t], size);           \
2105129044dSC.J. Collier        total_time += rte_rdtsc() - start_time;                             \
2115129044dSC.J. Collier    }                                                                       \
2125129044dSC.J. Collier    for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) {    \
2135129044dSC.J. Collier        fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset,             \
2145129044dSC.J. Collier                         src_addrs, is_src_cached, src_uoffset);            \
2155129044dSC.J. Collier        start_time = rte_rdtsc();                                           \
2165129044dSC.J. Collier        for (t = 0; t < TEST_BATCH_SIZE; t++)                               \
2175129044dSC.J. Collier            memcpy(dst+dst_addrs[t], src+src_addrs[t], size);               \
2185129044dSC.J. Collier        total_time2 += rte_rdtsc() - start_time;                            \
2195129044dSC.J. Collier    }                                                                       \
2205129044dSC.J. Collier    printf("%8.0f -",  (double)total_time /TEST_ITERATIONS);                \
2215129044dSC.J. Collier    printf("%5.0f",  (double)total_time2 / TEST_ITERATIONS);                \
2225129044dSC.J. Collier} while (0)
2235129044dSC.J. Collier
2245129044dSC.J. Collier/* Run aligned memcpy tests for each cached/uncached permutation */
2255129044dSC.J. Collier#define ALL_PERF_TESTS_FOR_SIZE(n)                                       \
2265129044dSC.J. Collierdo {                                                                     \
2275129044dSC.J. Collier    if (__builtin_constant_p(n))                                         \
2285129044dSC.J. Collier        printf("\nC%6u", (unsigned)n);                                   \
2295129044dSC.J. Collier    else                                                                 \
2305129044dSC.J. Collier        printf("\n%7u", (unsigned)n);                                    \
2315129044dSC.J. Collier    SINGLE_PERF_TEST(small_buf_write, 1, 0, small_buf_read, 1, 0, n);    \
2325129044dSC.J. Collier    SINGLE_PERF_TEST(large_buf_write, 0, 0, small_buf_read, 1, 0, n);    \
2335129044dSC.J. Collier    SINGLE_PERF_TEST(small_buf_write, 1, 0, large_buf_read, 0, 0, n);    \
2345129044dSC.J. Collier    SINGLE_PERF_TEST(large_buf_write, 0, 0, large_buf_read, 0, 0, n);    \
2355129044dSC.J. Collier} while (0)
2365129044dSC.J. Collier
2375129044dSC.J. Collier/* Run unaligned memcpy tests for each cached/uncached permutation */
2385129044dSC.J. Collier#define ALL_PERF_TESTS_FOR_SIZE_UNALIGNED(n)                             \
2395129044dSC.J. Collierdo {                                                                     \
2405129044dSC.J. Collier    if (__builtin_constant_p(n))                                         \
2415129044dSC.J. Collier        printf("\nC%6u", (unsigned)n);                                   \
2425129044dSC.J. Collier    else                                                                 \
2435129044dSC.J. Collier        printf("\n%7u", (unsigned)n);                                    \
2445129044dSC.J. Collier    SINGLE_PERF_TEST(small_buf_write, 1, 1, small_buf_read, 1, 5, n);    \
2455129044dSC.J. Collier    SINGLE_PERF_TEST(large_buf_write, 0, 1, small_buf_read, 1, 5, n);    \
2465129044dSC.J. Collier    SINGLE_PERF_TEST(small_buf_write, 1, 1, large_buf_read, 0, 5, n);    \
2475129044dSC.J. Collier    SINGLE_PERF_TEST(large_buf_write, 0, 1, large_buf_read, 0, 5, n);    \
2485129044dSC.J. Collier} while (0)
2495129044dSC.J. Collier
2505129044dSC.J. Collier/* Run memcpy tests for constant length */
2515129044dSC.J. Collier#define ALL_PERF_TEST_FOR_CONSTANT                                      \
2525129044dSC.J. Collierdo {                                                                    \
2535129044dSC.J. Collier    TEST_CONSTANT(6U); TEST_CONSTANT(64U); TEST_CONSTANT(128U);         \
2545129044dSC.J. Collier    TEST_CONSTANT(192U); TEST_CONSTANT(256U); TEST_CONSTANT(512U);      \
2555129044dSC.J. Collier    TEST_CONSTANT(768U); TEST_CONSTANT(1024U); TEST_CONSTANT(1536U);    \
2565129044dSC.J. Collier} while (0)
2575129044dSC.J. Collier
2585129044dSC.J. Collier/* Run all memcpy tests for aligned constant cases */
2595129044dSC.J. Collierstatic inline void
2605129044dSC.J. Collierperf_test_constant_aligned(void)
2615129044dSC.J. Collier{
2625129044dSC.J. Collier#define TEST_CONSTANT ALL_PERF_TESTS_FOR_SIZE
2635129044dSC.J. Collier	ALL_PERF_TEST_FOR_CONSTANT;
2645129044dSC.J. Collier#undef TEST_CONSTANT
2655129044dSC.J. Collier}
2665129044dSC.J. Collier
2675129044dSC.J. Collier/* Run all memcpy tests for unaligned constant cases */
2685129044dSC.J. Collierstatic inline void
2695129044dSC.J. Collierperf_test_constant_unaligned(void)
2705129044dSC.J. Collier{
2715129044dSC.J. Collier#define TEST_CONSTANT ALL_PERF_TESTS_FOR_SIZE_UNALIGNED
2725129044dSC.J. Collier	ALL_PERF_TEST_FOR_CONSTANT;
2735129044dSC.J. Collier#undef TEST_CONSTANT
2745129044dSC.J. Collier}
2755129044dSC.J. Collier
2765129044dSC.J. Collier/* Run all memcpy tests for aligned variable cases */
2775129044dSC.J. Collierstatic inline void
2785129044dSC.J. Collierperf_test_variable_aligned(void)
2795129044dSC.J. Collier{
2805129044dSC.J. Collier	unsigned n = sizeof(buf_sizes) / sizeof(buf_sizes[0]);
2815129044dSC.J. Collier	unsigned i;
2825129044dSC.J. Collier	for (i = 0; i < n; i++) {
2835129044dSC.J. Collier		ALL_PERF_TESTS_FOR_SIZE((size_t)buf_sizes[i]);
2845129044dSC.J. Collier	}
2855129044dSC.J. Collier}
2865129044dSC.J. Collier
2875129044dSC.J. Collier/* Run all memcpy tests for unaligned variable cases */
2885129044dSC.J. Collierstatic inline void
2895129044dSC.J. Collierperf_test_variable_unaligned(void)
2905129044dSC.J. Collier{
2915129044dSC.J. Collier	unsigned n = sizeof(buf_sizes) / sizeof(buf_sizes[0]);
2925129044dSC.J. Collier	unsigned i;
2935129044dSC.J. Collier	for (i = 0; i < n; i++) {
2945129044dSC.J. Collier		ALL_PERF_TESTS_FOR_SIZE_UNALIGNED((size_t)buf_sizes[i]);
2955129044dSC.J. Collier	}
2965129044dSC.J. Collier}
2975129044dSC.J. Collier
2985129044dSC.J. Collier/* Run all memcpy tests */
2995129044dSC.J. Collierstatic int
3005129044dSC.J. Collierperf_test(void)
3015129044dSC.J. Collier{
3025129044dSC.J. Collier	int ret;
3035129044dSC.J. Collier
3045129044dSC.J. Collier	ret = init_buffers();
3055129044dSC.J. Collier	if (ret != 0)
3065129044dSC.J. Collier		return ret;
3075129044dSC.J. Collier
3085129044dSC.J. Collier#if TEST_VALUE_RANGE != 0
3095129044dSC.J. Collier	/* Set up buf_sizes array, if required */
3105129044dSC.J. Collier	unsigned i;
3115129044dSC.J. Collier	for (i = 0; i < TEST_VALUE_RANGE; i++)
3125129044dSC.J. Collier		buf_sizes[i] = i;
3135129044dSC.J. Collier#endif
3145129044dSC.J. Collier
3155129044dSC.J. Collier	/* See function comment */
3165129044dSC.J. Collier	do_uncached_write(large_buf_write, 0, small_buf_read, 1, SMALL_BUFFER_SIZE);
3175129044dSC.J. Collier
3185129044dSC.J. Collier	printf("\n** rte_memcpy() - memcpy perf. tests (C = compile-time constant) **\n"
3195129044dSC.J. Collier		   "======= ============== ============== ============== ==============\n"
3205129044dSC.J. Collier		   "   Size Cache to cache   Cache to mem   Mem to cache     Mem to mem\n"
3215129044dSC.J. Collier		   "(bytes)        (ticks)        (ticks)        (ticks)        (ticks)\n"
3225129044dSC.J. Collier		   "------- -------------- -------------- -------------- --------------");
3235129044dSC.J. Collier
3245129044dSC.J. Collier	printf("\n========================== %2dB aligned ============================", ALIGNMENT_UNIT);
3255129044dSC.J. Collier	/* Do aligned tests where size is a variable */
3265129044dSC.J. Collier	perf_test_variable_aligned();
3275129044dSC.J. Collier	printf("\n------- -------------- -------------- -------------- --------------");
3285129044dSC.J. Collier	/* Do aligned tests where size is a compile-time constant */
3295129044dSC.J. Collier	perf_test_constant_aligned();
3305129044dSC.J. Collier	printf("\n=========================== Unaligned =============================");
3315129044dSC.J. Collier	/* Do unaligned tests where size is a variable */
3325129044dSC.J. Collier	perf_test_variable_unaligned();
3335129044dSC.J. Collier	printf("\n------- -------------- -------------- -------------- --------------");
3345129044dSC.J. Collier	/* Do unaligned tests where size is a compile-time constant */
3355129044dSC.J. Collier	perf_test_constant_unaligned();
3365129044dSC.J. Collier	printf("\n======= ============== ============== ============== ==============\n\n");
3375129044dSC.J. Collier
3385129044dSC.J. Collier	free_buffers();
3395129044dSC.J. Collier
3405129044dSC.J. Collier	return 0;
3415129044dSC.J. Collier}
3425129044dSC.J. Collier
3435129044dSC.J. Collierstatic int
3445129044dSC.J. Colliertest_memcpy_perf(void)
3455129044dSC.J. Collier{
3465129044dSC.J. Collier	int ret;
3475129044dSC.J. Collier
3485129044dSC.J. Collier	ret = perf_test();
3495129044dSC.J. Collier	if (ret != 0)
3505129044dSC.J. Collier		return -1;
3515129044dSC.J. Collier	return 0;
3525129044dSC.J. Collier}
3535129044dSC.J. Collier
3549ecc306dSRicardo SalvetiREGISTER_TEST_COMMAND(memcpy_perf_autotest, test_memcpy_perf);
355