Index: linux/tests/memcpy_test.c =================================================================== --- /dev/null +++ linux/tests/memcpy_test.c @@ -0,0 +1,295 @@ +/* memcpy.c + * + * Test module for synthetic in kernel memory copy operations testing + * + * The test is triggered by loading the module (which will fail). + * + * (C) 2023 Ampere Computing LLC, Christoph Lameter + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cycles.h" + +unsigned long __memcpy_test(int size, int reps, u8 *a, u8 *b) +{ + unsigned long start, stop; + int i; + +// printk(KERN_INFO "size=%d a=%p b=%p reps=%d\n", size,a,b,reps); + for(i = 0; i < size; i++) + a[i] = i % 256; + + CYCLES_ENABLE; + + start = CYCLES; + for(i = 0; i < reps; i++) + memcpy(b, a, size); + + stop = CYCLES; + CYCLES_DISABLE; + + for(i = 0; i < size; i++) + if (b[i] != i % 256) { + printk(KERN_ERR "Copy failed at offset %d\n", i); + goto out; + } + +out: + vfree(a); + vfree(b); + + return stop - start; +} + +unsigned long memcpy_test(int size, int reps) +{ + u8 *a = vzalloc(size); + u8 *b = vzalloc(size); + + return __memcpy_test(size,reps, a, b); +} + +unsigned long memcpy_test_huge(int size, int reps) +{ + u8 *a = vmalloc_huge(size, GFP_KERNEL|__GFP_ZERO); + u8 *b = vmalloc_huge(size, GFP_KERNEL|__GFP_ZERO); + + return __memcpy_test(size,reps, a, b); +} + +unsigned long memcpy_test_node_to_node(int size, int reps) +{ + u8 *a = vzalloc_node(size, 0); + u8 *b = vzalloc_node(size, 1); + + return __memcpy_test(size,reps, a, b); +} + +unsigned long __copy_to_user_test(int size, int reps, u8 *a, u8 __user *b) +{ + unsigned long start, stop; + int i; + + for(i = 0; i < size; i++) + a[i] = i % 256; + + CYCLES_ENABLE; + + start = CYCLES; + for(i = 0; i < reps; i++) { + int x = copy_to_user(b, a, size); + + if (x) { + printk(KERN_ERR "copy_to_user failed code %d\n", x); + return 0; + } + } + + stop = CYCLES; + CYCLES_DISABLE; + + for(i = 0; i < size; i++) { + u8 byte; + int status; + + status = get_user(byte, b + i); + + if (status) { + printk(KERN_ERR "Copy access failed at offset %d\n", i); + goto out; + } + + if (byte != i % 256) { + printk(KERN_ERR "Copy failed at offset %d\n", i); + goto out; + } + } + +out: + + vfree(a); + + return stop - start; +} + +unsigned long memcpy_test_user(int size, int reps) +{ + u8 *a = vzalloc(size); + unsigned long addr; + unsigned long cycles; + + addr = vm_mmap(NULL, 0, size, PROT_READ| PROT_WRITE, MAP_PRIVATE|MAP_POPULATE, 0); + + if (addr >= (unsigned long)(TASK_SIZE)) { + printk(KERN_ERR "Failed to allocate user memory addr=%lx\n", addr); + return 0; + } + + cycles = __copy_to_user_test(size,reps, a, (u8 __user *)addr); + + vm_munmap(addr, size); + return cycles; +} + +unsigned long memcpy_test_user_huge(int size, int reps) +{ + u8 *a = vmalloc_huge(size, GFP_KERNEL|__GFP_ZERO); + unsigned long addr; + unsigned long cycles; + + addr = vm_mmap(NULL, 0, size, PROT_READ| PROT_WRITE, MAP_PRIVATE|MAP_HUGETLB, 0); + + if (addr >= (unsigned long)(TASK_SIZE)) { + printk(KERN_ERR "Failed to allocate user memory addr=%lx\n", addr); + return 0; + } + + cycles = __copy_to_user_test(size,reps, a, (u8 __user *)addr); + + vm_munmap(addr, size); + return cycles; +} + +unsigned long memcpy_test_user_n2n(int size, int reps) +{ + u8 *a = vzalloc_node(size, numa_node_id() ^ 1); + unsigned long addr; + unsigned long cycles; + + addr = vm_mmap(NULL, 0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_POPULATE, 0); + + if (addr >= (unsigned long)(TASK_SIZE)) { + printk(KERN_ERR "Failed to allocate user memory addr=%lx\n", addr); + return 0; + } + + cycles = __copy_to_user_test(size,reps, a, (u8 __user *)addr); + + vm_munmap(addr, size); + return cycles; +} + +const char *size_shift_text[32] = { + "1","2","4","8""16","32","64","128","256","512", + "1K","2K","4K","8K","16K","32K","64K","128K","256K","512K", + "1M","2M","4M","8M","16M","32M","64M","128M","256M","512M", + "1G","2G","4G" +}; + +unsigned long test_matrix[20][32]; + +const char *coltext[20]; + +static unsigned get_current_freq(void) +{ + struct cpufreq_policy *x = cpufreq_cpu_get(smp_processor_id()); + unsigned long frequency = 3000000; /* 3 GHZ default if nothing is set */ + static bool warning = false; + + if (x) { + frequency = x->cur; + + cpufreq_cpu_put(x); + } else { + if (!warning) { + printk("Cpufreq subystem not available. Asumming processor runs at 3 Ghz\n"); + warning = true; + } + } + return frequency; +} + +static void run_test(unsigned long (*test)(int size, int reps), const char *text, const char *brief, int testnr) +{ + int i; + + printk(KERN_INFO "\n%s\n", text); + printk(KERN_INFO "--Size----Cycles-----Bytes per cycle---MByte per sec\n"); + for (i = 4; i < 31; i++) { + unsigned long size = 1UL << i; + unsigned long reps = (1UL << 31) / size; + unsigned long cycles = test(size, reps); + unsigned long cycles_per = cycles / reps; + unsigned long throughput_millibyte = ((1UL << 31) * 1000UL) / cycles; + unsigned long throughput_byte = throughput_millibyte / 1000; + unsigned long freq_khz = get_current_freq(); + unsigned long time_in_nanoseconds = cycles * 1000 * 1000 / freq_khz; + unsigned long mbyte_per_sec = (1000 * (1UL << 31)) / time_in_nanoseconds;; + + printk(KERN_INFO "%5s\t%10lu\t%5lu.%03lu\t%lu\n", size_shift_text[i], cycles_per, + throughput_byte, throughput_millibyte - throughput_byte * 1000, + mbyte_per_sec);; + + test_matrix[testnr][i] = mbyte_per_sec; + } + coltext[testnr] = brief; +} + +static int memcpy_test_init(void) +{ + int col = 0; + int i; + + printk(KERN_INFO "Memory copy tests\n"); + printk(KERN_INFO "===============================\n"); + + run_test(memcpy_test, "Default page size memcpy","Default", col++); + run_test(memcpy_test_huge, "Huge page size memcpy", "Huge", col++); + + if (nr_node_ids > 1) + run_test(memcpy_test_node_to_node, "Node to node memcpy", "n2n", col++); + + run_test(memcpy_test_user, "Copy to user memcpy","User", col++); + run_test(memcpy_test_user_huge, "Copy to huge user memcpy", "HugeUsr", col++); + + if (nr_node_ids > 1) + run_test(memcpy_test_user_n2n, "Copy to user memcpy node to node", "User-n2n", col++); + + + printk("Test Matrix Memcpy operations\n"); + printk("=============================\n"); + printk("Test\t"); + + for(i=0; i < col; i++) + printk(KERN_CONT "\t%s", coltext[i]); + + printk(KERN_CONT "\n"); + + for(i = 4; i < 31; i++) { + int j; + + printk(KERN_INFO "%s\t", size_shift_text[i]); + + for (j= 0; j< col; j++) + printk(KERN_CONT "\t%ld", test_matrix[j][i]); + + printk(KERN_CONT "\n"); + } + return -EAGAIN; /* Fail will directly unload the module */ +} + +static void memcpy_test_exit(void) +{ + printk(KERN_INFO "test exit\n"); +} + +module_init(memcpy_test_init) +module_exit(memcpy_test_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christopher Lameter "); +MODULE_DESCRIPTION("Memcpy test"); + Index: linux/tests/Kconfig =================================================================== --- linux.orig/tests/Kconfig +++ linux/tests/Kconfig @@ -22,5 +22,12 @@ config BENCHMARK_ATOMIC help A benchmark that measures cycle count of atomic operations. +config BENCHMARK_MEMCPY + tristate "Memcpy Operations Benchmark" + depends on m + default m + help + A benchmark that measures cycle count of memcpy operations. + endif # BENCHMARKS Index: linux/tests/Makefile =================================================================== --- linux.orig/tests/Makefile +++ linux/tests/Makefile @@ -4,4 +4,6 @@ obj-$(CONFIG_BENCHMARK_SLAB) += slab_tes obj-$(CONFIG_BENCHMARK_ATOMIC) += atomic_test.o +obj-$(CONFIG_BENCHMARK_MEMCPY) += memcpy_test.o +