Index: linux/tests/atomic_test.c =================================================================== --- /dev/null +++ linux/tests/atomic_test.c @@ -0,0 +1,695 @@ +/* test-atomic.c + * + * Test module for synthetic in kernel atomic operations testing + * + * The test is triggered by loading the module (which will fail). + * + * (C) 2023 Ampere Computing LLC, Christoph Lameter + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cycles.h" + +#define TEST_COUNT 30000 + +#include +#include +#include +#include + +static int cpus; +static bool validation_failed; + +static struct test_struct { + struct task_struct *task; + int cpu; + int count; + char **v; + void (*test_i)(struct test_struct *); + void (*test_p)(struct test_struct *); + void (*test_v)(struct test_struct *); + unsigned long start; + unsigned long stop; + bool unreliable; +} test[NR_CPUS]; + + +/* + * Operation on a normal global value + */ + +static unsigned long x_normal; + +static void init_normal_increment(struct test_struct *t) +{ + x_normal = 0; +} + +static void test_normal_increment(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + barrier(); /* Compiler optimizes the loop away if we dont stop it */ + x_normal++; + } + t->stop = CYCLES; +} + +static void validate_normal_increment(struct test_struct *t) +{ + if (x_normal != cpus * TEST_COUNT) + validation_failed = true; + +// printk(KERN_ERR "x++ %d times must result in %d and not %lu\n", TEST_COUNT, TEST_COUNT, x_normal); + +} + +/* + * Operation on an unsigned long that is per cpu + */ + + +/* Missing + * inc_return + * xchg + */ +static DEFINE_PER_CPU_ALIGNED(unsigned long, x_percpu); + +static void init_this_cpu(struct test_struct *t) +{ + this_cpu_write(x_percpu, 0); +} + +static void test_this_cpu_inc(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + this_cpu_inc(x_percpu); + } + t->stop = CYCLES; +} + +static void test_this_cpu_inc_return(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + if (this_cpu_inc_return(x_percpu) != i + 1) + validation_failed = true; + } + t->stop = CYCLES; + +} + +static void test_this_cpu_xchg(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + if (this_cpu_xchg(x_percpu, i + 1) != i) + validation_failed = true; + } + t->stop = CYCLES; +} + + +static void test_this_cpu_cmpxchg(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + if (this_cpu_cmpxchg(x_percpu, i, i + 1) != i) + validation_failed = true; + } + t->stop = CYCLES; +} + + +static void validate_this_cpu(struct test_struct *t) +{ + if (this_cpu_read(x_percpu) != TEST_COUNT) + validation_failed = true; +} + +static DEFINE_PER_CPU_ALIGNED(u128, x128_percpu); + +static void init_this_cpu128(struct test_struct *t) +{ + /* This should be this_cpu_write(x128_percpu, 0) but its not there */ + void *p = this_cpu_ptr(&x128_percpu); + int *i = (int *)p; + + i[0] = 0; + i[1] = 0; +} + +static void test_this_cpu_cmpxchg128(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + this_cpu_cmpxchg128(x128_percpu, i, i+1); + } + t->stop = CYCLES; +} + +static void validate_this_cpu128(struct test_struct *t) +{ + /* This should be this_cpu_read(x128_percpu) but its not there */ + void *p = this_cpu_ptr(&x128_percpu); + int *i = (int *)p; + + if (*i != TEST_COUNT) + validation_failed = true; +} + +/* Atomics */ + +static atomic_long_t x_atomic; + +static void init_atomic(struct test_struct *t) +{ + atomic_long_set(&x_atomic, 0); +} + +static void init_atomic_1(struct test_struct *t) +{ + atomic_long_set(&x_atomic, 1); +} + +static void test_atomic_inc(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + atomic_long_inc(&x_atomic); + } + t->stop = CYCLES; +} + +static void test_atomic_inc_return(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + if (atomic_long_inc_return(&x_atomic) != i + 1) + validation_failed = false; + } + t->stop = CYCLES; +} + +static void test_atomic_fetchadd(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + if (atomic_long_fetch_add(1, &x_atomic) != i) + validation_failed = false; + } + t->stop = CYCLES; +} + +static void test_atomic_inc_not_zero(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + int x; + + x = atomic_long_inc_not_zero(&x_atomic); + } + t->stop = CYCLES; +} + +static void test_atomic_sub_test(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + if (atomic_long_sub_and_test(1, &x_atomic) && i != 1) + validation_failed = false; + } + t->stop = CYCLES; +} + +static void validate_atomic(struct test_struct *t) +{ + if (atomic_long_read(&x_atomic) != cpus * TEST_COUNT) { + validation_failed = true; + } +} + +static void test_xchg(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + atomic_long_xchg(&x_atomic, i + 1); + /* Cannot really test this since this is not reliable */ + } + t->stop = CYCLES; +} + +static void test_cmpxchg(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + if (atomic_long_cmpxchg(&x_atomic, i, i + 1) != i) + validation_failed = true; + } + t->stop = CYCLES; +} + +static void validate_cmpxchg(struct test_struct *t) +{ + if (atomic_long_read(&x_atomic) != TEST_COUNT) { +// printk(KERN_ERR "cpu %d atomic_long_op %d times must result in %d and not %ld\n", +// t->cpu, TEST_COUNT, TEST_COUNT, atomic_long_read(&x_atomic)); + validation_failed = true; + } +} + +static u128 x128; + +static void init_atomic128(struct test_struct *t) +{ + /* This should be this_cpu_write(x128_percpu, 0) but its not there */ + int *i = (int *)&x128; + + i[0] = 0; + i[1] = 0; +} + +static void test_cmpxchg128(struct test_struct *t) +{ + int i; + + t->start = CYCLES; + for (i = 0; i < TEST_COUNT; i++) { + if (cmpxchg128(&x128, i, i+1) != i) + validation_failed = true;; + } + t->stop = CYCLES; +} + +static void validate_atomic128(struct test_struct *t) +{ + int *i = (int *)&x128; + + if (*i != cpus * TEST_COUNT) + validation_failed = true; +} + + +static atomic_t tests_running; +static atomic_t phase1_complete; +static DECLARE_COMPLETION(completion1); +static DECLARE_COMPLETION(completion2); +static DECLARE_COMPLETION(completion3); + +static int started; + +static int test_func(void *private) +{ + struct test_struct *t = private; + struct cpumask newmask = CPU_MASK_NONE; + + cpumask_set_cpu(t->cpu, &newmask); + set_cpus_allowed_ptr(current, &newmask); + t->v = kzalloc(t->count * sizeof(void *), GFP_KERNEL); + + t->test_i(t); + + CYCLES_ENABLE; + atomic_inc(&tests_running); + wait_for_completion(&completion1); + + t->test_p(t); + atomic_inc(&phase1_complete); + wait_for_completion(&completion2); + + atomic_dec(&tests_running); + + wait_for_completion(&completion3); + + msleep(100); + if (t->test_v && !validation_failed && !t->unreliable) + t->test_v(t); + + + kfree(t->v); + t->v = NULL; + + CYCLES_DISABLE; + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + return 0; +} + +static unsigned do_concurrent_test( + void (*init)(struct test_struct *), + void (*p)(struct test_struct *), + void (*validate)(struct test_struct *), + struct cpumask *mask, const char *name, const char *subtext, + bool unreliable) +{ + int cpu; + unsigned long time = 0; + unsigned long sum = 0; + unsigned long cycles; + int nr_cpu = 0; + + validation_failed = false; + + atomic_set(&tests_running, 0); + atomic_set(&phase1_complete, 0); + started = 0; + init_completion(&completion1); + init_completion(&completion2); + init_completion(&completion3); + + cpus = 0; + for_each_cpu(cpu, mask) { + struct test_struct *t = test + cpu; + + t->cpu = cpu; + t->count = TEST_COUNT; + t->test_i = init; + t->test_p = p; + t->test_v = validate; + t->unreliable = unreliable; + t->task = kthread_run(test_func, t, "test%d", cpu); + if (IS_ERR(t->task)) { + printk(KERN_INFO "Failed to start test func\n"); + return 0; + } else + cpus++; + } + + /* Wait till all processes are running */ + while (atomic_read(&tests_running) < cpus) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + complete_all(&completion1); + + /* Wait till all processes have completed phase 1 */ + while (atomic_read(&phase1_complete) < cpus) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + complete_all(&completion2); + + while (atomic_read(&tests_running)) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + + complete_all(&completion3); + + msleep(1000); + + for_each_cpu(cpu, mask) { + kthread_stop(test[cpu].task); + } + for_each_cpu(cpu, mask) { + struct test_struct *t = &test[cpu]; + + time = t->stop - t->start; + sum += time; + + nr_cpu++; + } + + + printk(KERN_INFO "%s\t:", name); + cycles = ((sum / cpus) + TEST_COUNT /2 ) / TEST_COUNT; + + printk(KERN_CONT " Average=%lu", cycles); + if (!unreliable && validation_failed) + printk(KERN_CONT " ** Counter corrupted **"); + + for_each_cpu(cpu, mask) { + struct test_struct *t = &test[cpu]; + + time = t->stop - t->start; + + if (nr_cpu < 8) { + /* limit number of cpus printed for systems with large counts of cpus */ + printk(KERN_CONT " cpu%d=%lu", cpu, (time + TEST_COUNT /2 )/ TEST_COUNT); + } + } + + printk(KERN_CONT "\n"); + schedule_timeout(200); + return cycles; +}; + +static unsigned do_single_test( + void (*init)(struct test_struct *), + void (*p)(struct test_struct *), + void (*validate)(struct test_struct *), + const char *name, const char *subtext) +{ + unsigned long time; + unsigned long cycles; + struct test_struct *t = test; + + cpus = 1; + validation_failed = false; + + CYCLES_ENABLE; + + t->count = TEST_COUNT; + init(t); + p(t); + if (validate && !validation_failed) + validate(t); + + CYCLES_DISABLE; + + time = t->stop - t->start; + + printk(KERN_INFO "%s\t:", name); + cycles = (time + TEST_COUNT /2 ) / TEST_COUNT; + printk(KERN_CONT " Cycles=%lu", cycles); + if (validation_failed) + printk(KERN_CONT " ** Counter corrupted **"); + printk(KERN_CONT "\n"); + schedule_timeout(200); + return cycles; +}; + +#define NR_TESTS 14 + +struct test_info { + void (*init)(struct test_struct *); + void (*test)(struct test_struct *); + void (*validate)(struct test_struct *); + const char *text; + bool multithreaded_unreliable; +} tests[NR_TESTS] = { + { init_normal_increment, test_normal_increment, validate_normal_increment, "x++\t\t\t", true }, + { init_this_cpu, test_this_cpu_inc, validate_this_cpu, "this_cpu_inc(x)\t\t", false }, + { init_this_cpu, test_this_cpu_inc_return, validate_this_cpu, "this_cpu_inc_return(x)\t", false }, + { init_this_cpu, test_this_cpu_xchg, validate_this_cpu, "this_cpu_xchg(x)\t\t", false }, + { init_this_cpu, test_this_cpu_cmpxchg, validate_this_cpu, "this_cpu_cmpxchg(x)\t", false }, + { init_this_cpu128, test_this_cpu_cmpxchg128, validate_this_cpu128, "this_cpu_cmpxchg128(x)\t", false }, + { init_atomic, test_atomic_inc, validate_atomic, "atomic_long_inc(x)\t", false }, + { init_atomic, test_atomic_inc_return, validate_atomic, "atomic_long_inc_return(x)", false }, + { init_atomic, test_atomic_fetchadd, validate_atomic, "atomic_fetchadd(x)\t", false }, + { init_atomic_1, test_atomic_inc_not_zero, NULL, "atomic_inc_not_zero(x)\t", false }, + { init_atomic, test_atomic_sub_test, NULL, "atomic_sub_test(x)\t", false }, + { init_atomic, test_xchg, validate_cmpxchg, "atomic_xchg(x)\t\t", true }, + { init_atomic, test_cmpxchg, validate_cmpxchg, "atomic_cmpxchg(x)\t", true }, + { init_atomic128, test_cmpxchg128, validate_atomic128, "atomic_cmpxchg128(x)\t", true } +}; + +/* Matrix for the scaling of the operations */ +#define NR_COLS 20 + +const char *coltext[NR_COLS]; + +unsigned cycle_matrix[NR_TESTS][NR_COLS]; + + +static void run_atomic_tests(struct cpumask *mask, const char *subtext, int index) +{ + int i; + + for(i = 0; i < NR_TESTS; i++) + cycle_matrix[i][index] = do_concurrent_test(tests[i].init, tests[i].test, tests[i].validate, mask, tests[i].text, + subtext, tests[i].multithreaded_unreliable); +} + +static void run_atomic_tests_single(const char *subtext) +{ + int i; + + for(i = 0; i < NR_TESTS; i++) + cycle_matrix[i][0] = do_single_test(tests[i].init, tests[i].test, tests[i].validate, tests[i].text, + subtext); +} + +static void __pick_cpus(struct cpumask *mask, int cpus, int node) +{ + int cpu = 0; + int start = 4; + int found_cpus = 0; + + while (found_cpus < cpus) { + int picked = cpu + start; + + if (picked > nr_cpu_ids) { + printk(KERN_ERR "Not enogh cpus per node\n"); + return; + } + if (cpu_to_node(picked) == node) { + cpumask_set_cpu(picked, mask); + found_cpus++; + } + cpu++; + } +} + +static void pick_cpus(struct cpumask *mask, int cpus, int node) +{ + int i; + int cpus_per_node; + + *mask = CPU_MASK_NONE; + + if (node != NUMA_NO_NODE) { + __pick_cpus(mask, cpus, node); + return; + } + + cpus_per_node = cpus / nr_node_ids; + + for(i = 0; i < nr_node_ids; i++) + __pick_cpus(mask, cpus_per_node, i); +} + +static int atomic_test_init(void) +{ + int i; + struct cpumask mask; + char buf[PAGE_SIZE]; + int col = 0; + + printk(KERN_INFO "\nUncontended single thread on single core\n"); + printk(KERN_INFO "===========================================================\n"); + run_atomic_tests_single("Single thread"); + coltext[col++] = "Single"; + + for(i = 1; i < 7; i++) { + int x = 1 << i; + + pick_cpus(&mask, x, 0); + cpumap_print_to_pagebuf(true, buf, &mask); + buf[strlen(buf)-1] = 0; + + printk(KERN_INFO "\nConcurrent operations on %d Cores (%s)\n", x, buf); + printk(KERN_INFO "==========================================\n"); + + run_atomic_tests(&mask, coltext[i] = "N cpus", i); + coltext[col++] = kasprintf(GFP_KERNEL, "%d CPU", x); + } + + if (nr_node_ids > 1) { + + for(i = 1; i < 7; i++) { + int x = 1 << i; + + pick_cpus(&mask, x, NUMA_NO_NODE); + cpumap_print_to_pagebuf(true, buf, &mask); + buf[strlen(buf)-1] = 0; + + printk(KERN_INFO "\nConcurrent operations on %d Cores (%s) on all NUMA NODES\n", x, buf); + printk(KERN_INFO "==============================================================\n"); + + run_atomic_tests(&mask, "N cpus", i + 6); + coltext[col++] = kasprintf(GFP_KERNEL, "%d 2P", x); + } + + + printk(KERN_INFO "\nConcurrent operations on a NUMA node\n"); + printk(KERN_INFO "==========================================\n"); + + cpumask_copy(&mask, cpumask_of_node(0)); + + run_atomic_tests(&mask, "Node 0 cpus", col); + coltext[col++] = "NODE"; + } + + printk(KERN_INFO "\nConcurrent operations all cpus\n"); + printk(KERN_INFO "==========================================\n"); + + mask = CPU_MASK_ALL; + run_atomic_tests(&mask, "All cpus", col); + coltext[col++] = "ALL"; + + if (col > NR_COLS) + printk(KERN_ERR "atomic_test: Too many columns\n"); + + /* Compact version: Test Matrix */ + printk(KERN_INFO "\nTestMatrix\n"); + printk(KERN_INFO "---------------------------\n"); + printk(KERN_INFO "Test\t\t\t\t"); + + for(i = 0; i < col; i++) + printk(KERN_CONT "\t%s", coltext[i]); + + printk(KERN_CONT "\n"); + + for(i=0; i < NR_TESTS; i++) { + int j; + + printk(KERN_CONT "%s\t:", tests[i].text); + + for(j = 0; j < col; j++) + printk(KERN_CONT "\t%d", cycle_matrix[i][j]); + + printk(KERN_CONT "\n"); + } + + + return -EAGAIN; /* Fail will directly unload the module */ +} + +static void atomic_test_exit(void) +{ + printk(KERN_INFO "test exit\n"); +} + +module_init(atomic_test_init) +module_exit(atomic_test_exit) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christopher Lameter "); +MODULE_DESCRIPTION("Atomic test"); + Index: linux/tests/Kconfig =================================================================== --- linux.orig/tests/Kconfig +++ linux/tests/Kconfig @@ -15,5 +15,12 @@ config BENCHMARK_SLAB help A benchmark that measures slab allocator performance. +config BENCHMARK_ATOMIC + tristate "Atomic Operations Benchmark" + depends on m + default m + help + A benchmark that measures cycle count of atomic operations. + endif # BENCHMARKS Index: linux/tests/Makefile =================================================================== --- linux.orig/tests/Makefile +++ linux/tests/Makefile @@ -2,4 +2,6 @@ obj-$(CONFIG_BENCHMARK_SLAB) += slab_test.o -# +obj-$(CONFIG_BENCHMARK_ATOMIC) += atomic_test.o + +