From: Christoph Lameter (Ampere) Date: Wed, 1 May 2024 16:59:10 +0000 (-0700) Subject: Merge branch 'tlb' into tlb2 X-Git-Url: https://gentwo.org/gitweb/?a=commitdiff_plain;h=refs%2Fheads%2Ftlb2;p=linux%2F.git Merge branch 'tlb' into tlb2 Fix issues regarding lpa2 support --- 2f1680509a25a6a80b1e7dcf4dcc188b45fce3b4 diff --cc arch/arm64/include/asm/tlbflush.h index a75de2665d84,037e74bf8077..ff328dd9f179 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@@ -422,85 -373,25 +398,36 @@@ do { } while (0) #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ - __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled()); - static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma, + void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long stride, bool last_level, - int tlb_level) - { - unsigned long asid, pages; - - start = round_down(start, stride); - end = round_up(end, stride); - pages = (end - start) >> PAGE_SHIFT; - - /* - * When not uses TLB range ops, we can handle up to - * (MAX_DVM_OPS - 1) pages; - * When uses TLB range ops, we can handle up to - * (MAX_TLBI_RANGE_PAGES - 1) pages. - */ - if ((!system_supports_tlb_range() && - (end - start) >= (MAX_DVM_OPS * stride)) || - pages >= MAX_TLBI_RANGE_PAGES) { - flush_tlb_mm(vma->vm_mm); - return; - } - - dsb(ishst); - asid = ASID(vma->vm_mm); - - if (last_level) - __flush_tlb_range_op(vale1is, start, pages, stride, asid, - tlb_level, true, lpa2_is_enabled()); - else - __flush_tlb_range_op(vae1is, start, pages, stride, asid, - tlb_level, true, lpa2_is_enabled()); - - mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); - } + int tlb_level); +static inline void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long stride, bool last_level, + int tlb_level) +{ + __flush_tlb_range_nosync(vma, start, end, stride, + last_level, tlb_level); + dsb(ish); +} + static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { /* * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. - * Set the tlb_level to 0 because we can not get enough information here. + * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough + * information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); } - static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) - { - unsigned long addr; - - if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { - flush_tlb_all(); - return; - } - - start = __TLBI_VADDR(start, 0); - end = __TLBI_VADDR(end, 0); - - dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaale1is, addr); - dsb(ish); - isb(); - } + void flush_tlb_kernel_range(unsigned long start, unsigned long end); /* * Used to invalidate the TLB (walk caches) corresponding to intermediate page diff --cc arch/arm64/mm/context.c index 188197590fc9,30f4bbcccf1a..831488ff38ee --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@@ -420,3 -422,463 +422,463 @@@ static int asids_init(void return 0; } early_initcall(asids_init); + + /* + * TLB flushing logic to alloc dynamically control the flushes and potentially reduce + * the need for TLBIs having to go over the ARM mesh. + */ + + enum tlb_state { + TLB_NONE, /* Address space has no TLBs due to recent flushes or this being a new address space */ + TLB_LOCAL, /* Only the current cpu has used this address space */ + TLB_IPI, /* Flush by sending IPIs and doing local flushes */ + TLB_BROADCAST /* Use the ARM mesh hardware to broadcast invalidations */ + }; + + /* + * Control over TLB flusing via TLB mode + * + * The lower 10 bits control the use of IPI to do local flushes. + * + * tlb_mode encodes a limit of the number of processors that are known to have used this address space. + * If more than this number of processors have used the address space then a TLBI broadcast + * will occur. If there are less cpus than this limit then the TLB logic will send IPIs to these + * processors and perform local flushes on each of them. If set to 0 (default) then no IPIs will occur. + * + * The higher bits control other aspects of TLB operations. + * + * The default operation is to always use TLBI broadcast (the common method) + */ + + #define TLB_MODE_IPI_BITS 10 + #define TLB_MODE_IPI_MASK ((1 << TLB_MODE_IPI_BITS) - 1) + + /* Feature encoding in tlb_mode */ + #define TLB_MODE_LOCAL (1 << TLB_MODE_IPI_BITS) /* Use local invalidation if only the current processor has used an address space */ + #define TLB_MODE_RANGE (1 << (TLB_MODE_IPI_BITS + 1)) /* Use TLBI range flushes */ + #define TLB_MODE_NONE (1 << (TLB_MODE_IPI_BITS + 2)) /* If no processor has used an address space then skip flushing */ + #define TLB_MODE_USER (1 << (TLB_MODE_IPI_BITS + 3)) /* User overrode system defaults */ + + + static unsigned int tlb_mode; + + static enum tlb_state tlbstat(struct cpumask *mask) + { + unsigned int weight = cpumask_weight(mask); + bool present = cpumask_test_cpu(smp_processor_id(), mask); + + if (weight == 0) { + /* + * Unused address space or something strange is on. + * TLB_MODE_NONE tells us either to ignore the + * flush request or flush everything to be safe + */ + + if (tlb_mode & TLB_MODE_NONE) + return TLB_NONE; + + return TLB_BROADCAST; + } + + if (weight == 1 && present && (tlb_mode & TLB_MODE_LOCAL)) + return TLB_LOCAL; + + if (weight < (tlb_mode & TLB_MODE_IPI_MASK)) + return TLB_IPI; + + return TLB_BROADCAST; + } + + static inline enum tlb_state tlbstat_mm(struct mm_struct *mm) + { + return tlbstat(mm_cpumask(mm)); + } + + static inline void flush_tlb_asid(enum tlb_state ts, unsigned long asid) + { + if (ts == TLB_NONE) { + count_vm_tlb_event(NR_TLB_SKIPPED); + return; + } + + if (ts == TLB_LOCAL) { + __tlbi(aside1, asid); + __tlbi_user(aside1, asid); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + return; + } + + __tlbi(aside1is, asid); + __tlbi_user(aside1is, asid); + count_vm_tlb_event(NR_TLB_FLUSH_ALL); + + } + + static inline void flush_tlb_addr(enum tlb_state ts, struct mm_struct *mm, unsigned long uaddr) + { + unsigned long addr; + + if (ts == TLB_NONE) { + count_vm_tlb_event(NR_TLB_SKIPPED); + return; + } + + addr = __TLBI_VADDR(uaddr, ASID(mm)); + + if (ts == TLB_LOCAL) { + __tlbi(vale1, addr); + __tlbi_user(vale1, addr); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); + return; + } + + __tlbi(vale1is, addr); + __tlbi_user(vale1is, addr); + count_vm_tlb_event(NR_TLB_FLUSH_ONE); + } + + static inline void flush_tlb_post(enum tlb_state ts) + { + if (ts == TLB_NONE) + return; + + if (ts == TLB_LOCAL) { + dsb(nsh); + return; + } + + dsb(ish); + } + + static inline void flush_tlb_pre(enum tlb_state ts) + { + if (ts == TLB_NONE) + return; + + if (ts == TLB_LOCAL) { + dsb(nshst); + return; + } + + dsb(ishst); + } + + static void ipi_flush_tlb_asid(void *p) + { + unsigned long asid = (unsigned long)p; + + flush_tlb_pre(TLB_LOCAL); + flush_tlb_asid(TLB_LOCAL, asid); + flush_tlb_post(TLB_LOCAL); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + } + + void flush_tlb_mm(struct mm_struct *mm) + { + unsigned long asid = __TLBI_VADDR(0, ASID(mm)); + enum tlb_state ts = tlbstat_mm(mm); + + if (ts == TLB_IPI) { + + on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_asid, (void *)asid, true); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + + } else { + + flush_tlb_pre(ts); + flush_tlb_asid(ts, asid); + flush_tlb_post(ts); + + } + mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); + } + + struct ipi_flush_tlb_page_param { + unsigned long uaddr; + struct mm_struct *mm; + }; + + static inline void ipi_flush_tlb_page(void *p) + { + struct ipi_flush_tlb_page_param *i = p; + + flush_tlb_pre(TLB_LOCAL); + flush_tlb_addr(TLB_LOCAL, i->mm, i->uaddr); + flush_tlb_post(TLB_LOCAL); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + } + + void __flush_tlb_page(struct mm_struct *mm, + unsigned long uaddr, bool sync) + { + struct ipi_flush_tlb_page_param i = { uaddr, mm }; + enum tlb_state ts = tlbstat_mm(i.mm); + + if (ts == TLB_IPI) { + + on_each_cpu_mask(mm_cpumask(i.mm), ipi_flush_tlb_page, &i, true); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + + } else { + + flush_tlb_pre(ts); + flush_tlb_addr(ts, i.mm, uaddr); + + if (sync) + flush_tlb_post(ts); + + } + + mmu_notifier_arch_invalidate_secondary_tlbs(i.mm, uaddr & PAGE_MASK, + (uaddr & PAGE_MASK) + PAGE_SIZE); + } + + void __tlbbatch_flush(void) + { + flush_tlb_post(TLB_BROADCAST); + } + + struct ipi_flush_tlb_range_param { + unsigned long start; + unsigned long pages; + unsigned long stride; + bool last_level; + int tlb_level; + unsigned long asid; + }; + + static inline void ipi_flush_tlb_range(void *p) + { + struct ipi_flush_tlb_range_param *i = p; + + flush_tlb_pre(TLB_LOCAL); + + if (i->last_level) + + __flush_tlb_range_op(vale1, i->start, i->pages, i->stride, i->asid, i->tlb_level, true); + + else + + __flush_tlb_range_op(vae1, i->start, i->pages, i->stride, i->asid, i->tlb_level, true); + + flush_tlb_post(TLB_LOCAL); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + } + + void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + unsigned long stride, bool last_level, + int tlb_level) + { + struct ipi_flush_tlb_range_param i = { 0,0, stride, last_level, tlb_level, ASID(vma->vm_mm) }; + enum tlb_state ts = tlbstat_mm(vma->vm_mm); + + if (ts == TLB_NONE) { + count_vm_tlb_event(NR_TLB_SKIPPED); + goto out; + } + + i.start = round_down(start, stride); + end = round_up(end, stride); + i.pages = (end - start) >> PAGE_SHIFT; + + /* + * When not using TLB range ops, we can handle up to + * (MAX_DVM_OPS - 1) pages; + * When uses TLB range ops, we can handle up to + * (MAX_TLBI_RANGE_PAGES - 1) pages. + */ + if (((tlb_mode & TLB_MODE_RANGE) && (end - i.start) >= (MAX_DVM_OPS * stride)) || + i.pages >= MAX_TLBI_RANGE_PAGES) { + + flush_tlb_mm(vma->vm_mm); + return; + + } + + if (ts == TLB_IPI) { + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &i, true); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); + + } else { + + flush_tlb_pre(ts); + + if (last_level) { + if (ts == TLB_LOCAL) { - __flush_tlb_range_op(vale1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true); ++ __flush_tlb_range_op(vale1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled()); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_RANGE); + } else { - __flush_tlb_range_op(vale1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true); ++ __flush_tlb_range_op(vale1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled()); + count_vm_tlb_event(NR_TLB_FLUSH_RANGE); + } + + } else { + if (ts == TLB_LOCAL) { - __flush_tlb_range_op(vae1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true); ++ __flush_tlb_range_op(vae1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled()); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_RANGE); + } else { - __flush_tlb_range_op(vae1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true); ++ __flush_tlb_range_op(vae1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled()); + count_vm_tlb_event(NR_TLB_FLUSH_RANGE); + } + } + + flush_tlb_post(ts); + } + out: + mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end); + } + + static void inline flush_tlb_pre_kernel(void) + { + flush_tlb_pre(TLB_BROADCAST); + } + + static void inline flush_tlb_post_kernel(void) + { + flush_tlb_post(TLB_BROADCAST); + isb(); + } + + void local_flush_tlb_all(void) + { + flush_tlb_pre(TLB_LOCAL); + __tlbi(vmalle1); + flush_tlb_post(TLB_LOCAL); + isb(); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + } + + void flush_tlb_all(void) + { + flush_tlb_pre_kernel(); + __tlbi(vmalle1is); + flush_tlb_post_kernel(); + count_vm_tlb_event(NR_TLB_FLUSH_ALL); + } + + void flush_tlb_kernel_range(unsigned long start, unsigned long end) + { + unsigned long addr; + + if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) { + flush_tlb_all(); + return; + } + + start = __TLBI_VADDR(start, 0); + end = __TLBI_VADDR(end, 0); + + flush_tlb_pre_kernel(); + for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + __tlbi(vaale1is, addr); + } + flush_tlb_post_kernel(); + } + + /* + * Used to invalidate the TLB (walk caches) corresponding to intermediate page + * table levels (pgd/pud/pmd). + */ + void __flush_tlb_kernel_pgtable(unsigned long kaddr) + { + unsigned long addr = __TLBI_VADDR(kaddr, 0); + + flush_tlb_pre_kernel(); + __tlbi(vaae1is, addr); + flush_tlb_post_kernel(); + } + + + static ssize_t tlb_mode_read_file(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) + { + char buf[32]; + unsigned int len; + + len = sprintf(buf, "%d\n", tlb_mode); + return simple_read_from_buffer(user_buf, count, ppos, buf, len); + } + + static ssize_t tlb_mode_write_file(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) + { + char buf[32]; + ssize_t len; + unsigned mode; + + len = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, user_buf, len)) + return -EFAULT; + + buf[len] = '\0'; + if (kstrtoint(buf, 0, &mode)) + return -EINVAL; + + if (mode > TLB_MODE_NONE + TLB_MODE_IPI_MASK) + return -EINVAL; + + tlb_mode = mode | TLB_MODE_USER; + return count; + } + + static const struct file_operations fops_tlbflush = { + .read = tlb_mode_read_file, + .write = tlb_mode_write_file, + .llseek = default_llseek, + }; + + struct dentry *arch_debugfs_dir; + + + static int __init set_tlb_mode(char *str) + { + u32 mode; + + pr_info("tlb_mode: "); + if (kstrtouint(str, 0, &mode)) { + pr_cont("using default of %u, unable to parse %s\n", + tlb_mode, str); + return 1; + } + + tlb_mode = mode| TLB_MODE_USER; + pr_cont("%d\n", tlb_mode); + + return 1; + + } + __setup("tlb_mode", set_tlb_mode); + + static int __init create_tlb_mode(void) + { + unsigned int ipi_cpus; + + arch_debugfs_dir = debugfs_create_dir("arm64", NULL); + + debugfs_create_file("tlb_mode", S_IRUSR | S_IWUSR, + arch_debugfs_dir, NULL, &fops_tlbflush); + + if (!(tlb_mode & TLB_MODE_USER)) { + /* + * Autotune IPI cpus depending on size of system + * + * A system with 16 cpus will send IPIs to up to 8 cpus + * A system with 254 cpus will send IPIs to up to 16 cpus + */ + ipi_cpus = ilog2(nr_cpu_ids) * 2; + + if (ipi_cpus > (tlb_mode & TLB_MODE_IPI_MASK)) { + + tlb_mode = ipi_cpus | (tlb_mode & (TLB_MODE_NONE|TLB_MODE_LOCAL)); + + } + + if (system_supports_tlb_range()) + tlb_mode |= TLB_MODE_RANGE; + } + return 0; + } + late_initcall(create_tlb_mode); +