]> Gentwo Git Trees - linux/.git/commitdiff
Merge branch 'tlb' into tlb2 tlb2
authorChristoph Lameter (Ampere) <cl@linux.com>
Wed, 1 May 2024 16:59:10 +0000 (09:59 -0700)
committerChristoph Lameter (Ampere) <cl@linux.com>
Wed, 1 May 2024 16:59:10 +0000 (09:59 -0700)
Fix issues regarding lpa2 support

1  2 
arch/arm64/include/asm/tlbflush.h
arch/arm64/kernel/setup.c
arch/arm64/kernel/smp.c
arch/arm64/mm/context.c
include/linux/vm_event_item.h
mm/vmstat.c

index a75de2665d844510a69d4af337ad1b5827b012c8,037e74bf807766f3beb97a5de728442f5f3d7e13..ff328dd9f1798fb8b52b6c6e47e90100455c9fb7
@@@ -422,85 -373,25 +398,36 @@@ do {                                                                    
  } while (0)
  
  #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
 -      __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
 +      __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
  
static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
void __flush_tlb_range(struct vm_area_struct *vma,
                                     unsigned long start, unsigned long end,
                                     unsigned long stride, bool last_level,
-                                    int tlb_level)
- {
-       unsigned long asid, pages;
-       start = round_down(start, stride);
-       end = round_up(end, stride);
-       pages = (end - start) >> PAGE_SHIFT;
-       /*
-        * When not uses TLB range ops, we can handle up to
-        * (MAX_DVM_OPS - 1) pages;
-        * When uses TLB range ops, we can handle up to
-        * (MAX_TLBI_RANGE_PAGES - 1) pages.
-        */
-       if ((!system_supports_tlb_range() &&
-            (end - start) >= (MAX_DVM_OPS * stride)) ||
-           pages >= MAX_TLBI_RANGE_PAGES) {
-               flush_tlb_mm(vma->vm_mm);
-               return;
-       }
-       dsb(ishst);
-       asid = ASID(vma->vm_mm);
-       if (last_level)
-               __flush_tlb_range_op(vale1is, start, pages, stride, asid,
-                                    tlb_level, true, lpa2_is_enabled());
-       else
-               __flush_tlb_range_op(vae1is, start, pages, stride, asid,
-                                    tlb_level, true, lpa2_is_enabled());
-       mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
- }
+                                    int tlb_level);
  
 +static inline void __flush_tlb_range(struct vm_area_struct *vma,
 +                                   unsigned long start, unsigned long end,
 +                                   unsigned long stride, bool last_level,
 +                                   int tlb_level)
 +{
 +      __flush_tlb_range_nosync(vma, start, end, stride,
 +                               last_level, tlb_level);
 +      dsb(ish);
 +}
 +
  static inline void flush_tlb_range(struct vm_area_struct *vma,
                                   unsigned long start, unsigned long end)
  {
        /*
         * We cannot use leaf-only invalidation here, since we may be invalidating
         * table entries as part of collapsing hugepages or moving page tables.
 -       * Set the tlb_level to 0 because we can not get enough information here.
 +       * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
 +       * information here.
         */
 -      __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
 +      __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
  }
  
- static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
- {
-       unsigned long addr;
-       if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
-               flush_tlb_all();
-               return;
-       }
-       start = __TLBI_VADDR(start, 0);
-       end = __TLBI_VADDR(end, 0);
-       dsb(ishst);
-       for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
-               __tlbi(vaale1is, addr);
-       dsb(ish);
-       isb();
- }
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end);
  
  /*
   * Used to invalidate the TLB (walk caches) corresponding to intermediate page
Simple merge
Simple merge
index 188197590fc9ce44da04246455571fc9ee47c57c,30f4bbcccf1a656a7ebffc4d6da7046141508e1b..831488ff38ee5613768dbda51108a5360126b2fb
@@@ -420,3 -422,463 +422,463 @@@ static int asids_init(void
        return 0;
  }
  early_initcall(asids_init);
 -                              __flush_tlb_range_op(vale1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
+ /*
+  * TLB flushing logic to alloc dynamically control the flushes and potentially reduce
+  * the need for TLBIs having to go over the ARM mesh.
+  */
+ enum tlb_state {
+       TLB_NONE,       /* Address space has no TLBs due to recent flushes or this being a new address space */
+       TLB_LOCAL,      /* Only the current cpu has used this address space */
+       TLB_IPI,        /* Flush by sending IPIs and doing local flushes  */
+       TLB_BROADCAST   /* Use the ARM mesh hardware to broadcast invalidations */
+ };
+ /*
+  * Control over TLB flusing via TLB mode
+  *
+  * The lower 10 bits control the use of IPI to do local flushes.
+  *
+  * tlb_mode encodes a limit of the number of processors that are known to have used this address space.
+  * If more than this number of processors have used the address space then a TLBI broadcast
+  * will occur. If there are less cpus than this limit then the TLB logic will send IPIs to these
+  * processors and perform local flushes on each of them. If set to 0 (default) then no IPIs will occur.
+  *
+  * The higher bits control other aspects of TLB operations.
+  *
+  * The default operation is to always use TLBI broadcast (the common method)
+  */
+ #define TLB_MODE_IPI_BITS 10
+ #define TLB_MODE_IPI_MASK ((1 << TLB_MODE_IPI_BITS) - 1)
+ /* Feature encoding in tlb_mode */
+ #define TLB_MODE_LOCAL        (1 << TLB_MODE_IPI_BITS)        /* Use local invalidation if only the current processor has used an address space */
+ #define TLB_MODE_RANGE        (1 << (TLB_MODE_IPI_BITS + 1))  /* Use TLBI range flushes */
+ #define TLB_MODE_NONE (1 << (TLB_MODE_IPI_BITS + 2))  /* If no processor has used an address space then skip flushing */
+ #define TLB_MODE_USER (1 << (TLB_MODE_IPI_BITS + 3))  /* User overrode system defaults */
+ static unsigned int tlb_mode;
+ static enum tlb_state tlbstat(struct cpumask *mask)
+ {
+       unsigned int weight = cpumask_weight(mask);
+       bool present = cpumask_test_cpu(smp_processor_id(), mask);
+       if (weight == 0) {
+               /*
+                * Unused address space or something strange is on.
+                * TLB_MODE_NONE tells us either to ignore the
+                * flush request or flush everything to be safe
+                */
+               if (tlb_mode & TLB_MODE_NONE)
+                       return TLB_NONE;
+               return TLB_BROADCAST;
+       }
+       if (weight == 1 && present && (tlb_mode & TLB_MODE_LOCAL))
+               return TLB_LOCAL;
+       if (weight < (tlb_mode & TLB_MODE_IPI_MASK))
+               return TLB_IPI;
+       return TLB_BROADCAST;
+ }
+ static inline enum tlb_state tlbstat_mm(struct mm_struct *mm)
+ {
+       return tlbstat(mm_cpumask(mm));
+ }
+ static inline void flush_tlb_asid(enum tlb_state ts, unsigned long asid)
+ {
+       if (ts == TLB_NONE) {
+               count_vm_tlb_event(NR_TLB_SKIPPED);
+               return;
+       }
+       if (ts == TLB_LOCAL) {
+               __tlbi(aside1, asid);
+               __tlbi_user(aside1, asid);
+               count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+               return;
+       }
+       __tlbi(aside1is, asid);
+       __tlbi_user(aside1is, asid);
+       count_vm_tlb_event(NR_TLB_FLUSH_ALL);
+ }
+ static inline void flush_tlb_addr(enum tlb_state ts, struct mm_struct *mm, unsigned long uaddr)
+ {
+       unsigned long addr;
+       if (ts == TLB_NONE)  {
+               count_vm_tlb_event(NR_TLB_SKIPPED);
+               return;
+       }
+       addr = __TLBI_VADDR(uaddr, ASID(mm));
+       if (ts == TLB_LOCAL) {
+               __tlbi(vale1, addr);
+               __tlbi_user(vale1, addr);
+               count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+               return;
+       }
+       __tlbi(vale1is, addr);
+       __tlbi_user(vale1is, addr);
+       count_vm_tlb_event(NR_TLB_FLUSH_ONE);
+ }
+ static inline void flush_tlb_post(enum tlb_state ts)
+ {
+       if (ts == TLB_NONE)
+               return;
+       if (ts == TLB_LOCAL) {
+               dsb(nsh);
+               return;
+       }
+       dsb(ish);
+ }
+ static inline void flush_tlb_pre(enum tlb_state ts)
+ {
+       if (ts == TLB_NONE)
+               return;
+       if (ts == TLB_LOCAL) {
+               dsb(nshst);
+               return;
+       }
+       dsb(ishst);
+ }
+ static void ipi_flush_tlb_asid(void *p)
+ {
+       unsigned long asid = (unsigned long)p;
+       flush_tlb_pre(TLB_LOCAL);
+       flush_tlb_asid(TLB_LOCAL, asid);
+       flush_tlb_post(TLB_LOCAL);
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+ void flush_tlb_mm(struct mm_struct *mm)
+ {
+       unsigned long asid = __TLBI_VADDR(0, ASID(mm));
+       enum tlb_state ts = tlbstat_mm(mm);
+       if (ts == TLB_IPI) {
+               on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_asid, (void *)asid, true);
+               count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+       } else {
+               flush_tlb_pre(ts);
+               flush_tlb_asid(ts, asid);
+               flush_tlb_post(ts);
+       }
+       mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+ }
+ struct ipi_flush_tlb_page_param {
+       unsigned long uaddr;
+       struct mm_struct *mm;
+ };
+ static inline void ipi_flush_tlb_page(void *p)
+ {
+       struct ipi_flush_tlb_page_param *i = p;
+       flush_tlb_pre(TLB_LOCAL);
+       flush_tlb_addr(TLB_LOCAL, i->mm, i->uaddr);
+       flush_tlb_post(TLB_LOCAL);
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+ void __flush_tlb_page(struct mm_struct *mm,
+                                 unsigned long uaddr, bool sync)
+ {
+       struct ipi_flush_tlb_page_param i = { uaddr, mm };
+       enum tlb_state ts = tlbstat_mm(i.mm);
+       if (ts == TLB_IPI) {
+               on_each_cpu_mask(mm_cpumask(i.mm), ipi_flush_tlb_page, &i, true);
+               count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+       } else {
+               flush_tlb_pre(ts);
+               flush_tlb_addr(ts, i.mm, uaddr);
+               if (sync)
+                       flush_tlb_post(ts);
+       }
+       mmu_notifier_arch_invalidate_secondary_tlbs(i.mm, uaddr & PAGE_MASK,
+                                               (uaddr & PAGE_MASK) + PAGE_SIZE);
+ }
+ void __tlbbatch_flush(void)
+ {
+       flush_tlb_post(TLB_BROADCAST);
+ }
+ struct ipi_flush_tlb_range_param {
+       unsigned long start;
+       unsigned long pages;
+       unsigned long stride;
+       bool last_level;
+       int tlb_level;
+       unsigned long asid;
+ };
+ static inline void ipi_flush_tlb_range(void *p)
+ {
+       struct ipi_flush_tlb_range_param *i = p;
+       flush_tlb_pre(TLB_LOCAL);
+       if (i->last_level)
+               __flush_tlb_range_op(vale1, i->start, i->pages, i->stride, i->asid, i->tlb_level, true);
+       else
+               __flush_tlb_range_op(vae1, i->start, i->pages, i->stride, i->asid, i->tlb_level, true);
+       flush_tlb_post(TLB_LOCAL);
+       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+ void __flush_tlb_range(struct vm_area_struct *vma,
+               unsigned long start, unsigned long end,
+               unsigned long stride, bool last_level,
+               int tlb_level)
+ {
+       struct ipi_flush_tlb_range_param i = { 0,0, stride, last_level, tlb_level, ASID(vma->vm_mm) };
+       enum tlb_state ts = tlbstat_mm(vma->vm_mm);
+       if (ts == TLB_NONE) {
+               count_vm_tlb_event(NR_TLB_SKIPPED);
+               goto out;
+       }
+       i.start = round_down(start, stride);
+       end = round_up(end, stride);
+       i.pages = (end - start) >> PAGE_SHIFT;
+       /*
+        * When not using TLB range ops, we can handle up to
+        * (MAX_DVM_OPS - 1) pages;
+        * When uses TLB range ops, we can handle up to
+        * (MAX_TLBI_RANGE_PAGES - 1) pages.
+        */
+       if (((tlb_mode & TLB_MODE_RANGE) && (end - i.start) >= (MAX_DVM_OPS * stride)) ||
+                       i.pages >= MAX_TLBI_RANGE_PAGES) {
+               flush_tlb_mm(vma->vm_mm);
+               return;
+       }
+       if (ts == TLB_IPI) {
+               on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &i, true);
+               count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+       } else {
+               flush_tlb_pre(ts);
+               if (last_level) {
+                       if (ts == TLB_LOCAL) {
 -                              __flush_tlb_range_op(vale1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++                              __flush_tlb_range_op(vale1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+                               count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_RANGE);
+                       } else {
 -                              __flush_tlb_range_op(vae1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++                              __flush_tlb_range_op(vale1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+                               count_vm_tlb_event(NR_TLB_FLUSH_RANGE);
+                       }
+               } else {
+                       if (ts == TLB_LOCAL) {
 -                              __flush_tlb_range_op(vae1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++                              __flush_tlb_range_op(vae1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+                               count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_RANGE);
+                       } else {
++                              __flush_tlb_range_op(vae1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+                               count_vm_tlb_event(NR_TLB_FLUSH_RANGE);
+                       }
+               }
+               flush_tlb_post(ts);
+       }
+ out:
+       mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
+ }
+ static void inline flush_tlb_pre_kernel(void)
+ {
+       flush_tlb_pre(TLB_BROADCAST);
+ }
+ static void inline flush_tlb_post_kernel(void)
+ {
+       flush_tlb_post(TLB_BROADCAST);
+       isb();
+ }
+ void local_flush_tlb_all(void)
+ {
+       flush_tlb_pre(TLB_LOCAL);
+       __tlbi(vmalle1);
+       flush_tlb_post(TLB_LOCAL);
+       isb();
+       count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ }
+ void flush_tlb_all(void)
+ {
+       flush_tlb_pre_kernel();
+       __tlbi(vmalle1is);
+       flush_tlb_post_kernel();
+       count_vm_tlb_event(NR_TLB_FLUSH_ALL);
+ }
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ {
+       unsigned long addr;
+       if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
+               flush_tlb_all();
+               return;
+       }
+       start = __TLBI_VADDR(start, 0);
+       end = __TLBI_VADDR(end, 0);
+       flush_tlb_pre_kernel();
+       for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
+               __tlbi(vaale1is, addr);
+       }
+       flush_tlb_post_kernel();
+ }
+ /*
+  * Used to invalidate the TLB (walk caches) corresponding to intermediate page
+  * table levels (pgd/pud/pmd).
+  */
+ void __flush_tlb_kernel_pgtable(unsigned long kaddr)
+ {
+       unsigned long addr = __TLBI_VADDR(kaddr, 0);
+       flush_tlb_pre_kernel();
+       __tlbi(vaae1is, addr);
+       flush_tlb_post_kernel();
+ }
+ static ssize_t tlb_mode_read_file(struct file *file, char __user *user_buf,
+                               size_t count, loff_t *ppos)
+ {
+       char buf[32];
+       unsigned int len;
+       len = sprintf(buf, "%d\n", tlb_mode);
+       return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+ }
+ static ssize_t tlb_mode_write_file(struct file *file,
+                  const char __user *user_buf, size_t count, loff_t *ppos)
+ {
+         char buf[32];
+         ssize_t len;
+         unsigned mode;
+         len = min(count, sizeof(buf) - 1);
+         if (copy_from_user(buf, user_buf, len))
+                 return -EFAULT;
+         buf[len] = '\0';
+         if (kstrtoint(buf, 0, &mode))
+                 return -EINVAL;
+         if (mode > TLB_MODE_NONE + TLB_MODE_IPI_MASK)
+                 return -EINVAL;
+         tlb_mode = mode | TLB_MODE_USER;
+         return count;
+ }
+ static const struct file_operations fops_tlbflush = {
+         .read = tlb_mode_read_file,
+         .write = tlb_mode_write_file,
+         .llseek = default_llseek,
+ };
+ struct dentry *arch_debugfs_dir;
+ static int __init set_tlb_mode(char *str)
+ {
+         u32 mode;
+         pr_info("tlb_mode: ");
+         if (kstrtouint(str, 0, &mode)) {
+                 pr_cont("using default of %u, unable to parse %s\n",
+                         tlb_mode, str);
+                 return 1;
+         }
+         tlb_mode = mode| TLB_MODE_USER;
+         pr_cont("%d\n", tlb_mode);
+         return 1;
+ }
+ __setup("tlb_mode", set_tlb_mode);
+ static int __init create_tlb_mode(void)
+ {
+       unsigned int ipi_cpus;
+       arch_debugfs_dir = debugfs_create_dir("arm64", NULL);
+         debugfs_create_file("tlb_mode", S_IRUSR | S_IWUSR,
+                             arch_debugfs_dir, NULL, &fops_tlbflush);
+       if (!(tlb_mode & TLB_MODE_USER)) {
+               /*
+                * Autotune IPI cpus depending on size of system
+                *
+                * A system with 16 cpus will send IPIs to up to 8 cpus
+                * A system with 254 cpus will send IPIs to up to 16 cpus
+                */
+               ipi_cpus = ilog2(nr_cpu_ids) * 2;
+               if (ipi_cpus > (tlb_mode & TLB_MODE_IPI_MASK)) {
+                       tlb_mode = ipi_cpus | (tlb_mode & (TLB_MODE_NONE|TLB_MODE_LOCAL));
+               }
+               if (system_supports_tlb_range())
+                       tlb_mode |= TLB_MODE_RANGE;
+       }
+         return 0;
+ }
+ late_initcall(create_tlb_mode);
Simple merge
diff --cc mm/vmstat.c
Simple merge