From: Christoph Lameter (Ampere) <cl@linux.com>
Date: Wed, 1 May 2024 16:59:10 +0000 (-0700)
Subject: Merge branch 'tlb' into tlb2
X-Git-Url: https://gentwo.org/gitweb/?a=commitdiff_plain;h=refs%2Fheads%2Ftlb2;p=linux%2F.git

Merge branch 'tlb' into tlb2

Fix issues regarding lpa2 support
---

2f1680509a25a6a80b1e7dcf4dcc188b45fce3b4
diff --cc arch/arm64/include/asm/tlbflush.h
index a75de2665d84,037e74bf8077..ff328dd9f179
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@@ -422,85 -373,25 +398,36 @@@ do {									
  } while (0)
  
  #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
 -	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
 +	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
  
- static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
+ void __flush_tlb_range(struct vm_area_struct *vma,
  				     unsigned long start, unsigned long end,
  				     unsigned long stride, bool last_level,
- 				     int tlb_level)
- {
- 	unsigned long asid, pages;
- 
- 	start = round_down(start, stride);
- 	end = round_up(end, stride);
- 	pages = (end - start) >> PAGE_SHIFT;
- 
- 	/*
- 	 * When not uses TLB range ops, we can handle up to
- 	 * (MAX_DVM_OPS - 1) pages;
- 	 * When uses TLB range ops, we can handle up to
- 	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
- 	 */
- 	if ((!system_supports_tlb_range() &&
- 	     (end - start) >= (MAX_DVM_OPS * stride)) ||
- 	    pages >= MAX_TLBI_RANGE_PAGES) {
- 		flush_tlb_mm(vma->vm_mm);
- 		return;
- 	}
- 
- 	dsb(ishst);
- 	asid = ASID(vma->vm_mm);
- 
- 	if (last_level)
- 		__flush_tlb_range_op(vale1is, start, pages, stride, asid,
- 				     tlb_level, true, lpa2_is_enabled());
- 	else
- 		__flush_tlb_range_op(vae1is, start, pages, stride, asid,
- 				     tlb_level, true, lpa2_is_enabled());
- 
- 	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
- }
+ 				     int tlb_level);
  
 +static inline void __flush_tlb_range(struct vm_area_struct *vma,
 +				     unsigned long start, unsigned long end,
 +				     unsigned long stride, bool last_level,
 +				     int tlb_level)
 +{
 +	__flush_tlb_range_nosync(vma, start, end, stride,
 +				 last_level, tlb_level);
 +	dsb(ish);
 +}
 +
  static inline void flush_tlb_range(struct vm_area_struct *vma,
  				   unsigned long start, unsigned long end)
  {
  	/*
  	 * We cannot use leaf-only invalidation here, since we may be invalidating
  	 * table entries as part of collapsing hugepages or moving page tables.
 -	 * Set the tlb_level to 0 because we can not get enough information here.
 +	 * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
 +	 * information here.
  	 */
 -	__flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
 +	__flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
  }
  
- static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
- {
- 	unsigned long addr;
- 
- 	if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
- 		flush_tlb_all();
- 		return;
- 	}
- 
- 	start = __TLBI_VADDR(start, 0);
- 	end = __TLBI_VADDR(end, 0);
- 
- 	dsb(ishst);
- 	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
- 		__tlbi(vaale1is, addr);
- 	dsb(ish);
- 	isb();
- }
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end);
  
  /*
   * Used to invalidate the TLB (walk caches) corresponding to intermediate page
diff --cc arch/arm64/mm/context.c
index 188197590fc9,30f4bbcccf1a..831488ff38ee
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@@ -420,3 -422,463 +422,463 @@@ static int asids_init(void
  	return 0;
  }
  early_initcall(asids_init);
+ 
+ /*
+  * TLB flushing logic to alloc dynamically control the flushes and potentially reduce
+  * the need for TLBIs having to go over the ARM mesh.
+  */
+ 
+ enum tlb_state {
+ 	TLB_NONE,	/* Address space has no TLBs due to recent flushes or this being a new address space */
+ 	TLB_LOCAL,	/* Only the current cpu has used this address space */
+ 	TLB_IPI,	/* Flush by sending IPIs and doing local flushes  */
+ 	TLB_BROADCAST	/* Use the ARM mesh hardware to broadcast invalidations */
+ };
+ 
+ /*
+  * Control over TLB flusing via TLB mode
+  *
+  * The lower 10 bits control the use of IPI to do local flushes.
+  *
+  * tlb_mode encodes a limit of the number of processors that are known to have used this address space.
+  * If more than this number of processors have used the address space then a TLBI broadcast
+  * will occur. If there are less cpus than this limit then the TLB logic will send IPIs to these
+  * processors and perform local flushes on each of them. If set to 0 (default) then no IPIs will occur.
+  *
+  * The higher bits control other aspects of TLB operations.
+  *
+  * The default operation is to always use TLBI broadcast (the common method)
+  */
+ 
+ #define TLB_MODE_IPI_BITS 10
+ #define TLB_MODE_IPI_MASK ((1 << TLB_MODE_IPI_BITS) - 1)
+ 
+ /* Feature encoding in tlb_mode */
+ #define TLB_MODE_LOCAL	(1 << TLB_MODE_IPI_BITS)	/* Use local invalidation if only the current processor has used an address space */
+ #define TLB_MODE_RANGE	(1 << (TLB_MODE_IPI_BITS + 1))	/* Use TLBI range flushes */
+ #define TLB_MODE_NONE	(1 << (TLB_MODE_IPI_BITS + 2))	/* If no processor has used an address space then skip flushing */
+ #define TLB_MODE_USER	(1 << (TLB_MODE_IPI_BITS + 3))	/* User overrode system defaults */
+ 
+ 
+ static unsigned int tlb_mode;
+ 
+ static enum tlb_state tlbstat(struct cpumask *mask)
+ {
+ 	unsigned int weight = cpumask_weight(mask);
+ 	bool present = cpumask_test_cpu(smp_processor_id(), mask);
+ 
+ 	if (weight == 0) {
+ 	       	/*
+ 		 * Unused address space or something strange is on.
+ 		 * TLB_MODE_NONE tells us either to ignore the
+ 		 * flush request or flush everything to be safe
+ 		 */
+ 
+ 		if (tlb_mode & TLB_MODE_NONE)
+ 			return TLB_NONE;
+ 
+ 		return TLB_BROADCAST;
+ 	}
+ 
+ 	if (weight == 1 && present && (tlb_mode & TLB_MODE_LOCAL))
+ 		return TLB_LOCAL;
+ 
+ 	if (weight < (tlb_mode & TLB_MODE_IPI_MASK))
+ 		return TLB_IPI;
+ 
+ 	return TLB_BROADCAST;
+ }
+ 
+ static inline enum tlb_state tlbstat_mm(struct mm_struct *mm)
+ {
+ 	return tlbstat(mm_cpumask(mm));
+ }
+ 
+ static inline void flush_tlb_asid(enum tlb_state ts, unsigned long asid)
+ {
+ 	if (ts == TLB_NONE) {
+ 		count_vm_tlb_event(NR_TLB_SKIPPED);
+ 		return;
+ 	}
+ 
+ 	if (ts == TLB_LOCAL) {
+ 		__tlbi(aside1, asid);
+ 		__tlbi_user(aside1, asid);
+ 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ 		return;
+ 	}
+ 
+ 	__tlbi(aside1is, asid);
+ 	__tlbi_user(aside1is, asid);
+ 	count_vm_tlb_event(NR_TLB_FLUSH_ALL);
+ 
+ }
+ 
+ static inline void flush_tlb_addr(enum tlb_state ts, struct mm_struct *mm, unsigned long uaddr)
+ {
+ 	unsigned long addr;
+ 
+ 	if (ts == TLB_NONE)  {
+ 		count_vm_tlb_event(NR_TLB_SKIPPED);
+ 		return;
+ 	}
+ 
+ 	addr = __TLBI_VADDR(uaddr, ASID(mm));
+ 
+ 	if (ts == TLB_LOCAL) {
+ 		__tlbi(vale1, addr);
+ 		__tlbi_user(vale1, addr);
+ 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+ 		return;
+ 	}
+ 
+ 	__tlbi(vale1is, addr);
+ 	__tlbi_user(vale1is, addr);
+ 	count_vm_tlb_event(NR_TLB_FLUSH_ONE);
+ }
+ 
+ static inline void flush_tlb_post(enum tlb_state ts)
+ {
+ 	if (ts == TLB_NONE)
+ 		return;
+ 
+ 	if (ts == TLB_LOCAL) {
+ 		dsb(nsh);
+ 		return;
+ 	}
+ 
+ 	dsb(ish);
+ }
+ 
+ static inline void flush_tlb_pre(enum tlb_state ts)
+ {
+ 	if (ts == TLB_NONE)
+ 		return;
+ 
+ 	if (ts == TLB_LOCAL) {
+ 		dsb(nshst);
+ 		return;
+ 	}
+ 
+ 	dsb(ishst);
+ }
+ 
+ static void ipi_flush_tlb_asid(void *p)
+ {
+ 	unsigned long asid = (unsigned long)p;
+ 
+ 	flush_tlb_pre(TLB_LOCAL);
+ 	flush_tlb_asid(TLB_LOCAL, asid);
+ 	flush_tlb_post(TLB_LOCAL);
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+ 
+ void flush_tlb_mm(struct mm_struct *mm)
+ {
+ 	unsigned long asid = __TLBI_VADDR(0, ASID(mm));
+ 	enum tlb_state ts = tlbstat_mm(mm);
+ 
+ 	if (ts == TLB_IPI) {
+ 
+ 		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_asid, (void *)asid, true);
+ 		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+ 
+ 	} else {
+ 
+ 		flush_tlb_pre(ts);
+ 		flush_tlb_asid(ts, asid);
+ 		flush_tlb_post(ts);
+ 
+ 	}
+ 	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+ }
+ 
+ struct ipi_flush_tlb_page_param {
+ 	unsigned long uaddr;
+ 	struct mm_struct *mm;
+ };
+ 
+ static inline void ipi_flush_tlb_page(void *p)
+ {
+ 	struct ipi_flush_tlb_page_param *i = p;
+ 
+ 	flush_tlb_pre(TLB_LOCAL);
+ 	flush_tlb_addr(TLB_LOCAL, i->mm, i->uaddr);
+ 	flush_tlb_post(TLB_LOCAL);
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+ 
+ void __flush_tlb_page(struct mm_struct *mm,
+ 				  unsigned long uaddr, bool sync)
+ {
+ 	struct ipi_flush_tlb_page_param i = { uaddr, mm };
+ 	enum tlb_state ts = tlbstat_mm(i.mm);
+ 
+ 	if (ts == TLB_IPI) {
+ 
+ 		on_each_cpu_mask(mm_cpumask(i.mm), ipi_flush_tlb_page, &i, true);
+ 		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+ 
+ 	} else {
+ 
+ 		flush_tlb_pre(ts);
+ 		flush_tlb_addr(ts, i.mm, uaddr);
+ 
+ 		if (sync)
+ 			flush_tlb_post(ts);
+ 
+ 	}
+ 
+ 	mmu_notifier_arch_invalidate_secondary_tlbs(i.mm, uaddr & PAGE_MASK,
+ 						(uaddr & PAGE_MASK) + PAGE_SIZE);
+ }
+ 
+ void __tlbbatch_flush(void)
+ {
+ 	flush_tlb_post(TLB_BROADCAST);
+ }
+ 
+ struct ipi_flush_tlb_range_param {
+ 	unsigned long start;
+ 	unsigned long pages;
+ 	unsigned long stride;
+ 	bool last_level;
+ 	int tlb_level;
+ 	unsigned long asid;
+ };
+ 
+ static inline void ipi_flush_tlb_range(void *p)
+ {
+ 	struct ipi_flush_tlb_range_param *i = p;
+ 
+ 	flush_tlb_pre(TLB_LOCAL);
+ 
+ 	if (i->last_level)
+ 
+ 		__flush_tlb_range_op(vale1, i->start, i->pages, i->stride, i->asid, i->tlb_level, true);
+ 
+ 	else
+ 
+ 		__flush_tlb_range_op(vae1, i->start, i->pages, i->stride, i->asid, i->tlb_level, true);
+ 
+ 	flush_tlb_post(TLB_LOCAL);
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+ 
+ void __flush_tlb_range(struct vm_area_struct *vma,
+ 		unsigned long start, unsigned long end,
+ 		unsigned long stride, bool last_level,
+ 		int tlb_level)
+ {
+ 	struct ipi_flush_tlb_range_param i = { 0,0, stride, last_level, tlb_level, ASID(vma->vm_mm) };
+ 	enum tlb_state ts = tlbstat_mm(vma->vm_mm);
+ 
+ 	if (ts == TLB_NONE) {
+ 		count_vm_tlb_event(NR_TLB_SKIPPED);
+ 		goto out;
+ 	}
+ 
+ 	i.start = round_down(start, stride);
+ 	end = round_up(end, stride);
+ 	i.pages = (end - start) >> PAGE_SHIFT;
+ 
+ 	/*
+ 	 * When not using TLB range ops, we can handle up to
+ 	 * (MAX_DVM_OPS - 1) pages;
+ 	 * When uses TLB range ops, we can handle up to
+ 	 * (MAX_TLBI_RANGE_PAGES - 1) pages.
+ 	 */
+ 	if (((tlb_mode & TLB_MODE_RANGE) && (end - i.start) >= (MAX_DVM_OPS * stride)) ||
+ 			i.pages >= MAX_TLBI_RANGE_PAGES) {
+ 
+ 		flush_tlb_mm(vma->vm_mm);
+ 		return;
+ 
+ 	}
+ 
+ 	if (ts == TLB_IPI) {
+ 
+ 		on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &i, true);
+ 		count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+ 
+ 	} else {
+ 
+ 		flush_tlb_pre(ts);
+ 
+ 		if (last_level) {
+ 			if (ts == TLB_LOCAL) {
 -				__flush_tlb_range_op(vale1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++				__flush_tlb_range_op(vale1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+ 				count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_RANGE);
+ 			} else {
 -				__flush_tlb_range_op(vale1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++				__flush_tlb_range_op(vale1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+ 				count_vm_tlb_event(NR_TLB_FLUSH_RANGE);
+ 			}
+ 
+ 		} else {
+ 			if (ts == TLB_LOCAL) {
 -				__flush_tlb_range_op(vae1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++				__flush_tlb_range_op(vae1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+ 				count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_RANGE);
+ 			} else {
 -				__flush_tlb_range_op(vae1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++				__flush_tlb_range_op(vae1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+ 				count_vm_tlb_event(NR_TLB_FLUSH_RANGE);
+ 			}
+ 		}
+ 
+ 		flush_tlb_post(ts);
+ 	}
+ out:
+ 	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
+ }
+ 
+ static void inline flush_tlb_pre_kernel(void)
+ {
+ 	flush_tlb_pre(TLB_BROADCAST);
+ }
+ 
+ static void inline flush_tlb_post_kernel(void)
+ {
+ 	flush_tlb_post(TLB_BROADCAST);
+ 	isb();
+ }
+ 
+ void local_flush_tlb_all(void)
+ {
+ 	flush_tlb_pre(TLB_LOCAL);
+ 	__tlbi(vmalle1);
+ 	flush_tlb_post(TLB_LOCAL);
+ 	isb();
+ 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ }
+ 
+ void flush_tlb_all(void)
+ {
+ 	flush_tlb_pre_kernel();
+ 	__tlbi(vmalle1is);
+ 	flush_tlb_post_kernel();
+ 	count_vm_tlb_event(NR_TLB_FLUSH_ALL);
+ }
+ 
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ {
+ 	unsigned long addr;
+ 
+ 	if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
+ 		flush_tlb_all();
+ 		return;
+ 	}
+ 
+ 	start = __TLBI_VADDR(start, 0);
+ 	end = __TLBI_VADDR(end, 0);
+ 
+ 	flush_tlb_pre_kernel();
+ 	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
+ 		__tlbi(vaale1is, addr);
+ 	}
+ 	flush_tlb_post_kernel();
+ }
+ 
+ /*
+  * Used to invalidate the TLB (walk caches) corresponding to intermediate page
+  * table levels (pgd/pud/pmd).
+  */
+ void __flush_tlb_kernel_pgtable(unsigned long kaddr)
+ {
+ 	unsigned long addr = __TLBI_VADDR(kaddr, 0);
+ 
+ 	flush_tlb_pre_kernel();
+ 	__tlbi(vaae1is, addr);
+ 	flush_tlb_post_kernel();
+ }
+ 
+ 
+ static ssize_t tlb_mode_read_file(struct file *file, char __user *user_buf,
+ 				size_t count, loff_t *ppos)
+ {
+ 	char buf[32];
+ 	unsigned int len;
+ 
+ 	len = sprintf(buf, "%d\n", tlb_mode);
+ 	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+ }
+ 
+ static ssize_t tlb_mode_write_file(struct file *file,
+                  const char __user *user_buf, size_t count, loff_t *ppos)
+ {
+         char buf[32];
+         ssize_t len;
+         unsigned mode;
+ 
+         len = min(count, sizeof(buf) - 1);
+         if (copy_from_user(buf, user_buf, len))
+                 return -EFAULT;
+ 
+         buf[len] = '\0';
+         if (kstrtoint(buf, 0, &mode))
+                 return -EINVAL;
+ 
+         if (mode > TLB_MODE_NONE + TLB_MODE_IPI_MASK)
+                 return -EINVAL;
+ 
+         tlb_mode = mode | TLB_MODE_USER;
+         return count;
+ }
+ 
+ static const struct file_operations fops_tlbflush = {
+         .read = tlb_mode_read_file,
+         .write = tlb_mode_write_file,
+         .llseek = default_llseek,
+ };
+ 
+ struct dentry *arch_debugfs_dir;
+ 
+ 
+ static int __init set_tlb_mode(char *str)
+ {
+         u32 mode;
+ 
+         pr_info("tlb_mode: ");
+         if (kstrtouint(str, 0, &mode)) {
+                 pr_cont("using default of %u, unable to parse %s\n",
+                         tlb_mode, str);
+                 return 1;
+         }
+ 
+         tlb_mode = mode| TLB_MODE_USER;
+         pr_cont("%d\n", tlb_mode);
+ 
+         return 1;
+ 
+ }
+ __setup("tlb_mode", set_tlb_mode);
+ 
+ static int __init create_tlb_mode(void)
+ {
+ 	unsigned int ipi_cpus;
+ 
+ 	arch_debugfs_dir = debugfs_create_dir("arm64", NULL);
+ 
+         debugfs_create_file("tlb_mode", S_IRUSR | S_IWUSR,
+                             arch_debugfs_dir, NULL, &fops_tlbflush);
+ 
+ 	if (!(tlb_mode & TLB_MODE_USER)) {
+ 		/*
+ 		 * Autotune IPI cpus depending on size of system
+ 		 *
+ 		 * A system with 16 cpus will send IPIs to up to 8 cpus
+ 		 * A system with 254 cpus will send IPIs to up to 16 cpus
+ 		 */
+ 		ipi_cpus = ilog2(nr_cpu_ids) * 2;
+ 
+ 		if (ipi_cpus > (tlb_mode & TLB_MODE_IPI_MASK)) {
+ 
+ 			tlb_mode = ipi_cpus | (tlb_mode & (TLB_MODE_NONE|TLB_MODE_LOCAL));
+ 
+ 		}
+ 
+ 		if (system_supports_tlb_range())
+ 			tlb_mode |= TLB_MODE_RANGE;
+ 	}
+         return 0;
+ }
+ late_initcall(create_tlb_mode);
+