} while (0)
#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
- __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
- static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
+ void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
unsigned long stride, bool last_level,
- int tlb_level)
- {
- unsigned long asid, pages;
-
- start = round_down(start, stride);
- end = round_up(end, stride);
- pages = (end - start) >> PAGE_SHIFT;
-
- /*
- * When not uses TLB range ops, we can handle up to
- * (MAX_DVM_OPS - 1) pages;
- * When uses TLB range ops, we can handle up to
- * (MAX_TLBI_RANGE_PAGES - 1) pages.
- */
- if ((!system_supports_tlb_range() &&
- (end - start) >= (MAX_DVM_OPS * stride)) ||
- pages >= MAX_TLBI_RANGE_PAGES) {
- flush_tlb_mm(vma->vm_mm);
- return;
- }
-
- dsb(ishst);
- asid = ASID(vma->vm_mm);
-
- if (last_level)
- __flush_tlb_range_op(vale1is, start, pages, stride, asid,
- tlb_level, true, lpa2_is_enabled());
- else
- __flush_tlb_range_op(vae1is, start, pages, stride, asid,
- tlb_level, true, lpa2_is_enabled());
-
- mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
- }
+ int tlb_level);
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+{
+ __flush_tlb_range_nosync(vma, start, end, stride,
+ last_level, tlb_level);
+ dsb(ish);
+}
+
static inline void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
/*
* We cannot use leaf-only invalidation here, since we may be invalidating
* table entries as part of collapsing hugepages or moving page tables.
- * Set the tlb_level to 0 because we can not get enough information here.
+ * Set the tlb_level to TLBI_TTL_UNKNOWN because we can not get enough
+ * information here.
*/
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN);
}
- static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
- {
- unsigned long addr;
-
- if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
- flush_tlb_all();
- return;
- }
-
- start = __TLBI_VADDR(start, 0);
- end = __TLBI_VADDR(end, 0);
-
- dsb(ishst);
- for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
- __tlbi(vaale1is, addr);
- dsb(ish);
- isb();
- }
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end);
/*
* Used to invalidate the TLB (walk caches) corresponding to intermediate page
return 0;
}
early_initcall(asids_init);
- __flush_tlb_range_op(vale1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
+
+ /*
+ * TLB flushing logic to alloc dynamically control the flushes and potentially reduce
+ * the need for TLBIs having to go over the ARM mesh.
+ */
+
+ enum tlb_state {
+ TLB_NONE, /* Address space has no TLBs due to recent flushes or this being a new address space */
+ TLB_LOCAL, /* Only the current cpu has used this address space */
+ TLB_IPI, /* Flush by sending IPIs and doing local flushes */
+ TLB_BROADCAST /* Use the ARM mesh hardware to broadcast invalidations */
+ };
+
+ /*
+ * Control over TLB flusing via TLB mode
+ *
+ * The lower 10 bits control the use of IPI to do local flushes.
+ *
+ * tlb_mode encodes a limit of the number of processors that are known to have used this address space.
+ * If more than this number of processors have used the address space then a TLBI broadcast
+ * will occur. If there are less cpus than this limit then the TLB logic will send IPIs to these
+ * processors and perform local flushes on each of them. If set to 0 (default) then no IPIs will occur.
+ *
+ * The higher bits control other aspects of TLB operations.
+ *
+ * The default operation is to always use TLBI broadcast (the common method)
+ */
+
+ #define TLB_MODE_IPI_BITS 10
+ #define TLB_MODE_IPI_MASK ((1 << TLB_MODE_IPI_BITS) - 1)
+
+ /* Feature encoding in tlb_mode */
+ #define TLB_MODE_LOCAL (1 << TLB_MODE_IPI_BITS) /* Use local invalidation if only the current processor has used an address space */
+ #define TLB_MODE_RANGE (1 << (TLB_MODE_IPI_BITS + 1)) /* Use TLBI range flushes */
+ #define TLB_MODE_NONE (1 << (TLB_MODE_IPI_BITS + 2)) /* If no processor has used an address space then skip flushing */
+ #define TLB_MODE_USER (1 << (TLB_MODE_IPI_BITS + 3)) /* User overrode system defaults */
+
+
+ static unsigned int tlb_mode;
+
+ static enum tlb_state tlbstat(struct cpumask *mask)
+ {
+ unsigned int weight = cpumask_weight(mask);
+ bool present = cpumask_test_cpu(smp_processor_id(), mask);
+
+ if (weight == 0) {
+ /*
+ * Unused address space or something strange is on.
+ * TLB_MODE_NONE tells us either to ignore the
+ * flush request or flush everything to be safe
+ */
+
+ if (tlb_mode & TLB_MODE_NONE)
+ return TLB_NONE;
+
+ return TLB_BROADCAST;
+ }
+
+ if (weight == 1 && present && (tlb_mode & TLB_MODE_LOCAL))
+ return TLB_LOCAL;
+
+ if (weight < (tlb_mode & TLB_MODE_IPI_MASK))
+ return TLB_IPI;
+
+ return TLB_BROADCAST;
+ }
+
+ static inline enum tlb_state tlbstat_mm(struct mm_struct *mm)
+ {
+ return tlbstat(mm_cpumask(mm));
+ }
+
+ static inline void flush_tlb_asid(enum tlb_state ts, unsigned long asid)
+ {
+ if (ts == TLB_NONE) {
+ count_vm_tlb_event(NR_TLB_SKIPPED);
+ return;
+ }
+
+ if (ts == TLB_LOCAL) {
+ __tlbi(aside1, asid);
+ __tlbi_user(aside1, asid);
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ return;
+ }
+
+ __tlbi(aside1is, asid);
+ __tlbi_user(aside1is, asid);
+ count_vm_tlb_event(NR_TLB_FLUSH_ALL);
+
+ }
+
+ static inline void flush_tlb_addr(enum tlb_state ts, struct mm_struct *mm, unsigned long uaddr)
+ {
+ unsigned long addr;
+
+ if (ts == TLB_NONE) {
+ count_vm_tlb_event(NR_TLB_SKIPPED);
+ return;
+ }
+
+ addr = __TLBI_VADDR(uaddr, ASID(mm));
+
+ if (ts == TLB_LOCAL) {
+ __tlbi(vale1, addr);
+ __tlbi_user(vale1, addr);
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+ return;
+ }
+
+ __tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
+ count_vm_tlb_event(NR_TLB_FLUSH_ONE);
+ }
+
+ static inline void flush_tlb_post(enum tlb_state ts)
+ {
+ if (ts == TLB_NONE)
+ return;
+
+ if (ts == TLB_LOCAL) {
+ dsb(nsh);
+ return;
+ }
+
+ dsb(ish);
+ }
+
+ static inline void flush_tlb_pre(enum tlb_state ts)
+ {
+ if (ts == TLB_NONE)
+ return;
+
+ if (ts == TLB_LOCAL) {
+ dsb(nshst);
+ return;
+ }
+
+ dsb(ishst);
+ }
+
+ static void ipi_flush_tlb_asid(void *p)
+ {
+ unsigned long asid = (unsigned long)p;
+
+ flush_tlb_pre(TLB_LOCAL);
+ flush_tlb_asid(TLB_LOCAL, asid);
+ flush_tlb_post(TLB_LOCAL);
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+
+ void flush_tlb_mm(struct mm_struct *mm)
+ {
+ unsigned long asid = __TLBI_VADDR(0, ASID(mm));
+ enum tlb_state ts = tlbstat_mm(mm);
+
+ if (ts == TLB_IPI) {
+
+ on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_asid, (void *)asid, true);
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+
+ } else {
+
+ flush_tlb_pre(ts);
+ flush_tlb_asid(ts, asid);
+ flush_tlb_post(ts);
+
+ }
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+ }
+
+ struct ipi_flush_tlb_page_param {
+ unsigned long uaddr;
+ struct mm_struct *mm;
+ };
+
+ static inline void ipi_flush_tlb_page(void *p)
+ {
+ struct ipi_flush_tlb_page_param *i = p;
+
+ flush_tlb_pre(TLB_LOCAL);
+ flush_tlb_addr(TLB_LOCAL, i->mm, i->uaddr);
+ flush_tlb_post(TLB_LOCAL);
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+
+ void __flush_tlb_page(struct mm_struct *mm,
+ unsigned long uaddr, bool sync)
+ {
+ struct ipi_flush_tlb_page_param i = { uaddr, mm };
+ enum tlb_state ts = tlbstat_mm(i.mm);
+
+ if (ts == TLB_IPI) {
+
+ on_each_cpu_mask(mm_cpumask(i.mm), ipi_flush_tlb_page, &i, true);
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+
+ } else {
+
+ flush_tlb_pre(ts);
+ flush_tlb_addr(ts, i.mm, uaddr);
+
+ if (sync)
+ flush_tlb_post(ts);
+
+ }
+
+ mmu_notifier_arch_invalidate_secondary_tlbs(i.mm, uaddr & PAGE_MASK,
+ (uaddr & PAGE_MASK) + PAGE_SIZE);
+ }
+
+ void __tlbbatch_flush(void)
+ {
+ flush_tlb_post(TLB_BROADCAST);
+ }
+
+ struct ipi_flush_tlb_range_param {
+ unsigned long start;
+ unsigned long pages;
+ unsigned long stride;
+ bool last_level;
+ int tlb_level;
+ unsigned long asid;
+ };
+
+ static inline void ipi_flush_tlb_range(void *p)
+ {
+ struct ipi_flush_tlb_range_param *i = p;
+
+ flush_tlb_pre(TLB_LOCAL);
+
+ if (i->last_level)
+
+ __flush_tlb_range_op(vale1, i->start, i->pages, i->stride, i->asid, i->tlb_level, true);
+
+ else
+
+ __flush_tlb_range_op(vae1, i->start, i->pages, i->stride, i->asid, i->tlb_level, true);
+
+ flush_tlb_post(TLB_LOCAL);
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ }
+
+ void __flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long stride, bool last_level,
+ int tlb_level)
+ {
+ struct ipi_flush_tlb_range_param i = { 0,0, stride, last_level, tlb_level, ASID(vma->vm_mm) };
+ enum tlb_state ts = tlbstat_mm(vma->vm_mm);
+
+ if (ts == TLB_NONE) {
+ count_vm_tlb_event(NR_TLB_SKIPPED);
+ goto out;
+ }
+
+ i.start = round_down(start, stride);
+ end = round_up(end, stride);
+ i.pages = (end - start) >> PAGE_SHIFT;
+
+ /*
+ * When not using TLB range ops, we can handle up to
+ * (MAX_DVM_OPS - 1) pages;
+ * When uses TLB range ops, we can handle up to
+ * (MAX_TLBI_RANGE_PAGES - 1) pages.
+ */
+ if (((tlb_mode & TLB_MODE_RANGE) && (end - i.start) >= (MAX_DVM_OPS * stride)) ||
+ i.pages >= MAX_TLBI_RANGE_PAGES) {
+
+ flush_tlb_mm(vma->vm_mm);
+ return;
+
+ }
+
+ if (ts == TLB_IPI) {
+
+ on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &i, true);
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
+
+ } else {
+
+ flush_tlb_pre(ts);
+
+ if (last_level) {
+ if (ts == TLB_LOCAL) {
- __flush_tlb_range_op(vale1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++ __flush_tlb_range_op(vale1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_RANGE);
+ } else {
- __flush_tlb_range_op(vae1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++ __flush_tlb_range_op(vale1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+ count_vm_tlb_event(NR_TLB_FLUSH_RANGE);
+ }
+
+ } else {
+ if (ts == TLB_LOCAL) {
- __flush_tlb_range_op(vae1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true);
++ __flush_tlb_range_op(vae1, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_RANGE);
+ } else {
++ __flush_tlb_range_op(vae1is, i.start, i.pages, i.stride, i.asid, i.tlb_level, true, lpa2_is_enabled());
+ count_vm_tlb_event(NR_TLB_FLUSH_RANGE);
+ }
+ }
+
+ flush_tlb_post(ts);
+ }
+ out:
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
+ }
+
+ static void inline flush_tlb_pre_kernel(void)
+ {
+ flush_tlb_pre(TLB_BROADCAST);
+ }
+
+ static void inline flush_tlb_post_kernel(void)
+ {
+ flush_tlb_post(TLB_BROADCAST);
+ isb();
+ }
+
+ void local_flush_tlb_all(void)
+ {
+ flush_tlb_pre(TLB_LOCAL);
+ __tlbi(vmalle1);
+ flush_tlb_post(TLB_LOCAL);
+ isb();
+ count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ }
+
+ void flush_tlb_all(void)
+ {
+ flush_tlb_pre_kernel();
+ __tlbi(vmalle1is);
+ flush_tlb_post_kernel();
+ count_vm_tlb_event(NR_TLB_FLUSH_ALL);
+ }
+
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ {
+ unsigned long addr;
+
+ if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
+ flush_tlb_all();
+ return;
+ }
+
+ start = __TLBI_VADDR(start, 0);
+ end = __TLBI_VADDR(end, 0);
+
+ flush_tlb_pre_kernel();
+ for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
+ __tlbi(vaale1is, addr);
+ }
+ flush_tlb_post_kernel();
+ }
+
+ /*
+ * Used to invalidate the TLB (walk caches) corresponding to intermediate page
+ * table levels (pgd/pud/pmd).
+ */
+ void __flush_tlb_kernel_pgtable(unsigned long kaddr)
+ {
+ unsigned long addr = __TLBI_VADDR(kaddr, 0);
+
+ flush_tlb_pre_kernel();
+ __tlbi(vaae1is, addr);
+ flush_tlb_post_kernel();
+ }
+
+
+ static ssize_t tlb_mode_read_file(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+ {
+ char buf[32];
+ unsigned int len;
+
+ len = sprintf(buf, "%d\n", tlb_mode);
+ return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+ }
+
+ static ssize_t tlb_mode_write_file(struct file *file,
+ const char __user *user_buf, size_t count, loff_t *ppos)
+ {
+ char buf[32];
+ ssize_t len;
+ unsigned mode;
+
+ len = min(count, sizeof(buf) - 1);
+ if (copy_from_user(buf, user_buf, len))
+ return -EFAULT;
+
+ buf[len] = '\0';
+ if (kstrtoint(buf, 0, &mode))
+ return -EINVAL;
+
+ if (mode > TLB_MODE_NONE + TLB_MODE_IPI_MASK)
+ return -EINVAL;
+
+ tlb_mode = mode | TLB_MODE_USER;
+ return count;
+ }
+
+ static const struct file_operations fops_tlbflush = {
+ .read = tlb_mode_read_file,
+ .write = tlb_mode_write_file,
+ .llseek = default_llseek,
+ };
+
+ struct dentry *arch_debugfs_dir;
+
+
+ static int __init set_tlb_mode(char *str)
+ {
+ u32 mode;
+
+ pr_info("tlb_mode: ");
+ if (kstrtouint(str, 0, &mode)) {
+ pr_cont("using default of %u, unable to parse %s\n",
+ tlb_mode, str);
+ return 1;
+ }
+
+ tlb_mode = mode| TLB_MODE_USER;
+ pr_cont("%d\n", tlb_mode);
+
+ return 1;
+
+ }
+ __setup("tlb_mode", set_tlb_mode);
+
+ static int __init create_tlb_mode(void)
+ {
+ unsigned int ipi_cpus;
+
+ arch_debugfs_dir = debugfs_create_dir("arm64", NULL);
+
+ debugfs_create_file("tlb_mode", S_IRUSR | S_IWUSR,
+ arch_debugfs_dir, NULL, &fops_tlbflush);
+
+ if (!(tlb_mode & TLB_MODE_USER)) {
+ /*
+ * Autotune IPI cpus depending on size of system
+ *
+ * A system with 16 cpus will send IPIs to up to 8 cpus
+ * A system with 254 cpus will send IPIs to up to 16 cpus
+ */
+ ipi_cpus = ilog2(nr_cpu_ids) * 2;
+
+ if (ipi_cpus > (tlb_mode & TLB_MODE_IPI_MASK)) {
+
+ tlb_mode = ipi_cpus | (tlb_mode & (TLB_MODE_NONE|TLB_MODE_LOCAL));
+
+ }
+
+ if (system_supports_tlb_range())
+ tlb_mode |= TLB_MODE_RANGE;
+ }
+ return 0;
+ }
+ late_initcall(create_tlb_mode);
+