]> Gentwo Git Trees - linux/.git/commitdiff
iommupt: Use the incoherent start/stop functions for PT_FEAT_DMA_INCOHERENT
authorJason Gunthorpe <jgg@nvidia.com>
Thu, 23 Oct 2025 18:22:31 +0000 (15:22 -0300)
committerJoerg Roedel <joerg.roedel@amd.com>
Wed, 5 Nov 2025 08:47:44 +0000 (09:47 +0100)
This is the first step to supporting an incoherent walker, start and stop
the incoherence around the allocation and frees of the page table memory.

The iommu_pages API maps this to dma_map/unmap_single(), or arch cache
flushing calls.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
drivers/iommu/generic_pt/iommu_pt.h
drivers/iommu/generic_pt/kunit_iommu.h
drivers/iommu/generic_pt/pt_defs.h
include/linux/generic_pt/common.h
include/linux/generic_pt/iommu.h

index 142001f5aa83fd3cede505223c1a34e422a87ee1..2cad07da995aaabc54c5f2729ffaa6d26fb98dae 100644 (file)
@@ -24,6 +24,10 @@ static void gather_range_pages(struct iommu_iotlb_gather *iotlb_gather,
 {
        struct pt_common *common = common_from_iommu(iommu_table);
 
+       if (pt_feature(common, PT_FEAT_DMA_INCOHERENT))
+               iommu_pages_stop_incoherent_list(free_list,
+                                                iommu_table->iommu_device);
+
        if (pt_feature(common, PT_FEAT_FLUSH_RANGE_NO_GAPS) &&
            iommu_iotlb_gather_is_disjoint(iotlb_gather, iova, len)) {
                iommu_iotlb_sync(&iommu_table->domain, iotlb_gather);
@@ -329,35 +333,55 @@ static int __collect_tables(struct pt_range *range, void *arg,
        return 0;
 }
 
-static inline struct pt_table_p *table_alloc_top(struct pt_common *common,
-                                                uintptr_t top_of_table,
-                                                gfp_t gfp)
+enum alloc_mode {ALLOC_NORMAL, ALLOC_DEFER_COHERENT_FLUSH};
+
+/* Allocate a table, the empty table will be ready to be installed. */
+static inline struct pt_table_p *_table_alloc(struct pt_common *common,
+                                             size_t lg2sz, gfp_t gfp,
+                                             enum alloc_mode mode)
 {
        struct pt_iommu *iommu_table = iommu_from_common(common);
+       struct pt_table_p *table_mem;
+
+       table_mem = iommu_alloc_pages_node_sz(iommu_table->nid, gfp,
+                                             log2_to_int(lg2sz));
+       if (pt_feature(common, PT_FEAT_DMA_INCOHERENT) &&
+           mode == ALLOC_NORMAL) {
+               int ret = iommu_pages_start_incoherent(
+                       table_mem, iommu_table->iommu_device);
+               if (ret) {
+                       iommu_free_pages(table_mem);
+                       return ERR_PTR(ret);
+               }
+       }
+       return table_mem;
+}
 
+static inline struct pt_table_p *table_alloc_top(struct pt_common *common,
+                                                uintptr_t top_of_table,
+                                                gfp_t gfp,
+                                                enum alloc_mode mode)
+{
        /*
         * Top doesn't need the free list or otherwise, so it technically
         * doesn't need to use iommu pages. Use the API anyhow as the top is
         * usually not smaller than PAGE_SIZE to keep things simple.
         */
-       return iommu_alloc_pages_node_sz(
-               iommu_table->nid, gfp,
-               log2_to_int(pt_top_memsize_lg2(common, top_of_table)));
+       return _table_alloc(common, pt_top_memsize_lg2(common, top_of_table),
+                           gfp, mode);
 }
 
 /* Allocate an interior table */
 static inline struct pt_table_p *table_alloc(const struct pt_state *parent_pts,
-                                            gfp_t gfp)
+                                            gfp_t gfp, enum alloc_mode mode)
 {
-       struct pt_iommu *iommu_table =
-               iommu_from_common(parent_pts->range->common);
        struct pt_state child_pts =
                pt_init(parent_pts->range, parent_pts->level - 1, NULL);
 
-       return iommu_alloc_pages_node_sz(
-               iommu_table->nid, gfp,
-               log2_to_int(pt_num_items_lg2(&child_pts) +
-                           ilog2(PT_ITEM_WORD_SIZE)));
+       return _table_alloc(parent_pts->range->common,
+                           pt_num_items_lg2(&child_pts) +
+                                   ilog2(PT_ITEM_WORD_SIZE),
+                           gfp, mode);
 }
 
 static inline int pt_iommu_new_table(struct pt_state *pts,
@@ -370,13 +394,15 @@ static inline int pt_iommu_new_table(struct pt_state *pts,
        if (PT_WARN_ON(!pt_can_have_table(pts)))
                return -ENXIO;
 
-       table_mem = table_alloc(pts, attrs->gfp);
+       table_mem = table_alloc(pts, attrs->gfp, ALLOC_NORMAL);
        if (IS_ERR(table_mem))
                return PTR_ERR(table_mem);
 
        phys = virt_to_phys(table_mem);
        if (!pt_install_table(pts, phys, attrs)) {
-               iommu_free_pages(table_mem);
+               iommu_pages_free_incoherent(
+                       table_mem,
+                       iommu_from_common(pts->range->common)->iommu_device);
                return -EAGAIN;
        }
 
@@ -389,7 +415,9 @@ static inline int pt_iommu_new_table(struct pt_state *pts,
                pt_load_single_entry(pts);
                if (PT_WARN_ON(pt_table_pa(pts) != phys)) {
                        pt_clear_entries(pts, ilog2(1));
-                       iommu_free_pages(table_mem);
+                       iommu_pages_free_incoherent(
+                               table_mem, iommu_from_common(pts->range->common)
+                                                  ->iommu_device);
                        return -EINVAL;
                }
        }
@@ -615,8 +643,9 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range,
                }
 
                new_level = pts.level;
-               table_mem = table_alloc_top(
-                       common, _pt_top_set(NULL, pts.level), map->attrs.gfp);
+               table_mem =
+                       table_alloc_top(common, _pt_top_set(NULL, pts.level),
+                                       map->attrs.gfp, ALLOC_DEFER_COHERENT_FLUSH);
                if (IS_ERR(table_mem))
                        return PTR_ERR(table_mem);
                iommu_pages_list_add(&free_list, table_mem);
@@ -633,6 +662,16 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range,
                new_top_of_table = _pt_top_set(pts.table, pts.level);
        }
 
+       /*
+        * Avoid double flushing, flush it once after all pt_install_table()
+        */
+       if (pt_feature(common, PT_FEAT_DMA_INCOHERENT)) {
+               ret = iommu_pages_start_incoherent_list(
+                       &free_list, iommu_table->iommu_device);
+               if (ret)
+                       goto err_free;
+       }
+
        /*
         * top_of_table is write locked by the spinlock, but readers can use
         * READ_ONCE() to get the value. Since we encode both the level and the
@@ -665,6 +704,9 @@ static int increase_top(struct pt_iommu *iommu_table, struct pt_range *range,
        return 0;
 
 err_free:
+       if (pt_feature(common, PT_FEAT_DMA_INCOHERENT))
+               iommu_pages_stop_incoherent_list(&free_list,
+                                                iommu_table->iommu_device);
        iommu_put_pages_list(&free_list);
        return ret;
 }
@@ -988,6 +1030,9 @@ static void NS(deinit)(struct pt_iommu *iommu_table)
         * The driver has to already have fenced the HW access to the page table
         * and invalidated any caching referring to this memory.
         */
+       if (pt_feature(common, PT_FEAT_DMA_INCOHERENT))
+               iommu_pages_stop_incoherent_list(&collect.free_list,
+                                                iommu_table->iommu_device);
        iommu_put_pages_list(&collect.free_list);
 }
 
@@ -1078,6 +1123,7 @@ static void pt_iommu_zero(struct pt_iommu_table *fmt_table)
        memset_after(fmt_table, 0, iommu.domain);
 
        /* The caller can initialize some of these values */
+       iommu_table->iommu_device = cfg.iommu_device;
        iommu_table->driver_ops = cfg.driver_ops;
        iommu_table->nid = cfg.nid;
 }
@@ -1123,11 +1169,16 @@ int pt_iommu_init(struct pt_iommu_table *fmt_table,
             pt_feature(common, PT_FEAT_DYNAMIC_TOP)))
                return -EINVAL;
 
+       if (pt_feature(common, PT_FEAT_DMA_INCOHERENT) &&
+           WARN_ON(!iommu_table->iommu_device))
+               return -EINVAL;
+
        ret = pt_iommu_init_domain(iommu_table, &iommu_table->domain);
        if (ret)
                return ret;
 
-       table_mem = table_alloc_top(common, common->top_of_table, gfp);
+       table_mem = table_alloc_top(common, common->top_of_table, gfp,
+                                   ALLOC_NORMAL);
        if (IS_ERR(table_mem))
                return PTR_ERR(table_mem);
        pt_top_set(common, table_mem, pt_top_get_level(common));
index d541235632aa69b3f2a31ee4de937d06d66b342c..5d4f269627d5743bf7b39cfe78f8afd95b8d56c5 100644 (file)
@@ -139,6 +139,7 @@ static int pt_kunit_priv_init(struct kunit *test, struct kunit_iommu_priv *priv)
 
        priv->fmt_table.iommu.nid = NUMA_NO_NODE;
        priv->fmt_table.iommu.driver_ops = &pt_kunit_driver_ops;
+       priv->fmt_table.iommu.iommu_device = priv->dummy_dev;
        priv->domain.ops = &kunit_pt_ops;
        ret = pt_iommu_init(&priv->fmt_table, &priv->cfg, GFP_KERNEL);
        if (ret) {
index 819057de50d82c56388657a862f0f761513d1498..c25544d72f979a5947d67070fc41581f079063be 100644 (file)
@@ -48,13 +48,16 @@ enum {
 /*
  * When in debug mode we compile all formats with all features. This allows the
  * kunit to test the full matrix. SIGN_EXTEND can't co-exist with DYNAMIC_TOP or
- * FULL_VA.
+ * FULL_VA. DMA_INCOHERENT requires a SW bit that not all formats have
  */
 #if IS_ENABLED(CONFIG_DEBUG_GENERIC_PT)
 enum {
        PT_ORIG_SUPPORTED_FEATURES = PT_SUPPORTED_FEATURES,
        PT_DEBUG_SUPPORTED_FEATURES =
                UINT_MAX &
+               ~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_DMA_INCOHERENT) ?
+                          0 :
+                          BIT(PT_FEAT_DMA_INCOHERENT))) &
                ~((PT_ORIG_SUPPORTED_FEATURES & BIT(PT_FEAT_SIGN_EXTEND)) ?
                          BIT(PT_FEAT_DYNAMIC_TOP) | BIT(PT_FEAT_FULL_VA) :
                          BIT(PT_FEAT_SIGN_EXTEND)),
index 96f8a6a7d60e1095511d5cb52abfac4d4469530b..883069e329527ff3bf0241a10bf5571fdc92bfcf 100644 (file)
@@ -85,6 +85,12 @@ enum {
  * position.
  */
 enum pt_features {
+       /**
+        * @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before
+        * assuming the HW can read it. Otherwise a SMP release is sufficient
+        * for HW to read it.
+        */
+       PT_FEAT_DMA_INCOHERENT,
        /**
         * @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to
         * PT_VADDR_MAX.
index fde7ccf007c50cc8ae6b1a2cd398e52abbdbd790..21132e342a791f4cc03e9f75108798569632824e 100644 (file)
@@ -57,6 +57,13 @@ struct pt_iommu {
         * table walkers.
         */
        int nid;
+
+       /**
+        * @iommu_device: Device pointer used for any DMA cache flushing when
+        * PT_FEAT_DMA_INCOHERENT. This is the iommu device that created the
+        * page table which must have dma ops that perform cache flushing.
+        */
+       struct device *iommu_device;
 };
 
 /**