}
if (bkey_extent_is_unwritten(k)) {
- bch2_update_unwritten_extent(trans, m);
- goto out;
+ ret = bch2_update_unwritten_extent(trans, m) ?:
+ -BCH_ERR_data_update_done_unwritten;
+ goto out_nocow_unlock;
}
- bio_set_prio(&m->op.wbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ /* write path might have to decompress data: */
+ unsigned buf_bytes = 0;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ buf_bytes = max_t(unsigned, buf_bytes, p.crc.uncompressed_size << 9);
+
+ unsigned nr_vecs = DIV_ROUND_UP(buf_bytes, PAGE_SIZE);
+
+ m->bvecs = kmalloc_array(nr_vecs, sizeof*(m->bvecs), GFP_KERNEL);
+ if (!m->bvecs)
+ goto enomem;
+
+ bio_init(&m->rbio.bio, NULL, m->bvecs, nr_vecs, REQ_OP_READ);
+ bio_init(&m->op.wbio.bio, NULL, m->bvecs, nr_vecs, 0);
+
+ if (bch2_bio_alloc_pages(&m->op.wbio.bio, buf_bytes, GFP_KERNEL))
+ goto enomem;
+
+ rbio_init(&m->rbio.bio, c, io_opts, NULL);
+ m->rbio.bio.bi_iter.bi_size = buf_bytes;
+ m->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
+
++ m->op.wbio.bio.bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
+
return 0;
-out:
- bch2_data_update_exit(m);
- return ret ?: -BCH_ERR_data_update_done;
+enomem:
+ ret = -ENOMEM;
+ kfree(m->bvecs);
+ m->bvecs = NULL;
+out_nocow_unlock:
+ if (c->opts.nocow_enabled)
+ bkey_nocow_unlock(c, k);
+out_put_dev_refs:
+ bkey_put_dev_refs(c, k);
+out_put_disk_res:
+ bch2_disk_reservation_put(c, &m->op.res);
+out_bkey_buf_exit:
+ bch2_bkey_buf_exit(&m->k, c);
+ return ret;
}
void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
return ERR_PTR(ret);
}
- void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
- unsigned long uaddr, size_t size)
+ enum {
+ /* memory was vmap'ed for the kernel, freeing the region vunmap's it */
+ IO_REGION_F_VMAP = 1,
+ /* memory is provided by user and pinned by the kernel */
+ IO_REGION_F_USER_PROVIDED = 2,
+ /* only the first page in the array is ref'ed */
+ IO_REGION_F_SINGLE_REF = 4,
+ };
+
+ void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr)
{
- struct page **page_array;
- unsigned int nr_pages;
- void *page_addr;
+ if (mr->pages) {
+ long nr_refs = mr->nr_pages;
- *npages = 0;
+ if (mr->flags & IO_REGION_F_SINGLE_REF)
+ nr_refs = 1;
- if (uaddr & (PAGE_SIZE - 1) || !size)
- return ERR_PTR(-EINVAL);
+ if (mr->flags & IO_REGION_F_USER_PROVIDED)
+ unpin_user_pages(mr->pages, nr_refs);
+ else
+ release_pages(mr->pages, nr_refs);
- nr_pages = 0;
- page_array = io_pin_pages(uaddr, size, &nr_pages);
- if (IS_ERR(page_array))
- return page_array;
+ kvfree(mr->pages);
+ }
+ if ((mr->flags & IO_REGION_F_VMAP) && mr->ptr)
+ vunmap(mr->ptr);
+ if (mr->nr_pages && ctx->user)
+ __io_unaccount_mem(ctx->user, mr->nr_pages);
- page_addr = vmap(page_array, nr_pages, VM_MAP, PAGE_KERNEL);
- if (page_addr) {
- *pages = page_array;
- *npages = nr_pages;
- return page_addr;
+ memset(mr, 0, sizeof(*mr));
+ }
+
+ static int io_region_init_ptr(struct io_mapped_region *mr)
+ {
+ struct io_imu_folio_data ifd;
+ void *ptr;
+
+ if (io_check_coalesce_buffer(mr->pages, mr->nr_pages, &ifd)) {
+ if (ifd.nr_folios == 1) {
+ mr->ptr = page_address(mr->pages[0]);
+ return 0;
+ }
}
+ ptr = vmap(mr->pages, mr->nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
- io_pages_free(&page_array, nr_pages);
- return ERR_PTR(-ENOMEM);
+ mr->ptr = ptr;
+ mr->flags |= IO_REGION_F_VMAP;
+ return 0;
}
- void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr)
+ static int io_region_pin_pages(struct io_ring_ctx *ctx,
+ struct io_mapped_region *mr,
+ struct io_uring_region_desc *reg)
{
- if (mr->pages) {
- unpin_user_pages(mr->pages, mr->nr_pages);
- kvfree(mr->pages);
+ unsigned long size = mr->nr_pages << PAGE_SHIFT;
+ struct page **pages;
+ int nr_pages;
+
+ pages = io_pin_pages(reg->user_addr, size, &nr_pages);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+ if (WARN_ON_ONCE(nr_pages != mr->nr_pages))
+ return -EFAULT;
+
+ mr->pages = pages;
+ mr->flags |= IO_REGION_F_USER_PROVIDED;
+ return 0;
+ }
+
+ static int io_region_allocate_pages(struct io_ring_ctx *ctx,
+ struct io_mapped_region *mr,
+ struct io_uring_region_desc *reg,
+ unsigned long mmap_offset)
+ {
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN;
+ unsigned long size = mr->nr_pages << PAGE_SHIFT;
+ unsigned long nr_allocated;
+ struct page **pages;
+ void *p;
+
+ pages = kvmalloc_array(mr->nr_pages, sizeof(*pages), gfp);
+ if (!pages)
+ return -ENOMEM;
+
+ p = io_mem_alloc_compound(pages, mr->nr_pages, size, gfp);
+ if (!IS_ERR(p)) {
+ mr->flags |= IO_REGION_F_SINGLE_REF;
+ goto done;
}
- if (mr->vmap_ptr)
- vunmap(mr->vmap_ptr);
- if (mr->nr_pages && ctx->user)
- __io_unaccount_mem(ctx->user, mr->nr_pages);
- memset(mr, 0, sizeof(*mr));
- nr_allocated = alloc_pages_bulk_array_node(gfp, NUMA_NO_NODE,
- mr->nr_pages, pages);
++ nr_allocated = alloc_pages_bulk_node(gfp, NUMA_NO_NODE,
++ mr->nr_pages, pages);
+ if (nr_allocated != mr->nr_pages) {
+ if (nr_allocated)
+ release_pages(pages, nr_allocated);
+ kvfree(pages);
+ return -ENOMEM;
+ }
+ done:
+ reg->mmap_offset = mmap_offset;
+ mr->pages = pages;
+ return 0;
}
int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,