|
|
|
|
@@ -1,7 +1,7 @@
|
|
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Copyright 2016-2019 HabanaLabs, Ltd.
|
|
|
|
|
* Copyright 2016-2021 HabanaLabs, Ltd.
|
|
|
|
|
* All Rights Reserved.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
@@ -11,6 +11,7 @@
|
|
|
|
|
|
|
|
|
|
#include <linux/uaccess.h>
|
|
|
|
|
#include <linux/slab.h>
|
|
|
|
|
#include <linux/pci-p2pdma.h>
|
|
|
|
|
|
|
|
|
|
#define HL_MMU_DEBUG 0
|
|
|
|
|
|
|
|
|
|
@@ -347,6 +348,12 @@ static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (phys_pg_pack->exporting_cnt) {
|
|
|
|
|
dev_dbg(hdev->dev, "handle %u is exported, cannot free\n", handle);
|
|
|
|
|
spin_unlock(&vm->idr_lock);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* must remove from idr before the freeing of the physical
|
|
|
|
|
* pages as the refcount of the pool is also the trigger of the
|
|
|
|
|
@@ -1487,13 +1494,487 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size,
|
|
|
|
|
struct device *dev, enum dma_data_direction dir)
|
|
|
|
|
{
|
|
|
|
|
dma_addr_t addr;
|
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
|
|
addr = dma_map_resource(dev, bar_address, chunk_size, dir,
|
|
|
|
|
DMA_ATTR_SKIP_CPU_SYNC);
|
|
|
|
|
rc = dma_mapping_error(dev, addr);
|
|
|
|
|
if (rc)
|
|
|
|
|
return rc;
|
|
|
|
|
|
|
|
|
|
sg_set_page(sg, NULL, chunk_size, 0);
|
|
|
|
|
sg_dma_address(sg) = addr;
|
|
|
|
|
sg_dma_len(sg) = chunk_size;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages,
|
|
|
|
|
u64 page_size, struct device *dev,
|
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
|
{
|
|
|
|
|
u64 chunk_size, bar_address, dma_max_seg_size;
|
|
|
|
|
struct asic_fixed_properties *prop;
|
|
|
|
|
int rc, i, j, nents, cur_page;
|
|
|
|
|
struct scatterlist *sg;
|
|
|
|
|
struct sg_table *sgt;
|
|
|
|
|
|
|
|
|
|
prop = &hdev->asic_prop;
|
|
|
|
|
|
|
|
|
|
dma_max_seg_size = dma_get_max_seg_size(dev);
|
|
|
|
|
|
|
|
|
|
/* We would like to align the max segment size to PAGE_SIZE, so the
|
|
|
|
|
* SGL will contain aligned addresses that can be easily mapped to
|
|
|
|
|
* an MMU
|
|
|
|
|
*/
|
|
|
|
|
dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE);
|
|
|
|
|
if (dma_max_seg_size < PAGE_SIZE) {
|
|
|
|
|
dev_err_ratelimited(hdev->dev,
|
|
|
|
|
"dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n",
|
|
|
|
|
dma_max_seg_size);
|
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
|
|
|
|
|
if (!sgt)
|
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
|
|
/* If the size of each page is larger than the dma max segment size,
|
|
|
|
|
* then we can't combine pages and the number of entries in the SGL
|
|
|
|
|
* will just be the
|
|
|
|
|
* <number of pages> * <chunks of max segment size in each page>
|
|
|
|
|
*/
|
|
|
|
|
if (page_size > dma_max_seg_size)
|
|
|
|
|
nents = npages * DIV_ROUND_UP_ULL(page_size, dma_max_seg_size);
|
|
|
|
|
else
|
|
|
|
|
/* Get number of non-contiguous chunks */
|
|
|
|
|
for (i = 1, nents = 1, chunk_size = page_size ; i < npages ; i++) {
|
|
|
|
|
if (pages[i - 1] + page_size != pages[i] ||
|
|
|
|
|
chunk_size + page_size > dma_max_seg_size) {
|
|
|
|
|
nents++;
|
|
|
|
|
chunk_size = page_size;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
chunk_size += page_size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO);
|
|
|
|
|
if (rc)
|
|
|
|
|
goto error_free;
|
|
|
|
|
|
|
|
|
|
cur_page = 0;
|
|
|
|
|
|
|
|
|
|
if (page_size > dma_max_seg_size) {
|
|
|
|
|
u64 size_left, cur_device_address = 0;
|
|
|
|
|
|
|
|
|
|
size_left = page_size;
|
|
|
|
|
|
|
|
|
|
/* Need to split each page into the number of chunks of
|
|
|
|
|
* dma_max_seg_size
|
|
|
|
|
*/
|
|
|
|
|
for_each_sgtable_dma_sg(sgt, sg, i) {
|
|
|
|
|
if (size_left == page_size)
|
|
|
|
|
cur_device_address =
|
|
|
|
|
pages[cur_page] - prop->dram_base_address;
|
|
|
|
|
else
|
|
|
|
|
cur_device_address += dma_max_seg_size;
|
|
|
|
|
|
|
|
|
|
chunk_size = min(size_left, dma_max_seg_size);
|
|
|
|
|
|
|
|
|
|
bar_address = hdev->dram_pci_bar_start + cur_device_address;
|
|
|
|
|
|
|
|
|
|
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
|
|
|
|
|
if (rc)
|
|
|
|
|
goto error_unmap;
|
|
|
|
|
|
|
|
|
|
if (size_left > dma_max_seg_size) {
|
|
|
|
|
size_left -= dma_max_seg_size;
|
|
|
|
|
} else {
|
|
|
|
|
cur_page++;
|
|
|
|
|
size_left = page_size;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* Merge pages and put them into the scatterlist */
|
|
|
|
|
for_each_sgtable_dma_sg(sgt, sg, i) {
|
|
|
|
|
chunk_size = page_size;
|
|
|
|
|
for (j = cur_page + 1 ; j < npages ; j++) {
|
|
|
|
|
if (pages[j - 1] + page_size != pages[j] ||
|
|
|
|
|
chunk_size + page_size > dma_max_seg_size)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
chunk_size += page_size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bar_address = hdev->dram_pci_bar_start +
|
|
|
|
|
(pages[cur_page] - prop->dram_base_address);
|
|
|
|
|
|
|
|
|
|
rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir);
|
|
|
|
|
if (rc)
|
|
|
|
|
goto error_unmap;
|
|
|
|
|
|
|
|
|
|
cur_page = j;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Because we are not going to include a CPU list we want to have some
|
|
|
|
|
* chance that other users will detect this by setting the orig_nents
|
|
|
|
|
* to 0 and using only nents (length of DMA list) when going over the
|
|
|
|
|
* sgl
|
|
|
|
|
*/
|
|
|
|
|
sgt->orig_nents = 0;
|
|
|
|
|
|
|
|
|
|
return sgt;
|
|
|
|
|
|
|
|
|
|
error_unmap:
|
|
|
|
|
for_each_sgtable_dma_sg(sgt, sg, i) {
|
|
|
|
|
if (!sg_dma_len(sg))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
dma_unmap_resource(dev, sg_dma_address(sg),
|
|
|
|
|
sg_dma_len(sg), dir,
|
|
|
|
|
DMA_ATTR_SKIP_CPU_SYNC);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sg_free_table(sgt);
|
|
|
|
|
|
|
|
|
|
error_free:
|
|
|
|
|
kfree(sgt);
|
|
|
|
|
return ERR_PTR(rc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int hl_dmabuf_attach(struct dma_buf *dmabuf,
|
|
|
|
|
struct dma_buf_attachment *attachment)
|
|
|
|
|
{
|
|
|
|
|
struct hl_dmabuf_priv *hl_dmabuf;
|
|
|
|
|
struct hl_device *hdev;
|
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
|
|
hl_dmabuf = dmabuf->priv;
|
|
|
|
|
hdev = hl_dmabuf->ctx->hdev;
|
|
|
|
|
|
|
|
|
|
rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true);
|
|
|
|
|
|
|
|
|
|
if (rc < 0)
|
|
|
|
|
attachment->peer2peer = false;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment,
|
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
|
{
|
|
|
|
|
struct dma_buf *dma_buf = attachment->dmabuf;
|
|
|
|
|
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
|
|
|
|
struct hl_dmabuf_priv *hl_dmabuf;
|
|
|
|
|
struct hl_device *hdev;
|
|
|
|
|
struct sg_table *sgt;
|
|
|
|
|
|
|
|
|
|
hl_dmabuf = dma_buf->priv;
|
|
|
|
|
hdev = hl_dmabuf->ctx->hdev;
|
|
|
|
|
phys_pg_pack = hl_dmabuf->phys_pg_pack;
|
|
|
|
|
|
|
|
|
|
if (!attachment->peer2peer) {
|
|
|
|
|
dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n");
|
|
|
|
|
return ERR_PTR(-EPERM);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (phys_pg_pack)
|
|
|
|
|
sgt = alloc_sgt_from_device_pages(hdev,
|
|
|
|
|
phys_pg_pack->pages,
|
|
|
|
|
phys_pg_pack->npages,
|
|
|
|
|
phys_pg_pack->page_size,
|
|
|
|
|
attachment->dev,
|
|
|
|
|
dir);
|
|
|
|
|
else
|
|
|
|
|
sgt = alloc_sgt_from_device_pages(hdev,
|
|
|
|
|
&hl_dmabuf->device_address,
|
|
|
|
|
1,
|
|
|
|
|
hl_dmabuf->dmabuf->size,
|
|
|
|
|
attachment->dev,
|
|
|
|
|
dir);
|
|
|
|
|
|
|
|
|
|
if (IS_ERR(sgt))
|
|
|
|
|
dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt));
|
|
|
|
|
|
|
|
|
|
return sgt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment,
|
|
|
|
|
struct sg_table *sgt,
|
|
|
|
|
enum dma_data_direction dir)
|
|
|
|
|
{
|
|
|
|
|
struct scatterlist *sg;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
/* The memory behind the dma-buf has *always* resided on the device itself, i.e. it lives
|
|
|
|
|
* only in the 'device' domain (after all, it maps a PCI bar address which points to the
|
|
|
|
|
* device memory).
|
|
|
|
|
*
|
|
|
|
|
* Therefore, it was never in the 'CPU' domain and hence, there is no need to perform
|
|
|
|
|
* a sync of the memory to the CPU's cache, as it never resided inside that cache.
|
|
|
|
|
*/
|
|
|
|
|
for_each_sgtable_dma_sg(sgt, sg, i)
|
|
|
|
|
dma_unmap_resource(attachment->dev, sg_dma_address(sg),
|
|
|
|
|
sg_dma_len(sg), dir,
|
|
|
|
|
DMA_ATTR_SKIP_CPU_SYNC);
|
|
|
|
|
|
|
|
|
|
/* Need to restore orig_nents because sg_free_table use that field */
|
|
|
|
|
sgt->orig_nents = sgt->nents;
|
|
|
|
|
sg_free_table(sgt);
|
|
|
|
|
kfree(sgt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void hl_release_dmabuf(struct dma_buf *dmabuf)
|
|
|
|
|
{
|
|
|
|
|
struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv;
|
|
|
|
|
struct hl_ctx *ctx = hl_dmabuf->ctx;
|
|
|
|
|
struct hl_device *hdev = ctx->hdev;
|
|
|
|
|
struct hl_vm *vm = &hdev->vm;
|
|
|
|
|
|
|
|
|
|
if (hl_dmabuf->phys_pg_pack) {
|
|
|
|
|
spin_lock(&vm->idr_lock);
|
|
|
|
|
hl_dmabuf->phys_pg_pack->exporting_cnt--;
|
|
|
|
|
spin_unlock(&vm->idr_lock);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hl_ctx_put(hl_dmabuf->ctx);
|
|
|
|
|
|
|
|
|
|
kfree(hl_dmabuf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const struct dma_buf_ops habanalabs_dmabuf_ops = {
|
|
|
|
|
.attach = hl_dmabuf_attach,
|
|
|
|
|
.map_dma_buf = hl_map_dmabuf,
|
|
|
|
|
.unmap_dma_buf = hl_unmap_dmabuf,
|
|
|
|
|
.release = hl_release_dmabuf,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static int export_dmabuf_common(struct hl_ctx *ctx,
|
|
|
|
|
struct hl_dmabuf_priv *hl_dmabuf,
|
|
|
|
|
u64 total_size, int flags, int *dmabuf_fd)
|
|
|
|
|
{
|
|
|
|
|
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
|
|
|
|
|
struct hl_device *hdev = ctx->hdev;
|
|
|
|
|
int rc, fd;
|
|
|
|
|
|
|
|
|
|
exp_info.ops = &habanalabs_dmabuf_ops;
|
|
|
|
|
exp_info.size = total_size;
|
|
|
|
|
exp_info.flags = flags;
|
|
|
|
|
exp_info.priv = hl_dmabuf;
|
|
|
|
|
|
|
|
|
|
hl_dmabuf->dmabuf = dma_buf_export(&exp_info);
|
|
|
|
|
if (IS_ERR(hl_dmabuf->dmabuf)) {
|
|
|
|
|
dev_err(hdev->dev, "failed to export dma-buf\n");
|
|
|
|
|
return PTR_ERR(hl_dmabuf->dmabuf);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fd = dma_buf_fd(hl_dmabuf->dmabuf, flags);
|
|
|
|
|
if (fd < 0) {
|
|
|
|
|
dev_err(hdev->dev, "failed to get a file descriptor for a dma-buf\n");
|
|
|
|
|
rc = fd;
|
|
|
|
|
goto err_dma_buf_put;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hl_dmabuf->ctx = ctx;
|
|
|
|
|
hl_ctx_get(hdev, hl_dmabuf->ctx);
|
|
|
|
|
|
|
|
|
|
*dmabuf_fd = fd;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
err_dma_buf_put:
|
|
|
|
|
dma_buf_put(hl_dmabuf->dmabuf);
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* export_dmabuf_from_addr() - export a dma-buf object for the given memory
|
|
|
|
|
* address and size.
|
|
|
|
|
* @ctx: pointer to the context structure.
|
|
|
|
|
* @device_addr: device memory physical address.
|
|
|
|
|
* @size: size of device memory.
|
|
|
|
|
* @flags: DMA-BUF file/FD flags.
|
|
|
|
|
* @dmabuf_fd: pointer to result FD that represents the dma-buf object.
|
|
|
|
|
*
|
|
|
|
|
* Create and export a dma-buf object for an existing memory allocation inside
|
|
|
|
|
* the device memory, and return a FD which is associated with the dma-buf
|
|
|
|
|
* object.
|
|
|
|
|
*
|
|
|
|
|
* Return: 0 on success, non-zero for failure.
|
|
|
|
|
*/
|
|
|
|
|
static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 device_addr,
|
|
|
|
|
u64 size, int flags, int *dmabuf_fd)
|
|
|
|
|
{
|
|
|
|
|
struct hl_dmabuf_priv *hl_dmabuf;
|
|
|
|
|
struct hl_device *hdev = ctx->hdev;
|
|
|
|
|
struct asic_fixed_properties *prop;
|
|
|
|
|
u64 bar_address;
|
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
|
|
prop = &hdev->asic_prop;
|
|
|
|
|
|
|
|
|
|
if (!IS_ALIGNED(device_addr, PAGE_SIZE)) {
|
|
|
|
|
dev_dbg(hdev->dev,
|
|
|
|
|
"exported device memory address 0x%llx should be aligned to 0x%lx\n",
|
|
|
|
|
device_addr, PAGE_SIZE);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (size < PAGE_SIZE) {
|
|
|
|
|
dev_dbg(hdev->dev,
|
|
|
|
|
"exported device memory size %llu should be equal to or greater than %lu\n",
|
|
|
|
|
size, PAGE_SIZE);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (device_addr < prop->dram_user_base_address ||
|
|
|
|
|
device_addr + size > prop->dram_end_address ||
|
|
|
|
|
device_addr + size < device_addr) {
|
|
|
|
|
dev_dbg(hdev->dev,
|
|
|
|
|
"DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n",
|
|
|
|
|
device_addr, size);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bar_address = hdev->dram_pci_bar_start +
|
|
|
|
|
(device_addr - prop->dram_base_address);
|
|
|
|
|
|
|
|
|
|
if (bar_address + size >
|
|
|
|
|
hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
|
|
|
|
|
bar_address + size < bar_address) {
|
|
|
|
|
dev_dbg(hdev->dev,
|
|
|
|
|
"DRAM memory range 0x%llx (+0x%llx) is outside of PCI BAR boundaries\n",
|
|
|
|
|
device_addr, size);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
|
|
|
|
|
if (!hl_dmabuf)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
|
|
hl_dmabuf->device_address = device_addr;
|
|
|
|
|
|
|
|
|
|
rc = export_dmabuf_common(ctx, hl_dmabuf, size, flags, dmabuf_fd);
|
|
|
|
|
if (rc)
|
|
|
|
|
goto err_free_dmabuf_wrapper;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
err_free_dmabuf_wrapper:
|
|
|
|
|
kfree(hl_dmabuf);
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* export_dmabuf_from_handle() - export a dma-buf object for the given memory
|
|
|
|
|
* handle.
|
|
|
|
|
* @ctx: pointer to the context structure.
|
|
|
|
|
* @handle: device memory allocation handle.
|
|
|
|
|
* @flags: DMA-BUF file/FD flags.
|
|
|
|
|
* @dmabuf_fd: pointer to result FD that represents the dma-buf object.
|
|
|
|
|
*
|
|
|
|
|
* Create and export a dma-buf object for an existing memory allocation inside
|
|
|
|
|
* the device memory, and return a FD which is associated with the dma-buf
|
|
|
|
|
* object.
|
|
|
|
|
*
|
|
|
|
|
* Return: 0 on success, non-zero for failure.
|
|
|
|
|
*/
|
|
|
|
|
static int export_dmabuf_from_handle(struct hl_ctx *ctx, u64 handle, int flags,
|
|
|
|
|
int *dmabuf_fd)
|
|
|
|
|
{
|
|
|
|
|
struct hl_vm_phys_pg_pack *phys_pg_pack;
|
|
|
|
|
struct hl_dmabuf_priv *hl_dmabuf;
|
|
|
|
|
struct hl_device *hdev = ctx->hdev;
|
|
|
|
|
struct asic_fixed_properties *prop;
|
|
|
|
|
struct hl_vm *vm = &hdev->vm;
|
|
|
|
|
u64 bar_address;
|
|
|
|
|
int rc, i;
|
|
|
|
|
|
|
|
|
|
prop = &hdev->asic_prop;
|
|
|
|
|
|
|
|
|
|
if (upper_32_bits(handle)) {
|
|
|
|
|
dev_dbg(hdev->dev, "no match for handle 0x%llx\n", handle);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spin_lock(&vm->idr_lock);
|
|
|
|
|
|
|
|
|
|
phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) handle);
|
|
|
|
|
if (!phys_pg_pack) {
|
|
|
|
|
spin_unlock(&vm->idr_lock);
|
|
|
|
|
dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) handle);
|
|
|
|
|
return -EINVAL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* increment now to avoid freeing device memory while exporting */
|
|
|
|
|
phys_pg_pack->exporting_cnt++;
|
|
|
|
|
|
|
|
|
|
spin_unlock(&vm->idr_lock);
|
|
|
|
|
|
|
|
|
|
if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) {
|
|
|
|
|
dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", handle);
|
|
|
|
|
rc = -EINVAL;
|
|
|
|
|
goto err_dec_exporting_cnt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
|
|
|
|
|
|
|
|
|
|
bar_address = hdev->dram_pci_bar_start +
|
|
|
|
|
(phys_pg_pack->pages[i] -
|
|
|
|
|
prop->dram_base_address);
|
|
|
|
|
|
|
|
|
|
if (bar_address + phys_pg_pack->page_size >
|
|
|
|
|
hdev->dram_pci_bar_start + prop->dram_pci_bar_size ||
|
|
|
|
|
bar_address + phys_pg_pack->page_size < bar_address) {
|
|
|
|
|
|
|
|
|
|
dev_dbg(hdev->dev,
|
|
|
|
|
"DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n",
|
|
|
|
|
phys_pg_pack->pages[i],
|
|
|
|
|
phys_pg_pack->page_size);
|
|
|
|
|
|
|
|
|
|
rc = -EINVAL;
|
|
|
|
|
goto err_dec_exporting_cnt;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL);
|
|
|
|
|
if (!hl_dmabuf) {
|
|
|
|
|
rc = -ENOMEM;
|
|
|
|
|
goto err_dec_exporting_cnt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hl_dmabuf->phys_pg_pack = phys_pg_pack;
|
|
|
|
|
|
|
|
|
|
rc = export_dmabuf_common(ctx, hl_dmabuf, phys_pg_pack->total_size,
|
|
|
|
|
flags, dmabuf_fd);
|
|
|
|
|
if (rc)
|
|
|
|
|
goto err_free_dmabuf_wrapper;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
err_free_dmabuf_wrapper:
|
|
|
|
|
kfree(hl_dmabuf);
|
|
|
|
|
|
|
|
|
|
err_dec_exporting_cnt:
|
|
|
|
|
spin_lock(&vm->idr_lock);
|
|
|
|
|
phys_pg_pack->exporting_cnt--;
|
|
|
|
|
spin_unlock(&vm->idr_lock);
|
|
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
|
|
|
|
|
{
|
|
|
|
|
struct hl_device *hdev = hpriv->hdev;
|
|
|
|
|
struct hl_ctx *ctx = hpriv->ctx;
|
|
|
|
|
u64 block_handle, device_addr = 0;
|
|
|
|
|
u32 handle = 0, block_size;
|
|
|
|
|
int rc;
|
|
|
|
|
int rc, dmabuf_fd = -EBADF;
|
|
|
|
|
|
|
|
|
|
switch (args->in.op) {
|
|
|
|
|
case HL_MEM_OP_ALLOC:
|
|
|
|
|
@@ -1542,6 +2023,16 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
|
|
|
|
|
args->out.block_size = block_size;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case HL_MEM_OP_EXPORT_DMABUF_FD:
|
|
|
|
|
rc = export_dmabuf_from_addr(ctx,
|
|
|
|
|
args->in.export_dmabuf_fd.handle,
|
|
|
|
|
args->in.export_dmabuf_fd.mem_size,
|
|
|
|
|
args->in.flags,
|
|
|
|
|
&dmabuf_fd);
|
|
|
|
|
memset(args, 0, sizeof(*args));
|
|
|
|
|
args->out.fd = dmabuf_fd;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
|
|
|
|
|
rc = -ENOTTY;
|
|
|
|
|
@@ -1560,7 +2051,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
|
|
|
|
struct hl_ctx *ctx = hpriv->ctx;
|
|
|
|
|
u64 block_handle, device_addr = 0;
|
|
|
|
|
u32 handle = 0, block_size;
|
|
|
|
|
int rc;
|
|
|
|
|
int rc, dmabuf_fd = -EBADF;
|
|
|
|
|
|
|
|
|
|
if (!hl_device_operational(hdev, &status)) {
|
|
|
|
|
dev_warn_ratelimited(hdev->dev,
|
|
|
|
|
@@ -1651,6 +2142,22 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
|
|
|
|
|
args->out.block_size = block_size;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case HL_MEM_OP_EXPORT_DMABUF_FD:
|
|
|
|
|
if (hdev->asic_prop.dram_supports_virtual_memory)
|
|
|
|
|
rc = export_dmabuf_from_handle(ctx,
|
|
|
|
|
args->in.export_dmabuf_fd.handle,
|
|
|
|
|
args->in.flags,
|
|
|
|
|
&dmabuf_fd);
|
|
|
|
|
else
|
|
|
|
|
rc = export_dmabuf_from_addr(ctx,
|
|
|
|
|
args->in.export_dmabuf_fd.handle,
|
|
|
|
|
args->in.export_dmabuf_fd.mem_size,
|
|
|
|
|
args->in.flags,
|
|
|
|
|
&dmabuf_fd);
|
|
|
|
|
memset(args, 0, sizeof(*args));
|
|
|
|
|
args->out.fd = dmabuf_fd;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
|
|
|
|
|
rc = -ENOTTY;
|
|
|
|
|
|