Blame - drivers/misc/habanalabs/memory.c - hafnium/third_party/linux.git

blob: acfccf32be6b954a7e02f1b75d29ebf86f310ec8 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	/*
				4	* Copyright 2016-2019 HabanaLabs, Ltd.
				5	* All Rights Reserved.
				6	*/
				7
				8	#include <uapi/misc/habanalabs.h>
				9	#include "habanalabs.h"
				10	#include "include/hw_ip/mmu/mmu_general.h"
				11
				12	#include <linux/uaccess.h>
				13	#include <linux/slab.h>
				14	#include <linux/genalloc.h>
				15
				16	#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT)
				17	#define HL_MMU_DEBUG 0
				18
				19	/*
				20	* The va ranges in context object contain a list with the available chunks of
				21	* device virtual memory.
				22	* There is one range for host allocations and one for DRAM allocations.
				23	*
				24	* On initialization each range contains one chunk of all of its available
				25	* virtual range which is a half of the total device virtual range.
				26	*
				27	* On each mapping of physical pages, a suitable virtual range chunk (with a
				28	* minimum size) is selected from the list. If the chunk size equals the
				29	* requested size, the chunk is returned. Otherwise, the chunk is split into
				30	* two chunks - one to return as result and a remainder to stay in the list.
				31	*
				32	* On each Unmapping of a virtual address, the relevant virtual chunk is
				33	* returned to the list. The chunk is added to the list and if its edges match
				34	* the edges of the adjacent chunks (means a contiguous chunk can be created),
				35	* the chunks are merged.
				36	*
				37	* On finish, the list is checked to have only one chunk of all the relevant
				38	* virtual range (which is a half of the device total virtual range).
				39	* If not (means not all mappings were unmapped), a warning is printed.
				40	*/
				41
				42	/*
				43	* alloc_device_memory - allocate device memory
				44	*
				45	* @ctx : current context
				46	* @args : host parameters containing the requested size
				47	* @ret_handle : result handle
				48	*
				49	* This function does the following:
				50	* - Allocate the requested size rounded up to 2MB pages
				51	* - Return unique handle
				52	*/
				53	static int alloc_device_memory(struct hl_ctx ctx, struct hl_mem_in args,
				54	u32 *ret_handle)
				55	{
				56	struct hl_device *hdev = ctx->hdev;
				57	struct hl_vm *vm = &hdev->vm;
				58	struct hl_vm_phys_pg_pack *phys_pg_pack;
				59	u64 paddr = 0, total_size, num_pgs, i;
				60	u32 num_curr_pgs, page_size, page_shift;
				61	int handle, rc;
				62	bool contiguous;
				63
				64	num_curr_pgs = 0;
				65	page_size = hdev->asic_prop.dram_page_size;
				66	page_shift = __ffs(page_size);
				67	num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
				68	total_size = num_pgs << page_shift;
				69
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	70	if (!total_size) {
				71	dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
				72	return -EINVAL;
				73	}
				74
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	75	contiguous = args->flags & HL_MEM_CONTIGUOUS;
				76
				77	if (contiguous) {
				78	paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
				79	if (!paddr) {
				80	dev_err(hdev->dev,
				81	"failed to allocate %llu huge contiguous pages\n",
				82	num_pgs);
				83	return -ENOMEM;
				84	}
				85	}
				86
				87	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
				88	if (!phys_pg_pack) {
				89	rc = -ENOMEM;
				90	goto pages_pack_err;
				91	}
				92
				93	phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
				94	phys_pg_pack->asid = ctx->asid;
				95	phys_pg_pack->npages = num_pgs;
				96	phys_pg_pack->page_size = page_size;
				97	phys_pg_pack->total_size = total_size;
				98	phys_pg_pack->flags = args->flags;
				99	phys_pg_pack->contiguous = contiguous;
				100
				101	phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	102	if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	103	rc = -ENOMEM;
				104	goto pages_arr_err;
				105	}
				106
				107	if (phys_pg_pack->contiguous) {
				108	for (i = 0 ; i < num_pgs ; i++)
				109	phys_pg_pack->pages[i] = paddr + i * page_size;
				110	} else {
				111	for (i = 0 ; i < num_pgs ; i++) {
				112	phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
				113	vm->dram_pg_pool,
				114	page_size);
				115	if (!phys_pg_pack->pages[i]) {
				116	dev_err(hdev->dev,
				117	"Failed to allocate device memory (out of memory)\n");
				118	rc = -ENOMEM;
				119	goto page_err;
				120	}
				121
				122	num_curr_pgs++;
				123	}
				124	}
				125
				126	spin_lock(&vm->idr_lock);
				127	handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
				128	GFP_ATOMIC);
				129	spin_unlock(&vm->idr_lock);
				130
				131	if (handle < 0) {
				132	dev_err(hdev->dev, "Failed to get handle for page\n");
				133	rc = -EFAULT;
				134	goto idr_err;
				135	}
				136
				137	for (i = 0 ; i < num_pgs ; i++)
				138	kref_get(&vm->dram_pg_pool_refcount);
				139
				140	phys_pg_pack->handle = handle;
				141
				142	atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
				143	atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
				144
				145	*ret_handle = handle;
				146
				147	return 0;
				148
				149	idr_err:
				150	page_err:
				151	if (!phys_pg_pack->contiguous)
				152	for (i = 0 ; i < num_curr_pgs ; i++)
				153	gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
				154	page_size);
				155
				156	kvfree(phys_pg_pack->pages);
				157	pages_arr_err:
				158	kfree(phys_pg_pack);
				159	pages_pack_err:
				160	if (contiguous)
				161	gen_pool_free(vm->dram_pg_pool, paddr, total_size);
				162
				163	return rc;
				164	}
				165
				166	/*
				167	* get_userptr_from_host_va - initialize userptr structure from given host
				168	* virtual address
				169	*
				170	* @hdev : habanalabs device structure
				171	* @args : parameters containing the virtual address and size
				172	* @p_userptr : pointer to result userptr structure
				173	*
				174	* This function does the following:
				175	* - Allocate userptr structure
				176	* - Pin the given host memory using the userptr structure
				177	* - Perform DMA mapping to have the DMA addresses of the pages
				178	*/
				179	static int get_userptr_from_host_va(struct hl_device *hdev,
				180	struct hl_mem_in args, struct hl_userptr *p_userptr)
				181	{
				182	struct hl_userptr *userptr;
				183	int rc;
				184
				185	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
				186	if (!userptr) {
				187	rc = -ENOMEM;
				188	goto userptr_err;
				189	}
				190
				191	rc = hl_pin_host_memory(hdev, args->map_host.host_virt_addr,
				192	args->map_host.mem_size, userptr);
				193	if (rc) {
				194	dev_err(hdev->dev, "Failed to pin host memory\n");
				195	goto pin_err;
				196	}
				197
				198	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
				199	userptr->sgt->nents, DMA_BIDIRECTIONAL);
				200	if (rc) {
				201	dev_err(hdev->dev, "failed to map sgt with DMA region\n");
				202	goto dma_map_err;
				203	}
				204
				205	userptr->dma_mapped = true;
				206	userptr->dir = DMA_BIDIRECTIONAL;
				207	userptr->vm_type = VM_TYPE_USERPTR;
				208
				209	*p_userptr = userptr;
				210
				211	return 0;
				212
				213	dma_map_err:
				214	hl_unpin_host_memory(hdev, userptr);
				215	pin_err:
				216	kfree(userptr);
				217	userptr_err:
				218
				219	return rc;
				220	}
				221
				222	/*
				223	* free_userptr - free userptr structure
				224	*
				225	* @hdev : habanalabs device structure
				226	* @userptr : userptr to free
				227	*
				228	* This function does the following:
				229	* - Unpins the physical pages
				230	* - Frees the userptr structure
				231	*/
				232	static void free_userptr(struct hl_device hdev, struct hl_userptr userptr)
				233	{
				234	hl_unpin_host_memory(hdev, userptr);
				235	kfree(userptr);
				236	}
				237
				238	/*
				239	* dram_pg_pool_do_release - free DRAM pages pool
				240	*
				241	* @ref : pointer to reference object
				242	*
				243	* This function does the following:
				244	* - Frees the idr structure of physical pages handles
				245	* - Frees the generic pool of DRAM physical pages
				246	*/
				247	static void dram_pg_pool_do_release(struct kref *ref)
				248	{
				249	struct hl_vm *vm = container_of(ref, struct hl_vm,
				250	dram_pg_pool_refcount);
				251
				252	/*
				253	* free the idr here as only here we know for sure that there are no
				254	* allocated physical pages and hence there are no handles in use
				255	*/
				256	idr_destroy(&vm->phys_pg_pack_handles);
				257	gen_pool_destroy(vm->dram_pg_pool);
				258	}
				259
				260	/*
				261	* free_phys_pg_pack - free physical page pack
				262	*
				263	* @hdev : habanalabs device structure
				264	* @phys_pg_pack : physical page pack to free
				265	*
				266	* This function does the following:
				267	* - For DRAM memory only, iterate over the pack and free each physical block
				268	* structure by returning it to the general pool
				269	* - Free the hl_vm_phys_pg_pack structure
				270	*/
				271	static void free_phys_pg_pack(struct hl_device *hdev,
				272	struct hl_vm_phys_pg_pack *phys_pg_pack)
				273	{
				274	struct hl_vm *vm = &hdev->vm;
				275	u64 i;
				276
				277	if (!phys_pg_pack->created_from_userptr) {
				278	if (phys_pg_pack->contiguous) {
				279	gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
				280	phys_pg_pack->total_size);
				281
				282	for (i = 0; i < phys_pg_pack->npages ; i++)
				283	kref_put(&vm->dram_pg_pool_refcount,
				284	dram_pg_pool_do_release);
				285	} else {
				286	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
				287	gen_pool_free(vm->dram_pg_pool,
				288	phys_pg_pack->pages[i],
				289	phys_pg_pack->page_size);
				290	kref_put(&vm->dram_pg_pool_refcount,
				291	dram_pg_pool_do_release);
				292	}
				293	}
				294	}
				295
				296	kvfree(phys_pg_pack->pages);
				297	kfree(phys_pg_pack);
				298	}
				299
				300	/*
				301	* free_device_memory - free device memory
				302	*
				303	* @ctx : current context
				304	* @handle : handle of the memory chunk to free
				305	*
				306	* This function does the following:
				307	* - Free the device memory related to the given handle
				308	*/
				309	static int free_device_memory(struct hl_ctx *ctx, u32 handle)
				310	{
				311	struct hl_device *hdev = ctx->hdev;
				312	struct hl_vm *vm = &hdev->vm;
				313	struct hl_vm_phys_pg_pack *phys_pg_pack;
				314
				315	spin_lock(&vm->idr_lock);
				316	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
				317	if (phys_pg_pack) {
				318	if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
				319	dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
				320	handle);
				321	spin_unlock(&vm->idr_lock);
				322	return -EINVAL;
				323	}
				324
				325	/*
				326	* must remove from idr before the freeing of the physical
				327	* pages as the refcount of the pool is also the trigger of the
				328	* idr destroy
				329	*/
				330	idr_remove(&vm->phys_pg_pack_handles, handle);
				331	spin_unlock(&vm->idr_lock);
				332
				333	atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
				334	atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
				335
				336	free_phys_pg_pack(hdev, phys_pg_pack);
				337	} else {
				338	spin_unlock(&vm->idr_lock);
				339	dev_err(hdev->dev,
				340	"free device memory failed, no match for handle %u\n",
				341	handle);
				342	return -EINVAL;
				343	}
				344
				345	return 0;
				346	}
				347
				348	/*
				349	* clear_va_list_locked - free virtual addresses list
				350	*
				351	* @hdev : habanalabs device structure
				352	* @va_list : list of virtual addresses to free
				353	*
				354	* This function does the following:
				355	* - Iterate over the list and free each virtual addresses block
				356	*
				357	* This function should be called only when va_list lock is taken
				358	*/
				359	static void clear_va_list_locked(struct hl_device *hdev,
				360	struct list_head *va_list)
				361	{
				362	struct hl_vm_va_block va_block, tmp;
				363
				364	list_for_each_entry_safe(va_block, tmp, va_list, node) {
				365	list_del(&va_block->node);
				366	kfree(va_block);
				367	}
				368	}
				369
				370	/*
				371	* print_va_list_locked - print virtual addresses list
				372	*
				373	* @hdev : habanalabs device structure
				374	* @va_list : list of virtual addresses to print
				375	*
				376	* This function does the following:
				377	* - Iterate over the list and print each virtual addresses block
				378	*
				379	* This function should be called only when va_list lock is taken
				380	*/
				381	static void print_va_list_locked(struct hl_device *hdev,
				382	struct list_head *va_list)
				383	{
				384	#if HL_MMU_DEBUG
				385	struct hl_vm_va_block *va_block;
				386
				387	dev_dbg(hdev->dev, "print va list:\n");
				388
				389	list_for_each_entry(va_block, va_list, node)
				390	dev_dbg(hdev->dev,
				391	"va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
				392	va_block->start, va_block->end, va_block->size);
				393	#endif
				394	}
				395
				396	/*
				397	* merge_va_blocks_locked - merge a virtual block if possible
				398	*
				399	* @hdev : pointer to the habanalabs device structure
				400	* @va_list : pointer to the virtual addresses block list
				401	* @va_block : virtual block to merge with adjacent blocks
				402	*
				403	* This function does the following:
				404	* - Merge the given blocks with the adjacent blocks if their virtual ranges
				405	* create a contiguous virtual range
				406	*
				407	* This Function should be called only when va_list lock is taken
				408	*/
				409	static void merge_va_blocks_locked(struct hl_device *hdev,
				410	struct list_head va_list, struct hl_vm_va_block va_block)
				411	{
				412	struct hl_vm_va_block prev, next;
				413
				414	prev = list_prev_entry(va_block, node);
				415	if (&prev->node != va_list && prev->end + 1 == va_block->start) {
				416	prev->end = va_block->end;
				417	prev->size = prev->end - prev->start;
				418	list_del(&va_block->node);
				419	kfree(va_block);
				420	va_block = prev;
				421	}
				422
				423	next = list_next_entry(va_block, node);
				424	if (&next->node != va_list && va_block->end + 1 == next->start) {
				425	next->start = va_block->start;
				426	next->size = next->end - next->start;
				427	list_del(&va_block->node);
				428	kfree(va_block);
				429	}
				430	}
				431
				432	/*
				433	* add_va_block_locked - add a virtual block to the virtual addresses list
				434	*
				435	* @hdev : pointer to the habanalabs device structure
				436	* @va_list : pointer to the virtual addresses block list
				437	* @start : start virtual address
				438	* @end : end virtual address
				439	*
				440	* This function does the following:
				441	* - Add the given block to the virtual blocks list and merge with other
				442	* blocks if a contiguous virtual block can be created
				443	*
				444	* This Function should be called only when va_list lock is taken
				445	*/
				446	static int add_va_block_locked(struct hl_device *hdev,
				447	struct list_head *va_list, u64 start, u64 end)
				448	{
				449	struct hl_vm_va_block va_block, res = NULL;
				450	u64 size = end - start;
				451
				452	print_va_list_locked(hdev, va_list);
				453
				454	list_for_each_entry(va_block, va_list, node) {
				455	/* TODO: remove upon matureness */
				456	if (hl_mem_area_crosses_range(start, size, va_block->start,
				457	va_block->end)) {
				458	dev_err(hdev->dev,
				459	"block crossing ranges at start 0x%llx, end 0x%llx\n",
				460	va_block->start, va_block->end);
				461	return -EINVAL;
				462	}
				463
				464	if (va_block->end < start)
				465	res = va_block;
				466	}
				467
				468	va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
				469	if (!va_block)
				470	return -ENOMEM;
				471
				472	va_block->start = start;
				473	va_block->end = end;
				474	va_block->size = size;
				475
				476	if (!res)
				477	list_add(&va_block->node, va_list);
				478	else
				479	list_add(&va_block->node, &res->node);
				480
				481	merge_va_blocks_locked(hdev, va_list, va_block);
				482
				483	print_va_list_locked(hdev, va_list);
				484
				485	return 0;
				486	}
				487
				488	/*
				489	* add_va_block - wrapper for add_va_block_locked
				490	*
				491	* @hdev : pointer to the habanalabs device structure
				492	* @va_list : pointer to the virtual addresses block list
				493	* @start : start virtual address
				494	* @end : end virtual address
				495	*
				496	* This function does the following:
				497	* - Takes the list lock and calls add_va_block_locked
				498	*/
				499	static inline int add_va_block(struct hl_device *hdev,
				500	struct hl_va_range *va_range, u64 start, u64 end)
				501	{
				502	int rc;
				503
				504	mutex_lock(&va_range->lock);
				505	rc = add_va_block_locked(hdev, &va_range->list, start, end);
				506	mutex_unlock(&va_range->lock);
				507
				508	return rc;
				509	}
				510
				511	/*
				512	* get_va_block - get a virtual block with the requested size
				513	*
				514	* @hdev : pointer to the habanalabs device structure
				515	* @va_range : pointer to the virtual addresses range
				516	* @size : requested block size
				517	* @hint_addr : hint for request address by the user
				518	* @is_userptr : is host or DRAM memory
				519	*
				520	* This function does the following:
				521	* - Iterate on the virtual block list to find a suitable virtual block for the
				522	* requested size
				523	* - Reserve the requested block and update the list
				524	* - Return the start address of the virtual block
				525	*/
				526	static u64 get_va_block(struct hl_device *hdev,
				527	struct hl_va_range *va_range, u64 size, u64 hint_addr,
				528	bool is_userptr)
				529	{
				530	struct hl_vm_va_block va_block, new_va_block = NULL;
				531	u64 valid_start, valid_size, prev_start, prev_end, page_mask,
				532	res_valid_start = 0, res_valid_size = 0;
				533	u32 page_size;
				534	bool add_prev = false;
				535
				536	if (is_userptr) {
				537	/*
				538	* We cannot know if the user allocated memory with huge pages
				539	* or not, hence we continue with the biggest possible
				540	* granularity.
				541	*/
				542	page_size = PAGE_SIZE_2MB;
				543	page_mask = PAGE_MASK_2MB;
				544	} else {
				545	page_size = hdev->asic_prop.dram_page_size;
				546	page_mask = ~((u64)page_size - 1);
				547	}
				548
				549	mutex_lock(&va_range->lock);
				550
				551	print_va_list_locked(hdev, &va_range->list);
				552
				553	list_for_each_entry(va_block, &va_range->list, node) {
				554	/* calc the first possible aligned addr */
				555	valid_start = va_block->start;
				556
				557
				558	if (valid_start & (page_size - 1)) {
				559	valid_start &= page_mask;
				560	valid_start += page_size;
				561	if (valid_start > va_block->end)
				562	continue;
				563	}
				564
				565	valid_size = va_block->end - valid_start;
				566
				567	if (valid_size >= size &&
				568	(!new_va_block \|\| valid_size < res_valid_size)) {
				569
				570	new_va_block = va_block;
				571	res_valid_start = valid_start;
				572	res_valid_size = valid_size;
				573	}
				574
				575	if (hint_addr && hint_addr >= valid_start &&
				576	((hint_addr + size) <= va_block->end)) {
				577	new_va_block = va_block;
				578	res_valid_start = hint_addr;
				579	res_valid_size = valid_size;
				580	break;
				581	}
				582	}
				583
				584	if (!new_va_block) {
				585	dev_err(hdev->dev, "no available va block for size %llu\n",
				586	size);
				587	goto out;
				588	}
				589
				590	if (res_valid_start > new_va_block->start) {
				591	prev_start = new_va_block->start;
				592	prev_end = res_valid_start - 1;
				593
				594	new_va_block->start = res_valid_start;
				595	new_va_block->size = res_valid_size;
				596
				597	add_prev = true;
				598	}
				599
				600	if (new_va_block->size > size) {
				601	new_va_block->start += size;
				602	new_va_block->size = new_va_block->end - new_va_block->start;
				603	} else {
				604	list_del(&new_va_block->node);
				605	kfree(new_va_block);
				606	}
				607
				608	if (add_prev)
				609	add_va_block_locked(hdev, &va_range->list, prev_start,
				610	prev_end);
				611
				612	print_va_list_locked(hdev, &va_range->list);
				613	out:
				614	mutex_unlock(&va_range->lock);
				615
				616	return res_valid_start;
				617	}
				618
				619	/*
				620	* get_sg_info - get number of pages and the DMA address from SG list
				621	*
				622	* @sg : the SG list
				623	* @dma_addr : pointer to DMA address to return
				624	*
				625	* Calculate the number of consecutive pages described by the SG list. Take the
				626	* offset of the address in the first page, add to it the length and round it up
				627	* to the number of needed pages.
				628	*/
				629	static u32 get_sg_info(struct scatterlist sg, dma_addr_t dma_addr)
				630	{
				631	*dma_addr = sg_dma_address(sg);
				632
				633	return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
				634	(PAGE_SIZE - 1)) >> PAGE_SHIFT;
				635	}
				636
				637	/*
				638	* init_phys_pg_pack_from_userptr - initialize physical page pack from host
				639	* memory
				640	*
				641	* @ctx : current context
				642	* @userptr : userptr to initialize from
				643	* @pphys_pg_pack : res pointer
				644	*
				645	* This function does the following:
				646	* - Pin the physical pages related to the given virtual block
				647	* - Create a physical page pack from the physical pages related to the given
				648	* virtual block
				649	*/
				650	static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
				651	struct hl_userptr *userptr,
				652	struct hl_vm_phys_pg_pack **pphys_pg_pack)
				653	{
				654	struct hl_vm_phys_pg_pack *phys_pg_pack;
				655	struct scatterlist *sg;
				656	dma_addr_t dma_addr;
				657	u64 page_mask, total_npages;
				658	u32 npages, page_size = PAGE_SIZE;
				659	bool first = true, is_huge_page_opt = true;
				660	int rc, i, j;
				661
				662	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
				663	if (!phys_pg_pack)
				664	return -ENOMEM;
				665
				666	phys_pg_pack->vm_type = userptr->vm_type;
				667	phys_pg_pack->created_from_userptr = true;
				668	phys_pg_pack->asid = ctx->asid;
				669	atomic_set(&phys_pg_pack->mapping_cnt, 1);
				670
				671	/* Only if all dma_addrs are aligned to 2MB and their
				672	* sizes is at least 2MB, we can use huge page mapping.
				673	* We limit the 2MB optimization to this condition,
				674	* since later on we acquire the related VA range as one
				675	* consecutive block.
				676	*/
				677	total_npages = 0;
				678	for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
				679	npages = get_sg_info(sg, &dma_addr);
				680
				681	total_npages += npages;
				682
				683	if ((npages % PGS_IN_2MB_PAGE) \|\|
				684	(dma_addr & (PAGE_SIZE_2MB - 1)))
				685	is_huge_page_opt = false;
				686	}
				687
				688	if (is_huge_page_opt) {
				689	page_size = PAGE_SIZE_2MB;
				690	total_npages /= PGS_IN_2MB_PAGE;
				691	}
				692
				693	page_mask = ~(((u64) page_size) - 1);
				694
				695	phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
				696	GFP_KERNEL);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	697	if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	698	rc = -ENOMEM;
				699	goto page_pack_arr_mem_err;
				700	}
				701
				702	phys_pg_pack->npages = total_npages;
				703	phys_pg_pack->page_size = page_size;
				704	phys_pg_pack->total_size = total_npages * page_size;
				705
				706	j = 0;
				707	for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
				708	npages = get_sg_info(sg, &dma_addr);
				709
				710	/* align down to physical page size and save the offset */
				711	if (first) {
				712	first = false;
				713	phys_pg_pack->offset = dma_addr & (page_size - 1);
				714	dma_addr &= page_mask;
				715	}
				716
				717	while (npages) {
				718	phys_pg_pack->pages[j++] = dma_addr;
				719	dma_addr += page_size;
				720
				721	if (is_huge_page_opt)
				722	npages -= PGS_IN_2MB_PAGE;
				723	else
				724	npages--;
				725	}
				726	}
				727
				728	*pphys_pg_pack = phys_pg_pack;
				729
				730	return 0;
				731
				732	page_pack_arr_mem_err:
				733	kfree(phys_pg_pack);
				734
				735	return rc;
				736	}
				737
				738	/*
				739	* map_phys_page_pack - maps the physical page pack
				740	*
				741	* @ctx : current context
				742	* @vaddr : start address of the virtual area to map from
				743	* @phys_pg_pack : the pack of physical pages to map to
				744	*
				745	* This function does the following:
				746	* - Maps each chunk of virtual memory to matching physical chunk
				747	* - Stores number of successful mappings in the given argument
				748	* - Returns 0 on success, error code otherwise.
				749	*/
				750	static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
				751	struct hl_vm_phys_pg_pack *phys_pg_pack)
				752	{
				753	struct hl_device *hdev = ctx->hdev;
				754	u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
				755	u32 page_size = phys_pg_pack->page_size;
				756	int rc = 0;
				757
				758	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
				759	paddr = phys_pg_pack->pages[i];
				760
				761	rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
				762	if (rc) {
				763	dev_err(hdev->dev,
				764	"map failed for handle %u, npages: %llu, mapped: %llu",
				765	phys_pg_pack->handle, phys_pg_pack->npages,
				766	mapped_pg_cnt);
				767	goto err;
				768	}
				769
				770	mapped_pg_cnt++;
				771	next_vaddr += page_size;
				772	}
				773
				774	return 0;
				775
				776	err:
				777	next_vaddr = vaddr;
				778	for (i = 0 ; i < mapped_pg_cnt ; i++) {
				779	if (hl_mmu_unmap(ctx, next_vaddr, page_size))
				780	dev_warn_ratelimited(hdev->dev,
				781	"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
				782	phys_pg_pack->handle, next_vaddr,
				783	phys_pg_pack->pages[i], page_size);
				784
				785	next_vaddr += page_size;
				786	}
				787
				788	return rc;
				789	}
				790
				791	static int get_paddr_from_handle(struct hl_ctx ctx, struct hl_mem_in args,
				792	u64 *paddr)
				793	{
				794	struct hl_device *hdev = ctx->hdev;
				795	struct hl_vm *vm = &hdev->vm;
				796	struct hl_vm_phys_pg_pack *phys_pg_pack;
				797	u32 handle;
				798
				799	handle = lower_32_bits(args->map_device.handle);
				800	spin_lock(&vm->idr_lock);
				801	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
				802	if (!phys_pg_pack) {
				803	spin_unlock(&vm->idr_lock);
				804	dev_err(hdev->dev, "no match for handle %u\n", handle);
				805	return -EINVAL;
				806	}
				807
				808	*paddr = phys_pg_pack->pages[0];
				809
				810	spin_unlock(&vm->idr_lock);
				811
				812	return 0;
				813	}
				814
				815	/*
				816	* map_device_va - map the given memory
				817	*
				818	* @ctx : current context
				819	* @args : host parameters with handle/host virtual address
				820	* @device_addr : pointer to result device virtual address
				821	*
				822	* This function does the following:
				823	* - If given a physical device memory handle, map to a device virtual block
				824	* and return the start address of this block
				825	* - If given a host virtual address and size, find the related physical pages,
				826	* map a device virtual block to this pages and return the start address of
				827	* this block
				828	*/
				829	static int map_device_va(struct hl_ctx ctx, struct hl_mem_in args,
				830	u64 *device_addr)
				831	{
				832	struct hl_device *hdev = ctx->hdev;
				833	struct hl_vm *vm = &hdev->vm;
				834	struct hl_vm_phys_pg_pack *phys_pg_pack;
				835	struct hl_userptr *userptr = NULL;
				836	struct hl_vm_hash_node *hnode;
				837	enum vm_type_t *vm_type;
				838	u64 ret_vaddr, hint_addr;
				839	u32 handle = 0;
				840	int rc;
				841	bool is_userptr = args->flags & HL_MEM_USERPTR;
				842
				843	/* Assume failure */
				844	*device_addr = 0;
				845
				846	if (is_userptr) {
				847	rc = get_userptr_from_host_va(hdev, args, &userptr);
				848	if (rc) {
				849	dev_err(hdev->dev, "failed to get userptr from va\n");
				850	return rc;
				851	}
				852
				853	rc = init_phys_pg_pack_from_userptr(ctx, userptr,
				854	&phys_pg_pack);
				855	if (rc) {
				856	dev_err(hdev->dev,
				857	"unable to init page pack for vaddr 0x%llx\n",
				858	args->map_host.host_virt_addr);
				859	goto init_page_pack_err;
				860	}
				861
				862	vm_type = (enum vm_type_t *) userptr;
				863	hint_addr = args->map_host.hint_addr;
				864	} else {
				865	handle = lower_32_bits(args->map_device.handle);
				866
				867	spin_lock(&vm->idr_lock);
				868	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
				869	if (!phys_pg_pack) {
				870	spin_unlock(&vm->idr_lock);
				871	dev_err(hdev->dev,
				872	"no match for handle %u\n", handle);
				873	return -EINVAL;
				874	}
				875
				876	/* increment now to avoid freeing device memory while mapping */
				877	atomic_inc(&phys_pg_pack->mapping_cnt);
				878
				879	spin_unlock(&vm->idr_lock);
				880
				881	vm_type = (enum vm_type_t *) phys_pg_pack;
				882
				883	hint_addr = args->map_device.hint_addr;
				884	}
				885
				886	/*
				887	* relevant for mapping device physical memory only, as host memory is
				888	* implicitly shared
				889	*/
				890	if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
				891	phys_pg_pack->asid != ctx->asid) {
				892	dev_err(hdev->dev,
				893	"Failed to map memory, handle %u is not shared\n",
				894	handle);
				895	rc = -EPERM;
				896	goto shared_err;
				897	}
				898
				899	hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
				900	if (!hnode) {
				901	rc = -ENOMEM;
				902	goto hnode_err;
				903	}
				904
				905	ret_vaddr = get_va_block(hdev,
				906	is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
				907	phys_pg_pack->total_size, hint_addr, is_userptr);
				908	if (!ret_vaddr) {
				909	dev_err(hdev->dev, "no available va block for handle %u\n",
				910	handle);
				911	rc = -ENOMEM;
				912	goto va_block_err;
				913	}
				914
				915	mutex_lock(&ctx->mmu_lock);
				916
				917	rc = map_phys_page_pack(ctx, ret_vaddr, phys_pg_pack);
				918	if (rc) {
				919	mutex_unlock(&ctx->mmu_lock);
				920	dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
				921	handle);
				922	goto map_err;
				923	}
				924
				925	hdev->asic_funcs->mmu_invalidate_cache(hdev, false);
				926
				927	mutex_unlock(&ctx->mmu_lock);
				928
				929	ret_vaddr += phys_pg_pack->offset;
				930
				931	hnode->ptr = vm_type;
				932	hnode->vaddr = ret_vaddr;
				933
				934	mutex_lock(&ctx->mem_hash_lock);
				935	hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
				936	mutex_unlock(&ctx->mem_hash_lock);
				937
				938	*device_addr = ret_vaddr;
				939
				940	if (is_userptr)
				941	free_phys_pg_pack(hdev, phys_pg_pack);
				942
				943	return 0;
				944
				945	map_err:
				946	if (add_va_block(hdev,
				947	is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
				948	ret_vaddr,
				949	ret_vaddr + phys_pg_pack->total_size - 1))
				950	dev_warn(hdev->dev,
				951	"release va block failed for handle 0x%x, vaddr: 0x%llx\n",
				952	handle, ret_vaddr);
				953
				954	va_block_err:
				955	kfree(hnode);
				956	hnode_err:
				957	shared_err:
				958	atomic_dec(&phys_pg_pack->mapping_cnt);
				959	if (is_userptr)
				960	free_phys_pg_pack(hdev, phys_pg_pack);
				961	init_page_pack_err:
				962	if (is_userptr)
				963	free_userptr(hdev, userptr);
				964
				965	return rc;
				966	}
				967
				968	/*
				969	* unmap_device_va - unmap the given device virtual address
				970	*
				971	* @ctx : current context
				972	* @vaddr : device virtual address to unmap
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	973	* @ctx_free : true if in context free flow, false otherwise.
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	974	*
				975	* This function does the following:
				976	* - Unmap the physical pages related to the given virtual address
				977	* - return the device virtual block to the virtual block list
				978	*/
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	979	static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	980	{
				981	struct hl_device *hdev = ctx->hdev;
				982	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
				983	struct hl_vm_hash_node *hnode = NULL;
				984	struct hl_userptr *userptr = NULL;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	985	struct hl_va_range *va_range;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	986	enum vm_type_t *vm_type;
				987	u64 next_vaddr, i;
				988	u32 page_size;
				989	bool is_userptr;
				990	int rc;
				991
				992	/* protect from double entrance */
				993	mutex_lock(&ctx->mem_hash_lock);
				994	hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
				995	if (vaddr == hnode->vaddr)
				996	break;
				997
				998	if (!hnode) {
				999	mutex_unlock(&ctx->mem_hash_lock);
				1000	dev_err(hdev->dev,
				1001	"unmap failed, no mem hnode for vaddr 0x%llx\n",
				1002	vaddr);
				1003	return -EINVAL;
				1004	}
				1005
				1006	hash_del(&hnode->node);
				1007	mutex_unlock(&ctx->mem_hash_lock);
				1008
				1009	vm_type = hnode->ptr;
				1010
				1011	if (*vm_type == VM_TYPE_USERPTR) {
				1012	is_userptr = true;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1013	va_range = &ctx->host_va_range;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1014	userptr = hnode->ptr;
				1015	rc = init_phys_pg_pack_from_userptr(ctx, userptr,
				1016	&phys_pg_pack);
				1017	if (rc) {
				1018	dev_err(hdev->dev,
				1019	"unable to init page pack for vaddr 0x%llx\n",
				1020	vaddr);
				1021	goto vm_type_err;
				1022	}
				1023	} else if (*vm_type == VM_TYPE_PHYS_PACK) {
				1024	is_userptr = false;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1025	va_range = &ctx->dram_va_range;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1026	phys_pg_pack = hnode->ptr;
				1027	} else {
				1028	dev_warn(hdev->dev,
				1029	"unmap failed, unknown vm desc for vaddr 0x%llx\n",
				1030	vaddr);
				1031	rc = -EFAULT;
				1032	goto vm_type_err;
				1033	}
				1034
				1035	if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
				1036	dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
				1037	rc = -EINVAL;
				1038	goto mapping_cnt_err;
				1039	}
				1040
				1041	page_size = phys_pg_pack->page_size;
				1042	vaddr &= ~(((u64) page_size) - 1);
				1043
				1044	next_vaddr = vaddr;
				1045
				1046	mutex_lock(&ctx->mmu_lock);
				1047
				1048	for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
				1049	if (hl_mmu_unmap(ctx, next_vaddr, page_size))
				1050	dev_warn_ratelimited(hdev->dev,
				1051	"unmap failed for vaddr: 0x%llx\n", next_vaddr);
				1052
				1053	/* unmapping on Palladium can be really long, so avoid a CPU
				1054	* soft lockup bug by sleeping a little between unmapping pages
				1055	*/
				1056	if (hdev->pldm)
				1057	usleep_range(500, 1000);
				1058	}
				1059
				1060	hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
				1061
				1062	mutex_unlock(&ctx->mmu_lock);
				1063
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1064	/*
				1065	* No point in maintaining the free VA block list if the context is
				1066	* closing as the list will be freed anyway
				1067	*/
				1068	if (!ctx_free) {
				1069	rc = add_va_block(hdev, va_range, vaddr,
				1070	vaddr + phys_pg_pack->total_size - 1);
				1071	if (rc)
				1072	dev_warn(hdev->dev,
				1073	"add va block failed for vaddr: 0x%llx\n",
				1074	vaddr);
				1075	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1076
				1077	atomic_dec(&phys_pg_pack->mapping_cnt);
				1078	kfree(hnode);
				1079
				1080	if (is_userptr) {
				1081	free_phys_pg_pack(hdev, phys_pg_pack);
				1082	free_userptr(hdev, userptr);
				1083	}
				1084
				1085	return 0;
				1086
				1087	mapping_cnt_err:
				1088	if (is_userptr)
				1089	free_phys_pg_pack(hdev, phys_pg_pack);
				1090	vm_type_err:
				1091	mutex_lock(&ctx->mem_hash_lock);
				1092	hash_add(ctx->mem_hash, &hnode->node, vaddr);
				1093	mutex_unlock(&ctx->mem_hash_lock);
				1094
				1095	return rc;
				1096	}
				1097
				1098	static int mem_ioctl_no_mmu(struct hl_fpriv hpriv, union hl_mem_args args)
				1099	{
				1100	struct hl_device *hdev = hpriv->hdev;
				1101	struct hl_ctx *ctx = hpriv->ctx;
				1102	u64 device_addr = 0;
				1103	u32 handle = 0;
				1104	int rc;
				1105
				1106	switch (args->in.op) {
				1107	case HL_MEM_OP_ALLOC:
				1108	if (args->in.alloc.mem_size == 0) {
				1109	dev_err(hdev->dev,
				1110	"alloc size must be larger than 0\n");
				1111	rc = -EINVAL;
				1112	goto out;
				1113	}
				1114
				1115	/* Force contiguous as there are no real MMU
				1116	* translations to overcome physical memory gaps
				1117	*/
				1118	args->in.flags \|= HL_MEM_CONTIGUOUS;
				1119	rc = alloc_device_memory(ctx, &args->in, &handle);
				1120
				1121	memset(args, 0, sizeof(*args));
				1122	args->out.handle = (__u64) handle;
				1123	break;
				1124
				1125	case HL_MEM_OP_FREE:
				1126	rc = free_device_memory(ctx, args->in.free.handle);
				1127	break;
				1128
				1129	case HL_MEM_OP_MAP:
				1130	if (args->in.flags & HL_MEM_USERPTR) {
				1131	device_addr = args->in.map_host.host_virt_addr;
				1132	rc = 0;
				1133	} else {
				1134	rc = get_paddr_from_handle(ctx, &args->in,
				1135	&device_addr);
				1136	}
				1137
				1138	memset(args, 0, sizeof(*args));
				1139	args->out.device_virt_addr = device_addr;
				1140	break;
				1141
				1142	case HL_MEM_OP_UNMAP:
				1143	rc = 0;
				1144	break;
				1145
				1146	default:
				1147	dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
				1148	rc = -ENOTTY;
				1149	break;
				1150	}
				1151
				1152	out:
				1153	return rc;
				1154	}
				1155
				1156	int hl_mem_ioctl(struct hl_fpriv hpriv, void data)
				1157	{
				1158	union hl_mem_args *args = data;
				1159	struct hl_device *hdev = hpriv->hdev;
				1160	struct hl_ctx *ctx = hpriv->ctx;
				1161	u64 device_addr = 0;
				1162	u32 handle = 0;
				1163	int rc;
				1164
				1165	if (hl_device_disabled_or_in_reset(hdev)) {
				1166	dev_warn_ratelimited(hdev->dev,
				1167	"Device is %s. Can't execute MEMORY IOCTL\n",
				1168	atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
				1169	return -EBUSY;
				1170	}
				1171
				1172	if (!hdev->mmu_enable)
				1173	return mem_ioctl_no_mmu(hpriv, args);
				1174
				1175	switch (args->in.op) {
				1176	case HL_MEM_OP_ALLOC:
				1177	if (!hdev->dram_supports_virtual_memory) {
				1178	dev_err(hdev->dev, "DRAM alloc is not supported\n");
				1179	rc = -EINVAL;
				1180	goto out;
				1181	}
				1182
				1183	if (args->in.alloc.mem_size == 0) {
				1184	dev_err(hdev->dev,
				1185	"alloc size must be larger than 0\n");
				1186	rc = -EINVAL;
				1187	goto out;
				1188	}
				1189	rc = alloc_device_memory(ctx, &args->in, &handle);
				1190
				1191	memset(args, 0, sizeof(*args));
				1192	args->out.handle = (__u64) handle;
				1193	break;
				1194
				1195	case HL_MEM_OP_FREE:
				1196	rc = free_device_memory(ctx, args->in.free.handle);
				1197	break;
				1198
				1199	case HL_MEM_OP_MAP:
				1200	rc = map_device_va(ctx, &args->in, &device_addr);
				1201
				1202	memset(args, 0, sizeof(*args));
				1203	args->out.device_virt_addr = device_addr;
				1204	break;
				1205
				1206	case HL_MEM_OP_UNMAP:
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1207	rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
				1208	false);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1209	break;
				1210
				1211	default:
				1212	dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
				1213	rc = -ENOTTY;
				1214	break;
				1215	}
				1216
				1217	out:
				1218	return rc;
				1219	}
				1220
				1221	/*
				1222	* hl_pin_host_memory - pins a chunk of host memory
				1223	*
				1224	* @hdev : pointer to the habanalabs device structure
				1225	* @addr : the user-space virtual address of the memory area
				1226	* @size : the size of the memory area
				1227	* @userptr : pointer to hl_userptr structure
				1228	*
				1229	* This function does the following:
				1230	* - Pins the physical pages
				1231	* - Create a SG list from those pages
				1232	*/
				1233	int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
				1234	struct hl_userptr *userptr)
				1235	{
				1236	u64 start, end;
				1237	u32 npages, offset;
				1238	int rc;
				1239
				1240	if (!size) {
				1241	dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
				1242	return -EINVAL;
				1243	}
				1244
				1245	if (!access_ok((void __user *) (uintptr_t) addr, size)) {
				1246	dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
				1247	return -EFAULT;
				1248	}
				1249
				1250	/*
				1251	* If the combination of the address and size requested for this memory
				1252	* region causes an integer overflow, return error.
				1253	*/
				1254	if (((addr + size) < addr) \|\|
				1255	PAGE_ALIGN(addr + size) < (addr + size)) {
				1256	dev_err(hdev->dev,
				1257	"user pointer 0x%llx + %llu causes integer overflow\n",
				1258	addr, size);
				1259	return -EINVAL;
				1260	}
				1261
				1262	start = addr & PAGE_MASK;
				1263	offset = addr & ~PAGE_MASK;
				1264	end = PAGE_ALIGN(addr + size);
				1265	npages = (end - start) >> PAGE_SHIFT;
				1266
				1267	userptr->size = size;
				1268	userptr->addr = addr;
				1269	userptr->dma_mapped = false;
				1270	INIT_LIST_HEAD(&userptr->job_node);
				1271
				1272	userptr->vec = frame_vector_create(npages);
				1273	if (!userptr->vec) {
				1274	dev_err(hdev->dev, "Failed to create frame vector\n");
				1275	return -ENOMEM;
				1276	}
				1277
				1278	rc = get_vaddr_frames(start, npages, FOLL_FORCE \| FOLL_WRITE,
				1279	userptr->vec);
				1280
				1281	if (rc != npages) {
				1282	dev_err(hdev->dev,
				1283	"Failed to map host memory, user ptr probably wrong\n");
				1284	if (rc < 0)
				1285	goto destroy_framevec;
				1286	rc = -EFAULT;
				1287	goto put_framevec;
				1288	}
				1289
				1290	if (frame_vector_to_pages(userptr->vec) < 0) {
				1291	dev_err(hdev->dev,
				1292	"Failed to translate frame vector to pages\n");
				1293	rc = -EFAULT;
				1294	goto put_framevec;
				1295	}
				1296
				1297	userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
				1298	if (!userptr->sgt) {
				1299	rc = -ENOMEM;
				1300	goto put_framevec;
				1301	}
				1302
				1303	rc = sg_alloc_table_from_pages(userptr->sgt,
				1304	frame_vector_pages(userptr->vec),
				1305	npages, offset, size, GFP_ATOMIC);
				1306	if (rc < 0) {
				1307	dev_err(hdev->dev, "failed to create SG table from pages\n");
				1308	goto free_sgt;
				1309	}
				1310
				1311	hl_debugfs_add_userptr(hdev, userptr);
				1312
				1313	return 0;
				1314
				1315	free_sgt:
				1316	kfree(userptr->sgt);
				1317	put_framevec:
				1318	put_vaddr_frames(userptr->vec);
				1319	destroy_framevec:
				1320	frame_vector_destroy(userptr->vec);
				1321	return rc;
				1322	}
				1323
				1324	/*
				1325	* hl_unpin_host_memory - unpins a chunk of host memory
				1326	*
				1327	* @hdev : pointer to the habanalabs device structure
				1328	* @userptr : pointer to hl_userptr structure
				1329	*
				1330	* This function does the following:
				1331	* - Unpins the physical pages related to the host memory
				1332	* - Free the SG list
				1333	*/
				1334	int hl_unpin_host_memory(struct hl_device hdev, struct hl_userptr userptr)
				1335	{
				1336	struct page **pages;
				1337
				1338	hl_debugfs_remove_userptr(hdev, userptr);
				1339
				1340	if (userptr->dma_mapped)
				1341	hdev->asic_funcs->hl_dma_unmap_sg(hdev,
				1342	userptr->sgt->sgl,
				1343	userptr->sgt->nents,
				1344	userptr->dir);
				1345
				1346	pages = frame_vector_pages(userptr->vec);
				1347	if (!IS_ERR(pages)) {
				1348	int i;
				1349
				1350	for (i = 0; i < frame_vector_count(userptr->vec); i++)
				1351	set_page_dirty_lock(pages[i]);
				1352	}
				1353	put_vaddr_frames(userptr->vec);
				1354	frame_vector_destroy(userptr->vec);
				1355
				1356	list_del(&userptr->job_node);
				1357
				1358	sg_free_table(userptr->sgt);
				1359	kfree(userptr->sgt);
				1360
				1361	return 0;
				1362	}
				1363
				1364	/*
				1365	* hl_userptr_delete_list - clear userptr list
				1366	*
				1367	* @hdev : pointer to the habanalabs device structure
				1368	* @userptr_list : pointer to the list to clear
				1369	*
				1370	* This function does the following:
				1371	* - Iterates over the list and unpins the host memory and frees the userptr
				1372	* structure.
				1373	*/
				1374	void hl_userptr_delete_list(struct hl_device *hdev,
				1375	struct list_head *userptr_list)
				1376	{
				1377	struct hl_userptr userptr, tmp;
				1378
				1379	list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
				1380	hl_unpin_host_memory(hdev, userptr);
				1381	kfree(userptr);
				1382	}
				1383
				1384	INIT_LIST_HEAD(userptr_list);
				1385	}
				1386
				1387	/*
				1388	* hl_userptr_is_pinned - returns whether the given userptr is pinned
				1389	*
				1390	* @hdev : pointer to the habanalabs device structure
				1391	* @userptr_list : pointer to the list to clear
				1392	* @userptr : pointer to userptr to check
				1393	*
				1394	* This function does the following:
				1395	* - Iterates over the list and checks if the given userptr is in it, means is
				1396	* pinned. If so, returns true, otherwise returns false.
				1397	*/
				1398	bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
				1399	u32 size, struct list_head *userptr_list,
				1400	struct hl_userptr **userptr)
				1401	{
				1402	list_for_each_entry((*userptr), userptr_list, job_node) {
				1403	if ((addr == (userptr)->addr) && (size == (userptr)->size))
				1404	return true;
				1405	}
				1406
				1407	return false;
				1408	}
				1409
				1410	/*
				1411	* hl_va_range_init - initialize virtual addresses range
				1412	*
				1413	* @hdev : pointer to the habanalabs device structure
				1414	* @va_range : pointer to the range to initialize
				1415	* @start : range start address
				1416	* @end : range end address
				1417	*
				1418	* This function does the following:
				1419	* - Initializes the virtual addresses list of the given range with the given
				1420	* addresses.
				1421	*/
				1422	static int hl_va_range_init(struct hl_device *hdev,
				1423	struct hl_va_range *va_range, u64 start, u64 end)
				1424	{
				1425	int rc;
				1426
				1427	INIT_LIST_HEAD(&va_range->list);
				1428
				1429	/* PAGE_SIZE alignment */
				1430
				1431	if (start & (PAGE_SIZE - 1)) {
				1432	start &= PAGE_MASK;
				1433	start += PAGE_SIZE;
				1434	}
				1435
				1436	if (end & (PAGE_SIZE - 1))
				1437	end &= PAGE_MASK;
				1438
				1439	if (start >= end) {
				1440	dev_err(hdev->dev, "too small vm range for va list\n");
				1441	return -EFAULT;
				1442	}
				1443
				1444	rc = add_va_block(hdev, va_range, start, end);
				1445
				1446	if (rc) {
				1447	dev_err(hdev->dev, "Failed to init host va list\n");
				1448	return rc;
				1449	}
				1450
				1451	va_range->start_addr = start;
				1452	va_range->end_addr = end;
				1453
				1454	return 0;
				1455	}
				1456
				1457	/*
				1458	* hl_vm_ctx_init_with_ranges - initialize virtual memory for context
				1459	*
				1460	* @ctx : pointer to the habanalabs context structure
				1461	* @host_range_start : host virtual addresses range start
				1462	* @host_range_end : host virtual addresses range end
				1463	* @dram_range_start : dram virtual addresses range start
				1464	* @dram_range_end : dram virtual addresses range end
				1465	*
				1466	* This function initializes the following:
				1467	* - MMU for context
				1468	* - Virtual address to area descriptor hashtable
				1469	* - Virtual block list of available virtual memory
				1470	*/
				1471	static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
				1472	u64 host_range_end, u64 dram_range_start,
				1473	u64 dram_range_end)
				1474	{
				1475	struct hl_device *hdev = ctx->hdev;
				1476	int rc;
				1477
				1478	rc = hl_mmu_ctx_init(ctx);
				1479	if (rc) {
				1480	dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
				1481	return rc;
				1482	}
				1483
				1484	mutex_init(&ctx->mem_hash_lock);
				1485	hash_init(ctx->mem_hash);
				1486
				1487	mutex_init(&ctx->host_va_range.lock);
				1488
				1489	rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
				1490	host_range_end);
				1491	if (rc) {
				1492	dev_err(hdev->dev, "failed to init host vm range\n");
				1493	goto host_vm_err;
				1494	}
				1495
				1496	mutex_init(&ctx->dram_va_range.lock);
				1497
				1498	rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
				1499	dram_range_end);
				1500	if (rc) {
				1501	dev_err(hdev->dev, "failed to init dram vm range\n");
				1502	goto dram_vm_err;
				1503	}
				1504
				1505	hl_debugfs_add_ctx_mem_hash(hdev, ctx);
				1506
				1507	return 0;
				1508
				1509	dram_vm_err:
				1510	mutex_destroy(&ctx->dram_va_range.lock);
				1511
				1512	mutex_lock(&ctx->host_va_range.lock);
				1513	clear_va_list_locked(hdev, &ctx->host_va_range.list);
				1514	mutex_unlock(&ctx->host_va_range.lock);
				1515	host_vm_err:
				1516	mutex_destroy(&ctx->host_va_range.lock);
				1517	mutex_destroy(&ctx->mem_hash_lock);
				1518	hl_mmu_ctx_fini(ctx);
				1519
				1520	return rc;
				1521	}
				1522
				1523	int hl_vm_ctx_init(struct hl_ctx *ctx)
				1524	{
				1525	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
				1526	u64 host_range_start, host_range_end, dram_range_start,
				1527	dram_range_end;
				1528
				1529	atomic64_set(&ctx->dram_phys_mem, 0);
				1530
				1531	/*
				1532	* - If MMU is enabled, init the ranges as usual.
				1533	* - If MMU is disabled, in case of host mapping, the returned address
				1534	* is the given one.
				1535	* In case of DRAM mapping, the returned address is the physical
				1536	* address of the memory related to the given handle.
				1537	*/
				1538	if (ctx->hdev->mmu_enable) {
				1539	dram_range_start = prop->va_space_dram_start_address;
				1540	dram_range_end = prop->va_space_dram_end_address;
				1541	host_range_start = prop->va_space_host_start_address;
				1542	host_range_end = prop->va_space_host_end_address;
				1543	} else {
				1544	dram_range_start = prop->dram_user_base_address;
				1545	dram_range_end = prop->dram_end_address;
				1546	host_range_start = prop->dram_user_base_address;
				1547	host_range_end = prop->dram_end_address;
				1548	}
				1549
				1550	return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
				1551	dram_range_start, dram_range_end);
				1552	}
				1553
				1554	/*
				1555	* hl_va_range_fini - clear a virtual addresses range
				1556	*
				1557	* @hdev : pointer to the habanalabs structure
				1558	* va_range : pointer to virtual addresses range
				1559	*
				1560	* This function initializes the following:
				1561	* - Checks that the given range contains the whole initial range
				1562	* - Frees the virtual addresses block list and its lock
				1563	*/
				1564	static void hl_va_range_fini(struct hl_device *hdev,
				1565	struct hl_va_range *va_range)
				1566	{
				1567	struct hl_vm_va_block *va_block;
				1568
				1569	if (list_empty(&va_range->list)) {
				1570	dev_warn(hdev->dev,
				1571	"va list should not be empty on cleanup!\n");
				1572	goto out;
				1573	}
				1574
				1575	if (!list_is_singular(&va_range->list)) {
				1576	dev_warn(hdev->dev,
				1577	"va list should not contain multiple blocks on cleanup!\n");
				1578	goto free_va_list;
				1579	}
				1580
				1581	va_block = list_first_entry(&va_range->list, typeof(*va_block), node);
				1582
				1583	if (va_block->start != va_range->start_addr \|\|
				1584	va_block->end != va_range->end_addr) {
				1585	dev_warn(hdev->dev,
				1586	"wrong va block on cleanup, from 0x%llx to 0x%llx\n",
				1587	va_block->start, va_block->end);
				1588	goto free_va_list;
				1589	}
				1590
				1591	free_va_list:
				1592	mutex_lock(&va_range->lock);
				1593	clear_va_list_locked(hdev, &va_range->list);
				1594	mutex_unlock(&va_range->lock);
				1595
				1596	out:
				1597	mutex_destroy(&va_range->lock);
				1598	}
				1599
				1600	/*
				1601	* hl_vm_ctx_fini - virtual memory teardown of context
				1602	*
				1603	* @ctx : pointer to the habanalabs context structure
				1604	*
				1605	* This function perform teardown the following:
				1606	* - Virtual block list of available virtual memory
				1607	* - Virtual address to area descriptor hashtable
				1608	* - MMU for context
				1609	*
				1610	* In addition this function does the following:
				1611	* - Unmaps the existing hashtable nodes if the hashtable is not empty. The
				1612	* hashtable should be empty as no valid mappings should exist at this
				1613	* point.
				1614	* - Frees any existing physical page list from the idr which relates to the
				1615	* current context asid.
				1616	* - This function checks the virtual block list for correctness. At this point
				1617	* the list should contain one element which describes the whole virtual
				1618	* memory range of the context. Otherwise, a warning is printed.
				1619	*/
				1620	void hl_vm_ctx_fini(struct hl_ctx *ctx)
				1621	{
				1622	struct hl_device *hdev = ctx->hdev;
				1623	struct hl_vm *vm = &hdev->vm;
				1624	struct hl_vm_phys_pg_pack *phys_pg_list;
				1625	struct hl_vm_hash_node *hnode;
				1626	struct hlist_node *tmp_node;
				1627	int i;
				1628
				1629	hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
				1630
				1631	if (!hash_empty(ctx->mem_hash))
				1632	dev_notice(hdev->dev, "ctx is freed while it has va in use\n");
				1633
				1634	hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
				1635	dev_dbg(hdev->dev,
				1636	"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
				1637	hnode->vaddr, ctx->asid);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1638	unmap_device_va(ctx, hnode->vaddr, true);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1639	}
				1640
				1641	spin_lock(&vm->idr_lock);
				1642	idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
				1643	if (phys_pg_list->asid == ctx->asid) {
				1644	dev_dbg(hdev->dev,
				1645	"page list 0x%p of asid %d is still alive\n",
				1646	phys_pg_list, ctx->asid);
				1647	atomic64_sub(phys_pg_list->total_size,
				1648	&hdev->dram_used_mem);
				1649	free_phys_pg_pack(hdev, phys_pg_list);
				1650	idr_remove(&vm->phys_pg_pack_handles, i);
				1651	}
				1652	spin_unlock(&vm->idr_lock);
				1653
				1654	hl_va_range_fini(hdev, &ctx->dram_va_range);
				1655	hl_va_range_fini(hdev, &ctx->host_va_range);
				1656
				1657	mutex_destroy(&ctx->mem_hash_lock);
				1658	hl_mmu_ctx_fini(ctx);
				1659	}
				1660
				1661	/*
				1662	* hl_vm_init - initialize virtual memory module
				1663	*
				1664	* @hdev : pointer to the habanalabs device structure
				1665	*
				1666	* This function initializes the following:
				1667	* - MMU module
				1668	* - DRAM physical pages pool of 2MB
				1669	* - Idr for device memory allocation handles
				1670	*/
				1671	int hl_vm_init(struct hl_device *hdev)
				1672	{
				1673	struct asic_fixed_properties *prop = &hdev->asic_prop;
				1674	struct hl_vm *vm = &hdev->vm;
				1675	int rc;
				1676
				1677	vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
				1678	if (!vm->dram_pg_pool) {
				1679	dev_err(hdev->dev, "Failed to create dram page pool\n");
				1680	return -ENOMEM;
				1681	}
				1682
				1683	kref_init(&vm->dram_pg_pool_refcount);
				1684
				1685	rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
				1686	prop->dram_end_address - prop->dram_user_base_address,
				1687	-1);
				1688
				1689	if (rc) {
				1690	dev_err(hdev->dev,
				1691	"Failed to add memory to dram page pool %d\n", rc);
				1692	goto pool_add_err;
				1693	}
				1694
				1695	spin_lock_init(&vm->idr_lock);
				1696	idr_init(&vm->phys_pg_pack_handles);
				1697
				1698	atomic64_set(&hdev->dram_used_mem, 0);
				1699
				1700	vm->init_done = true;
				1701
				1702	return 0;
				1703
				1704	pool_add_err:
				1705	gen_pool_destroy(vm->dram_pg_pool);
				1706
				1707	return rc;
				1708	}
				1709
				1710	/*
				1711	* hl_vm_fini - virtual memory module teardown
				1712	*
				1713	* @hdev : pointer to the habanalabs device structure
				1714	*
				1715	* This function perform teardown to the following:
				1716	* - Idr for device memory allocation handles
				1717	* - DRAM physical pages pool of 2MB
				1718	* - MMU module
				1719	*/
				1720	void hl_vm_fini(struct hl_device *hdev)
				1721	{
				1722	struct hl_vm *vm = &hdev->vm;
				1723
				1724	if (!vm->init_done)
				1725	return;
				1726
				1727	/*
				1728	* At this point all the contexts should be freed and hence no DRAM
				1729	* memory should be in use. Hence the DRAM pool should be freed here.
				1730	*/
				1731	if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
				1732	dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
				1733	__func__);
				1734
				1735	vm->init_done = false;
				1736	}