Blame - drivers/misc/habanalabs/memory.c - hafnium/third_party/linux.git

blob: 365fb0cb8dfffa87291dad6eeeb8625ce0de3212 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	/*
				4	* Copyright 2016-2019 HabanaLabs, Ltd.
				5	* All Rights Reserved.
				6	*/
				7
				8	#include <uapi/misc/habanalabs.h>
				9	#include "habanalabs.h"
				10	#include "include/hw_ip/mmu/mmu_general.h"
				11
				12	#include <linux/uaccess.h>
				13	#include <linux/slab.h>
				14	#include <linux/genalloc.h>
				15
				16	#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT)
				17	#define HL_MMU_DEBUG 0
				18
				19	/*
				20	* The va ranges in context object contain a list with the available chunks of
				21	* device virtual memory.
				22	* There is one range for host allocations and one for DRAM allocations.
				23	*
				24	* On initialization each range contains one chunk of all of its available
				25	* virtual range which is a half of the total device virtual range.
				26	*
				27	* On each mapping of physical pages, a suitable virtual range chunk (with a
				28	* minimum size) is selected from the list. If the chunk size equals the
				29	* requested size, the chunk is returned. Otherwise, the chunk is split into
				30	* two chunks - one to return as result and a remainder to stay in the list.
				31	*
				32	* On each Unmapping of a virtual address, the relevant virtual chunk is
				33	* returned to the list. The chunk is added to the list and if its edges match
				34	* the edges of the adjacent chunks (means a contiguous chunk can be created),
				35	* the chunks are merged.
				36	*
				37	* On finish, the list is checked to have only one chunk of all the relevant
				38	* virtual range (which is a half of the device total virtual range).
				39	* If not (means not all mappings were unmapped), a warning is printed.
				40	*/
				41
				42	/*
				43	* alloc_device_memory - allocate device memory
				44	*
				45	* @ctx : current context
				46	* @args : host parameters containing the requested size
				47	* @ret_handle : result handle
				48	*
				49	* This function does the following:
				50	* - Allocate the requested size rounded up to 2MB pages
				51	* - Return unique handle
				52	*/
				53	static int alloc_device_memory(struct hl_ctx ctx, struct hl_mem_in args,
				54	u32 *ret_handle)
				55	{
				56	struct hl_device *hdev = ctx->hdev;
				57	struct hl_vm *vm = &hdev->vm;
				58	struct hl_vm_phys_pg_pack *phys_pg_pack;
				59	u64 paddr = 0, total_size, num_pgs, i;
				60	u32 num_curr_pgs, page_size, page_shift;
				61	int handle, rc;
				62	bool contiguous;
				63
				64	num_curr_pgs = 0;
				65	page_size = hdev->asic_prop.dram_page_size;
				66	page_shift = __ffs(page_size);
				67	num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
				68	total_size = num_pgs << page_shift;
				69
				70	contiguous = args->flags & HL_MEM_CONTIGUOUS;
				71
				72	if (contiguous) {
				73	paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
				74	if (!paddr) {
				75	dev_err(hdev->dev,
				76	"failed to allocate %llu huge contiguous pages\n",
				77	num_pgs);
				78	return -ENOMEM;
				79	}
				80	}
				81
				82	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
				83	if (!phys_pg_pack) {
				84	rc = -ENOMEM;
				85	goto pages_pack_err;
				86	}
				87
				88	phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
				89	phys_pg_pack->asid = ctx->asid;
				90	phys_pg_pack->npages = num_pgs;
				91	phys_pg_pack->page_size = page_size;
				92	phys_pg_pack->total_size = total_size;
				93	phys_pg_pack->flags = args->flags;
				94	phys_pg_pack->contiguous = contiguous;
				95
				96	phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
				97	if (!phys_pg_pack->pages) {
				98	rc = -ENOMEM;
				99	goto pages_arr_err;
				100	}
				101
				102	if (phys_pg_pack->contiguous) {
				103	for (i = 0 ; i < num_pgs ; i++)
				104	phys_pg_pack->pages[i] = paddr + i * page_size;
				105	} else {
				106	for (i = 0 ; i < num_pgs ; i++) {
				107	phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
				108	vm->dram_pg_pool,
				109	page_size);
				110	if (!phys_pg_pack->pages[i]) {
				111	dev_err(hdev->dev,
				112	"Failed to allocate device memory (out of memory)\n");
				113	rc = -ENOMEM;
				114	goto page_err;
				115	}
				116
				117	num_curr_pgs++;
				118	}
				119	}
				120
				121	spin_lock(&vm->idr_lock);
				122	handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
				123	GFP_ATOMIC);
				124	spin_unlock(&vm->idr_lock);
				125
				126	if (handle < 0) {
				127	dev_err(hdev->dev, "Failed to get handle for page\n");
				128	rc = -EFAULT;
				129	goto idr_err;
				130	}
				131
				132	for (i = 0 ; i < num_pgs ; i++)
				133	kref_get(&vm->dram_pg_pool_refcount);
				134
				135	phys_pg_pack->handle = handle;
				136
				137	atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
				138	atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
				139
				140	*ret_handle = handle;
				141
				142	return 0;
				143
				144	idr_err:
				145	page_err:
				146	if (!phys_pg_pack->contiguous)
				147	for (i = 0 ; i < num_curr_pgs ; i++)
				148	gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
				149	page_size);
				150
				151	kvfree(phys_pg_pack->pages);
				152	pages_arr_err:
				153	kfree(phys_pg_pack);
				154	pages_pack_err:
				155	if (contiguous)
				156	gen_pool_free(vm->dram_pg_pool, paddr, total_size);
				157
				158	return rc;
				159	}
				160
				161	/*
				162	* get_userptr_from_host_va - initialize userptr structure from given host
				163	* virtual address
				164	*
				165	* @hdev : habanalabs device structure
				166	* @args : parameters containing the virtual address and size
				167	* @p_userptr : pointer to result userptr structure
				168	*
				169	* This function does the following:
				170	* - Allocate userptr structure
				171	* - Pin the given host memory using the userptr structure
				172	* - Perform DMA mapping to have the DMA addresses of the pages
				173	*/
				174	static int get_userptr_from_host_va(struct hl_device *hdev,
				175	struct hl_mem_in args, struct hl_userptr *p_userptr)
				176	{
				177	struct hl_userptr *userptr;
				178	int rc;
				179
				180	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
				181	if (!userptr) {
				182	rc = -ENOMEM;
				183	goto userptr_err;
				184	}
				185
				186	rc = hl_pin_host_memory(hdev, args->map_host.host_virt_addr,
				187	args->map_host.mem_size, userptr);
				188	if (rc) {
				189	dev_err(hdev->dev, "Failed to pin host memory\n");
				190	goto pin_err;
				191	}
				192
				193	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
				194	userptr->sgt->nents, DMA_BIDIRECTIONAL);
				195	if (rc) {
				196	dev_err(hdev->dev, "failed to map sgt with DMA region\n");
				197	goto dma_map_err;
				198	}
				199
				200	userptr->dma_mapped = true;
				201	userptr->dir = DMA_BIDIRECTIONAL;
				202	userptr->vm_type = VM_TYPE_USERPTR;
				203
				204	*p_userptr = userptr;
				205
				206	return 0;
				207
				208	dma_map_err:
				209	hl_unpin_host_memory(hdev, userptr);
				210	pin_err:
				211	kfree(userptr);
				212	userptr_err:
				213
				214	return rc;
				215	}
				216
				217	/*
				218	* free_userptr - free userptr structure
				219	*
				220	* @hdev : habanalabs device structure
				221	* @userptr : userptr to free
				222	*
				223	* This function does the following:
				224	* - Unpins the physical pages
				225	* - Frees the userptr structure
				226	*/
				227	static void free_userptr(struct hl_device hdev, struct hl_userptr userptr)
				228	{
				229	hl_unpin_host_memory(hdev, userptr);
				230	kfree(userptr);
				231	}
				232
				233	/*
				234	* dram_pg_pool_do_release - free DRAM pages pool
				235	*
				236	* @ref : pointer to reference object
				237	*
				238	* This function does the following:
				239	* - Frees the idr structure of physical pages handles
				240	* - Frees the generic pool of DRAM physical pages
				241	*/
				242	static void dram_pg_pool_do_release(struct kref *ref)
				243	{
				244	struct hl_vm *vm = container_of(ref, struct hl_vm,
				245	dram_pg_pool_refcount);
				246
				247	/*
				248	* free the idr here as only here we know for sure that there are no
				249	* allocated physical pages and hence there are no handles in use
				250	*/
				251	idr_destroy(&vm->phys_pg_pack_handles);
				252	gen_pool_destroy(vm->dram_pg_pool);
				253	}
				254
				255	/*
				256	* free_phys_pg_pack - free physical page pack
				257	*
				258	* @hdev : habanalabs device structure
				259	* @phys_pg_pack : physical page pack to free
				260	*
				261	* This function does the following:
				262	* - For DRAM memory only, iterate over the pack and free each physical block
				263	* structure by returning it to the general pool
				264	* - Free the hl_vm_phys_pg_pack structure
				265	*/
				266	static void free_phys_pg_pack(struct hl_device *hdev,
				267	struct hl_vm_phys_pg_pack *phys_pg_pack)
				268	{
				269	struct hl_vm *vm = &hdev->vm;
				270	u64 i;
				271
				272	if (!phys_pg_pack->created_from_userptr) {
				273	if (phys_pg_pack->contiguous) {
				274	gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
				275	phys_pg_pack->total_size);
				276
				277	for (i = 0; i < phys_pg_pack->npages ; i++)
				278	kref_put(&vm->dram_pg_pool_refcount,
				279	dram_pg_pool_do_release);
				280	} else {
				281	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
				282	gen_pool_free(vm->dram_pg_pool,
				283	phys_pg_pack->pages[i],
				284	phys_pg_pack->page_size);
				285	kref_put(&vm->dram_pg_pool_refcount,
				286	dram_pg_pool_do_release);
				287	}
				288	}
				289	}
				290
				291	kvfree(phys_pg_pack->pages);
				292	kfree(phys_pg_pack);
				293	}
				294
				295	/*
				296	* free_device_memory - free device memory
				297	*
				298	* @ctx : current context
				299	* @handle : handle of the memory chunk to free
				300	*
				301	* This function does the following:
				302	* - Free the device memory related to the given handle
				303	*/
				304	static int free_device_memory(struct hl_ctx *ctx, u32 handle)
				305	{
				306	struct hl_device *hdev = ctx->hdev;
				307	struct hl_vm *vm = &hdev->vm;
				308	struct hl_vm_phys_pg_pack *phys_pg_pack;
				309
				310	spin_lock(&vm->idr_lock);
				311	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
				312	if (phys_pg_pack) {
				313	if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
				314	dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
				315	handle);
				316	spin_unlock(&vm->idr_lock);
				317	return -EINVAL;
				318	}
				319
				320	/*
				321	* must remove from idr before the freeing of the physical
				322	* pages as the refcount of the pool is also the trigger of the
				323	* idr destroy
				324	*/
				325	idr_remove(&vm->phys_pg_pack_handles, handle);
				326	spin_unlock(&vm->idr_lock);
				327
				328	atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
				329	atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
				330
				331	free_phys_pg_pack(hdev, phys_pg_pack);
				332	} else {
				333	spin_unlock(&vm->idr_lock);
				334	dev_err(hdev->dev,
				335	"free device memory failed, no match for handle %u\n",
				336	handle);
				337	return -EINVAL;
				338	}
				339
				340	return 0;
				341	}
				342
				343	/*
				344	* clear_va_list_locked - free virtual addresses list
				345	*
				346	* @hdev : habanalabs device structure
				347	* @va_list : list of virtual addresses to free
				348	*
				349	* This function does the following:
				350	* - Iterate over the list and free each virtual addresses block
				351	*
				352	* This function should be called only when va_list lock is taken
				353	*/
				354	static void clear_va_list_locked(struct hl_device *hdev,
				355	struct list_head *va_list)
				356	{
				357	struct hl_vm_va_block va_block, tmp;
				358
				359	list_for_each_entry_safe(va_block, tmp, va_list, node) {
				360	list_del(&va_block->node);
				361	kfree(va_block);
				362	}
				363	}
				364
				365	/*
				366	* print_va_list_locked - print virtual addresses list
				367	*
				368	* @hdev : habanalabs device structure
				369	* @va_list : list of virtual addresses to print
				370	*
				371	* This function does the following:
				372	* - Iterate over the list and print each virtual addresses block
				373	*
				374	* This function should be called only when va_list lock is taken
				375	*/
				376	static void print_va_list_locked(struct hl_device *hdev,
				377	struct list_head *va_list)
				378	{
				379	#if HL_MMU_DEBUG
				380	struct hl_vm_va_block *va_block;
				381
				382	dev_dbg(hdev->dev, "print va list:\n");
				383
				384	list_for_each_entry(va_block, va_list, node)
				385	dev_dbg(hdev->dev,
				386	"va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
				387	va_block->start, va_block->end, va_block->size);
				388	#endif
				389	}
				390
				391	/*
				392	* merge_va_blocks_locked - merge a virtual block if possible
				393	*
				394	* @hdev : pointer to the habanalabs device structure
				395	* @va_list : pointer to the virtual addresses block list
				396	* @va_block : virtual block to merge with adjacent blocks
				397	*
				398	* This function does the following:
				399	* - Merge the given blocks with the adjacent blocks if their virtual ranges
				400	* create a contiguous virtual range
				401	*
				402	* This Function should be called only when va_list lock is taken
				403	*/
				404	static void merge_va_blocks_locked(struct hl_device *hdev,
				405	struct list_head va_list, struct hl_vm_va_block va_block)
				406	{
				407	struct hl_vm_va_block prev, next;
				408
				409	prev = list_prev_entry(va_block, node);
				410	if (&prev->node != va_list && prev->end + 1 == va_block->start) {
				411	prev->end = va_block->end;
				412	prev->size = prev->end - prev->start;
				413	list_del(&va_block->node);
				414	kfree(va_block);
				415	va_block = prev;
				416	}
				417
				418	next = list_next_entry(va_block, node);
				419	if (&next->node != va_list && va_block->end + 1 == next->start) {
				420	next->start = va_block->start;
				421	next->size = next->end - next->start;
				422	list_del(&va_block->node);
				423	kfree(va_block);
				424	}
				425	}
				426
				427	/*
				428	* add_va_block_locked - add a virtual block to the virtual addresses list
				429	*
				430	* @hdev : pointer to the habanalabs device structure
				431	* @va_list : pointer to the virtual addresses block list
				432	* @start : start virtual address
				433	* @end : end virtual address
				434	*
				435	* This function does the following:
				436	* - Add the given block to the virtual blocks list and merge with other
				437	* blocks if a contiguous virtual block can be created
				438	*
				439	* This Function should be called only when va_list lock is taken
				440	*/
				441	static int add_va_block_locked(struct hl_device *hdev,
				442	struct list_head *va_list, u64 start, u64 end)
				443	{
				444	struct hl_vm_va_block va_block, res = NULL;
				445	u64 size = end - start;
				446
				447	print_va_list_locked(hdev, va_list);
				448
				449	list_for_each_entry(va_block, va_list, node) {
				450	/* TODO: remove upon matureness */
				451	if (hl_mem_area_crosses_range(start, size, va_block->start,
				452	va_block->end)) {
				453	dev_err(hdev->dev,
				454	"block crossing ranges at start 0x%llx, end 0x%llx\n",
				455	va_block->start, va_block->end);
				456	return -EINVAL;
				457	}
				458
				459	if (va_block->end < start)
				460	res = va_block;
				461	}
				462
				463	va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
				464	if (!va_block)
				465	return -ENOMEM;
				466
				467	va_block->start = start;
				468	va_block->end = end;
				469	va_block->size = size;
				470
				471	if (!res)
				472	list_add(&va_block->node, va_list);
				473	else
				474	list_add(&va_block->node, &res->node);
				475
				476	merge_va_blocks_locked(hdev, va_list, va_block);
				477
				478	print_va_list_locked(hdev, va_list);
				479
				480	return 0;
				481	}
				482
				483	/*
				484	* add_va_block - wrapper for add_va_block_locked
				485	*
				486	* @hdev : pointer to the habanalabs device structure
				487	* @va_list : pointer to the virtual addresses block list
				488	* @start : start virtual address
				489	* @end : end virtual address
				490	*
				491	* This function does the following:
				492	* - Takes the list lock and calls add_va_block_locked
				493	*/
				494	static inline int add_va_block(struct hl_device *hdev,
				495	struct hl_va_range *va_range, u64 start, u64 end)
				496	{
				497	int rc;
				498
				499	mutex_lock(&va_range->lock);
				500	rc = add_va_block_locked(hdev, &va_range->list, start, end);
				501	mutex_unlock(&va_range->lock);
				502
				503	return rc;
				504	}
				505
				506	/*
				507	* get_va_block - get a virtual block with the requested size
				508	*
				509	* @hdev : pointer to the habanalabs device structure
				510	* @va_range : pointer to the virtual addresses range
				511	* @size : requested block size
				512	* @hint_addr : hint for request address by the user
				513	* @is_userptr : is host or DRAM memory
				514	*
				515	* This function does the following:
				516	* - Iterate on the virtual block list to find a suitable virtual block for the
				517	* requested size
				518	* - Reserve the requested block and update the list
				519	* - Return the start address of the virtual block
				520	*/
				521	static u64 get_va_block(struct hl_device *hdev,
				522	struct hl_va_range *va_range, u64 size, u64 hint_addr,
				523	bool is_userptr)
				524	{
				525	struct hl_vm_va_block va_block, new_va_block = NULL;
				526	u64 valid_start, valid_size, prev_start, prev_end, page_mask,
				527	res_valid_start = 0, res_valid_size = 0;
				528	u32 page_size;
				529	bool add_prev = false;
				530
				531	if (is_userptr) {
				532	/*
				533	* We cannot know if the user allocated memory with huge pages
				534	* or not, hence we continue with the biggest possible
				535	* granularity.
				536	*/
				537	page_size = PAGE_SIZE_2MB;
				538	page_mask = PAGE_MASK_2MB;
				539	} else {
				540	page_size = hdev->asic_prop.dram_page_size;
				541	page_mask = ~((u64)page_size - 1);
				542	}
				543
				544	mutex_lock(&va_range->lock);
				545
				546	print_va_list_locked(hdev, &va_range->list);
				547
				548	list_for_each_entry(va_block, &va_range->list, node) {
				549	/* calc the first possible aligned addr */
				550	valid_start = va_block->start;
				551
				552
				553	if (valid_start & (page_size - 1)) {
				554	valid_start &= page_mask;
				555	valid_start += page_size;
				556	if (valid_start > va_block->end)
				557	continue;
				558	}
				559
				560	valid_size = va_block->end - valid_start;
				561
				562	if (valid_size >= size &&
				563	(!new_va_block \|\| valid_size < res_valid_size)) {
				564
				565	new_va_block = va_block;
				566	res_valid_start = valid_start;
				567	res_valid_size = valid_size;
				568	}
				569
				570	if (hint_addr && hint_addr >= valid_start &&
				571	((hint_addr + size) <= va_block->end)) {
				572	new_va_block = va_block;
				573	res_valid_start = hint_addr;
				574	res_valid_size = valid_size;
				575	break;
				576	}
				577	}
				578
				579	if (!new_va_block) {
				580	dev_err(hdev->dev, "no available va block for size %llu\n",
				581	size);
				582	goto out;
				583	}
				584
				585	if (res_valid_start > new_va_block->start) {
				586	prev_start = new_va_block->start;
				587	prev_end = res_valid_start - 1;
				588
				589	new_va_block->start = res_valid_start;
				590	new_va_block->size = res_valid_size;
				591
				592	add_prev = true;
				593	}
				594
				595	if (new_va_block->size > size) {
				596	new_va_block->start += size;
				597	new_va_block->size = new_va_block->end - new_va_block->start;
				598	} else {
				599	list_del(&new_va_block->node);
				600	kfree(new_va_block);
				601	}
				602
				603	if (add_prev)
				604	add_va_block_locked(hdev, &va_range->list, prev_start,
				605	prev_end);
				606
				607	print_va_list_locked(hdev, &va_range->list);
				608	out:
				609	mutex_unlock(&va_range->lock);
				610
				611	return res_valid_start;
				612	}
				613
				614	/*
				615	* get_sg_info - get number of pages and the DMA address from SG list
				616	*
				617	* @sg : the SG list
				618	* @dma_addr : pointer to DMA address to return
				619	*
				620	* Calculate the number of consecutive pages described by the SG list. Take the
				621	* offset of the address in the first page, add to it the length and round it up
				622	* to the number of needed pages.
				623	*/
				624	static u32 get_sg_info(struct scatterlist sg, dma_addr_t dma_addr)
				625	{
				626	*dma_addr = sg_dma_address(sg);
				627
				628	return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
				629	(PAGE_SIZE - 1)) >> PAGE_SHIFT;
				630	}
				631
				632	/*
				633	* init_phys_pg_pack_from_userptr - initialize physical page pack from host
				634	* memory
				635	*
				636	* @ctx : current context
				637	* @userptr : userptr to initialize from
				638	* @pphys_pg_pack : res pointer
				639	*
				640	* This function does the following:
				641	* - Pin the physical pages related to the given virtual block
				642	* - Create a physical page pack from the physical pages related to the given
				643	* virtual block
				644	*/
				645	static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
				646	struct hl_userptr *userptr,
				647	struct hl_vm_phys_pg_pack **pphys_pg_pack)
				648	{
				649	struct hl_vm_phys_pg_pack *phys_pg_pack;
				650	struct scatterlist *sg;
				651	dma_addr_t dma_addr;
				652	u64 page_mask, total_npages;
				653	u32 npages, page_size = PAGE_SIZE;
				654	bool first = true, is_huge_page_opt = true;
				655	int rc, i, j;
				656
				657	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
				658	if (!phys_pg_pack)
				659	return -ENOMEM;
				660
				661	phys_pg_pack->vm_type = userptr->vm_type;
				662	phys_pg_pack->created_from_userptr = true;
				663	phys_pg_pack->asid = ctx->asid;
				664	atomic_set(&phys_pg_pack->mapping_cnt, 1);
				665
				666	/* Only if all dma_addrs are aligned to 2MB and their
				667	* sizes is at least 2MB, we can use huge page mapping.
				668	* We limit the 2MB optimization to this condition,
				669	* since later on we acquire the related VA range as one
				670	* consecutive block.
				671	*/
				672	total_npages = 0;
				673	for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
				674	npages = get_sg_info(sg, &dma_addr);
				675
				676	total_npages += npages;
				677
				678	if ((npages % PGS_IN_2MB_PAGE) \|\|
				679	(dma_addr & (PAGE_SIZE_2MB - 1)))
				680	is_huge_page_opt = false;
				681	}
				682
				683	if (is_huge_page_opt) {
				684	page_size = PAGE_SIZE_2MB;
				685	total_npages /= PGS_IN_2MB_PAGE;
				686	}
				687
				688	page_mask = ~(((u64) page_size) - 1);
				689
				690	phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
				691	GFP_KERNEL);
				692	if (!phys_pg_pack->pages) {
				693	rc = -ENOMEM;
				694	goto page_pack_arr_mem_err;
				695	}
				696
				697	phys_pg_pack->npages = total_npages;
				698	phys_pg_pack->page_size = page_size;
				699	phys_pg_pack->total_size = total_npages * page_size;
				700
				701	j = 0;
				702	for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
				703	npages = get_sg_info(sg, &dma_addr);
				704
				705	/* align down to physical page size and save the offset */
				706	if (first) {
				707	first = false;
				708	phys_pg_pack->offset = dma_addr & (page_size - 1);
				709	dma_addr &= page_mask;
				710	}
				711
				712	while (npages) {
				713	phys_pg_pack->pages[j++] = dma_addr;
				714	dma_addr += page_size;
				715
				716	if (is_huge_page_opt)
				717	npages -= PGS_IN_2MB_PAGE;
				718	else
				719	npages--;
				720	}
				721	}
				722
				723	*pphys_pg_pack = phys_pg_pack;
				724
				725	return 0;
				726
				727	page_pack_arr_mem_err:
				728	kfree(phys_pg_pack);
				729
				730	return rc;
				731	}
				732
				733	/*
				734	* map_phys_page_pack - maps the physical page pack
				735	*
				736	* @ctx : current context
				737	* @vaddr : start address of the virtual area to map from
				738	* @phys_pg_pack : the pack of physical pages to map to
				739	*
				740	* This function does the following:
				741	* - Maps each chunk of virtual memory to matching physical chunk
				742	* - Stores number of successful mappings in the given argument
				743	* - Returns 0 on success, error code otherwise.
				744	*/
				745	static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
				746	struct hl_vm_phys_pg_pack *phys_pg_pack)
				747	{
				748	struct hl_device *hdev = ctx->hdev;
				749	u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
				750	u32 page_size = phys_pg_pack->page_size;
				751	int rc = 0;
				752
				753	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
				754	paddr = phys_pg_pack->pages[i];
				755
				756	rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
				757	if (rc) {
				758	dev_err(hdev->dev,
				759	"map failed for handle %u, npages: %llu, mapped: %llu",
				760	phys_pg_pack->handle, phys_pg_pack->npages,
				761	mapped_pg_cnt);
				762	goto err;
				763	}
				764
				765	mapped_pg_cnt++;
				766	next_vaddr += page_size;
				767	}
				768
				769	return 0;
				770
				771	err:
				772	next_vaddr = vaddr;
				773	for (i = 0 ; i < mapped_pg_cnt ; i++) {
				774	if (hl_mmu_unmap(ctx, next_vaddr, page_size))
				775	dev_warn_ratelimited(hdev->dev,
				776	"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
				777	phys_pg_pack->handle, next_vaddr,
				778	phys_pg_pack->pages[i], page_size);
				779
				780	next_vaddr += page_size;
				781	}
				782
				783	return rc;
				784	}
				785
				786	static int get_paddr_from_handle(struct hl_ctx ctx, struct hl_mem_in args,
				787	u64 *paddr)
				788	{
				789	struct hl_device *hdev = ctx->hdev;
				790	struct hl_vm *vm = &hdev->vm;
				791	struct hl_vm_phys_pg_pack *phys_pg_pack;
				792	u32 handle;
				793
				794	handle = lower_32_bits(args->map_device.handle);
				795	spin_lock(&vm->idr_lock);
				796	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
				797	if (!phys_pg_pack) {
				798	spin_unlock(&vm->idr_lock);
				799	dev_err(hdev->dev, "no match for handle %u\n", handle);
				800	return -EINVAL;
				801	}
				802
				803	*paddr = phys_pg_pack->pages[0];
				804
				805	spin_unlock(&vm->idr_lock);
				806
				807	return 0;
				808	}
				809
				810	/*
				811	* map_device_va - map the given memory
				812	*
				813	* @ctx : current context
				814	* @args : host parameters with handle/host virtual address
				815	* @device_addr : pointer to result device virtual address
				816	*
				817	* This function does the following:
				818	* - If given a physical device memory handle, map to a device virtual block
				819	* and return the start address of this block
				820	* - If given a host virtual address and size, find the related physical pages,
				821	* map a device virtual block to this pages and return the start address of
				822	* this block
				823	*/
				824	static int map_device_va(struct hl_ctx ctx, struct hl_mem_in args,
				825	u64 *device_addr)
				826	{
				827	struct hl_device *hdev = ctx->hdev;
				828	struct hl_vm *vm = &hdev->vm;
				829	struct hl_vm_phys_pg_pack *phys_pg_pack;
				830	struct hl_userptr *userptr = NULL;
				831	struct hl_vm_hash_node *hnode;
				832	enum vm_type_t *vm_type;
				833	u64 ret_vaddr, hint_addr;
				834	u32 handle = 0;
				835	int rc;
				836	bool is_userptr = args->flags & HL_MEM_USERPTR;
				837
				838	/* Assume failure */
				839	*device_addr = 0;
				840
				841	if (is_userptr) {
				842	rc = get_userptr_from_host_va(hdev, args, &userptr);
				843	if (rc) {
				844	dev_err(hdev->dev, "failed to get userptr from va\n");
				845	return rc;
				846	}
				847
				848	rc = init_phys_pg_pack_from_userptr(ctx, userptr,
				849	&phys_pg_pack);
				850	if (rc) {
				851	dev_err(hdev->dev,
				852	"unable to init page pack for vaddr 0x%llx\n",
				853	args->map_host.host_virt_addr);
				854	goto init_page_pack_err;
				855	}
				856
				857	vm_type = (enum vm_type_t *) userptr;
				858	hint_addr = args->map_host.hint_addr;
				859	} else {
				860	handle = lower_32_bits(args->map_device.handle);
				861
				862	spin_lock(&vm->idr_lock);
				863	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
				864	if (!phys_pg_pack) {
				865	spin_unlock(&vm->idr_lock);
				866	dev_err(hdev->dev,
				867	"no match for handle %u\n", handle);
				868	return -EINVAL;
				869	}
				870
				871	/* increment now to avoid freeing device memory while mapping */
				872	atomic_inc(&phys_pg_pack->mapping_cnt);
				873
				874	spin_unlock(&vm->idr_lock);
				875
				876	vm_type = (enum vm_type_t *) phys_pg_pack;
				877
				878	hint_addr = args->map_device.hint_addr;
				879	}
				880
				881	/*
				882	* relevant for mapping device physical memory only, as host memory is
				883	* implicitly shared
				884	*/
				885	if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
				886	phys_pg_pack->asid != ctx->asid) {
				887	dev_err(hdev->dev,
				888	"Failed to map memory, handle %u is not shared\n",
				889	handle);
				890	rc = -EPERM;
				891	goto shared_err;
				892	}
				893
				894	hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
				895	if (!hnode) {
				896	rc = -ENOMEM;
				897	goto hnode_err;
				898	}
				899
				900	ret_vaddr = get_va_block(hdev,
				901	is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
				902	phys_pg_pack->total_size, hint_addr, is_userptr);
				903	if (!ret_vaddr) {
				904	dev_err(hdev->dev, "no available va block for handle %u\n",
				905	handle);
				906	rc = -ENOMEM;
				907	goto va_block_err;
				908	}
				909
				910	mutex_lock(&ctx->mmu_lock);
				911
				912	rc = map_phys_page_pack(ctx, ret_vaddr, phys_pg_pack);
				913	if (rc) {
				914	mutex_unlock(&ctx->mmu_lock);
				915	dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
				916	handle);
				917	goto map_err;
				918	}
				919
				920	hdev->asic_funcs->mmu_invalidate_cache(hdev, false);
				921
				922	mutex_unlock(&ctx->mmu_lock);
				923
				924	ret_vaddr += phys_pg_pack->offset;
				925
				926	hnode->ptr = vm_type;
				927	hnode->vaddr = ret_vaddr;
				928
				929	mutex_lock(&ctx->mem_hash_lock);
				930	hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
				931	mutex_unlock(&ctx->mem_hash_lock);
				932
				933	*device_addr = ret_vaddr;
				934
				935	if (is_userptr)
				936	free_phys_pg_pack(hdev, phys_pg_pack);
				937
				938	return 0;
				939
				940	map_err:
				941	if (add_va_block(hdev,
				942	is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
				943	ret_vaddr,
				944	ret_vaddr + phys_pg_pack->total_size - 1))
				945	dev_warn(hdev->dev,
				946	"release va block failed for handle 0x%x, vaddr: 0x%llx\n",
				947	handle, ret_vaddr);
				948
				949	va_block_err:
				950	kfree(hnode);
				951	hnode_err:
				952	shared_err:
				953	atomic_dec(&phys_pg_pack->mapping_cnt);
				954	if (is_userptr)
				955	free_phys_pg_pack(hdev, phys_pg_pack);
				956	init_page_pack_err:
				957	if (is_userptr)
				958	free_userptr(hdev, userptr);
				959
				960	return rc;
				961	}
				962
				963	/*
				964	* unmap_device_va - unmap the given device virtual address
				965	*
				966	* @ctx : current context
				967	* @vaddr : device virtual address to unmap
				968	*
				969	* This function does the following:
				970	* - Unmap the physical pages related to the given virtual address
				971	* - return the device virtual block to the virtual block list
				972	*/
				973	static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
				974	{
				975	struct hl_device *hdev = ctx->hdev;
				976	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
				977	struct hl_vm_hash_node *hnode = NULL;
				978	struct hl_userptr *userptr = NULL;
				979	enum vm_type_t *vm_type;
				980	u64 next_vaddr, i;
				981	u32 page_size;
				982	bool is_userptr;
				983	int rc;
				984
				985	/* protect from double entrance */
				986	mutex_lock(&ctx->mem_hash_lock);
				987	hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
				988	if (vaddr == hnode->vaddr)
				989	break;
				990
				991	if (!hnode) {
				992	mutex_unlock(&ctx->mem_hash_lock);
				993	dev_err(hdev->dev,
				994	"unmap failed, no mem hnode for vaddr 0x%llx\n",
				995	vaddr);
				996	return -EINVAL;
				997	}
				998
				999	hash_del(&hnode->node);
				1000	mutex_unlock(&ctx->mem_hash_lock);
				1001
				1002	vm_type = hnode->ptr;
				1003
				1004	if (*vm_type == VM_TYPE_USERPTR) {
				1005	is_userptr = true;
				1006	userptr = hnode->ptr;
				1007	rc = init_phys_pg_pack_from_userptr(ctx, userptr,
				1008	&phys_pg_pack);
				1009	if (rc) {
				1010	dev_err(hdev->dev,
				1011	"unable to init page pack for vaddr 0x%llx\n",
				1012	vaddr);
				1013	goto vm_type_err;
				1014	}
				1015	} else if (*vm_type == VM_TYPE_PHYS_PACK) {
				1016	is_userptr = false;
				1017	phys_pg_pack = hnode->ptr;
				1018	} else {
				1019	dev_warn(hdev->dev,
				1020	"unmap failed, unknown vm desc for vaddr 0x%llx\n",
				1021	vaddr);
				1022	rc = -EFAULT;
				1023	goto vm_type_err;
				1024	}
				1025
				1026	if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
				1027	dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
				1028	rc = -EINVAL;
				1029	goto mapping_cnt_err;
				1030	}
				1031
				1032	page_size = phys_pg_pack->page_size;
				1033	vaddr &= ~(((u64) page_size) - 1);
				1034
				1035	next_vaddr = vaddr;
				1036
				1037	mutex_lock(&ctx->mmu_lock);
				1038
				1039	for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
				1040	if (hl_mmu_unmap(ctx, next_vaddr, page_size))
				1041	dev_warn_ratelimited(hdev->dev,
				1042	"unmap failed for vaddr: 0x%llx\n", next_vaddr);
				1043
				1044	/* unmapping on Palladium can be really long, so avoid a CPU
				1045	* soft lockup bug by sleeping a little between unmapping pages
				1046	*/
				1047	if (hdev->pldm)
				1048	usleep_range(500, 1000);
				1049	}
				1050
				1051	hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
				1052
				1053	mutex_unlock(&ctx->mmu_lock);
				1054
				1055	if (add_va_block(hdev,
				1056	is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
				1057	vaddr,
				1058	vaddr + phys_pg_pack->total_size - 1))
				1059	dev_warn(hdev->dev, "add va block failed for vaddr: 0x%llx\n",
				1060	vaddr);
				1061
				1062	atomic_dec(&phys_pg_pack->mapping_cnt);
				1063	kfree(hnode);
				1064
				1065	if (is_userptr) {
				1066	free_phys_pg_pack(hdev, phys_pg_pack);
				1067	free_userptr(hdev, userptr);
				1068	}
				1069
				1070	return 0;
				1071
				1072	mapping_cnt_err:
				1073	if (is_userptr)
				1074	free_phys_pg_pack(hdev, phys_pg_pack);
				1075	vm_type_err:
				1076	mutex_lock(&ctx->mem_hash_lock);
				1077	hash_add(ctx->mem_hash, &hnode->node, vaddr);
				1078	mutex_unlock(&ctx->mem_hash_lock);
				1079
				1080	return rc;
				1081	}
				1082
				1083	static int mem_ioctl_no_mmu(struct hl_fpriv hpriv, union hl_mem_args args)
				1084	{
				1085	struct hl_device *hdev = hpriv->hdev;
				1086	struct hl_ctx *ctx = hpriv->ctx;
				1087	u64 device_addr = 0;
				1088	u32 handle = 0;
				1089	int rc;
				1090
				1091	switch (args->in.op) {
				1092	case HL_MEM_OP_ALLOC:
				1093	if (args->in.alloc.mem_size == 0) {
				1094	dev_err(hdev->dev,
				1095	"alloc size must be larger than 0\n");
				1096	rc = -EINVAL;
				1097	goto out;
				1098	}
				1099
				1100	/* Force contiguous as there are no real MMU
				1101	* translations to overcome physical memory gaps
				1102	*/
				1103	args->in.flags \|= HL_MEM_CONTIGUOUS;
				1104	rc = alloc_device_memory(ctx, &args->in, &handle);
				1105
				1106	memset(args, 0, sizeof(*args));
				1107	args->out.handle = (__u64) handle;
				1108	break;
				1109
				1110	case HL_MEM_OP_FREE:
				1111	rc = free_device_memory(ctx, args->in.free.handle);
				1112	break;
				1113
				1114	case HL_MEM_OP_MAP:
				1115	if (args->in.flags & HL_MEM_USERPTR) {
				1116	device_addr = args->in.map_host.host_virt_addr;
				1117	rc = 0;
				1118	} else {
				1119	rc = get_paddr_from_handle(ctx, &args->in,
				1120	&device_addr);
				1121	}
				1122
				1123	memset(args, 0, sizeof(*args));
				1124	args->out.device_virt_addr = device_addr;
				1125	break;
				1126
				1127	case HL_MEM_OP_UNMAP:
				1128	rc = 0;
				1129	break;
				1130
				1131	default:
				1132	dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
				1133	rc = -ENOTTY;
				1134	break;
				1135	}
				1136
				1137	out:
				1138	return rc;
				1139	}
				1140
				1141	int hl_mem_ioctl(struct hl_fpriv hpriv, void data)
				1142	{
				1143	union hl_mem_args *args = data;
				1144	struct hl_device *hdev = hpriv->hdev;
				1145	struct hl_ctx *ctx = hpriv->ctx;
				1146	u64 device_addr = 0;
				1147	u32 handle = 0;
				1148	int rc;
				1149
				1150	if (hl_device_disabled_or_in_reset(hdev)) {
				1151	dev_warn_ratelimited(hdev->dev,
				1152	"Device is %s. Can't execute MEMORY IOCTL\n",
				1153	atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
				1154	return -EBUSY;
				1155	}
				1156
				1157	if (!hdev->mmu_enable)
				1158	return mem_ioctl_no_mmu(hpriv, args);
				1159
				1160	switch (args->in.op) {
				1161	case HL_MEM_OP_ALLOC:
				1162	if (!hdev->dram_supports_virtual_memory) {
				1163	dev_err(hdev->dev, "DRAM alloc is not supported\n");
				1164	rc = -EINVAL;
				1165	goto out;
				1166	}
				1167
				1168	if (args->in.alloc.mem_size == 0) {
				1169	dev_err(hdev->dev,
				1170	"alloc size must be larger than 0\n");
				1171	rc = -EINVAL;
				1172	goto out;
				1173	}
				1174	rc = alloc_device_memory(ctx, &args->in, &handle);
				1175
				1176	memset(args, 0, sizeof(*args));
				1177	args->out.handle = (__u64) handle;
				1178	break;
				1179
				1180	case HL_MEM_OP_FREE:
				1181	rc = free_device_memory(ctx, args->in.free.handle);
				1182	break;
				1183
				1184	case HL_MEM_OP_MAP:
				1185	rc = map_device_va(ctx, &args->in, &device_addr);
				1186
				1187	memset(args, 0, sizeof(*args));
				1188	args->out.device_virt_addr = device_addr;
				1189	break;
				1190
				1191	case HL_MEM_OP_UNMAP:
				1192	rc = unmap_device_va(ctx,
				1193	args->in.unmap.device_virt_addr);
				1194	break;
				1195
				1196	default:
				1197	dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
				1198	rc = -ENOTTY;
				1199	break;
				1200	}
				1201
				1202	out:
				1203	return rc;
				1204	}
				1205
				1206	/*
				1207	* hl_pin_host_memory - pins a chunk of host memory
				1208	*
				1209	* @hdev : pointer to the habanalabs device structure
				1210	* @addr : the user-space virtual address of the memory area
				1211	* @size : the size of the memory area
				1212	* @userptr : pointer to hl_userptr structure
				1213	*
				1214	* This function does the following:
				1215	* - Pins the physical pages
				1216	* - Create a SG list from those pages
				1217	*/
				1218	int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
				1219	struct hl_userptr *userptr)
				1220	{
				1221	u64 start, end;
				1222	u32 npages, offset;
				1223	int rc;
				1224
				1225	if (!size) {
				1226	dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
				1227	return -EINVAL;
				1228	}
				1229
				1230	if (!access_ok((void __user *) (uintptr_t) addr, size)) {
				1231	dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
				1232	return -EFAULT;
				1233	}
				1234
				1235	/*
				1236	* If the combination of the address and size requested for this memory
				1237	* region causes an integer overflow, return error.
				1238	*/
				1239	if (((addr + size) < addr) \|\|
				1240	PAGE_ALIGN(addr + size) < (addr + size)) {
				1241	dev_err(hdev->dev,
				1242	"user pointer 0x%llx + %llu causes integer overflow\n",
				1243	addr, size);
				1244	return -EINVAL;
				1245	}
				1246
				1247	start = addr & PAGE_MASK;
				1248	offset = addr & ~PAGE_MASK;
				1249	end = PAGE_ALIGN(addr + size);
				1250	npages = (end - start) >> PAGE_SHIFT;
				1251
				1252	userptr->size = size;
				1253	userptr->addr = addr;
				1254	userptr->dma_mapped = false;
				1255	INIT_LIST_HEAD(&userptr->job_node);
				1256
				1257	userptr->vec = frame_vector_create(npages);
				1258	if (!userptr->vec) {
				1259	dev_err(hdev->dev, "Failed to create frame vector\n");
				1260	return -ENOMEM;
				1261	}
				1262
				1263	rc = get_vaddr_frames(start, npages, FOLL_FORCE \| FOLL_WRITE,
				1264	userptr->vec);
				1265
				1266	if (rc != npages) {
				1267	dev_err(hdev->dev,
				1268	"Failed to map host memory, user ptr probably wrong\n");
				1269	if (rc < 0)
				1270	goto destroy_framevec;
				1271	rc = -EFAULT;
				1272	goto put_framevec;
				1273	}
				1274
				1275	if (frame_vector_to_pages(userptr->vec) < 0) {
				1276	dev_err(hdev->dev,
				1277	"Failed to translate frame vector to pages\n");
				1278	rc = -EFAULT;
				1279	goto put_framevec;
				1280	}
				1281
				1282	userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
				1283	if (!userptr->sgt) {
				1284	rc = -ENOMEM;
				1285	goto put_framevec;
				1286	}
				1287
				1288	rc = sg_alloc_table_from_pages(userptr->sgt,
				1289	frame_vector_pages(userptr->vec),
				1290	npages, offset, size, GFP_ATOMIC);
				1291	if (rc < 0) {
				1292	dev_err(hdev->dev, "failed to create SG table from pages\n");
				1293	goto free_sgt;
				1294	}
				1295
				1296	hl_debugfs_add_userptr(hdev, userptr);
				1297
				1298	return 0;
				1299
				1300	free_sgt:
				1301	kfree(userptr->sgt);
				1302	put_framevec:
				1303	put_vaddr_frames(userptr->vec);
				1304	destroy_framevec:
				1305	frame_vector_destroy(userptr->vec);
				1306	return rc;
				1307	}
				1308
				1309	/*
				1310	* hl_unpin_host_memory - unpins a chunk of host memory
				1311	*
				1312	* @hdev : pointer to the habanalabs device structure
				1313	* @userptr : pointer to hl_userptr structure
				1314	*
				1315	* This function does the following:
				1316	* - Unpins the physical pages related to the host memory
				1317	* - Free the SG list
				1318	*/
				1319	int hl_unpin_host_memory(struct hl_device hdev, struct hl_userptr userptr)
				1320	{
				1321	struct page **pages;
				1322
				1323	hl_debugfs_remove_userptr(hdev, userptr);
				1324
				1325	if (userptr->dma_mapped)
				1326	hdev->asic_funcs->hl_dma_unmap_sg(hdev,
				1327	userptr->sgt->sgl,
				1328	userptr->sgt->nents,
				1329	userptr->dir);
				1330
				1331	pages = frame_vector_pages(userptr->vec);
				1332	if (!IS_ERR(pages)) {
				1333	int i;
				1334
				1335	for (i = 0; i < frame_vector_count(userptr->vec); i++)
				1336	set_page_dirty_lock(pages[i]);
				1337	}
				1338	put_vaddr_frames(userptr->vec);
				1339	frame_vector_destroy(userptr->vec);
				1340
				1341	list_del(&userptr->job_node);
				1342
				1343	sg_free_table(userptr->sgt);
				1344	kfree(userptr->sgt);
				1345
				1346	return 0;
				1347	}
				1348
				1349	/*
				1350	* hl_userptr_delete_list - clear userptr list
				1351	*
				1352	* @hdev : pointer to the habanalabs device structure
				1353	* @userptr_list : pointer to the list to clear
				1354	*
				1355	* This function does the following:
				1356	* - Iterates over the list and unpins the host memory and frees the userptr
				1357	* structure.
				1358	*/
				1359	void hl_userptr_delete_list(struct hl_device *hdev,
				1360	struct list_head *userptr_list)
				1361	{
				1362	struct hl_userptr userptr, tmp;
				1363
				1364	list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
				1365	hl_unpin_host_memory(hdev, userptr);
				1366	kfree(userptr);
				1367	}
				1368
				1369	INIT_LIST_HEAD(userptr_list);
				1370	}
				1371
				1372	/*
				1373	* hl_userptr_is_pinned - returns whether the given userptr is pinned
				1374	*
				1375	* @hdev : pointer to the habanalabs device structure
				1376	* @userptr_list : pointer to the list to clear
				1377	* @userptr : pointer to userptr to check
				1378	*
				1379	* This function does the following:
				1380	* - Iterates over the list and checks if the given userptr is in it, means is
				1381	* pinned. If so, returns true, otherwise returns false.
				1382	*/
				1383	bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
				1384	u32 size, struct list_head *userptr_list,
				1385	struct hl_userptr **userptr)
				1386	{
				1387	list_for_each_entry((*userptr), userptr_list, job_node) {
				1388	if ((addr == (userptr)->addr) && (size == (userptr)->size))
				1389	return true;
				1390	}
				1391
				1392	return false;
				1393	}
				1394
				1395	/*
				1396	* hl_va_range_init - initialize virtual addresses range
				1397	*
				1398	* @hdev : pointer to the habanalabs device structure
				1399	* @va_range : pointer to the range to initialize
				1400	* @start : range start address
				1401	* @end : range end address
				1402	*
				1403	* This function does the following:
				1404	* - Initializes the virtual addresses list of the given range with the given
				1405	* addresses.
				1406	*/
				1407	static int hl_va_range_init(struct hl_device *hdev,
				1408	struct hl_va_range *va_range, u64 start, u64 end)
				1409	{
				1410	int rc;
				1411
				1412	INIT_LIST_HEAD(&va_range->list);
				1413
				1414	/* PAGE_SIZE alignment */
				1415
				1416	if (start & (PAGE_SIZE - 1)) {
				1417	start &= PAGE_MASK;
				1418	start += PAGE_SIZE;
				1419	}
				1420
				1421	if (end & (PAGE_SIZE - 1))
				1422	end &= PAGE_MASK;
				1423
				1424	if (start >= end) {
				1425	dev_err(hdev->dev, "too small vm range for va list\n");
				1426	return -EFAULT;
				1427	}
				1428
				1429	rc = add_va_block(hdev, va_range, start, end);
				1430
				1431	if (rc) {
				1432	dev_err(hdev->dev, "Failed to init host va list\n");
				1433	return rc;
				1434	}
				1435
				1436	va_range->start_addr = start;
				1437	va_range->end_addr = end;
				1438
				1439	return 0;
				1440	}
				1441
				1442	/*
				1443	* hl_vm_ctx_init_with_ranges - initialize virtual memory for context
				1444	*
				1445	* @ctx : pointer to the habanalabs context structure
				1446	* @host_range_start : host virtual addresses range start
				1447	* @host_range_end : host virtual addresses range end
				1448	* @dram_range_start : dram virtual addresses range start
				1449	* @dram_range_end : dram virtual addresses range end
				1450	*
				1451	* This function initializes the following:
				1452	* - MMU for context
				1453	* - Virtual address to area descriptor hashtable
				1454	* - Virtual block list of available virtual memory
				1455	*/
				1456	static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
				1457	u64 host_range_end, u64 dram_range_start,
				1458	u64 dram_range_end)
				1459	{
				1460	struct hl_device *hdev = ctx->hdev;
				1461	int rc;
				1462
				1463	rc = hl_mmu_ctx_init(ctx);
				1464	if (rc) {
				1465	dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
				1466	return rc;
				1467	}
				1468
				1469	mutex_init(&ctx->mem_hash_lock);
				1470	hash_init(ctx->mem_hash);
				1471
				1472	mutex_init(&ctx->host_va_range.lock);
				1473
				1474	rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
				1475	host_range_end);
				1476	if (rc) {
				1477	dev_err(hdev->dev, "failed to init host vm range\n");
				1478	goto host_vm_err;
				1479	}
				1480
				1481	mutex_init(&ctx->dram_va_range.lock);
				1482
				1483	rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
				1484	dram_range_end);
				1485	if (rc) {
				1486	dev_err(hdev->dev, "failed to init dram vm range\n");
				1487	goto dram_vm_err;
				1488	}
				1489
				1490	hl_debugfs_add_ctx_mem_hash(hdev, ctx);
				1491
				1492	return 0;
				1493
				1494	dram_vm_err:
				1495	mutex_destroy(&ctx->dram_va_range.lock);
				1496
				1497	mutex_lock(&ctx->host_va_range.lock);
				1498	clear_va_list_locked(hdev, &ctx->host_va_range.list);
				1499	mutex_unlock(&ctx->host_va_range.lock);
				1500	host_vm_err:
				1501	mutex_destroy(&ctx->host_va_range.lock);
				1502	mutex_destroy(&ctx->mem_hash_lock);
				1503	hl_mmu_ctx_fini(ctx);
				1504
				1505	return rc;
				1506	}
				1507
				1508	int hl_vm_ctx_init(struct hl_ctx *ctx)
				1509	{
				1510	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
				1511	u64 host_range_start, host_range_end, dram_range_start,
				1512	dram_range_end;
				1513
				1514	atomic64_set(&ctx->dram_phys_mem, 0);
				1515
				1516	/*
				1517	* - If MMU is enabled, init the ranges as usual.
				1518	* - If MMU is disabled, in case of host mapping, the returned address
				1519	* is the given one.
				1520	* In case of DRAM mapping, the returned address is the physical
				1521	* address of the memory related to the given handle.
				1522	*/
				1523	if (ctx->hdev->mmu_enable) {
				1524	dram_range_start = prop->va_space_dram_start_address;
				1525	dram_range_end = prop->va_space_dram_end_address;
				1526	host_range_start = prop->va_space_host_start_address;
				1527	host_range_end = prop->va_space_host_end_address;
				1528	} else {
				1529	dram_range_start = prop->dram_user_base_address;
				1530	dram_range_end = prop->dram_end_address;
				1531	host_range_start = prop->dram_user_base_address;
				1532	host_range_end = prop->dram_end_address;
				1533	}
				1534
				1535	return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
				1536	dram_range_start, dram_range_end);
				1537	}
				1538
				1539	/*
				1540	* hl_va_range_fini - clear a virtual addresses range
				1541	*
				1542	* @hdev : pointer to the habanalabs structure
				1543	* va_range : pointer to virtual addresses range
				1544	*
				1545	* This function initializes the following:
				1546	* - Checks that the given range contains the whole initial range
				1547	* - Frees the virtual addresses block list and its lock
				1548	*/
				1549	static void hl_va_range_fini(struct hl_device *hdev,
				1550	struct hl_va_range *va_range)
				1551	{
				1552	struct hl_vm_va_block *va_block;
				1553
				1554	if (list_empty(&va_range->list)) {
				1555	dev_warn(hdev->dev,
				1556	"va list should not be empty on cleanup!\n");
				1557	goto out;
				1558	}
				1559
				1560	if (!list_is_singular(&va_range->list)) {
				1561	dev_warn(hdev->dev,
				1562	"va list should not contain multiple blocks on cleanup!\n");
				1563	goto free_va_list;
				1564	}
				1565
				1566	va_block = list_first_entry(&va_range->list, typeof(*va_block), node);
				1567
				1568	if (va_block->start != va_range->start_addr \|\|
				1569	va_block->end != va_range->end_addr) {
				1570	dev_warn(hdev->dev,
				1571	"wrong va block on cleanup, from 0x%llx to 0x%llx\n",
				1572	va_block->start, va_block->end);
				1573	goto free_va_list;
				1574	}
				1575
				1576	free_va_list:
				1577	mutex_lock(&va_range->lock);
				1578	clear_va_list_locked(hdev, &va_range->list);
				1579	mutex_unlock(&va_range->lock);
				1580
				1581	out:
				1582	mutex_destroy(&va_range->lock);
				1583	}
				1584
				1585	/*
				1586	* hl_vm_ctx_fini - virtual memory teardown of context
				1587	*
				1588	* @ctx : pointer to the habanalabs context structure
				1589	*
				1590	* This function perform teardown the following:
				1591	* - Virtual block list of available virtual memory
				1592	* - Virtual address to area descriptor hashtable
				1593	* - MMU for context
				1594	*
				1595	* In addition this function does the following:
				1596	* - Unmaps the existing hashtable nodes if the hashtable is not empty. The
				1597	* hashtable should be empty as no valid mappings should exist at this
				1598	* point.
				1599	* - Frees any existing physical page list from the idr which relates to the
				1600	* current context asid.
				1601	* - This function checks the virtual block list for correctness. At this point
				1602	* the list should contain one element which describes the whole virtual
				1603	* memory range of the context. Otherwise, a warning is printed.
				1604	*/
				1605	void hl_vm_ctx_fini(struct hl_ctx *ctx)
				1606	{
				1607	struct hl_device *hdev = ctx->hdev;
				1608	struct hl_vm *vm = &hdev->vm;
				1609	struct hl_vm_phys_pg_pack *phys_pg_list;
				1610	struct hl_vm_hash_node *hnode;
				1611	struct hlist_node *tmp_node;
				1612	int i;
				1613
				1614	hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
				1615
				1616	if (!hash_empty(ctx->mem_hash))
				1617	dev_notice(hdev->dev, "ctx is freed while it has va in use\n");
				1618
				1619	hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
				1620	dev_dbg(hdev->dev,
				1621	"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
				1622	hnode->vaddr, ctx->asid);
				1623	unmap_device_va(ctx, hnode->vaddr);
				1624	}
				1625
				1626	spin_lock(&vm->idr_lock);
				1627	idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
				1628	if (phys_pg_list->asid == ctx->asid) {
				1629	dev_dbg(hdev->dev,
				1630	"page list 0x%p of asid %d is still alive\n",
				1631	phys_pg_list, ctx->asid);
				1632	atomic64_sub(phys_pg_list->total_size,
				1633	&hdev->dram_used_mem);
				1634	free_phys_pg_pack(hdev, phys_pg_list);
				1635	idr_remove(&vm->phys_pg_pack_handles, i);
				1636	}
				1637	spin_unlock(&vm->idr_lock);
				1638
				1639	hl_va_range_fini(hdev, &ctx->dram_va_range);
				1640	hl_va_range_fini(hdev, &ctx->host_va_range);
				1641
				1642	mutex_destroy(&ctx->mem_hash_lock);
				1643	hl_mmu_ctx_fini(ctx);
				1644	}
				1645
				1646	/*
				1647	* hl_vm_init - initialize virtual memory module
				1648	*
				1649	* @hdev : pointer to the habanalabs device structure
				1650	*
				1651	* This function initializes the following:
				1652	* - MMU module
				1653	* - DRAM physical pages pool of 2MB
				1654	* - Idr for device memory allocation handles
				1655	*/
				1656	int hl_vm_init(struct hl_device *hdev)
				1657	{
				1658	struct asic_fixed_properties *prop = &hdev->asic_prop;
				1659	struct hl_vm *vm = &hdev->vm;
				1660	int rc;
				1661
				1662	vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
				1663	if (!vm->dram_pg_pool) {
				1664	dev_err(hdev->dev, "Failed to create dram page pool\n");
				1665	return -ENOMEM;
				1666	}
				1667
				1668	kref_init(&vm->dram_pg_pool_refcount);
				1669
				1670	rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
				1671	prop->dram_end_address - prop->dram_user_base_address,
				1672	-1);
				1673
				1674	if (rc) {
				1675	dev_err(hdev->dev,
				1676	"Failed to add memory to dram page pool %d\n", rc);
				1677	goto pool_add_err;
				1678	}
				1679
				1680	spin_lock_init(&vm->idr_lock);
				1681	idr_init(&vm->phys_pg_pack_handles);
				1682
				1683	atomic64_set(&hdev->dram_used_mem, 0);
				1684
				1685	vm->init_done = true;
				1686
				1687	return 0;
				1688
				1689	pool_add_err:
				1690	gen_pool_destroy(vm->dram_pg_pool);
				1691
				1692	return rc;
				1693	}
				1694
				1695	/*
				1696	* hl_vm_fini - virtual memory module teardown
				1697	*
				1698	* @hdev : pointer to the habanalabs device structure
				1699	*
				1700	* This function perform teardown to the following:
				1701	* - Idr for device memory allocation handles
				1702	* - DRAM physical pages pool of 2MB
				1703	* - MMU module
				1704	*/
				1705	void hl_vm_fini(struct hl_device *hdev)
				1706	{
				1707	struct hl_vm *vm = &hdev->vm;
				1708
				1709	if (!vm->init_done)
				1710	return;
				1711
				1712	/*
				1713	* At this point all the contexts should be freed and hence no DRAM
				1714	* memory should be in use. Hence the DRAM pool should be freed here.
				1715	*/
				1716	if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
				1717	dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
				1718	__func__);
				1719
				1720	vm->init_done = false;
				1721	}