Blame - drivers/misc/habanalabs/mmu.c - hafnium/third_party/linux.git

blob: 176c315836f128d00acab1a6deb42e97e00aeb93 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	/*
				4	* Copyright 2016-2019 HabanaLabs, Ltd.
				5	* All Rights Reserved.
				6	*/
				7
				8	#include "habanalabs.h"
				9	#include "include/hw_ip/mmu/mmu_general.h"
				10
				11	#include <linux/genalloc.h>
				12	#include <linux/slab.h>
				13
				14	static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
				15
				16	static struct pgt_info get_pgt_info(struct hl_ctx ctx, u64 hop_addr)
				17	{
				18	struct pgt_info *pgt_info = NULL;
				19
				20	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
				21	(unsigned long) hop_addr)
				22	if (hop_addr == pgt_info->shadow_addr)
				23	break;
				24
				25	return pgt_info;
				26	}
				27
				28	static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
				29	{
				30	struct hl_device *hdev = ctx->hdev;
				31	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
				32
				33	gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
				34	hdev->asic_prop.mmu_hop_table_size);
				35	hash_del(&pgt_info->node);
				36	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
				37	kfree(pgt_info);
				38	}
				39
				40	static u64 alloc_hop(struct hl_ctx *ctx)
				41	{
				42	struct hl_device *hdev = ctx->hdev;
				43	struct asic_fixed_properties *prop = &hdev->asic_prop;
				44	struct pgt_info *pgt_info;
				45	u64 phys_addr, shadow_addr;
				46
				47	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
				48	if (!pgt_info)
				49	return ULLONG_MAX;
				50
				51	phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
				52	prop->mmu_hop_table_size);
				53	if (!phys_addr) {
				54	dev_err(hdev->dev, "failed to allocate page\n");
				55	goto pool_add_err;
				56	}
				57
				58	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
				59	GFP_KERNEL);
				60	if (!shadow_addr)
				61	goto shadow_err;
				62
				63	pgt_info->phys_addr = phys_addr;
				64	pgt_info->shadow_addr = shadow_addr;
				65	pgt_info->ctx = ctx;
				66	pgt_info->num_of_ptes = 0;
				67	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
				68
				69	return shadow_addr;
				70
				71	shadow_err:
				72	gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
				73	pool_add_err:
				74	kfree(pgt_info);
				75
				76	return ULLONG_MAX;
				77	}
				78
				79	static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
				80	{
				81	return ctx->hdev->asic_prop.mmu_pgt_addr +
				82	(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
				83	}
				84
				85	static inline u64 get_hop0_addr(struct hl_ctx *ctx)
				86	{
				87	return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
				88	(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
				89	}
				90
				91	static inline void flush(struct hl_ctx *ctx)
				92	{
				93	/* flush all writes from all cores to reach PCI */
				94	mb();
				95	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
				96	}
				97
				98	/* transform the value to physical address when writing to H/W */
				99	static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
				100	{
				101	/*
				102	* The value to write is actually the address of the next shadow hop +
				103	* flags at the 12 LSBs.
				104	* Hence in order to get the value to write to the physical PTE, we
				105	* clear the 12 LSBs and translate the shadow hop to its associated
				106	* physical hop, and add back the original 12 LSBs.
				107	*/
				108	u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) \|
				109	(val & OFFSET_MASK);
				110
				111	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
				112	get_phys_addr(ctx, shadow_pte_addr),
				113	phys_val);
				114
				115	(u64 ) (uintptr_t) shadow_pte_addr = val;
				116	}
				117
				118	/* do not transform the value to physical address when writing to H/W */
				119	static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
				120	u64 val)
				121	{
				122	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
				123	get_phys_addr(ctx, shadow_pte_addr),
				124	val);
				125	(u64 ) (uintptr_t) shadow_pte_addr = val;
				126	}
				127
				128	/* clear the last and present bits */
				129	static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
				130	{
				131	/* no need to transform the value to physical address */
				132	write_final_pte(ctx, pte_addr, 0);
				133	}
				134
				135	static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
				136	{
				137	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
				138	}
				139
				140	/*
				141	* put_pte - decrement the num of ptes and free the hop if possible
				142	*
				143	* @ctx: pointer to the context structure
				144	* @hop_addr: addr of the hop
				145	*
				146	* This function returns the number of ptes left on this hop. If the number is
				147	* 0, it means the pte was freed.
				148	*/
				149	static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
				150	{
				151	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
				152	int num_of_ptes_left;
				153
				154	pgt_info->num_of_ptes--;
				155
				156	/*
				157	* Need to save the number of ptes left because free_hop might free
				158	* the pgt_info
				159	*/
				160	num_of_ptes_left = pgt_info->num_of_ptes;
				161	if (!num_of_ptes_left)
				162	free_hop(ctx, hop_addr);
				163
				164	return num_of_ptes_left;
				165	}
				166
				167	static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
				168	u64 virt_addr, u64 mask, u64 shift)
				169	{
				170	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
				171	((virt_addr & mask) >> shift);
				172	}
				173
				174	static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
				175	{
				176	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
				177	}
				178
				179	static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
				180	{
				181	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
				182	}
				183
				184	static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
				185	{
				186	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
				187	}
				188
				189	static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
				190	{
				191	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
				192	}
				193
				194	static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
				195	{
				196	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
				197	}
				198
				199	static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
				200	{
				201	if (curr_pte & PAGE_PRESENT_MASK)
				202	return curr_pte & PHYS_ADDR_MASK;
				203	else
				204	return ULLONG_MAX;
				205	}
				206
				207	static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
				208	bool *is_new_hop)
				209	{
				210	u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
				211
				212	if (hop_addr == ULLONG_MAX) {
				213	hop_addr = alloc_hop(ctx);
				214	*is_new_hop = (hop_addr != ULLONG_MAX);
				215	}
				216
				217	return hop_addr;
				218	}
				219
				220	/* translates shadow address inside hop to a physical address */
				221	static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
				222	{
				223	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
				224	u64 shadow_hop_addr = shadow_addr & ~page_mask;
				225	u64 pte_offset = shadow_addr & page_mask;
				226	u64 phys_hop_addr;
				227
				228	if (shadow_hop_addr != get_hop0_addr(ctx))
				229	phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
				230	else
				231	phys_hop_addr = get_phys_hop0_addr(ctx);
				232
				233	return phys_hop_addr + pte_offset;
				234	}
				235
				236	static int dram_default_mapping_init(struct hl_ctx *ctx)
				237	{
				238	struct hl_device *hdev = ctx->hdev;
				239	struct asic_fixed_properties *prop = &hdev->asic_prop;
				240	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
				241	hop2_pte_addr, hop3_pte_addr, pte_val;
				242	int rc, i, j, hop3_allocated = 0;
				243
				244	if ((!hdev->dram_supports_virtual_memory) \|\|
				245	(!hdev->dram_default_page_mapping) \|\|
				246	(ctx->asid == HL_KERNEL_ASID_ID))
				247	return 0;
				248
				249	num_of_hop3 = prop->dram_size_for_default_page_mapping;
				250	do_div(num_of_hop3, prop->dram_page_size);
				251	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
				252
				253	/* add hop1 and hop2 */
				254	total_hops = num_of_hop3 + 2;
				255
				256	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
				257	if (!ctx->dram_default_hops)
				258	return -ENOMEM;
				259
				260	hop0_addr = get_hop0_addr(ctx);
				261
				262	hop1_addr = alloc_hop(ctx);
				263	if (hop1_addr == ULLONG_MAX) {
				264	dev_err(hdev->dev, "failed to alloc hop 1\n");
				265	rc = -ENOMEM;
				266	goto hop1_err;
				267	}
				268
				269	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
				270
				271	hop2_addr = alloc_hop(ctx);
				272	if (hop2_addr == ULLONG_MAX) {
				273	dev_err(hdev->dev, "failed to alloc hop 2\n");
				274	rc = -ENOMEM;
				275	goto hop2_err;
				276	}
				277
				278	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
				279
				280	for (i = 0 ; i < num_of_hop3 ; i++) {
				281	ctx->dram_default_hops[i] = alloc_hop(ctx);
				282	if (ctx->dram_default_hops[i] == ULLONG_MAX) {
				283	dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
				284	rc = -ENOMEM;
				285	goto hop3_err;
				286	}
				287	hop3_allocated++;
				288	}
				289
				290	/* need only pte 0 in hops 0 and 1 */
				291	pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) \| PAGE_PRESENT_MASK;
				292	write_pte(ctx, hop0_addr, pte_val);
				293
				294	pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) \| PAGE_PRESENT_MASK;
				295	write_pte(ctx, hop1_addr, pte_val);
				296	get_pte(ctx, hop1_addr);
				297
				298	hop2_pte_addr = hop2_addr;
				299	for (i = 0 ; i < num_of_hop3 ; i++) {
				300	pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) \|
				301	PAGE_PRESENT_MASK;
				302	write_pte(ctx, hop2_pte_addr, pte_val);
				303	get_pte(ctx, hop2_addr);
				304	hop2_pte_addr += HL_PTE_SIZE;
				305	}
				306
				307	pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) \|
				308	LAST_MASK \| PAGE_PRESENT_MASK;
				309
				310	for (i = 0 ; i < num_of_hop3 ; i++) {
				311	hop3_pte_addr = ctx->dram_default_hops[i];
				312	for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
				313	write_final_pte(ctx, hop3_pte_addr, pte_val);
				314	get_pte(ctx, ctx->dram_default_hops[i]);
				315	hop3_pte_addr += HL_PTE_SIZE;
				316	}
				317	}
				318
				319	flush(ctx);
				320
				321	return 0;
				322
				323	hop3_err:
				324	for (i = 0 ; i < hop3_allocated ; i++)
				325	free_hop(ctx, ctx->dram_default_hops[i]);
				326
				327	free_hop(ctx, hop2_addr);
				328	hop2_err:
				329	free_hop(ctx, hop1_addr);
				330	hop1_err:
				331	kfree(ctx->dram_default_hops);
				332
				333	return rc;
				334	}
				335
				336	static void dram_default_mapping_fini(struct hl_ctx *ctx)
				337	{
				338	struct hl_device *hdev = ctx->hdev;
				339	struct asic_fixed_properties *prop = &hdev->asic_prop;
				340	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
				341	hop2_pte_addr, hop3_pte_addr;
				342	int i, j;
				343
				344	if ((!hdev->dram_supports_virtual_memory) \|\|
				345	(!hdev->dram_default_page_mapping) \|\|
				346	(ctx->asid == HL_KERNEL_ASID_ID))
				347	return;
				348
				349	num_of_hop3 = prop->dram_size_for_default_page_mapping;
				350	do_div(num_of_hop3, prop->dram_page_size);
				351	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
				352
				353	hop0_addr = get_hop0_addr(ctx);
				354	/* add hop1 and hop2 */
				355	total_hops = num_of_hop3 + 2;
				356	hop1_addr = ctx->dram_default_hops[total_hops - 1];
				357	hop2_addr = ctx->dram_default_hops[total_hops - 2];
				358
				359	for (i = 0 ; i < num_of_hop3 ; i++) {
				360	hop3_pte_addr = ctx->dram_default_hops[i];
				361	for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
				362	clear_pte(ctx, hop3_pte_addr);
				363	put_pte(ctx, ctx->dram_default_hops[i]);
				364	hop3_pte_addr += HL_PTE_SIZE;
				365	}
				366	}
				367
				368	hop2_pte_addr = hop2_addr;
				369	hop2_pte_addr = hop2_addr;
				370	for (i = 0 ; i < num_of_hop3 ; i++) {
				371	clear_pte(ctx, hop2_pte_addr);
				372	put_pte(ctx, hop2_addr);
				373	hop2_pte_addr += HL_PTE_SIZE;
				374	}
				375
				376	clear_pte(ctx, hop1_addr);
				377	put_pte(ctx, hop1_addr);
				378	clear_pte(ctx, hop0_addr);
				379
				380	kfree(ctx->dram_default_hops);
				381
				382	flush(ctx);
				383	}
				384
				385	/**
				386	* hl_mmu_init() - initialize the MMU module.
				387	* @hdev: habanalabs device structure.
				388	*
				389	* This function does the following:
				390	* - Create a pool of pages for pgt_infos.
				391	* - Create a shadow table for pgt
				392	*
				393	* Return: 0 for success, non-zero for failure.
				394	*/
				395	int hl_mmu_init(struct hl_device *hdev)
				396	{
				397	struct asic_fixed_properties *prop = &hdev->asic_prop;
				398	int rc;
				399
				400	if (!hdev->mmu_enable)
				401	return 0;
				402
				403	/* MMU H/W init was already done in device hw_init() */
				404
				405	hdev->mmu_pgt_pool =
				406	gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
				407
				408	if (!hdev->mmu_pgt_pool) {
				409	dev_err(hdev->dev, "Failed to create page gen pool\n");
				410	return -ENOMEM;
				411	}
				412
				413	rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
				414	prop->mmu_hop0_tables_total_size,
				415	prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
				416	-1);
				417	if (rc) {
				418	dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
				419	goto err_pool_add;
				420	}
				421
				422	hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
				423	prop->mmu_hop_table_size,
				424	GFP_KERNEL \| __GFP_ZERO);
				425	if (!hdev->mmu_shadow_hop0) {
				426	rc = -ENOMEM;
				427	goto err_pool_add;
				428	}
				429
				430	return 0;
				431
				432	err_pool_add:
				433	gen_pool_destroy(hdev->mmu_pgt_pool);
				434
				435	return rc;
				436	}
				437
				438	/**
				439	* hl_mmu_fini() - release the MMU module.
				440	* @hdev: habanalabs device structure.
				441	*
				442	* This function does the following:
				443	* - Disable MMU in H/W.
				444	* - Free the pgt_infos pool.
				445	*
				446	* All contexts should be freed before calling this function.
				447	*/
				448	void hl_mmu_fini(struct hl_device *hdev)
				449	{
				450	if (!hdev->mmu_enable)
				451	return;
				452
				453	kvfree(hdev->mmu_shadow_hop0);
				454	gen_pool_destroy(hdev->mmu_pgt_pool);
				455
				456	/* MMU H/W fini will be done in device hw_fini() */
				457	}
				458
				459	/**
				460	* hl_mmu_ctx_init() - initialize a context for using the MMU module.
				461	* @ctx: pointer to the context structure to initialize.
				462	*
				463	* Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
				464	* page tables hops related to this context.
				465	* Return: 0 on success, non-zero otherwise.
				466	*/
				467	int hl_mmu_ctx_init(struct hl_ctx *ctx)
				468	{
				469	struct hl_device *hdev = ctx->hdev;
				470
				471	if (!hdev->mmu_enable)
				472	return 0;
				473
				474	mutex_init(&ctx->mmu_lock);
				475	hash_init(ctx->mmu_phys_hash);
				476	hash_init(ctx->mmu_shadow_hash);
				477
				478	return dram_default_mapping_init(ctx);
				479	}
				480
				481	/*
				482	* hl_mmu_ctx_fini - disable a ctx from using the mmu module
				483	*
				484	* @ctx: pointer to the context structure
				485	*
				486	* This function does the following:
				487	* - Free any pgts which were not freed yet
				488	* - Free the mutex
				489	* - Free DRAM default page mapping hops
				490	*/
				491	void hl_mmu_ctx_fini(struct hl_ctx *ctx)
				492	{
				493	struct hl_device *hdev = ctx->hdev;
				494	struct pgt_info *pgt_info;
				495	struct hlist_node *tmp;
				496	int i;
				497
				498	if (!hdev->mmu_enable)
				499	return;
				500
				501	dram_default_mapping_fini(ctx);
				502
				503	if (!hash_empty(ctx->mmu_shadow_hash))
				504	dev_err(hdev->dev, "ctx is freed while it has pgts in use\n");
				505
				506	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
				507	dev_err(hdev->dev,
				508	"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
				509	pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
				510	free_hop(ctx, pgt_info->shadow_addr);
				511	}
				512
				513	mutex_destroy(&ctx->mmu_lock);
				514	}
				515
				516	static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
				517	{
				518	struct hl_device *hdev = ctx->hdev;
				519	struct asic_fixed_properties *prop = &hdev->asic_prop;
				520	u64 hop0_addr = 0, hop0_pte_addr = 0,
				521	hop1_addr = 0, hop1_pte_addr = 0,
				522	hop2_addr = 0, hop2_pte_addr = 0,
				523	hop3_addr = 0, hop3_pte_addr = 0,
				524	hop4_addr = 0, hop4_pte_addr = 0,
				525	curr_pte;
				526	bool is_dram_addr, is_huge, clear_hop3 = true;
				527
				528	is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
				529	prop->va_space_dram_start_address,
				530	prop->va_space_dram_end_address);
				531
				532	hop0_addr = get_hop0_addr(ctx);
				533	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
				534
				535	curr_pte = (u64 ) (uintptr_t) hop0_pte_addr;
				536
				537	hop1_addr = get_next_hop_addr(ctx, curr_pte);
				538
				539	if (hop1_addr == ULLONG_MAX)
				540	goto not_mapped;
				541
				542	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
				543
				544	curr_pte = (u64 ) (uintptr_t) hop1_pte_addr;
				545
				546	hop2_addr = get_next_hop_addr(ctx, curr_pte);
				547
				548	if (hop2_addr == ULLONG_MAX)
				549	goto not_mapped;
				550
				551	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
				552
				553	curr_pte = (u64 ) (uintptr_t) hop2_pte_addr;
				554
				555	hop3_addr = get_next_hop_addr(ctx, curr_pte);
				556
				557	if (hop3_addr == ULLONG_MAX)
				558	goto not_mapped;
				559
				560	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
				561
				562	curr_pte = (u64 ) (uintptr_t) hop3_pte_addr;
				563
				564	is_huge = curr_pte & LAST_MASK;
				565
				566	if (is_dram_addr && !is_huge) {
				567	dev_err(hdev->dev,
				568	"DRAM unmapping should use huge pages only\n");
				569	return -EFAULT;
				570	}
				571
				572	if (!is_huge) {
				573	hop4_addr = get_next_hop_addr(ctx, curr_pte);
				574
				575	if (hop4_addr == ULLONG_MAX)
				576	goto not_mapped;
				577
				578	hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
				579
				580	curr_pte = (u64 ) (uintptr_t) hop4_pte_addr;
				581
				582	clear_hop3 = false;
				583	}
				584
				585	if (hdev->dram_default_page_mapping && is_dram_addr) {
				586	u64 default_pte = (prop->mmu_dram_default_page_addr &
				587	PTE_PHYS_ADDR_MASK) \| LAST_MASK \|
				588	PAGE_PRESENT_MASK;
				589	if (curr_pte == default_pte) {
				590	dev_err(hdev->dev,
				591	"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
				592	virt_addr);
				593	goto not_mapped;
				594	}
				595
				596	if (!(curr_pte & PAGE_PRESENT_MASK)) {
				597	dev_err(hdev->dev,
				598	"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
				599	virt_addr);
				600	goto not_mapped;
				601	}
				602
				603	write_final_pte(ctx, hop3_pte_addr, default_pte);
				604	put_pte(ctx, hop3_addr);
				605	} else {
				606	if (!(curr_pte & PAGE_PRESENT_MASK))
				607	goto not_mapped;
				608
				609	if (hop4_addr)
				610	clear_pte(ctx, hop4_pte_addr);
				611	else
				612	clear_pte(ctx, hop3_pte_addr);
				613
				614	if (hop4_addr && !put_pte(ctx, hop4_addr))
				615	clear_hop3 = true;
				616
				617	if (!clear_hop3)
				618	goto flush;
				619
				620	clear_pte(ctx, hop3_pte_addr);
				621
				622	if (put_pte(ctx, hop3_addr))
				623	goto flush;
				624
				625	clear_pte(ctx, hop2_pte_addr);
				626
				627	if (put_pte(ctx, hop2_addr))
				628	goto flush;
				629
				630	clear_pte(ctx, hop1_pte_addr);
				631
				632	if (put_pte(ctx, hop1_addr))
				633	goto flush;
				634
				635	clear_pte(ctx, hop0_pte_addr);
				636	}
				637
				638	flush:
				639	flush(ctx);
				640
				641	return 0;
				642
				643	not_mapped:
				644	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
				645	virt_addr);
				646
				647	return -EINVAL;
				648	}
				649
				650	/*
				651	* hl_mmu_unmap - unmaps a virtual addr
				652	*
				653	* @ctx: pointer to the context structure
				654	* @virt_addr: virt addr to map from
				655	* @page_size: size of the page to unmap
				656	*
				657	* This function does the following:
				658	* - Check that the virt addr is mapped
				659	* - Unmap the virt addr and frees pgts if possible
				660	* - Returns 0 on success, -EINVAL if the given addr is not mapped
				661	*
				662	* Because this function changes the page tables in the device and because it
				663	* changes the MMU hash, it must be protected by a lock.
				664	* However, because it maps only a single page, the lock should be implemented
				665	* in a higher level in order to protect the entire mapping of the memory area
				666	*/
				667	int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
				668	{
				669	struct hl_device *hdev = ctx->hdev;
				670	u64 real_virt_addr;
				671	u32 real_page_size, npages;
				672	int i, rc;
				673
				674	if (!hdev->mmu_enable)
				675	return 0;
				676
				677	/*
				678	* The H/W handles mapping of 4KB/2MB page. Hence if the host page size
				679	* is bigger, we break it to sub-pages and unmap them separately.
				680	*/
				681	if ((page_size % PAGE_SIZE_2MB) == 0) {
				682	real_page_size = PAGE_SIZE_2MB;
				683	} else if ((page_size % PAGE_SIZE_4KB) == 0) {
				684	real_page_size = PAGE_SIZE_4KB;
				685	} else {
				686	dev_err(hdev->dev,
				687	"page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
				688	page_size);
				689
				690	return -EFAULT;
				691	}
				692
				693	npages = page_size / real_page_size;
				694	real_virt_addr = virt_addr;
				695
				696	for (i = 0 ; i < npages ; i++) {
				697	rc = _hl_mmu_unmap(ctx, real_virt_addr);
				698	if (rc)
				699	return rc;
				700
				701	real_virt_addr += real_page_size;
				702	}
				703
				704	return 0;
				705	}
				706
				707	static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
				708	u32 page_size)
				709	{
				710	struct hl_device *hdev = ctx->hdev;
				711	struct asic_fixed_properties *prop = &hdev->asic_prop;
				712	u64 hop0_addr = 0, hop0_pte_addr = 0,
				713	hop1_addr = 0, hop1_pte_addr = 0,
				714	hop2_addr = 0, hop2_pte_addr = 0,
				715	hop3_addr = 0, hop3_pte_addr = 0,
				716	hop4_addr = 0, hop4_pte_addr = 0,
				717	curr_pte = 0;
				718	bool hop1_new = false, hop2_new = false, hop3_new = false,
				719	hop4_new = false, is_huge, is_dram_addr;
				720	int rc = -ENOMEM;
				721
				722	/*
				723	* This mapping function can map a 4KB/2MB page. For 2MB page there are
				724	* only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
				725	* pages only but user memory could have been allocated with one of the
				726	* two page sizes. Since this is a common code for all the three cases,
				727	* we need this hugs page check.
				728	*/
				729	is_huge = page_size == PAGE_SIZE_2MB;
				730
				731	is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
				732	prop->va_space_dram_start_address,
				733	prop->va_space_dram_end_address);
				734
				735	if (is_dram_addr && !is_huge) {
				736	dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
				737	return -EFAULT;
				738	}
				739
				740	hop0_addr = get_hop0_addr(ctx);
				741	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
				742	curr_pte = (u64 ) (uintptr_t) hop0_pte_addr;
				743
				744	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
				745	if (hop1_addr == ULLONG_MAX)
				746	goto err;
				747
				748	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
				749	curr_pte = (u64 ) (uintptr_t) hop1_pte_addr;
				750
				751	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
				752	if (hop2_addr == ULLONG_MAX)
				753	goto err;
				754
				755	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
				756	curr_pte = (u64 ) (uintptr_t) hop2_pte_addr;
				757
				758	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
				759	if (hop3_addr == ULLONG_MAX)
				760	goto err;
				761
				762	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
				763	curr_pte = (u64 ) (uintptr_t) hop3_pte_addr;
				764
				765	if (!is_huge) {
				766	hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
				767	if (hop4_addr == ULLONG_MAX)
				768	goto err;
				769
				770	hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
				771	curr_pte = (u64 ) (uintptr_t) hop4_pte_addr;
				772	}
				773
				774	if (hdev->dram_default_page_mapping && is_dram_addr) {
				775	u64 default_pte = (prop->mmu_dram_default_page_addr &
				776	PTE_PHYS_ADDR_MASK) \| LAST_MASK \|
				777	PAGE_PRESENT_MASK;
				778
				779	if (curr_pte != default_pte) {
				780	dev_err(hdev->dev,
				781	"DRAM: mapping already exists for virt_addr 0x%llx\n",
				782	virt_addr);
				783	rc = -EINVAL;
				784	goto err;
				785	}
				786
				787	if (hop1_new \|\| hop2_new \|\| hop3_new \|\| hop4_new) {
				788	dev_err(hdev->dev,
				789	"DRAM mapping should not allocate more hops\n");
				790	rc = -EFAULT;
				791	goto err;
				792	}
				793	} else if (curr_pte & PAGE_PRESENT_MASK) {
				794	dev_err(hdev->dev,
				795	"mapping already exists for virt_addr 0x%llx\n",
				796	virt_addr);
				797
				798	dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
				799	(u64 ) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
				800	dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
				801	(u64 ) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
				802	dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
				803	(u64 ) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
				804	dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
				805	(u64 ) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
				806
				807	if (!is_huge)
				808	dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
				809	(u64 ) (uintptr_t) hop4_pte_addr,
				810	hop4_pte_addr);
				811
				812	rc = -EINVAL;
				813	goto err;
				814	}
				815
				816	curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) \| LAST_MASK
				817	\| PAGE_PRESENT_MASK;
				818
				819	if (is_huge)
				820	write_final_pte(ctx, hop3_pte_addr, curr_pte);
				821	else
				822	write_final_pte(ctx, hop4_pte_addr, curr_pte);
				823
				824	if (hop1_new) {
				825	curr_pte =
				826	(hop1_addr & PTE_PHYS_ADDR_MASK) \| PAGE_PRESENT_MASK;
				827	write_pte(ctx, hop0_pte_addr, curr_pte);
				828	}
				829	if (hop2_new) {
				830	curr_pte =
				831	(hop2_addr & PTE_PHYS_ADDR_MASK) \| PAGE_PRESENT_MASK;
				832	write_pte(ctx, hop1_pte_addr, curr_pte);
				833	get_pte(ctx, hop1_addr);
				834	}
				835	if (hop3_new) {
				836	curr_pte =
				837	(hop3_addr & PTE_PHYS_ADDR_MASK) \| PAGE_PRESENT_MASK;
				838	write_pte(ctx, hop2_pte_addr, curr_pte);
				839	get_pte(ctx, hop2_addr);
				840	}
				841
				842	if (!is_huge) {
				843	if (hop4_new) {
				844	curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) \|
				845	PAGE_PRESENT_MASK;
				846	write_pte(ctx, hop3_pte_addr, curr_pte);
				847	get_pte(ctx, hop3_addr);
				848	}
				849
				850	get_pte(ctx, hop4_addr);
				851	} else {
				852	get_pte(ctx, hop3_addr);
				853	}
				854
				855	flush(ctx);
				856
				857	return 0;
				858
				859	err:
				860	if (hop4_new)
				861	free_hop(ctx, hop4_addr);
				862	if (hop3_new)
				863	free_hop(ctx, hop3_addr);
				864	if (hop2_new)
				865	free_hop(ctx, hop2_addr);
				866	if (hop1_new)
				867	free_hop(ctx, hop1_addr);
				868
				869	return rc;
				870	}
				871
				872	/*
				873	* hl_mmu_map - maps a virtual addr to physical addr
				874	*
				875	* @ctx: pointer to the context structure
				876	* @virt_addr: virt addr to map from
				877	* @phys_addr: phys addr to map to
				878	* @page_size: physical page size
				879	*
				880	* This function does the following:
				881	* - Check that the virt addr is not mapped
				882	* - Allocate pgts as necessary in order to map the virt addr to the phys
				883	* - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
				884	*
				885	* Because this function changes the page tables in the device and because it
				886	* changes the MMU hash, it must be protected by a lock.
				887	* However, because it maps only a single page, the lock should be implemented
				888	* in a higher level in order to protect the entire mapping of the memory area
				889	*/
				890	int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
				891	{
				892	struct hl_device *hdev = ctx->hdev;
				893	u64 real_virt_addr, real_phys_addr;
				894	u32 real_page_size, npages;
				895	int i, rc, mapped_cnt = 0;
				896
				897	if (!hdev->mmu_enable)
				898	return 0;
				899
				900	/*
				901	* The H/W handles mapping of 4KB/2MB page. Hence if the host page size
				902	* is bigger, we break it to sub-pages and map them separately.
				903	*/
				904	if ((page_size % PAGE_SIZE_2MB) == 0) {
				905	real_page_size = PAGE_SIZE_2MB;
				906	} else if ((page_size % PAGE_SIZE_4KB) == 0) {
				907	real_page_size = PAGE_SIZE_4KB;
				908	} else {
				909	dev_err(hdev->dev,
				910	"page size of %u is not 4KB nor 2MB aligned, can't map\n",
				911	page_size);
				912
				913	return -EFAULT;
				914	}
				915
				916	WARN_ONCE((phys_addr & (real_page_size - 1)),
				917	"Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
				918	phys_addr, real_page_size);
				919
				920	npages = page_size / real_page_size;
				921	real_virt_addr = virt_addr;
				922	real_phys_addr = phys_addr;
				923
				924	for (i = 0 ; i < npages ; i++) {
				925	rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
				926	real_page_size);
				927	if (rc)
				928	goto err;
				929
				930	real_virt_addr += real_page_size;
				931	real_phys_addr += real_page_size;
				932	mapped_cnt++;
				933	}
				934
				935	return 0;
				936
				937	err:
				938	real_virt_addr = virt_addr;
				939	for (i = 0 ; i < mapped_cnt ; i++) {
				940	if (_hl_mmu_unmap(ctx, real_virt_addr))
				941	dev_warn_ratelimited(hdev->dev,
				942	"failed to unmap va: 0x%llx\n", real_virt_addr);
				943
				944	real_virt_addr += real_page_size;
				945	}
				946
				947	return rc;
				948	}
				949
				950	/*
				951	* hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
				952	*
				953	* @ctx: pointer to the context structure
				954	*
				955	*/
				956	void hl_mmu_swap_out(struct hl_ctx *ctx)
				957	{
				958
				959	}
				960
				961	/*
				962	* hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
				963	*
				964	* @ctx: pointer to the context structure
				965	*
				966	*/
				967	void hl_mmu_swap_in(struct hl_ctx *ctx)
				968	{
				969
				970	}