Update Linux to v5.10.109

Sourced from [1]

[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz

Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index fb792e8..0a89f95 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -10,6 +10,9 @@
 #define FASTRPC_IOCTL_INVOKE		_IOWR('R', 3, struct fastrpc_invoke)
 #define FASTRPC_IOCTL_INIT_ATTACH	_IO('R', 4)
 #define FASTRPC_IOCTL_INIT_CREATE	_IOWR('R', 5, struct fastrpc_init_create)
+#define FASTRPC_IOCTL_MMAP		_IOWR('R', 6, struct fastrpc_req_mmap)
+#define FASTRPC_IOCTL_MUNMAP		_IOWR('R', 7, struct fastrpc_req_munmap)
+#define FASTRPC_IOCTL_INIT_ATTACH_SNS	_IO('R', 8)
 
 struct fastrpc_invoke_args {
 	__u64 ptr;
@@ -38,4 +41,17 @@
 	__u64 size;	/* size */
 };
 
+struct fastrpc_req_mmap {
+	__s32 fd;
+	__u32 flags;	/* flags for dsp to map with */
+	__u64 vaddrin;	/* optional virtual address */
+	__u64 size;	/* size */
+	__u64 vaddrout;	/* dsp virtual address */
+};
+
+struct fastrpc_req_munmap {
+	__u64 vaddrout;	/* address to unmap */
+	__u64 size;	/* size */
+};
+
 #endif /* __QCOM_FASTRPC_H__ */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 39c4ea5..9705b8a 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
  *
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2020 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -15,10 +15,13 @@
  * Defines that are asic-specific but constitutes as ABI between kernel driver
  * and userspace
  */
-#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START	0x8000	/* 32KB */
+#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START		0x8000	/* 32KB */
+#define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START	0x80	/* 128 bytes */
 
+#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT		48
+#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR		24
 /*
- * Queue Numbering
+ * Goya queue Numbering
  *
  * The external queues (PCI DMA channels) MUST be before the internal queues
  * and each group (PCI DMA channels and internal) must be contiguous inside
@@ -46,6 +49,129 @@
 };
 
 /*
+ * Gaudi queue Numbering
+ * External queues (PCI DMA channels) are DMA_0_*, DMA_1_* and DMA_5_*.
+ * Except one CPU queue, all the rest are internal queues.
+ */
+
+enum gaudi_queue_id {
+	GAUDI_QUEUE_ID_DMA_0_0 = 0,	/* external */
+	GAUDI_QUEUE_ID_DMA_0_1 = 1,	/* external */
+	GAUDI_QUEUE_ID_DMA_0_2 = 2,	/* external */
+	GAUDI_QUEUE_ID_DMA_0_3 = 3,	/* external */
+	GAUDI_QUEUE_ID_DMA_1_0 = 4,	/* external */
+	GAUDI_QUEUE_ID_DMA_1_1 = 5,	/* external */
+	GAUDI_QUEUE_ID_DMA_1_2 = 6,	/* external */
+	GAUDI_QUEUE_ID_DMA_1_3 = 7,	/* external */
+	GAUDI_QUEUE_ID_CPU_PQ = 8,	/* CPU */
+	GAUDI_QUEUE_ID_DMA_2_0 = 9,	/* internal */
+	GAUDI_QUEUE_ID_DMA_2_1 = 10,	/* internal */
+	GAUDI_QUEUE_ID_DMA_2_2 = 11,	/* internal */
+	GAUDI_QUEUE_ID_DMA_2_3 = 12,	/* internal */
+	GAUDI_QUEUE_ID_DMA_3_0 = 13,	/* internal */
+	GAUDI_QUEUE_ID_DMA_3_1 = 14,	/* internal */
+	GAUDI_QUEUE_ID_DMA_3_2 = 15,	/* internal */
+	GAUDI_QUEUE_ID_DMA_3_3 = 16,	/* internal */
+	GAUDI_QUEUE_ID_DMA_4_0 = 17,	/* internal */
+	GAUDI_QUEUE_ID_DMA_4_1 = 18,	/* internal */
+	GAUDI_QUEUE_ID_DMA_4_2 = 19,	/* internal */
+	GAUDI_QUEUE_ID_DMA_4_3 = 20,	/* internal */
+	GAUDI_QUEUE_ID_DMA_5_0 = 21,	/* external */
+	GAUDI_QUEUE_ID_DMA_5_1 = 22,	/* external */
+	GAUDI_QUEUE_ID_DMA_5_2 = 23,	/* external */
+	GAUDI_QUEUE_ID_DMA_5_3 = 24,	/* external */
+	GAUDI_QUEUE_ID_DMA_6_0 = 25,	/* internal */
+	GAUDI_QUEUE_ID_DMA_6_1 = 26,	/* internal */
+	GAUDI_QUEUE_ID_DMA_6_2 = 27,	/* internal */
+	GAUDI_QUEUE_ID_DMA_6_3 = 28,	/* internal */
+	GAUDI_QUEUE_ID_DMA_7_0 = 29,	/* internal */
+	GAUDI_QUEUE_ID_DMA_7_1 = 30,	/* internal */
+	GAUDI_QUEUE_ID_DMA_7_2 = 31,	/* internal */
+	GAUDI_QUEUE_ID_DMA_7_3 = 32,	/* internal */
+	GAUDI_QUEUE_ID_MME_0_0 = 33,	/* internal */
+	GAUDI_QUEUE_ID_MME_0_1 = 34,	/* internal */
+	GAUDI_QUEUE_ID_MME_0_2 = 35,	/* internal */
+	GAUDI_QUEUE_ID_MME_0_3 = 36,	/* internal */
+	GAUDI_QUEUE_ID_MME_1_0 = 37,	/* internal */
+	GAUDI_QUEUE_ID_MME_1_1 = 38,	/* internal */
+	GAUDI_QUEUE_ID_MME_1_2 = 39,	/* internal */
+	GAUDI_QUEUE_ID_MME_1_3 = 40,	/* internal */
+	GAUDI_QUEUE_ID_TPC_0_0 = 41,	/* internal */
+	GAUDI_QUEUE_ID_TPC_0_1 = 42,	/* internal */
+	GAUDI_QUEUE_ID_TPC_0_2 = 43,	/* internal */
+	GAUDI_QUEUE_ID_TPC_0_3 = 44,	/* internal */
+	GAUDI_QUEUE_ID_TPC_1_0 = 45,	/* internal */
+	GAUDI_QUEUE_ID_TPC_1_1 = 46,	/* internal */
+	GAUDI_QUEUE_ID_TPC_1_2 = 47,	/* internal */
+	GAUDI_QUEUE_ID_TPC_1_3 = 48,	/* internal */
+	GAUDI_QUEUE_ID_TPC_2_0 = 49,	/* internal */
+	GAUDI_QUEUE_ID_TPC_2_1 = 50,	/* internal */
+	GAUDI_QUEUE_ID_TPC_2_2 = 51,	/* internal */
+	GAUDI_QUEUE_ID_TPC_2_3 = 52,	/* internal */
+	GAUDI_QUEUE_ID_TPC_3_0 = 53,	/* internal */
+	GAUDI_QUEUE_ID_TPC_3_1 = 54,	/* internal */
+	GAUDI_QUEUE_ID_TPC_3_2 = 55,	/* internal */
+	GAUDI_QUEUE_ID_TPC_3_3 = 56,	/* internal */
+	GAUDI_QUEUE_ID_TPC_4_0 = 57,	/* internal */
+	GAUDI_QUEUE_ID_TPC_4_1 = 58,	/* internal */
+	GAUDI_QUEUE_ID_TPC_4_2 = 59,	/* internal */
+	GAUDI_QUEUE_ID_TPC_4_3 = 60,	/* internal */
+	GAUDI_QUEUE_ID_TPC_5_0 = 61,	/* internal */
+	GAUDI_QUEUE_ID_TPC_5_1 = 62,	/* internal */
+	GAUDI_QUEUE_ID_TPC_5_2 = 63,	/* internal */
+	GAUDI_QUEUE_ID_TPC_5_3 = 64,	/* internal */
+	GAUDI_QUEUE_ID_TPC_6_0 = 65,	/* internal */
+	GAUDI_QUEUE_ID_TPC_6_1 = 66,	/* internal */
+	GAUDI_QUEUE_ID_TPC_6_2 = 67,	/* internal */
+	GAUDI_QUEUE_ID_TPC_6_3 = 68,	/* internal */
+	GAUDI_QUEUE_ID_TPC_7_0 = 69,	/* internal */
+	GAUDI_QUEUE_ID_TPC_7_1 = 70,	/* internal */
+	GAUDI_QUEUE_ID_TPC_7_2 = 71,	/* internal */
+	GAUDI_QUEUE_ID_TPC_7_3 = 72,	/* internal */
+	GAUDI_QUEUE_ID_NIC_0_0 = 73,	/* internal */
+	GAUDI_QUEUE_ID_NIC_0_1 = 74,	/* internal */
+	GAUDI_QUEUE_ID_NIC_0_2 = 75,	/* internal */
+	GAUDI_QUEUE_ID_NIC_0_3 = 76,	/* internal */
+	GAUDI_QUEUE_ID_NIC_1_0 = 77,	/* internal */
+	GAUDI_QUEUE_ID_NIC_1_1 = 78,	/* internal */
+	GAUDI_QUEUE_ID_NIC_1_2 = 79,	/* internal */
+	GAUDI_QUEUE_ID_NIC_1_3 = 80,	/* internal */
+	GAUDI_QUEUE_ID_NIC_2_0 = 81,	/* internal */
+	GAUDI_QUEUE_ID_NIC_2_1 = 82,	/* internal */
+	GAUDI_QUEUE_ID_NIC_2_2 = 83,	/* internal */
+	GAUDI_QUEUE_ID_NIC_2_3 = 84,	/* internal */
+	GAUDI_QUEUE_ID_NIC_3_0 = 85,	/* internal */
+	GAUDI_QUEUE_ID_NIC_3_1 = 86,	/* internal */
+	GAUDI_QUEUE_ID_NIC_3_2 = 87,	/* internal */
+	GAUDI_QUEUE_ID_NIC_3_3 = 88,	/* internal */
+	GAUDI_QUEUE_ID_NIC_4_0 = 89,	/* internal */
+	GAUDI_QUEUE_ID_NIC_4_1 = 90,	/* internal */
+	GAUDI_QUEUE_ID_NIC_4_2 = 91,	/* internal */
+	GAUDI_QUEUE_ID_NIC_4_3 = 92,	/* internal */
+	GAUDI_QUEUE_ID_NIC_5_0 = 93,	/* internal */
+	GAUDI_QUEUE_ID_NIC_5_1 = 94,	/* internal */
+	GAUDI_QUEUE_ID_NIC_5_2 = 95,	/* internal */
+	GAUDI_QUEUE_ID_NIC_5_3 = 96,	/* internal */
+	GAUDI_QUEUE_ID_NIC_6_0 = 97,	/* internal */
+	GAUDI_QUEUE_ID_NIC_6_1 = 98,	/* internal */
+	GAUDI_QUEUE_ID_NIC_6_2 = 99,	/* internal */
+	GAUDI_QUEUE_ID_NIC_6_3 = 100,	/* internal */
+	GAUDI_QUEUE_ID_NIC_7_0 = 101,	/* internal */
+	GAUDI_QUEUE_ID_NIC_7_1 = 102,	/* internal */
+	GAUDI_QUEUE_ID_NIC_7_2 = 103,	/* internal */
+	GAUDI_QUEUE_ID_NIC_7_3 = 104,	/* internal */
+	GAUDI_QUEUE_ID_NIC_8_0 = 105,	/* internal */
+	GAUDI_QUEUE_ID_NIC_8_1 = 106,	/* internal */
+	GAUDI_QUEUE_ID_NIC_8_2 = 107,	/* internal */
+	GAUDI_QUEUE_ID_NIC_8_3 = 108,	/* internal */
+	GAUDI_QUEUE_ID_NIC_9_0 = 109,	/* internal */
+	GAUDI_QUEUE_ID_NIC_9_1 = 110,	/* internal */
+	GAUDI_QUEUE_ID_NIC_9_2 = 111,	/* internal */
+	GAUDI_QUEUE_ID_NIC_9_3 = 112,	/* internal */
+	GAUDI_QUEUE_ID_SIZE
+};
+
+/*
  * Engine Numbering
  *
  * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle'
@@ -69,6 +195,40 @@
 	GOYA_ENGINE_ID_SIZE
 };
 
+enum gaudi_engine_id {
+	GAUDI_ENGINE_ID_DMA_0 = 0,
+	GAUDI_ENGINE_ID_DMA_1,
+	GAUDI_ENGINE_ID_DMA_2,
+	GAUDI_ENGINE_ID_DMA_3,
+	GAUDI_ENGINE_ID_DMA_4,
+	GAUDI_ENGINE_ID_DMA_5,
+	GAUDI_ENGINE_ID_DMA_6,
+	GAUDI_ENGINE_ID_DMA_7,
+	GAUDI_ENGINE_ID_MME_0,
+	GAUDI_ENGINE_ID_MME_1,
+	GAUDI_ENGINE_ID_MME_2,
+	GAUDI_ENGINE_ID_MME_3,
+	GAUDI_ENGINE_ID_TPC_0,
+	GAUDI_ENGINE_ID_TPC_1,
+	GAUDI_ENGINE_ID_TPC_2,
+	GAUDI_ENGINE_ID_TPC_3,
+	GAUDI_ENGINE_ID_TPC_4,
+	GAUDI_ENGINE_ID_TPC_5,
+	GAUDI_ENGINE_ID_TPC_6,
+	GAUDI_ENGINE_ID_TPC_7,
+	GAUDI_ENGINE_ID_NIC_0,
+	GAUDI_ENGINE_ID_NIC_1,
+	GAUDI_ENGINE_ID_NIC_2,
+	GAUDI_ENGINE_ID_NIC_3,
+	GAUDI_ENGINE_ID_NIC_4,
+	GAUDI_ENGINE_ID_NIC_5,
+	GAUDI_ENGINE_ID_NIC_6,
+	GAUDI_ENGINE_ID_NIC_7,
+	GAUDI_ENGINE_ID_NIC_8,
+	GAUDI_ENGINE_ID_NIC_9,
+	GAUDI_ENGINE_ID_SIZE
+};
+
 enum hl_device_status {
 	HL_DEVICE_STATUS_OPERATIONAL,
 	HL_DEVICE_STATUS_IN_RESET,
@@ -88,13 +248,26 @@
  *                         internal engine.
  * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't
  *                         require an open context.
- * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device
- *                              over the last period specified by the user.
- *                              The period can be between 100ms to 1s, in
- *                              resolution of 100ms. The return value is a
- *                              percentage of the utilization rate.
+ * HL_INFO_DEVICE_UTILIZATION  - Retrieve the total utilization of the device
+ *                               over the last period specified by the user.
+ *                               The period can be between 100ms to 1s, in
+ *                               resolution of 100ms. The return value is a
+ *                               percentage of the utilization rate.
  * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
  *                               event occurred since the driver was loaded.
+ * HL_INFO_CLK_RATE            - Retrieve the current and maximum clock rate
+ *                               of the device in MHz. The maximum clock rate is
+ *                               configurable via sysfs parameter
+ * HL_INFO_RESET_COUNT   - Retrieve the counts of the soft and hard reset
+ *                         operations performed on the device since the last
+ *                         time the driver was loaded.
+ * HL_INFO_TIME_SYNC     - Retrieve the device's time alongside the host's time
+ *                         for synchronization.
+ * HL_INFO_CS_COUNTERS   - Retrieve command submission counters
+ * HL_INFO_PCI_COUNTERS  - Retrieve PCI counters
+ * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
+ * HL_INFO_SYNC_MANAGER  - Retrieve sync manager info per dcore
+ * HL_INFO_TOTAL_ENERGY  - Retrieve total energy consumption
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
@@ -103,8 +276,17 @@
 #define HL_INFO_DEVICE_STATUS		4
 #define HL_INFO_DEVICE_UTILIZATION	6
 #define HL_INFO_HW_EVENTS_AGGREGATE	7
+#define HL_INFO_CLK_RATE		8
+#define HL_INFO_RESET_COUNT		9
+#define HL_INFO_TIME_SYNC		10
+#define HL_INFO_CS_COUNTERS		11
+#define HL_INFO_PCI_COUNTERS		12
+#define HL_INFO_CLK_THROTTLE_REASON	13
+#define HL_INFO_SYNC_MANAGER		14
+#define HL_INFO_TOTAL_ENERGY		15
 
 #define HL_INFO_VERSION_MAX_LEN	128
+#define HL_INFO_CARD_NAME_MAX_LEN	16
 
 struct hl_info_hw_ip_info {
 	__u64 sram_base_address;
@@ -113,8 +295,9 @@
 	__u32 sram_size;
 	__u32 num_of_events;
 	__u32 device_id; /* PCI Device ID */
-	__u32 reserved[3];
-	__u32 armcp_cpld_version;
+	__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
+	__u32 reserved[2];
+	__u32 cpld_version;
 	__u32 psoc_pci_pll_nr;
 	__u32 psoc_pci_pll_nf;
 	__u32 psoc_pci_pll_od;
@@ -122,7 +305,8 @@
 	__u8 tpc_enabled_mask;
 	__u8 dram_enabled;
 	__u8 pad[2];
-	__u8 armcp_version[HL_INFO_VERSION_MAX_LEN];
+	__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
+	__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
 };
 
 struct hl_info_dram_usage {
@@ -137,6 +321,12 @@
 	 * Bits definition is according to `enum <chip>_enging_id'.
 	 */
 	__u32 busy_engines_mask;
+
+	/*
+	 * Extended Bitmask of busy engines.
+	 * Bits definition is according to `enum <chip>_enging_id'.
+	 */
+	__u64 busy_engines_mask_ext;
 };
 
 struct hl_info_device_status {
@@ -149,6 +339,90 @@
 	__u32 pad;
 };
 
+struct hl_info_clk_rate {
+	__u32 cur_clk_rate_mhz;
+	__u32 max_clk_rate_mhz;
+};
+
+struct hl_info_reset_count {
+	__u32 hard_reset_cnt;
+	__u32 soft_reset_cnt;
+};
+
+struct hl_info_time_sync {
+	__u64 device_time;
+	__u64 host_time;
+};
+
+/**
+ * struct hl_info_pci_counters - pci counters
+ * @rx_throughput: PCI rx throughput KBps
+ * @tx_throughput: PCI tx throughput KBps
+ * @replay_cnt: PCI replay counter
+ */
+struct hl_info_pci_counters {
+	__u64 rx_throughput;
+	__u64 tx_throughput;
+	__u64 replay_cnt;
+};
+
+#define HL_CLK_THROTTLE_POWER	0x1
+#define HL_CLK_THROTTLE_THERMAL	0x2
+
+/**
+ * struct hl_info_clk_throttle - clock throttling reason
+ * @clk_throttling_reason: each bit represents a clk throttling reason
+ */
+struct hl_info_clk_throttle {
+	__u32 clk_throttling_reason;
+};
+
+/**
+ * struct hl_info_energy - device energy information
+ * @total_energy_consumption: total device energy consumption
+ */
+struct hl_info_energy {
+	__u64 total_energy_consumption;
+};
+
+/**
+ * struct hl_info_sync_manager - sync manager information
+ * @first_available_sync_object: first available sob
+ * @first_available_monitor: first available monitor
+ */
+struct hl_info_sync_manager {
+	__u32 first_available_sync_object;
+	__u32 first_available_monitor;
+};
+
+/**
+ * struct hl_info_cs_counters - command submission counters
+ * @out_of_mem_drop_cnt: dropped due to memory allocation issue
+ * @parsing_drop_cnt: dropped due to error in packet parsing
+ * @queue_full_drop_cnt: dropped due to queue full
+ * @device_in_reset_drop_cnt: dropped due to device in reset
+ * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
+ */
+struct hl_cs_counters {
+	__u64 out_of_mem_drop_cnt;
+	__u64 parsing_drop_cnt;
+	__u64 queue_full_drop_cnt;
+	__u64 device_in_reset_drop_cnt;
+	__u64 max_cs_in_flight_drop_cnt;
+};
+
+struct hl_info_cs_counters {
+	struct hl_cs_counters cs_counters;
+	struct hl_cs_counters ctx_cs_counters;
+};
+
+enum gaudi_dcores {
+	HL_GAUDI_WS_DCORE,
+	HL_GAUDI_WN_DCORE,
+	HL_GAUDI_EN_DCORE,
+	HL_GAUDI_ES_DCORE
+};
+
 struct hl_info_args {
 	/* Location of relevant struct in userspace */
 	__u64 return_pointer;
@@ -165,6 +439,10 @@
 	__u32 op;
 
 	union {
+		/* Dcore id for which the information is relevant.
+		 * For Gaudi refer to 'enum gaudi_dcores'
+		 */
+		__u32 dcore_id;
 		/* Context ID - Currently not in use */
 		__u32 ctx_id;
 		/* Period value for utilization rate (100ms - 1000ms, in 100ms
@@ -181,18 +459,25 @@
 /* Opcode to destroy previously created command buffer */
 #define HL_CB_OP_DESTROY	1
 
+/* 2MB minus 32 bytes for 2xMSG_PROT */
+#define HL_MAX_CB_SIZE		(0x200000 - 32)
+
+/* Indicates whether the command buffer should be mapped to the device's MMU */
+#define HL_CB_FLAGS_MAP		0x1
+
 struct hl_cb_in {
 	/* Handle of CB or 0 if we want to create one */
 	__u64 cb_handle;
 	/* HL_CB_OP_* */
 	__u32 op;
-	/* Size of CB. Maximum size is 2MB. The minimum size that will be
-	 * allocated, regardless of this parameter's value, is PAGE_SIZE
+	/* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that
+	 * will be allocated, regardless of this parameter's value, is PAGE_SIZE
 	 */
 	__u32 cb_size;
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
-	__u32 pad;
+	/* HL_CB_FLAGS_* */
+	__u32 flags;
 };
 
 struct hl_cb_out {
@@ -210,46 +495,87 @@
  * compatibility
  */
 struct hl_cs_chunk {
-	/*
-	 * For external queue, this represents a Handle of CB on the Host
-	 * For internal queue, this represents an SRAM or DRAM address of the
-	 * internal CB
-	 */
-	__u64 cb_handle;
+	union {
+		/* For external queue, this represents a Handle of CB on the
+		 * Host.
+		 * For internal queue in Goya, this represents an SRAM or
+		 * a DRAM address of the internal CB. In Gaudi, this might also
+		 * represent a mapped host address of the CB.
+		 *
+		 * A mapped host address is in the device address space, after
+		 * a host address was mapped by the device MMU.
+		 */
+		__u64 cb_handle;
+
+		/* Relevant only when HL_CS_FLAGS_WAIT is set.
+		 * This holds address of array of u64 values that contain
+		 * signal CS sequence numbers. The wait described by this job
+		 * will listen on all those signals (wait event per signal)
+		 */
+		__u64 signal_seq_arr;
+	};
+
 	/* Index of queue to put the CB on */
 	__u32 queue_index;
-	/*
-	 * Size of command buffer with valid packets
-	 * Can be smaller then actual CB size
-	 */
-	__u32 cb_size;
+
+	union {
+		/*
+		 * Size of command buffer with valid packets
+		 * Can be smaller then actual CB size
+		 */
+		__u32 cb_size;
+
+		/* Relevant only when HL_CS_FLAGS_WAIT is set.
+		 * Number of entries in signal_seq_arr
+		 */
+		__u32 num_signal_seq_arr;
+	};
+
 	/* HL_CS_CHUNK_FLAGS_* */
 	__u32 cs_chunk_flags;
+
 	/* Align structure to 64 bytes */
 	__u32 pad[11];
 };
 
+/* SIGNAL and WAIT flags are mutually exclusive */
 #define HL_CS_FLAGS_FORCE_RESTORE	0x1
+#define HL_CS_FLAGS_SIGNAL		0x2
+#define HL_CS_FLAGS_WAIT		0x4
 
 #define HL_CS_STATUS_SUCCESS		0
 
+#define HL_MAX_JOBS_PER_CS		512
+
 struct hl_cs_in {
+
 	/* this holds address of array of hl_cs_chunk for restore phase */
 	__u64 chunks_restore;
-	/* this holds address of array of hl_cs_chunk for execution phase */
+
+	/* holds address of array of hl_cs_chunk for execution phase */
 	__u64 chunks_execute;
+
 	/* this holds address of array of hl_cs_chunk for store phase -
 	 * Currently not in use
 	 */
 	__u64 chunks_store;
-	/* Number of chunks in restore phase array */
+
+	/* Number of chunks in restore phase array. Maximum number is
+	 * HL_MAX_JOBS_PER_CS
+	 */
 	__u32 num_chunks_restore;
-	/* Number of chunks in execution array */
+
+	/* Number of chunks in execution array. Maximum number is
+	 * HL_MAX_JOBS_PER_CS
+	 */
 	__u32 num_chunks_execute;
+
 	/* Number of chunks in restore phase array - Currently not in use */
 	__u32 num_chunks_store;
+
 	/* HL_CS_FLAGS_* */
 	__u32 cs_flags;
+
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
 };
@@ -297,13 +623,13 @@
 	struct hl_wait_cs_out out;
 };
 
-/* Opcode to alloc device memory */
+/* Opcode to allocate device memory */
 #define HL_MEM_OP_ALLOC			0
 /* Opcode to free previously allocated device memory */
 #define HL_MEM_OP_FREE			1
-/* Opcode to map host memory */
+/* Opcode to map host and device memory */
 #define HL_MEM_OP_MAP			2
-/* Opcode to unmap previously mapped host memory */
+/* Opcode to unmap previously mapped host and device memory */
 #define HL_MEM_OP_UNMAP			3
 
 /* Memory flags */
@@ -534,6 +860,12 @@
  * When creating a new CB, the IOCTL returns a handle of it, and the user-space
  * process needs to use that handle to mmap the buffer so it can access them.
  *
+ * In some instances, the device must access the command buffer through the
+ * device's MMU, and thus its memory should be mapped. In these cases, user can
+ * indicate the driver that such a mapping is required.
+ * The resulting device virtual address will be used internally by the driver,
+ * and won't be returned to user.
+ *
  */
 #define HL_IOCTL_CB		\
 		_IOWR('H', 0x02, union hl_cb_args)
@@ -560,8 +892,8 @@
  * For jobs on external queues, the user needs to create command buffers
  * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on
  * internal queues, the user needs to prepare a "command buffer" with packets
- * on either the SRAM or DRAM, and give the device address of that buffer to
- * the CS ioctl.
+ * on either the device SRAM/DRAM or the host, and give the device address of
+ * that buffer to the CS ioctl.
  *
  * This IOCTL is asynchronous in regard to the actual execution of the CS. This
  * means it returns immediately after ALL the JOBS were enqueued on their
@@ -573,7 +905,7 @@
  * external JOBS have been completed. Note that if the CS has internal JOBS
  * which can execute AFTER the external JOBS have finished, the driver might
  * report that the CS has finished executing BEFORE the internal JOBS have
- * actually finish executing.
+ * actually finished executing.
  *
  * Even though the sequence number increments per CS, the user can NOT
  * automatically assume that if CS with sequence number N finished, then CS
@@ -589,9 +921,12 @@
  *
  * The user can call this IOCTL with a handle it received from the CS IOCTL
  * to wait until the handle's CS has finished executing. The user will wait
- * inside the kernel until the CS has finished or until the user-requeusted
+ * inside the kernel until the CS has finished or until the user-requested
  * timeout has expired.
  *
+ * If the timeout value is 0, the driver won't sleep at all. It will check
+ * the status of the CS and return immediately
+ *
  * The return value of the IOCTL is a standard Linux error code. The possible
  * values are:
  *
diff --git a/include/uapi/misc/pvpanic.h b/include/uapi/misc/pvpanic.h
new file mode 100644
index 0000000..54b7485
--- /dev/null
+++ b/include/uapi/misc/pvpanic.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef __PVPANIC_H__
+#define __PVPANIC_H__
+
+#define PVPANIC_PANICKED	(1 << 0)
+#define PVPANIC_CRASH_LOADED	(1 << 1)
+
+#endif /* __PVPANIC_H__ */
diff --git a/include/uapi/misc/uacce/hisi_qm.h b/include/uapi/misc/uacce/hisi_qm.h
new file mode 100644
index 0000000..6435f0b
--- /dev/null
+++ b/include/uapi/misc/uacce/hisi_qm.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _UAPI_HISI_QM_H
+#define _UAPI_HISI_QM_H
+
+#include <linux/types.h>
+
+/**
+ * struct hisi_qp_ctx - User data for hisi qp.
+ * @id: qp_index return to user space
+ * @qc_type: Accelerator algorithm type
+ */
+struct hisi_qp_ctx {
+	__u16 id;
+	__u16 qc_type;
+};
+
+#define HISI_QM_API_VER_BASE "hisi_qm_v1"
+#define HISI_QM_API_VER2_BASE "hisi_qm_v2"
+
+/* UACCE_CMD_QM_SET_QP_CTX: Set qp algorithm type */
+#define UACCE_CMD_QM_SET_QP_CTX	_IOWR('H', 10, struct hisi_qp_ctx)
+
+#endif
diff --git a/include/uapi/misc/uacce/uacce.h b/include/uapi/misc/uacce/uacce.h
new file mode 100644
index 0000000..cc71856
--- /dev/null
+++ b/include/uapi/misc/uacce/uacce.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _UAPIUUACCE_H
+#define _UAPIUUACCE_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * UACCE_CMD_START_Q: Start queue
+ */
+#define UACCE_CMD_START_Q	_IO('W', 0)
+
+/*
+ * UACCE_CMD_PUT_Q:
+ * User actively stop queue and free queue resource immediately
+ * Optimization method since close fd may delay
+ */
+#define UACCE_CMD_PUT_Q		_IO('W', 1)
+
+/*
+ * UACCE Device flags:
+ * UACCE_DEV_SVA: Shared Virtual Addresses
+ *		  Support PASID
+ *		  Support device page faults (PCI PRI or SMMU Stall)
+ */
+#define UACCE_DEV_SVA		BIT(0)
+
+/**
+ * enum uacce_qfrt: queue file region type
+ * @UACCE_QFRT_MMIO: device mmio region
+ * @UACCE_QFRT_DUS: device user share region
+ */
+enum uacce_qfrt {
+	UACCE_QFRT_MMIO = 0,
+	UACCE_QFRT_DUS = 1,
+};
+
+#endif