Blame - arch/powerpc/kernel/eeh_driver.c - hafnium/third_party/linux

blob: 67619b4b3f96c721993ce8b94b0f56cca6b4f418 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	/*
				2	* PCI Error Recovery Driver for RPA-compliant PPC64 platform.
				3	* Copyright IBM Corp. 2004 2005
				4	* Copyright Linas Vepstas <linas@linas.org> 2004, 2005
				5	*
				6	* All rights reserved.
				7	*
				8	* This program is free software; you can redistribute it and/or modify
				9	* it under the terms of the GNU General Public License as published by
				10	* the Free Software Foundation; either version 2 of the License, or (at
				11	* your option) any later version.
				12	*
				13	* This program is distributed in the hope that it will be useful, but
				14	* WITHOUT ANY WARRANTY; without even the implied warranty of
				15	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
				16	* NON INFRINGEMENT. See the GNU General Public License for more
				17	* details.
				18	*
				19	* You should have received a copy of the GNU General Public License
				20	* along with this program; if not, write to the Free Software
				21	* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
				22	*
				23	* Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
				24	*/
				25	#include <linux/delay.h>
				26	#include <linux/interrupt.h>
				27	#include <linux/irq.h>
				28	#include <linux/module.h>
				29	#include <linux/pci.h>
				30	#include <asm/eeh.h>
				31	#include <asm/eeh_event.h>
				32	#include <asm/ppc-pci.h>
				33	#include <asm/pci-bridge.h>
				34	#include <asm/prom.h>
				35	#include <asm/rtas.h>
				36
				37	struct eeh_rmv_data {
				38	struct list_head edev_list;
				39	int removed;
				40	};
				41
				42	static int eeh_result_priority(enum pci_ers_result result)
				43	{
				44	switch (result) {
				45	case PCI_ERS_RESULT_NONE:
				46	return 1;
				47	case PCI_ERS_RESULT_NO_AER_DRIVER:
				48	return 2;
				49	case PCI_ERS_RESULT_RECOVERED:
				50	return 3;
				51	case PCI_ERS_RESULT_CAN_RECOVER:
				52	return 4;
				53	case PCI_ERS_RESULT_DISCONNECT:
				54	return 5;
				55	case PCI_ERS_RESULT_NEED_RESET:
				56	return 6;
				57	default:
				58	WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
				59	return 0;
				60	}
				61	};
				62
				63	const char *pci_ers_result_name(enum pci_ers_result result)
				64	{
				65	switch (result) {
				66	case PCI_ERS_RESULT_NONE:
				67	return "none";
				68	case PCI_ERS_RESULT_CAN_RECOVER:
				69	return "can recover";
				70	case PCI_ERS_RESULT_NEED_RESET:
				71	return "need reset";
				72	case PCI_ERS_RESULT_DISCONNECT:
				73	return "disconnect";
				74	case PCI_ERS_RESULT_RECOVERED:
				75	return "recovered";
				76	case PCI_ERS_RESULT_NO_AER_DRIVER:
				77	return "no AER driver";
				78	default:
				79	WARN_ONCE(1, "Unknown result type: %d\n", (int)result);
				80	return "unknown";
				81	}
				82	};
				83
				84	static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
				85	const char *fmt, ...)
				86	{
				87	struct va_format vaf;
				88	va_list args;
				89
				90	va_start(args, fmt);
				91
				92	vaf.fmt = fmt;
				93	vaf.va = &args;
				94
				95	printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
				96	edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
				97
				98	va_end(args);
				99	}
				100
				101	static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
				102	enum pci_ers_result new)
				103	{
				104	if (eeh_result_priority(new) > eeh_result_priority(old))
				105	return new;
				106	return old;
				107	}
				108
				109	static bool eeh_dev_removed(struct eeh_dev *edev)
				110	{
				111	return !edev \|\| (edev->mode & EEH_DEV_REMOVED);
				112	}
				113
				114	static bool eeh_edev_actionable(struct eeh_dev *edev)
				115	{
				116	return (edev->pdev && !eeh_dev_removed(edev) &&
				117	!eeh_pe_passed(edev->pe));
				118	}
				119
				120	/**
				121	* eeh_pcid_get - Get the PCI device driver
				122	* @pdev: PCI device
				123	*
				124	* The function is used to retrieve the PCI device driver for
				125	* the indicated PCI device. Besides, we will increase the reference
				126	* of the PCI device driver to prevent that being unloaded on
				127	* the fly. Otherwise, kernel crash would be seen.
				128	*/
				129	static inline struct pci_driver eeh_pcid_get(struct pci_dev pdev)
				130	{
				131	if (!pdev \|\| !pdev->driver)
				132	return NULL;
				133
				134	if (!try_module_get(pdev->driver->driver.owner))
				135	return NULL;
				136
				137	return pdev->driver;
				138	}
				139
				140	/**
				141	* eeh_pcid_put - Dereference on the PCI device driver
				142	* @pdev: PCI device
				143	*
				144	* The function is called to do dereference on the PCI device
				145	* driver of the indicated PCI device.
				146	*/
				147	static inline void eeh_pcid_put(struct pci_dev *pdev)
				148	{
				149	if (!pdev \|\| !pdev->driver)
				150	return;
				151
				152	module_put(pdev->driver->driver.owner);
				153	}
				154
				155	/**
				156	* eeh_disable_irq - Disable interrupt for the recovering device
				157	* @dev: PCI device
				158	*
				159	* This routine must be called when reporting temporary or permanent
				160	* error to the particular PCI device to disable interrupt of that
				161	* device. If the device has enabled MSI or MSI-X interrupt, we needn't
				162	* do real work because EEH should freeze DMA transfers for those PCI
				163	* devices encountering EEH errors, which includes MSI or MSI-X.
				164	*/
				165	static void eeh_disable_irq(struct eeh_dev *edev)
				166	{
				167	/* Don't disable MSI and MSI-X interrupts. They are
				168	* effectively disabled by the DMA Stopped state
				169	* when an EEH error occurs.
				170	*/
				171	if (edev->pdev->msi_enabled \|\| edev->pdev->msix_enabled)
				172	return;
				173
				174	if (!irq_has_action(edev->pdev->irq))
				175	return;
				176
				177	edev->mode \|= EEH_DEV_IRQ_DISABLED;
				178	disable_irq_nosync(edev->pdev->irq);
				179	}
				180
				181	/**
				182	* eeh_enable_irq - Enable interrupt for the recovering device
				183	* @dev: PCI device
				184	*
				185	* This routine must be called to enable interrupt while failed
				186	* device could be resumed.
				187	*/
				188	static void eeh_enable_irq(struct eeh_dev *edev)
				189	{
				190	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
				191	edev->mode &= ~EEH_DEV_IRQ_DISABLED;
				192	/*
				193	* FIXME !!!!!
				194	*
				195	* This is just ass backwards. This maze has
				196	* unbalanced irq_enable/disable calls. So instead of
				197	* finding the root cause it works around the warning
				198	* in the irq_enable code by conditionally calling
				199	* into it.
				200	*
				201	* That's just wrong.The warning in the core code is
				202	* there to tell people to fix their asymmetries in
				203	* their own code, not by abusing the core information
				204	* to avoid it.
				205	*
				206	* I so wish that the assymetry would be the other way
				207	* round and a few more irq_disable calls render that
				208	* shit unusable forever.
				209	*
				210	* tglx
				211	*/
				212	if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq)))
				213	enable_irq(edev->pdev->irq);
				214	}
				215	}
				216
				217	static void eeh_dev_save_state(struct eeh_dev edev, void *userdata)
				218	{
				219	struct pci_dev *pdev;
				220
				221	if (!edev)
				222	return NULL;
				223
				224	/*
				225	* We cannot access the config space on some adapters.
				226	* Otherwise, it will cause fenced PHB. We don't save
				227	* the content in their config space and will restore
				228	* from the initial config space saved when the EEH
				229	* device is created.
				230	*/
				231	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
				232	return NULL;
				233
				234	pdev = eeh_dev_to_pci_dev(edev);
				235	if (!pdev)
				236	return NULL;
				237
				238	pci_save_state(pdev);
				239	return NULL;
				240	}
				241
				242	static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
				243	{
				244	struct eeh_pe *pe;
				245	struct eeh_dev edev, tmp;
				246
				247	eeh_for_each_pe(root, pe)
				248	eeh_pe_for_each_dev(pe, edev, tmp)
				249	if (eeh_edev_actionable(edev))
				250	edev->pdev->error_state = s;
				251	}
				252
				253	static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
				254	{
				255	struct eeh_pe *pe;
				256	struct eeh_dev edev, tmp;
				257
				258	eeh_for_each_pe(root, pe) {
				259	eeh_pe_for_each_dev(pe, edev, tmp) {
				260	if (!eeh_edev_actionable(edev))
				261	continue;
				262
				263	if (!eeh_pcid_get(edev->pdev))
				264	continue;
				265
				266	if (enable)
				267	eeh_enable_irq(edev);
				268	else
				269	eeh_disable_irq(edev);
				270
				271	eeh_pcid_put(edev->pdev);
				272	}
				273	}
				274	}
				275
				276	typedef enum pci_ers_result (eeh_report_fn)(struct eeh_dev ,
				277	struct pci_driver *);
				278	static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
				279	enum pci_ers_result *result)
				280	{
				281	struct pci_driver *driver;
				282	enum pci_ers_result new_result;
				283
				284	device_lock(&edev->pdev->dev);
				285	if (eeh_edev_actionable(edev)) {
				286	driver = eeh_pcid_get(edev->pdev);
				287
				288	if (!driver)
				289	eeh_edev_info(edev, "no driver");
				290	else if (!driver->err_handler)
				291	eeh_edev_info(edev, "driver not EEH aware");
				292	else if (edev->mode & EEH_DEV_NO_HANDLER)
				293	eeh_edev_info(edev, "driver bound too late");
				294	else {
				295	new_result = fn(edev, driver);
				296	eeh_edev_info(edev, "%s driver reports: '%s'",
				297	driver->name,
				298	pci_ers_result_name(new_result));
				299	if (result)
				300	result = pci_ers_merge_result(result,
				301	new_result);
				302	}
				303	if (driver)
				304	eeh_pcid_put(edev->pdev);
				305	} else {
				306	eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
				307	!eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
				308	}
				309	device_unlock(&edev->pdev->dev);
				310	}
				311
				312	static void eeh_pe_report(const char name, struct eeh_pe root,
				313	eeh_report_fn fn, enum pci_ers_result *result)
				314	{
				315	struct eeh_pe *pe;
				316	struct eeh_dev edev, tmp;
				317
				318	pr_info("EEH: Beginning: '%s'\n", name);
				319	eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp)
				320	eeh_pe_report_edev(edev, fn, result);
				321	if (result)
				322	pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
				323	name, pci_ers_result_name(*result));
				324	else
				325	pr_info("EEH: Finished:'%s'", name);
				326	}
				327
				328	/**
				329	* eeh_report_error - Report pci error to each device driver
				330	* @edev: eeh device
				331	* @driver: device's PCI driver
				332	*
				333	* Report an EEH error to each device driver.
				334	*/
				335	static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
				336	struct pci_driver *driver)
				337	{
				338	enum pci_ers_result rc;
				339	struct pci_dev *dev = edev->pdev;
				340
				341	if (!driver->err_handler->error_detected)
				342	return PCI_ERS_RESULT_NONE;
				343
				344	eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
				345	driver->name);
				346	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
				347
				348	edev->in_error = true;
				349	pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
				350	return rc;
				351	}
				352
				353	/**
				354	* eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
				355	* @edev: eeh device
				356	* @driver: device's PCI driver
				357	*
				358	* Tells each device driver that IO ports, MMIO and config space I/O
				359	* are now enabled.
				360	*/
				361	static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
				362	struct pci_driver *driver)
				363	{
				364	if (!driver->err_handler->mmio_enabled)
				365	return PCI_ERS_RESULT_NONE;
				366	eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
				367	return driver->err_handler->mmio_enabled(edev->pdev);
				368	}
				369
				370	/**
				371	* eeh_report_reset - Tell device that slot has been reset
				372	* @edev: eeh device
				373	* @driver: device's PCI driver
				374	*
				375	* This routine must be called while EEH tries to reset particular
				376	* PCI device so that the associated PCI device driver could take
				377	* some actions, usually to save data the driver needs so that the
				378	* driver can work again while the device is recovered.
				379	*/
				380	static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
				381	struct pci_driver *driver)
				382	{
				383	if (!driver->err_handler->slot_reset \|\| !edev->in_error)
				384	return PCI_ERS_RESULT_NONE;
				385	eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
				386	return driver->err_handler->slot_reset(edev->pdev);
				387	}
				388
				389	static void eeh_dev_restore_state(struct eeh_dev edev, void *userdata)
				390	{
				391	struct pci_dev *pdev;
				392
				393	if (!edev)
				394	return NULL;
				395
				396	/*
				397	* The content in the config space isn't saved because
				398	* the blocked config space on some adapters. We have
				399	* to restore the initial saved config space when the
				400	* EEH device is created.
				401	*/
				402	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
				403	if (list_is_last(&edev->list, &edev->pe->edevs))
				404	eeh_pe_restore_bars(edev->pe);
				405
				406	return NULL;
				407	}
				408
				409	pdev = eeh_dev_to_pci_dev(edev);
				410	if (!pdev)
				411	return NULL;
				412
				413	pci_restore_state(pdev);
				414	return NULL;
				415	}
				416
				417	/**
				418	* eeh_report_resume - Tell device to resume normal operations
				419	* @edev: eeh device
				420	* @driver: device's PCI driver
				421	*
				422	* This routine must be called to notify the device driver that it
				423	* could resume so that the device driver can do some initialization
				424	* to make the recovered device work again.
				425	*/
				426	static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
				427	struct pci_driver *driver)
				428	{
				429	if (!driver->err_handler->resume \|\| !edev->in_error)
				430	return PCI_ERS_RESULT_NONE;
				431
				432	eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
				433	driver->err_handler->resume(edev->pdev);
				434
				435	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
				436	#ifdef CONFIG_PCI_IOV
				437	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
				438	eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
				439	#endif
				440	return PCI_ERS_RESULT_NONE;
				441	}
				442
				443	/**
				444	* eeh_report_failure - Tell device driver that device is dead.
				445	* @edev: eeh device
				446	* @driver: device's PCI driver
				447	*
				448	* This informs the device driver that the device is permanently
				449	* dead, and that no further recovery attempts will be made on it.
				450	*/
				451	static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
				452	struct pci_driver *driver)
				453	{
				454	enum pci_ers_result rc;
				455
				456	if (!driver->err_handler->error_detected)
				457	return PCI_ERS_RESULT_NONE;
				458
				459	eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
				460	driver->name);
				461	rc = driver->err_handler->error_detected(edev->pdev,
				462	pci_channel_io_perm_failure);
				463
				464	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
				465	return rc;
				466	}
				467
				468	static void eeh_add_virt_device(void data, void *userdata)
				469	{
				470	struct pci_driver *driver;
				471	struct eeh_dev edev = (struct eeh_dev )data;
				472	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
				473	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
				474
				475	if (!(edev->physfn)) {
				476	pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
				477	__func__, pdn->phb->global_number, pdn->busno,
				478	PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
				479	return NULL;
				480	}
				481
				482	driver = eeh_pcid_get(dev);
				483	if (driver) {
				484	if (driver->err_handler) {
				485	eeh_pcid_put(dev);
				486	return NULL;
				487	}
				488	eeh_pcid_put(dev);
				489	}
				490
				491	#ifdef CONFIG_PCI_IOV
				492	pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
				493	#endif
				494	return NULL;
				495	}
				496
				497	static void eeh_rmv_device(struct eeh_dev edev, void *userdata)
				498	{
				499	struct pci_driver *driver;
				500	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
				501	struct eeh_rmv_data rmv_data = (struct eeh_rmv_data )userdata;
				502	int *removed = rmv_data ? &rmv_data->removed : NULL;
				503
				504	/*
				505	* Actually, we should remove the PCI bridges as well.
				506	* However, that's lots of complexity to do that,
				507	* particularly some of devices under the bridge might
				508	* support EEH. So we just care about PCI devices for
				509	* simplicity here.
				510	*/
				511	if (!dev \|\| (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
				512	return NULL;
				513
				514	/*
				515	* We rely on count-based pcibios_release_device() to
				516	* detach permanently offlined PEs. Unfortunately, that's
				517	* not reliable enough. We might have the permanently
				518	* offlined PEs attached, but we needn't take care of
				519	* them and their child devices.
				520	*/
				521	if (eeh_dev_removed(edev))
				522	return NULL;
				523
				524	if (removed) {
				525	if (eeh_pe_passed(edev->pe))
				526	return NULL;
				527	driver = eeh_pcid_get(dev);
				528	if (driver) {
				529	if (driver->err_handler &&
				530	driver->err_handler->error_detected &&
				531	driver->err_handler->slot_reset) {
				532	eeh_pcid_put(dev);
				533	return NULL;
				534	}
				535	eeh_pcid_put(dev);
				536	}
				537	}
				538
				539	/* Remove it from PCI subsystem */
				540	pr_debug("EEH: Removing %s without EEH sensitive driver\n",
				541	pci_name(dev));
				542	edev->bus = dev->bus;
				543	edev->mode \|= EEH_DEV_DISCONNECTED;
				544	if (removed)
				545	(*removed)++;
				546
				547	if (edev->physfn) {
				548	#ifdef CONFIG_PCI_IOV
				549	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
				550
				551	pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
				552	edev->pdev = NULL;
				553
				554	/*
				555	* We have to set the VF PE number to invalid one, which is
				556	* required to plug the VF successfully.
				557	*/
				558	pdn->pe_number = IODA_INVALID_PE;
				559	#endif
				560	if (rmv_data)
				561	list_add(&edev->rmv_list, &rmv_data->edev_list);
				562	} else {
				563	pci_lock_rescan_remove();
				564	pci_stop_and_remove_bus_device(dev);
				565	pci_unlock_rescan_remove();
				566	}
				567
				568	return NULL;
				569	}
				570
				571	static void eeh_pe_detach_dev(struct eeh_pe pe, void *userdata)
				572	{
				573	struct eeh_dev edev, tmp;
				574
				575	eeh_pe_for_each_dev(pe, edev, tmp) {
				576	if (!(edev->mode & EEH_DEV_DISCONNECTED))
				577	continue;
				578
				579	edev->mode &= ~(EEH_DEV_DISCONNECTED \| EEH_DEV_IRQ_DISABLED);
				580	eeh_rmv_from_parent_pe(edev);
				581	}
				582
				583	return NULL;
				584	}
				585
				586	/*
				587	* Explicitly clear PE's frozen state for PowerNV where
				588	* we have frozen PE until BAR restore is completed. It's
				589	* harmless to clear it for pSeries. To be consistent with
				590	* PE reset (for 3 times), we try to clear the frozen state
				591	* for 3 times as well.
				592	*/
				593	static void __eeh_clear_pe_frozen_state(struct eeh_pe pe, void *flag)
				594	{
				595	bool clear_sw_state = (bool )flag;
				596	int i, rc = 1;
				597
				598	for (i = 0; rc && i < 3; i++)
				599	rc = eeh_unfreeze_pe(pe, clear_sw_state);
				600
				601	/* Stop immediately on any errors */
				602	if (rc) {
				603	pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n",
				604	__func__, rc, pe->phb->global_number, pe->addr);
				605	return (void *)pe;
				606	}
				607
				608	return NULL;
				609	}
				610
				611	static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
				612	bool clear_sw_state)
				613	{
				614	void *rc;
				615
				616	rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
				617	if (!rc)
				618	eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
				619
				620	return rc ? -EIO : 0;
				621	}
				622
				623	int eeh_pe_reset_and_recover(struct eeh_pe *pe)
				624	{
				625	int ret;
				626
				627	/* Bail if the PE is being recovered */
				628	if (pe->state & EEH_PE_RECOVERING)
				629	return 0;
				630
				631	/* Put the PE into recovery mode */
				632	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
				633
				634	/* Save states */
				635	eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
				636
				637	/* Issue reset */
				638	ret = eeh_pe_reset_full(pe);
				639	if (ret) {
				640	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
				641	return ret;
				642	}
				643
				644	/* Unfreeze the PE */
				645	ret = eeh_clear_pe_frozen_state(pe, true);
				646	if (ret) {
				647	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
				648	return ret;
				649	}
				650
				651	/* Restore device state */
				652	eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
				653
				654	/* Clear recovery mode */
				655	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
				656
				657	return 0;
				658	}
				659
				660	/**
				661	* eeh_reset_device - Perform actual reset of a pci slot
				662	* @driver_eeh_aware: Does the device's driver provide EEH support?
				663	* @pe: EEH PE
				664	* @bus: PCI bus corresponding to the isolcated slot
				665	* @rmv_data: Optional, list to record removed devices
				666	*
				667	* This routine must be called to do reset on the indicated PE.
				668	* During the reset, udev might be invoked because those affected
				669	* PCI devices will be removed and then added.
				670	*/
				671	static int eeh_reset_device(struct eeh_pe pe, struct pci_bus bus,
				672	struct eeh_rmv_data *rmv_data,
				673	bool driver_eeh_aware)
				674	{
				675	time64_t tstamp;
				676	int cnt, rc;
				677	struct eeh_dev *edev;
				678
				679	/* pcibios will clear the counter; save the value */
				680	cnt = pe->freeze_count;
				681	tstamp = pe->tstamp;
				682
				683	/*
				684	* We don't remove the corresponding PE instances because
				685	* we need the information afterwords. The attached EEH
				686	* devices are expected to be attached soon when calling
				687	* into pci_hp_add_devices().
				688	*/
				689	eeh_pe_state_mark(pe, EEH_PE_KEEP);
				690	if (driver_eeh_aware \|\| (pe->type & EEH_PE_VF)) {
				691	eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
				692	} else {
				693	pci_lock_rescan_remove();
				694	pci_hp_remove_devices(bus);
				695	pci_unlock_rescan_remove();
				696	}
				697
				698	/*
				699	* Reset the pci controller. (Asserts RST#; resets config space).
				700	* Reconfigure bridges and devices. Don't try to bring the system
				701	* up if the reset failed for some reason.
				702	*
				703	* During the reset, it's very dangerous to have uncontrolled PCI
				704	* config accesses. So we prefer to block them. However, controlled
				705	* PCI config accesses initiated from EEH itself are allowed.
				706	*/
				707	rc = eeh_pe_reset_full(pe);
				708	if (rc)
				709	return rc;
				710
				711	pci_lock_rescan_remove();
				712
				713	/* Restore PE */
				714	eeh_ops->configure_bridge(pe);
				715	eeh_pe_restore_bars(pe);
				716
				717	/* Clear frozen state */
				718	rc = eeh_clear_pe_frozen_state(pe, false);
				719	if (rc) {
				720	pci_unlock_rescan_remove();
				721	return rc;
				722	}
				723
				724	/* Give the system 5 seconds to finish running the user-space
				725	* hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
				726	* this is a hack, but if we don't do this, and try to bring
				727	* the device up before the scripts have taken it down,
				728	* potentially weird things happen.
				729	*/
				730	if (!driver_eeh_aware \|\| rmv_data->removed) {
				731	pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
				732	(driver_eeh_aware ? "partial" : "complete"));
				733	ssleep(5);
				734
				735	/*
				736	* The EEH device is still connected with its parent
				737	* PE. We should disconnect it so the binding can be
				738	* rebuilt when adding PCI devices.
				739	*/
				740	edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
				741	eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
				742	if (pe->type & EEH_PE_VF) {
				743	eeh_add_virt_device(edev, NULL);
				744	} else {
				745	if (!driver_eeh_aware)
				746	eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
				747	pci_hp_add_devices(bus);
				748	}
				749	}
				750	eeh_pe_state_clear(pe, EEH_PE_KEEP);
				751
				752	pe->tstamp = tstamp;
				753	pe->freeze_count = cnt;
				754
				755	pci_unlock_rescan_remove();
				756	return 0;
				757	}
				758
				759	/* The longest amount of time to wait for a pci device
				760	* to come back on line, in seconds.
				761	*/
				762	#define MAX_WAIT_FOR_RECOVERY 300
				763
				764	/**
				765	* eeh_handle_normal_event - Handle EEH events on a specific PE
				766	* @pe: EEH PE - which should not be used after we return, as it may
				767	* have been invalidated.
				768	*
				769	* Attempts to recover the given PE. If recovery fails or the PE has failed
				770	* too many times, remove the PE.
				771	*
				772	* While PHB detects address or data parity errors on particular PCI
				773	* slot, the associated PE will be frozen. Besides, DMA's occurring
				774	* to wild addresses (which usually happen due to bugs in device
				775	* drivers or in PCI adapter firmware) can cause EEH error. #SERR,
				776	* #PERR or other misc PCI-related errors also can trigger EEH errors.
				777	*
				778	* Recovery process consists of unplugging the device driver (which
				779	* generated hotplug events to userspace), then issuing a PCI #RST to
				780	* the device, then reconfiguring the PCI config space for all bridges
				781	* & devices under this slot, and then finally restarting the device
				782	* drivers (which cause a second set of hotplug events to go out to
				783	* userspace).
				784	*/
				785	void eeh_handle_normal_event(struct eeh_pe *pe)
				786	{
				787	struct pci_bus *bus;
				788	struct eeh_dev edev, tmp;
				789	struct eeh_pe *tmp_pe;
				790	int rc = 0;
				791	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
				792	struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
				793
				794	bus = eeh_pe_bus_get(pe);
				795	if (!bus) {
				796	pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
				797	__func__, pe->phb->global_number, pe->addr);
				798	return;
				799	}
				800
				801	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
				802
				803	eeh_pe_update_time_stamp(pe);
				804	pe->freeze_count++;
				805	if (pe->freeze_count > eeh_max_freezes) {
				806	pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
				807	pe->phb->global_number, pe->addr,
				808	pe->freeze_count);
				809	goto hard_fail;
				810	}
				811	pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
				812	pe->freeze_count, eeh_max_freezes);
				813
				814	/* Walk the various device drivers attached to this slot through
				815	* a reset sequence, giving each an opportunity to do what it needs
				816	* to accomplish the reset. Each child gets a report of the
				817	* status ... if any child can't handle the reset, then the entire
				818	* slot is dlpar removed and added.
				819	*
				820	* When the PHB is fenced, we have to issue a reset to recover from
				821	* the error. Override the result if necessary to have partially
				822	* hotplug for this case.
				823	*/
				824	pr_info("EEH: Notify device drivers to shutdown\n");
				825	eeh_set_channel_state(pe, pci_channel_io_frozen);
				826	eeh_set_irq_state(pe, false);
				827	eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
				828	&result);
				829	if ((pe->type & EEH_PE_PHB) &&
				830	result != PCI_ERS_RESULT_NONE &&
				831	result != PCI_ERS_RESULT_NEED_RESET)
				832	result = PCI_ERS_RESULT_NEED_RESET;
				833
				834	/* Get the current PCI slot state. This can take a long time,
				835	* sometimes over 300 seconds for certain systems.
				836	*/
				837	rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
				838	if (rc < 0 \|\| rc == EEH_STATE_NOT_SUPPORT) {
				839	pr_warn("EEH: Permanent failure\n");
				840	goto hard_fail;
				841	}
				842
				843	/* Since rtas may enable MMIO when posting the error log,
				844	* don't post the error log until after all dev drivers
				845	* have been informed.
				846	*/
				847	pr_info("EEH: Collect temporary log\n");
				848	eeh_slot_error_detail(pe, EEH_LOG_TEMP);
				849
				850	/* If all device drivers were EEH-unaware, then shut
				851	* down all of the device drivers, and hope they
				852	* go down willingly, without panicing the system.
				853	*/
				854	if (result == PCI_ERS_RESULT_NONE) {
				855	pr_info("EEH: Reset with hotplug activity\n");
				856	rc = eeh_reset_device(pe, bus, NULL, false);
				857	if (rc) {
				858	pr_warn("%s: Unable to reset, err=%d\n",
				859	__func__, rc);
				860	goto hard_fail;
				861	}
				862	}
				863
				864	/* If all devices reported they can proceed, then re-enable MMIO */
				865	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
				866	pr_info("EEH: Enable I/O for affected devices\n");
				867	rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
				868
				869	if (rc < 0)
				870	goto hard_fail;
				871	if (rc) {
				872	result = PCI_ERS_RESULT_NEED_RESET;
				873	} else {
				874	pr_info("EEH: Notify device drivers to resume I/O\n");
				875	eeh_pe_report("mmio_enabled", pe,
				876	eeh_report_mmio_enabled, &result);
				877	}
				878	}
				879
				880	/* If all devices reported they can proceed, then re-enable DMA */
				881	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
				882	pr_info("EEH: Enabled DMA for affected devices\n");
				883	rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
				884
				885	if (rc < 0)
				886	goto hard_fail;
				887	if (rc) {
				888	result = PCI_ERS_RESULT_NEED_RESET;
				889	} else {
				890	/*
				891	* We didn't do PE reset for the case. The PE
				892	* is still in frozen state. Clear it before
				893	* resuming the PE.
				894	*/
				895	eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
				896	result = PCI_ERS_RESULT_RECOVERED;
				897	}
				898	}
				899
				900	/* If any device has a hard failure, then shut off everything. */
				901	if (result == PCI_ERS_RESULT_DISCONNECT) {
				902	pr_warn("EEH: Device driver gave up\n");
				903	goto hard_fail;
				904	}
				905
				906	/* If any device called out for a reset, then reset the slot */
				907	if (result == PCI_ERS_RESULT_NEED_RESET) {
				908	pr_info("EEH: Reset without hotplug activity\n");
				909	rc = eeh_reset_device(pe, bus, &rmv_data, true);
				910	if (rc) {
				911	pr_warn("%s: Cannot reset, err=%d\n",
				912	__func__, rc);
				913	goto hard_fail;
				914	}
				915
				916	pr_info("EEH: Notify device drivers "
				917	"the completion of reset\n");
				918	result = PCI_ERS_RESULT_NONE;
				919	eeh_set_channel_state(pe, pci_channel_io_normal);
				920	eeh_set_irq_state(pe, true);
				921	eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
				922	}
				923
				924	/* All devices should claim they have recovered by now. */
				925	if ((result != PCI_ERS_RESULT_RECOVERED) &&
				926	(result != PCI_ERS_RESULT_NONE)) {
				927	pr_warn("EEH: Not recovered\n");
				928	goto hard_fail;
				929	}
				930
				931	/*
				932	* For those hot removed VFs, we should add back them after PF get
				933	* recovered properly.
				934	*/
				935	list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
				936	eeh_add_virt_device(edev, NULL);
				937	list_del(&edev->rmv_list);
				938	}
				939
				940	/* Tell all device drivers that they can resume operations */
				941	pr_info("EEH: Notify device driver to resume\n");
				942	eeh_set_channel_state(pe, pci_channel_io_normal);
				943	eeh_set_irq_state(pe, true);
				944	eeh_pe_report("resume", pe, eeh_report_resume, NULL);
				945	eeh_for_each_pe(pe, tmp_pe) {
				946	eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
				947	edev->mode &= ~EEH_DEV_NO_HANDLER;
				948	edev->in_error = false;
				949	}
				950	}
				951
				952	pr_info("EEH: Recovery successful.\n");
				953	goto final;
				954
				955	hard_fail:
				956	/*
				957	* About 90% of all real-life EEH failures in the field
				958	* are due to poorly seated PCI cards. Only 10% or so are
				959	* due to actual, failed cards.
				960	*/
				961	pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
				962	"Please try reseating or replacing it\n",
				963	pe->phb->global_number, pe->addr);
				964
				965	eeh_slot_error_detail(pe, EEH_LOG_PERM);
				966
				967	/* Notify all devices that they're about to go down. */
				968	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
				969	eeh_set_irq_state(pe, false);
				970	eeh_pe_report("error_detected(permanent failure)", pe,
				971	eeh_report_failure, NULL);
				972
				973	/* Mark the PE to be removed permanently */
				974	eeh_pe_state_mark(pe, EEH_PE_REMOVED);
				975
				976	/*
				977	* Shut down the device drivers for good. We mark
				978	* all removed devices correctly to avoid access
				979	* the their PCI config any more.
				980	*/
				981	if (pe->type & EEH_PE_VF) {
				982	eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
				983	eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
				984	} else {
				985	eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
				986	eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
				987
				988	pci_lock_rescan_remove();
				989	pci_hp_remove_devices(bus);
				990	pci_unlock_rescan_remove();
				991	/* The passed PE should no longer be used */
				992	return;
				993	}
				994	final:
				995	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
				996	}
				997
				998	/**
				999	* eeh_handle_special_event - Handle EEH events without a specific failing PE
				1000	*
				1001	* Called when an EEH event is detected but can't be narrowed down to a
				1002	* specific PE. Iterates through possible failures and handles them as
				1003	* necessary.
				1004	*/
				1005	void eeh_handle_special_event(void)
				1006	{
				1007	struct eeh_pe pe, phb_pe;
				1008	struct pci_bus *bus;
				1009	struct pci_controller *hose;
				1010	unsigned long flags;
				1011	int rc;
				1012
				1013
				1014	do {
				1015	rc = eeh_ops->next_error(&pe);
				1016
				1017	switch (rc) {
				1018	case EEH_NEXT_ERR_DEAD_IOC:
				1019	/* Mark all PHBs in dead state */
				1020	eeh_serialize_lock(&flags);
				1021
				1022	/* Purge all events */
				1023	eeh_remove_event(NULL, true);
				1024
				1025	list_for_each_entry(hose, &hose_list, list_node) {
				1026	phb_pe = eeh_phb_pe_get(hose);
				1027	if (!phb_pe) continue;
				1028
				1029	eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
				1030	}
				1031
				1032	eeh_serialize_unlock(flags);
				1033
				1034	break;
				1035	case EEH_NEXT_ERR_FROZEN_PE:
				1036	case EEH_NEXT_ERR_FENCED_PHB:
				1037	case EEH_NEXT_ERR_DEAD_PHB:
				1038	/* Mark the PE in fenced state */
				1039	eeh_serialize_lock(&flags);
				1040
				1041	/* Purge all events of the PHB */
				1042	eeh_remove_event(pe, true);
				1043
				1044	if (rc == EEH_NEXT_ERR_DEAD_PHB)
				1045	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
				1046	else
				1047	eeh_pe_state_mark(pe,
				1048	EEH_PE_ISOLATED \| EEH_PE_RECOVERING);
				1049
				1050	eeh_serialize_unlock(flags);
				1051
				1052	break;
				1053	case EEH_NEXT_ERR_NONE:
				1054	return;
				1055	default:
				1056	pr_warn("%s: Invalid value %d from next_error()\n",
				1057	__func__, rc);
				1058	return;
				1059	}
				1060
				1061	/*
				1062	* For fenced PHB and frozen PE, it's handled as normal
				1063	* event. We have to remove the affected PHBs for dead
				1064	* PHB and IOC
				1065	*/
				1066	if (rc == EEH_NEXT_ERR_FROZEN_PE \|\|
				1067	rc == EEH_NEXT_ERR_FENCED_PHB) {
				1068	eeh_handle_normal_event(pe);
				1069	} else {
				1070	pci_lock_rescan_remove();
				1071	list_for_each_entry(hose, &hose_list, list_node) {
				1072	phb_pe = eeh_phb_pe_get(hose);
				1073	if (!phb_pe \|\|
				1074	!(phb_pe->state & EEH_PE_ISOLATED) \|\|
				1075	(phb_pe->state & EEH_PE_RECOVERING))
				1076	continue;
				1077
				1078	/* Notify all devices to be down */
				1079	eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
				1080	eeh_set_channel_state(pe, pci_channel_io_perm_failure);
				1081	eeh_pe_report(
				1082	"error_detected(permanent failure)", pe,
				1083	eeh_report_failure, NULL);
				1084	bus = eeh_pe_bus_get(phb_pe);
				1085	if (!bus) {
				1086	pr_err("%s: Cannot find PCI bus for "
				1087	"PHB#%x-PE#%x\n",
				1088	__func__,
				1089	pe->phb->global_number,
				1090	pe->addr);
				1091	break;
				1092	}
				1093	pci_hp_remove_devices(bus);
				1094	}
				1095	pci_unlock_rescan_remove();
				1096	}
				1097
				1098	/*
				1099	* If we have detected dead IOC, we needn't proceed
				1100	* any more since all PHBs would have been removed
				1101	*/
				1102	if (rc == EEH_NEXT_ERR_DEAD_IOC)
				1103	break;
				1104	} while (rc != EEH_NEXT_ERR_NONE);
				1105	}