Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * This file implements the error recovery as a core part of PCIe error |
| 4 | * reporting. When a PCIe error is delivered, an error message will be |
| 5 | * collected and printed to console, then, an error recovery procedure |
| 6 | * will be executed by following the PCI error recovery rules. |
| 7 | * |
| 8 | * Copyright (C) 2006 Intel Corp. |
| 9 | * Tom Long Nguyen (tom.l.nguyen@intel.com) |
| 10 | * Zhang Yanmin (yanmin.zhang@intel.com) |
| 11 | */ |
| 12 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 13 | #define dev_fmt(fmt) "AER: " fmt |
| 14 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 15 | #include <linux/pci.h> |
| 16 | #include <linux/module.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 17 | #include <linux/kernel.h> |
| 18 | #include <linux/errno.h> |
| 19 | #include <linux/aer.h> |
| 20 | #include "portdrv.h" |
| 21 | #include "../pci.h" |
| 22 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 23 | static pci_ers_result_t merge_result(enum pci_ers_result orig, |
| 24 | enum pci_ers_result new) |
| 25 | { |
| 26 | if (new == PCI_ERS_RESULT_NO_AER_DRIVER) |
| 27 | return PCI_ERS_RESULT_NO_AER_DRIVER; |
| 28 | |
| 29 | if (new == PCI_ERS_RESULT_NONE) |
| 30 | return orig; |
| 31 | |
| 32 | switch (orig) { |
| 33 | case PCI_ERS_RESULT_CAN_RECOVER: |
| 34 | case PCI_ERS_RESULT_RECOVERED: |
| 35 | orig = new; |
| 36 | break; |
| 37 | case PCI_ERS_RESULT_DISCONNECT: |
| 38 | if (new == PCI_ERS_RESULT_NEED_RESET) |
| 39 | orig = PCI_ERS_RESULT_NEED_RESET; |
| 40 | break; |
| 41 | default: |
| 42 | break; |
| 43 | } |
| 44 | |
| 45 | return orig; |
| 46 | } |
| 47 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 48 | static int report_error_detected(struct pci_dev *dev, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 49 | pci_channel_state_t state, |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 50 | enum pci_ers_result *result) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 51 | { |
| 52 | pci_ers_result_t vote; |
| 53 | const struct pci_error_handlers *err_handler; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 54 | |
| 55 | device_lock(&dev->dev); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 56 | if (!pci_dev_set_io_state(dev, state) || |
| 57 | !dev->driver || |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 58 | !dev->driver->err_handler || |
| 59 | !dev->driver->err_handler->error_detected) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 60 | /* |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 61 | * If any device in the subtree does not have an error_detected |
| 62 | * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent |
| 63 | * error callbacks of "any" device in the subtree, and will |
| 64 | * exit in the disconnected error state. |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 65 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 66 | if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 67 | vote = PCI_ERS_RESULT_NO_AER_DRIVER; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 68 | pci_info(dev, "can't recover (no error_detected callback)\n"); |
| 69 | } else { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 70 | vote = PCI_ERS_RESULT_NONE; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 71 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 72 | } else { |
| 73 | err_handler = dev->driver->err_handler; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 74 | vote = err_handler->error_detected(dev, state); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 75 | } |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 76 | pci_uevent_ers(dev, vote); |
| 77 | *result = merge_result(*result, vote); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 78 | device_unlock(&dev->dev); |
| 79 | return 0; |
| 80 | } |
| 81 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 82 | static int report_frozen_detected(struct pci_dev *dev, void *data) |
| 83 | { |
| 84 | return report_error_detected(dev, pci_channel_io_frozen, data); |
| 85 | } |
| 86 | |
| 87 | static int report_normal_detected(struct pci_dev *dev, void *data) |
| 88 | { |
| 89 | return report_error_detected(dev, pci_channel_io_normal, data); |
| 90 | } |
| 91 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 92 | static int report_mmio_enabled(struct pci_dev *dev, void *data) |
| 93 | { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 94 | pci_ers_result_t vote, *result = data; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 95 | const struct pci_error_handlers *err_handler; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 96 | |
| 97 | device_lock(&dev->dev); |
| 98 | if (!dev->driver || |
| 99 | !dev->driver->err_handler || |
| 100 | !dev->driver->err_handler->mmio_enabled) |
| 101 | goto out; |
| 102 | |
| 103 | err_handler = dev->driver->err_handler; |
| 104 | vote = err_handler->mmio_enabled(dev); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 105 | *result = merge_result(*result, vote); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 106 | out: |
| 107 | device_unlock(&dev->dev); |
| 108 | return 0; |
| 109 | } |
| 110 | |
| 111 | static int report_slot_reset(struct pci_dev *dev, void *data) |
| 112 | { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 113 | pci_ers_result_t vote, *result = data; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 114 | const struct pci_error_handlers *err_handler; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 115 | |
| 116 | device_lock(&dev->dev); |
| 117 | if (!dev->driver || |
| 118 | !dev->driver->err_handler || |
| 119 | !dev->driver->err_handler->slot_reset) |
| 120 | goto out; |
| 121 | |
| 122 | err_handler = dev->driver->err_handler; |
| 123 | vote = err_handler->slot_reset(dev); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 124 | *result = merge_result(*result, vote); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 125 | out: |
| 126 | device_unlock(&dev->dev); |
| 127 | return 0; |
| 128 | } |
| 129 | |
| 130 | static int report_resume(struct pci_dev *dev, void *data) |
| 131 | { |
| 132 | const struct pci_error_handlers *err_handler; |
| 133 | |
| 134 | device_lock(&dev->dev); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 135 | if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || |
| 136 | !dev->driver || |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 137 | !dev->driver->err_handler || |
| 138 | !dev->driver->err_handler->resume) |
| 139 | goto out; |
| 140 | |
| 141 | err_handler = dev->driver->err_handler; |
| 142 | err_handler->resume(dev); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 143 | out: |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 144 | pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 145 | device_unlock(&dev->dev); |
| 146 | return 0; |
| 147 | } |
| 148 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 149 | pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, |
| 150 | pci_channel_state_t state, |
| 151 | pci_ers_result_t (*reset_link)(struct pci_dev *pdev)) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 152 | { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 153 | pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; |
| 154 | struct pci_bus *bus; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 155 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 156 | /* |
| 157 | * Error recovery runs on all subordinates of the first downstream port. |
| 158 | * If the downstream port detected the error, it is cleared at the end. |
| 159 | */ |
| 160 | if (!(pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || |
| 161 | pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)) |
| 162 | dev = dev->bus->self; |
| 163 | bus = dev->subordinate; |
| 164 | |
| 165 | pci_dbg(dev, "broadcast error_detected message\n"); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 166 | if (state == pci_channel_io_frozen) { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 167 | pci_walk_bus(bus, report_frozen_detected, &status); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 168 | status = reset_link(dev); |
| 169 | if (status != PCI_ERS_RESULT_RECOVERED) { |
| 170 | pci_warn(dev, "link reset failed\n"); |
| 171 | goto failed; |
| 172 | } |
| 173 | } else { |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 174 | pci_walk_bus(bus, report_normal_detected, &status); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 175 | } |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 176 | |
| 177 | if (status == PCI_ERS_RESULT_CAN_RECOVER) { |
| 178 | status = PCI_ERS_RESULT_RECOVERED; |
| 179 | pci_dbg(dev, "broadcast mmio_enabled message\n"); |
| 180 | pci_walk_bus(bus, report_mmio_enabled, &status); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 181 | } |
| 182 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 183 | if (status == PCI_ERS_RESULT_NEED_RESET) { |
| 184 | /* |
| 185 | * TODO: Should call platform-specific |
| 186 | * functions to reset slot before calling |
| 187 | * drivers' slot_reset callbacks? |
| 188 | */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 189 | status = PCI_ERS_RESULT_RECOVERED; |
| 190 | pci_dbg(dev, "broadcast slot_reset message\n"); |
| 191 | pci_walk_bus(bus, report_slot_reset, &status); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 192 | } |
| 193 | |
| 194 | if (status != PCI_ERS_RESULT_RECOVERED) |
| 195 | goto failed; |
| 196 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 197 | pci_dbg(dev, "broadcast resume message\n"); |
| 198 | pci_walk_bus(bus, report_resume, &status); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 199 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 200 | if (pcie_aer_is_native(dev)) |
| 201 | pcie_clear_device_status(dev); |
| 202 | pci_aer_clear_nonfatal_status(dev); |
| 203 | pci_info(dev, "device recovery successful\n"); |
| 204 | return status; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 205 | |
| 206 | failed: |
| 207 | pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT); |
| 208 | |
| 209 | /* TODO: Should kernel panic here? */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 210 | pci_info(dev, "device recovery failed\n"); |
| 211 | |
| 212 | return status; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 213 | } |