Update Linux to v5.10.109

Sourced from [1]

[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz

Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 27b67f1..efffa65 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -7,6 +7,7 @@
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
+#include <linux/mdio/mdio-i2c.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
@@ -16,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 
-#include "mdio-i2c.h"
 #include "sfp.h"
 #include "swphy.h"
 
@@ -36,6 +36,8 @@
 
 	SFP_E_INSERT = 0,
 	SFP_E_REMOVE,
+	SFP_E_DEV_ATTACH,
+	SFP_E_DEV_DETACH,
 	SFP_E_DEV_DOWN,
 	SFP_E_DEV_UP,
 	SFP_E_TX_FAULT,
@@ -45,16 +47,23 @@
 	SFP_E_TIMEOUT,
 
 	SFP_MOD_EMPTY = 0,
-	SFP_MOD_PROBE,
-	SFP_MOD_HPOWER,
-	SFP_MOD_PRESENT,
 	SFP_MOD_ERROR,
+	SFP_MOD_PROBE,
+	SFP_MOD_WAITDEV,
+	SFP_MOD_HPOWER,
+	SFP_MOD_WAITPWR,
+	SFP_MOD_PRESENT,
 
-	SFP_DEV_DOWN = 0,
+	SFP_DEV_DETACHED = 0,
+	SFP_DEV_DOWN,
 	SFP_DEV_UP,
 
 	SFP_S_DOWN = 0,
+	SFP_S_FAIL,
+	SFP_S_WAIT,
 	SFP_S_INIT,
+	SFP_S_INIT_PHY,
+	SFP_S_INIT_TX_FAULT,
 	SFP_S_WAIT_LOS,
 	SFP_S_LINK_UP,
 	SFP_S_TX_FAULT,
@@ -64,10 +73,12 @@
 
 static const char  * const mod_state_strings[] = {
 	[SFP_MOD_EMPTY] = "empty",
-	[SFP_MOD_PROBE] = "probe",
-	[SFP_MOD_HPOWER] = "hpower",
-	[SFP_MOD_PRESENT] = "present",
 	[SFP_MOD_ERROR] = "error",
+	[SFP_MOD_PROBE] = "probe",
+	[SFP_MOD_WAITDEV] = "waitdev",
+	[SFP_MOD_HPOWER] = "hpower",
+	[SFP_MOD_WAITPWR] = "waitpwr",
+	[SFP_MOD_PRESENT] = "present",
 };
 
 static const char *mod_state_to_str(unsigned short mod_state)
@@ -78,6 +89,7 @@
 }
 
 static const char * const dev_state_strings[] = {
+	[SFP_DEV_DETACHED] = "detached",
 	[SFP_DEV_DOWN] = "down",
 	[SFP_DEV_UP] = "up",
 };
@@ -92,6 +104,8 @@
 static const char * const event_strings[] = {
 	[SFP_E_INSERT] = "insert",
 	[SFP_E_REMOVE] = "remove",
+	[SFP_E_DEV_ATTACH] = "dev_attach",
+	[SFP_E_DEV_DETACH] = "dev_detach",
 	[SFP_E_DEV_DOWN] = "dev_down",
 	[SFP_E_DEV_UP] = "dev_up",
 	[SFP_E_TX_FAULT] = "tx_fault",
@@ -110,12 +124,16 @@
 
 static const char * const sm_state_strings[] = {
 	[SFP_S_DOWN] = "down",
+	[SFP_S_FAIL] = "fail",
+	[SFP_S_WAIT] = "wait",
 	[SFP_S_INIT] = "init",
+	[SFP_S_INIT_PHY] = "init_phy",
+	[SFP_S_INIT_TX_FAULT] = "init_tx_fault",
 	[SFP_S_WAIT_LOS] = "wait_los",
 	[SFP_S_LINK_UP] = "link_up",
 	[SFP_S_TX_FAULT] = "tx_fault",
 	[SFP_S_REINIT] = "reinit",
-	[SFP_S_TX_DISABLE] = "rx_disable",
+	[SFP_S_TX_DISABLE] = "tx_disable",
 };
 
 static const char *sm_state_to_str(unsigned short sm_state)
@@ -141,30 +159,54 @@
 	GPIOD_ASIS,
 };
 
-#define T_INIT_JIFFIES	msecs_to_jiffies(300)
-#define T_RESET_US	10
-#define T_FAULT_RECOVER	msecs_to_jiffies(1000)
+/* t_start_up (SFF-8431) or t_init (SFF-8472) is the time required for a
+ * non-cooled module to initialise its laser safety circuitry. We wait
+ * an initial T_WAIT period before we check the tx fault to give any PHY
+ * on board (for a copper SFP) time to initialise.
+ */
+#define T_WAIT			msecs_to_jiffies(50)
+#define T_START_UP		msecs_to_jiffies(300)
+#define T_START_UP_BAD_GPON	msecs_to_jiffies(60000)
+
+/* t_reset is the time required to assert the TX_DISABLE signal to reset
+ * an indicated TX_FAULT.
+ */
+#define T_RESET_US		10
+#define T_FAULT_RECOVER		msecs_to_jiffies(1000)
+
+/* N_FAULT_INIT is the number of recovery attempts at module initialisation
+ * time. If the TX_FAULT signal is not deasserted after this number of
+ * attempts at clearing it, we decide that the module is faulty.
+ * N_FAULT is the same but after the module has initialised.
+ */
+#define N_FAULT_INIT		5
+#define N_FAULT			5
+
+/* T_PHY_RETRY is the time interval between attempts to probe the PHY.
+ * R_PHY_RETRY is the number of attempts.
+ */
+#define T_PHY_RETRY		msecs_to_jiffies(50)
+#define R_PHY_RETRY		12
 
 /* SFP module presence detection is poor: the three MOD DEF signals are
  * the same length on the PCB, which means it's possible for MOD DEF 0 to
  * connect before the I2C bus on MOD DEF 1/2.
  *
- * The SFP MSA specifies 300ms as t_init (the time taken for TX_FAULT to
- * be deasserted) but makes no mention of the earliest time before we can
- * access the I2C EEPROM.  However, Avago modules require 300ms.
+ * The SFF-8472 specifies t_serial ("Time from power on until module is
+ * ready for data transmission over the two wire serial bus.") as 300ms.
  */
-#define T_PROBE_INIT	msecs_to_jiffies(300)
-#define T_HPOWER_LEVEL	msecs_to_jiffies(300)
-#define T_PROBE_RETRY	msecs_to_jiffies(100)
+#define T_SERIAL		msecs_to_jiffies(300)
+#define T_HPOWER_LEVEL		msecs_to_jiffies(300)
+#define T_PROBE_RETRY_INIT	msecs_to_jiffies(100)
+#define R_PROBE_RETRY_INIT	10
+#define T_PROBE_RETRY_SLOW	msecs_to_jiffies(5000)
+#define R_PROBE_RETRY_SLOW	12
 
 /* SFP modules appear to always have their PHY configured for bus address
  * 0x56 (which with mdio-i2c, translates to a PHY address of 22).
  */
 #define SFP_PHY_ADDR	22
 
-/* Give this long for the PHY to reset. */
-#define T_PHY_RESET_MS	50
-
 struct sff_data {
 	unsigned int gpios;
 	bool (*module_supported)(const struct sfp_eeprom_id *id);
@@ -177,6 +219,7 @@
 	struct sfp_bus *sfp_bus;
 	struct phy_device *mod_phy;
 	const struct sff_data *type;
+	size_t i2c_block_size;
 	u32 max_power_mW;
 
 	unsigned int (*get_state)(struct sfp *);
@@ -187,20 +230,30 @@
 	struct gpio_desc *gpio[GPIO_MAX];
 	int gpio_irq[GPIO_MAX];
 
-	bool attached;
+	bool need_poll;
+
 	struct mutex st_mutex;			/* Protects state */
+	unsigned int state_soft_mask;
 	unsigned int state;
 	struct delayed_work poll;
 	struct delayed_work timeout;
 	struct mutex sm_mutex;			/* Protects state machine */
 	unsigned char sm_mod_state;
+	unsigned char sm_mod_tries_init;
+	unsigned char sm_mod_tries;
 	unsigned char sm_dev_state;
 	unsigned short sm_state;
-	unsigned int sm_retries;
+	unsigned char sm_fault_retries;
+	unsigned char sm_phy_retries;
 
 	struct sfp_eeprom_id id;
+	unsigned int module_power_mW;
+	unsigned int module_t_start_up;
+
 #if IS_ENABLED(CONFIG_HWMON)
 	struct sfp_diag diag;
+	struct delayed_work hwmon_probe;
+	unsigned int hwmon_tries;
 	struct device *hwmon_dev;
 	char *hwmon_name;
 #endif
@@ -209,7 +262,7 @@
 
 static bool sff_module_supported(const struct sfp_eeprom_id *id)
 {
-	return id->base.phys_id == SFP_PHYS_ID_SFF &&
+	return id->base.phys_id == SFF8024_ID_SFF_8472 &&
 	       id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP;
 }
 
@@ -220,8 +273,21 @@
 
 static bool sfp_module_supported(const struct sfp_eeprom_id *id)
 {
-	return id->base.phys_id == SFP_PHYS_ID_SFP &&
-	       id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP;
+	if (id->base.phys_id == SFF8024_ID_SFP &&
+	    id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP)
+		return true;
+
+	/* SFP GPON module Ubiquiti U-Fiber Instant has in its EEPROM stored
+	 * phys id SFF instead of SFP. Therefore mark this module explicitly
+	 * as supported based on vendor name and pn match.
+	 */
+	if (id->base.phys_id == SFF8024_ID_SFF_8472 &&
+	    id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP &&
+	    !memcmp(id->base.vendor_name, "UBNT            ", 16) &&
+	    !memcmp(id->base.vendor_pn, "UF-INSTANT      ", 16))
+		return true;
+
+	return false;
 }
 
 static const struct sff_data sfp_data = {
@@ -284,6 +350,7 @@
 {
 	struct i2c_msg msgs[2];
 	u8 bus_addr = a2 ? 0x51 : 0x50;
+	size_t block_size = sfp->i2c_block_size;
 	size_t this_len;
 	int ret;
 
@@ -298,8 +365,8 @@
 
 	while (len) {
 		this_len = len;
-		if (this_len > 16)
-			this_len = 16;
+		if (this_len > block_size)
+			this_len = block_size;
 
 		msgs[1].len = this_len;
 
@@ -376,16 +443,6 @@
 }
 
 /* Interface */
-static unsigned int sfp_get_state(struct sfp *sfp)
-{
-	return sfp->get_state(sfp);
-}
-
-static void sfp_set_state(struct sfp *sfp, unsigned int state)
-{
-	sfp->set_state(sfp, state);
-}
-
 static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len)
 {
 	return sfp->read(sfp, a2, addr, buf, len);
@@ -396,6 +453,89 @@
 	return sfp->write(sfp, a2, addr, buf, len);
 }
 
+static unsigned int sfp_soft_get_state(struct sfp *sfp)
+{
+	unsigned int state = 0;
+	u8 status;
+	int ret;
+
+	ret = sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status));
+	if (ret == sizeof(status)) {
+		if (status & SFP_STATUS_RX_LOS)
+			state |= SFP_F_LOS;
+		if (status & SFP_STATUS_TX_FAULT)
+			state |= SFP_F_TX_FAULT;
+	} else {
+		dev_err_ratelimited(sfp->dev,
+				    "failed to read SFP soft status: %d\n",
+				    ret);
+		/* Preserve the current state */
+		state = sfp->state;
+	}
+
+	return state & sfp->state_soft_mask;
+}
+
+static void sfp_soft_set_state(struct sfp *sfp, unsigned int state)
+{
+	u8 status;
+
+	if (sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status)) ==
+		     sizeof(status)) {
+		if (state & SFP_F_TX_DISABLE)
+			status |= SFP_STATUS_TX_DISABLE_FORCE;
+		else
+			status &= ~SFP_STATUS_TX_DISABLE_FORCE;
+
+		sfp_write(sfp, true, SFP_STATUS, &status, sizeof(status));
+	}
+}
+
+static void sfp_soft_start_poll(struct sfp *sfp)
+{
+	const struct sfp_eeprom_id *id = &sfp->id;
+
+	sfp->state_soft_mask = 0;
+	if (id->ext.enhopts & SFP_ENHOPTS_SOFT_TX_DISABLE &&
+	    !sfp->gpio[GPIO_TX_DISABLE])
+		sfp->state_soft_mask |= SFP_F_TX_DISABLE;
+	if (id->ext.enhopts & SFP_ENHOPTS_SOFT_TX_FAULT &&
+	    !sfp->gpio[GPIO_TX_FAULT])
+		sfp->state_soft_mask |= SFP_F_TX_FAULT;
+	if (id->ext.enhopts & SFP_ENHOPTS_SOFT_RX_LOS &&
+	    !sfp->gpio[GPIO_LOS])
+		sfp->state_soft_mask |= SFP_F_LOS;
+
+	if (sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT) &&
+	    !sfp->need_poll)
+		mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
+}
+
+static void sfp_soft_stop_poll(struct sfp *sfp)
+{
+	sfp->state_soft_mask = 0;
+}
+
+static unsigned int sfp_get_state(struct sfp *sfp)
+{
+	unsigned int state = sfp->get_state(sfp);
+
+	if (state & SFP_F_PRESENT &&
+	    sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT))
+		state |= sfp_soft_get_state(sfp);
+
+	return state;
+}
+
+static void sfp_set_state(struct sfp *sfp, unsigned int state)
+{
+	sfp->set_state(sfp, state);
+
+	if (state & SFP_F_PRESENT &&
+	    sfp->state_soft_mask & SFP_F_TX_DISABLE)
+		sfp_soft_set_state(sfp, state);
+}
+
 static unsigned int sfp_check(void *buf, size_t len)
 {
 	u8 *p, check;
@@ -427,7 +567,7 @@
 		case hwmon_temp_crit:
 			if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN))
 				return 0;
-			/* fall through */
+			fallthrough;
 		case hwmon_temp_input:
 		case hwmon_temp_label:
 			return 0444;
@@ -446,7 +586,7 @@
 		case hwmon_in_crit:
 			if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN))
 				return 0;
-			/* fall through */
+			fallthrough;
 		case hwmon_in_input:
 		case hwmon_in_label:
 			return 0444;
@@ -465,7 +605,7 @@
 		case hwmon_curr_crit:
 			if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN))
 				return 0;
-			/* fall through */
+			fallthrough;
 		case hwmon_curr_input:
 		case hwmon_curr_label:
 			return 0444;
@@ -493,7 +633,7 @@
 		case hwmon_power_crit:
 			if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN))
 				return 0;
-			/* fall through */
+			fallthrough;
 		case hwmon_power_input:
 		case hwmon_power_label:
 			return 0444;
@@ -1142,10 +1282,57 @@
 	.info = sfp_hwmon_info,
 };
 
-static int sfp_hwmon_insert(struct sfp *sfp)
+static void sfp_hwmon_probe(struct work_struct *work)
 {
+	struct sfp *sfp = container_of(work, struct sfp, hwmon_probe.work);
 	int err, i;
 
+	/* hwmon interface needs to access 16bit registers in atomic way to
+	 * guarantee coherency of the diagnostic monitoring data. If it is not
+	 * possible to guarantee coherency because EEPROM is broken in such way
+	 * that does not support atomic 16bit read operation then we have to
+	 * skip registration of hwmon device.
+	 */
+	if (sfp->i2c_block_size < 2) {
+		dev_info(sfp->dev,
+			 "skipping hwmon device registration due to broken EEPROM\n");
+		dev_info(sfp->dev,
+			 "diagnostic EEPROM area cannot be read atomically to guarantee data coherency\n");
+		return;
+	}
+
+	err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag));
+	if (err < 0) {
+		if (sfp->hwmon_tries--) {
+			mod_delayed_work(system_wq, &sfp->hwmon_probe,
+					 T_PROBE_RETRY_SLOW);
+		} else {
+			dev_warn(sfp->dev, "hwmon probe failed: %d\n", err);
+		}
+		return;
+	}
+
+	sfp->hwmon_name = kstrdup(dev_name(sfp->dev), GFP_KERNEL);
+	if (!sfp->hwmon_name) {
+		dev_err(sfp->dev, "out of memory for hwmon name\n");
+		return;
+	}
+
+	for (i = 0; sfp->hwmon_name[i]; i++)
+		if (hwmon_is_bad_char(sfp->hwmon_name[i]))
+			sfp->hwmon_name[i] = '_';
+
+	sfp->hwmon_dev = hwmon_device_register_with_info(sfp->dev,
+							 sfp->hwmon_name, sfp,
+							 &sfp_hwmon_chip_info,
+							 NULL);
+	if (IS_ERR(sfp->hwmon_dev))
+		dev_err(sfp->dev, "failed to register hwmon device: %ld\n",
+			PTR_ERR(sfp->hwmon_dev));
+}
+
+static int sfp_hwmon_insert(struct sfp *sfp)
+{
 	if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE)
 		return 0;
 
@@ -1158,34 +1345,33 @@
 		 */
 		return 0;
 
-	err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag));
-	if (err < 0)
-		return err;
+	mod_delayed_work(system_wq, &sfp->hwmon_probe, 1);
+	sfp->hwmon_tries = R_PROBE_RETRY_SLOW;
 
-	sfp->hwmon_name = kstrdup(dev_name(sfp->dev), GFP_KERNEL);
-	if (!sfp->hwmon_name)
-		return -ENODEV;
-
-	for (i = 0; sfp->hwmon_name[i]; i++)
-		if (hwmon_is_bad_char(sfp->hwmon_name[i]))
-			sfp->hwmon_name[i] = '_';
-
-	sfp->hwmon_dev = hwmon_device_register_with_info(sfp->dev,
-							 sfp->hwmon_name, sfp,
-							 &sfp_hwmon_chip_info,
-							 NULL);
-
-	return PTR_ERR_OR_ZERO(sfp->hwmon_dev);
+	return 0;
 }
 
 static void sfp_hwmon_remove(struct sfp *sfp)
 {
+	cancel_delayed_work_sync(&sfp->hwmon_probe);
 	if (!IS_ERR_OR_NULL(sfp->hwmon_dev)) {
 		hwmon_device_unregister(sfp->hwmon_dev);
 		sfp->hwmon_dev = NULL;
 		kfree(sfp->hwmon_name);
 	}
 }
+
+static int sfp_hwmon_init(struct sfp *sfp)
+{
+	INIT_DELAYED_WORK(&sfp->hwmon_probe, sfp_hwmon_probe);
+
+	return 0;
+}
+
+static void sfp_hwmon_exit(struct sfp *sfp)
+{
+	cancel_delayed_work_sync(&sfp->hwmon_probe);
+}
 #else
 static int sfp_hwmon_insert(struct sfp *sfp)
 {
@@ -1195,6 +1381,15 @@
 static void sfp_hwmon_remove(struct sfp *sfp)
 {
 }
+
+static int sfp_hwmon_init(struct sfp *sfp)
+{
+	return 0;
+}
+
+static void sfp_hwmon_exit(struct sfp *sfp)
+{
+}
 #endif
 
 /* Helpers */
@@ -1245,7 +1440,7 @@
 	sfp_sm_set_timer(sfp, timeout);
 }
 
-static void sfp_sm_ins_next(struct sfp *sfp, unsigned int state,
+static void sfp_sm_mod_next(struct sfp *sfp, unsigned int state,
 			    unsigned int timeout)
 {
 	sfp->sm_mod_state = state;
@@ -1254,28 +1449,30 @@
 
 static void sfp_sm_phy_detach(struct sfp *sfp)
 {
-	phy_stop(sfp->mod_phy);
 	sfp_remove_phy(sfp->sfp_bus);
 	phy_device_remove(sfp->mod_phy);
 	phy_device_free(sfp->mod_phy);
 	sfp->mod_phy = NULL;
 }
 
-static void sfp_sm_probe_phy(struct sfp *sfp)
+static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45)
 {
 	struct phy_device *phy;
 	int err;
 
-	msleep(T_PHY_RESET_MS);
-
-	phy = mdiobus_scan(sfp->i2c_mii, SFP_PHY_ADDR);
-	if (phy == ERR_PTR(-ENODEV)) {
-		dev_info(sfp->dev, "no PHY detected\n");
-		return;
-	}
+	phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45);
+	if (phy == ERR_PTR(-ENODEV))
+		return PTR_ERR(phy);
 	if (IS_ERR(phy)) {
 		dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy));
-		return;
+		return PTR_ERR(phy);
+	}
+
+	err = phy_device_register(phy);
+	if (err) {
+		phy_device_free(phy);
+		dev_err(sfp->dev, "phy_device_register failed: %d\n", err);
+		return err;
 	}
 
 	err = sfp_add_phy(sfp->sfp_bus, phy);
@@ -1283,11 +1480,12 @@
 		phy_device_remove(phy);
 		phy_device_free(phy);
 		dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err);
-		return;
+		return err;
 	}
 
 	sfp->mod_phy = phy;
-	phy_start(phy);
+
+	return 0;
 }
 
 static void sfp_sm_link_up(struct sfp *sfp)
@@ -1303,15 +1501,19 @@
 
 static void sfp_sm_link_check_los(struct sfp *sfp)
 {
-	unsigned int los = sfp->state & SFP_F_LOS;
+	const __be16 los_inverted = cpu_to_be16(SFP_OPTIONS_LOS_INVERTED);
+	const __be16 los_normal = cpu_to_be16(SFP_OPTIONS_LOS_NORMAL);
+	__be16 los_options = sfp->id.ext.options & (los_inverted | los_normal);
+	bool los = false;
 
 	/* If neither SFP_OPTIONS_LOS_INVERTED nor SFP_OPTIONS_LOS_NORMAL
-	 * are set, we assume that no LOS signal is available.
+	 * are set, we assume that no LOS signal is available. If both are
+	 * set, we assume LOS is not implemented (and is meaningless.)
 	 */
-	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED))
-		los ^= SFP_F_LOS;
-	else if (!(sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL)))
-		los = 0;
+	if (los_options == los_inverted)
+		los = !(sfp->state & SFP_F_LOS);
+	else if (los_options == los_normal)
+		los = !!(sfp->state & SFP_F_LOS);
 
 	if (los)
 		sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
@@ -1321,23 +1523,27 @@
 
 static bool sfp_los_event_active(struct sfp *sfp, unsigned int event)
 {
-	return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) &&
-		event == SFP_E_LOS_LOW) ||
-	       (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) &&
-		event == SFP_E_LOS_HIGH);
+	const __be16 los_inverted = cpu_to_be16(SFP_OPTIONS_LOS_INVERTED);
+	const __be16 los_normal = cpu_to_be16(SFP_OPTIONS_LOS_NORMAL);
+	__be16 los_options = sfp->id.ext.options & (los_inverted | los_normal);
+
+	return (los_options == los_inverted && event == SFP_E_LOS_LOW) ||
+	       (los_options == los_normal && event == SFP_E_LOS_HIGH);
 }
 
 static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event)
 {
-	return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) &&
-		event == SFP_E_LOS_HIGH) ||
-	       (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) &&
-		event == SFP_E_LOS_LOW);
+	const __be16 los_inverted = cpu_to_be16(SFP_OPTIONS_LOS_INVERTED);
+	const __be16 los_normal = cpu_to_be16(SFP_OPTIONS_LOS_NORMAL);
+	__be16 los_options = sfp->id.ext.options & (los_inverted | los_normal);
+
+	return (los_options == los_inverted && event == SFP_E_LOS_HIGH) ||
+	       (los_options == los_normal && event == SFP_E_LOS_LOW);
 }
 
-static void sfp_sm_fault(struct sfp *sfp, bool warn)
+static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn)
 {
-	if (sfp->sm_retries && !--sfp->sm_retries) {
+	if (sfp->sm_fault_retries && !--sfp->sm_fault_retries) {
 		dev_err(sfp->dev,
 			"module persistently indicates fault, disabling\n");
 		sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0);
@@ -1345,122 +1551,269 @@
 		if (warn)
 			dev_err(sfp->dev, "module transmit fault indicated\n");
 
-		sfp_sm_next(sfp, SFP_S_TX_FAULT, T_FAULT_RECOVER);
+		sfp_sm_next(sfp, next_state, T_FAULT_RECOVER);
 	}
 }
 
-static void sfp_sm_mod_init(struct sfp *sfp)
+/* Probe a SFP for a PHY device if the module supports copper - the PHY
+ * normally sits at I2C bus address 0x56, and may either be a clause 22
+ * or clause 45 PHY.
+ *
+ * Clause 22 copper SFP modules normally operate in Cisco SGMII mode with
+ * negotiation enabled, but some may be in 1000base-X - which is for the
+ * PHY driver to determine.
+ *
+ * Clause 45 copper SFP+ modules (10G) appear to switch their interface
+ * mode according to the negotiated line speed.
+ */
+static int sfp_sm_probe_for_phy(struct sfp *sfp)
 {
-	sfp_module_tx_enable(sfp);
+	int err = 0;
 
-	/* Wait t_init before indicating that the link is up, provided the
-	 * current state indicates no TX_FAULT.  If TX_FAULT clears before
-	 * this time, that's fine too.
-	 */
-	sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES);
-	sfp->sm_retries = 5;
+	switch (sfp->id.base.extended_cc) {
+	case SFF8024_ECC_10GBASE_T_SFI:
+	case SFF8024_ECC_10GBASE_T_SR:
+	case SFF8024_ECC_5GBASE_T:
+	case SFF8024_ECC_2_5GBASE_T:
+		err = sfp_sm_probe_phy(sfp, true);
+		break;
 
-	/* Setting the serdes link mode is guesswork: there's no
-	 * field in the EEPROM which indicates what mode should
-	 * be used.
-	 *
-	 * If it's a gigabit-only fiber module, it probably does
-	 * not have a PHY, so switch to 802.3z negotiation mode.
-	 * Otherwise, switch to SGMII mode (which is required to
-	 * support non-gigabit speeds) and probe for a PHY.
-	 */
-	if (sfp->id.base.e1000_base_t ||
-	    sfp->id.base.e100_base_lx ||
-	    sfp->id.base.e100_base_fx)
-		sfp_sm_probe_phy(sfp);
+	default:
+		if (sfp->id.base.e1000_base_t)
+			err = sfp_sm_probe_phy(sfp, false);
+		break;
+	}
+	return err;
 }
 
-static int sfp_sm_mod_hpower(struct sfp *sfp)
+static int sfp_module_parse_power(struct sfp *sfp)
 {
-	u32 power;
-	u8 val;
-	int err;
+	u32 power_mW = 1000;
+	bool supports_a2;
 
-	power = 1000;
 	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL))
-		power = 1500;
+		power_mW = 1500;
 	if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL))
-		power = 2000;
+		power_mW = 2000;
 
-	if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE &&
-	    (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) !=
-	    SFP_DIAGMON_DDM) {
-		/* The module appears not to implement bus address 0xa2,
-		 * or requires an address change sequence, so assume that
-		 * the module powers up in the indicated power mode.
-		 */
-		if (power > sfp->max_power_mW) {
+	supports_a2 = sfp->id.ext.sff8472_compliance !=
+				SFP_SFF8472_COMPLIANCE_NONE ||
+		      sfp->id.ext.diagmon & SFP_DIAGMON_DDM;
+
+	if (power_mW > sfp->max_power_mW) {
+		/* Module power specification exceeds the allowed maximum. */
+		if (!supports_a2) {
+			/* The module appears not to implement bus address
+			 * 0xa2, so assume that the module powers up in the
+			 * indicated mode.
+			 */
 			dev_err(sfp->dev,
 				"Host does not support %u.%uW modules\n",
-				power / 1000, (power / 100) % 10);
+				power_mW / 1000, (power_mW / 100) % 10);
 			return -EINVAL;
+		} else {
+			dev_warn(sfp->dev,
+				 "Host does not support %u.%uW modules, module left in power mode 1\n",
+				 power_mW / 1000, (power_mW / 100) % 10);
+			return 0;
 		}
+	}
+
+	if (power_mW <= 1000) {
+		/* Modules below 1W do not require a power change sequence */
+		sfp->module_power_mW = power_mW;
 		return 0;
 	}
 
-	if (power > sfp->max_power_mW) {
+	if (!supports_a2) {
+		/* The module power level is below the host maximum and the
+		 * module appears not to implement bus address 0xa2, so assume
+		 * that the module powers up in the indicated mode.
+		 */
+		return 0;
+	}
+
+	/* If the module requires a higher power mode, but also requires
+	 * an address change sequence, warn the user that the module may
+	 * not be functional.
+	 */
+	if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) {
 		dev_warn(sfp->dev,
-			 "Host does not support %u.%uW modules, module left in power mode 1\n",
-			 power / 1000, (power / 100) % 10);
+			 "Address Change Sequence not supported but module requires %u.%uW, module may not be functional\n",
+			 power_mW / 1000, (power_mW / 100) % 10);
 		return 0;
 	}
 
-	if (power <= 1000)
-		return 0;
+	sfp->module_power_mW = power_mW;
+
+	return 0;
+}
+
+static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable)
+{
+	u8 val;
+	int err;
 
 	err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
 	if (err != sizeof(val)) {
 		dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err);
-		err = -EAGAIN;
-		goto err;
+		return -EAGAIN;
 	}
 
-	val |= BIT(0);
+	/* DM7052 reports as a high power module, responds to reads (with
+	 * all bytes 0xff) at 0x51 but does not accept writes.  In any case,
+	 * if the bit is already set, we're already in high power mode.
+	 */
+	if (!!(val & BIT(0)) == enable)
+		return 0;
+
+	if (enable)
+		val |= BIT(0);
+	else
+		val &= ~BIT(0);
 
 	err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val));
 	if (err != sizeof(val)) {
 		dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err);
-		err = -EAGAIN;
-		goto err;
+		return -EAGAIN;
 	}
 
-	dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
-		 power / 1000, (power / 100) % 10);
-	return T_HPOWER_LEVEL;
+	if (enable)
+		dev_info(sfp->dev, "Module switched to %u.%uW power level\n",
+			 sfp->module_power_mW / 1000,
+			 (sfp->module_power_mW / 100) % 10);
 
-err:
-	return err;
+	return 0;
 }
 
-static int sfp_sm_mod_probe(struct sfp *sfp)
+/* GPON modules based on Realtek RTL8672 and RTL9601C chips (e.g. V-SOL
+ * V2801F, CarlitoxxPro CPGOS03-0490, Ubiquiti U-Fiber Instant, ...) do
+ * not support multibyte reads from the EEPROM. Each multi-byte read
+ * operation returns just one byte of EEPROM followed by zeros. There is
+ * no way to identify which modules are using Realtek RTL8672 and RTL9601C
+ * chips. Moreover every OEM of V-SOL V2801F module puts its own vendor
+ * name and vendor id into EEPROM, so there is even no way to detect if
+ * module is V-SOL V2801F. Therefore check for those zeros in the read
+ * data and then based on check switch to reading EEPROM to one byte
+ * at a time.
+ */
+static bool sfp_id_needs_byte_io(struct sfp *sfp, void *buf, size_t len)
+{
+	size_t i, block_size = sfp->i2c_block_size;
+
+	/* Already using byte IO */
+	if (block_size == 1)
+		return false;
+
+	for (i = 1; i < len; i += block_size) {
+		if (memchr_inv(buf + i, '\0', min(block_size - 1, len - i)))
+			return false;
+	}
+	return true;
+}
+
+static int sfp_cotsworks_fixup_check(struct sfp *sfp, struct sfp_eeprom_id *id)
+{
+	u8 check;
+	int err;
+
+	if (id->base.phys_id != SFF8024_ID_SFF_8472 ||
+	    id->base.phys_ext_id != SFP_PHYS_EXT_ID_SFP ||
+	    id->base.connector != SFF8024_CONNECTOR_LC) {
+		dev_warn(sfp->dev, "Rewriting fiber module EEPROM with corrected values\n");
+		id->base.phys_id = SFF8024_ID_SFF_8472;
+		id->base.phys_ext_id = SFP_PHYS_EXT_ID_SFP;
+		id->base.connector = SFF8024_CONNECTOR_LC;
+		err = sfp_write(sfp, false, SFP_PHYS_ID, &id->base, 3);
+		if (err != 3) {
+			dev_err(sfp->dev, "Failed to rewrite module EEPROM: %d\n", err);
+			return err;
+		}
+
+		/* Cotsworks modules have been found to require a delay between write operations. */
+		mdelay(50);
+
+		/* Update base structure checksum */
+		check = sfp_check(&id->base, sizeof(id->base) - 1);
+		err = sfp_write(sfp, false, SFP_CC_BASE, &check, 1);
+		if (err != 1) {
+			dev_err(sfp->dev, "Failed to update base structure checksum in fiber module EEPROM: %d\n", err);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
 {
 	/* SFP module inserted - read I2C data */
 	struct sfp_eeprom_id id;
+	bool cotsworks_sfbg;
 	bool cotsworks;
 	u8 check;
 	int ret;
 
-	ret = sfp_read(sfp, false, 0, &id, sizeof(id));
+	/* Some SFP modules and also some Linux I2C drivers do not like reads
+	 * longer than 16 bytes, so read the EEPROM in chunks of 16 bytes at
+	 * a time.
+	 */
+	sfp->i2c_block_size = 16;
+
+	ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base));
 	if (ret < 0) {
-		dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
+		if (report)
+			dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
 		return -EAGAIN;
 	}
 
-	if (ret != sizeof(id)) {
+	if (ret != sizeof(id.base)) {
 		dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
 		return -EAGAIN;
 	}
 
+	/* Some SFP modules (e.g. Nokia 3FE46541AA) lock up if read from
+	 * address 0x51 is just one byte at a time. Also SFF-8472 requires
+	 * that EEPROM supports atomic 16bit read operation for diagnostic
+	 * fields, so do not switch to one byte reading at a time unless it
+	 * is really required and we have no other option.
+	 */
+	if (sfp_id_needs_byte_io(sfp, &id.base, sizeof(id.base))) {
+		dev_info(sfp->dev,
+			 "Detected broken RTL8672/RTL9601C emulated EEPROM\n");
+		dev_info(sfp->dev,
+			 "Switching to reading EEPROM to one byte at a time\n");
+		sfp->i2c_block_size = 1;
+
+		ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base));
+		if (ret < 0) {
+			if (report)
+				dev_err(sfp->dev, "failed to read EEPROM: %d\n",
+					ret);
+			return -EAGAIN;
+		}
+
+		if (ret != sizeof(id.base)) {
+			dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
+			return -EAGAIN;
+		}
+	}
+
 	/* Cotsworks do not seem to update the checksums when they
 	 * do the final programming with the final module part number,
 	 * serial number and date code.
 	 */
 	cotsworks = !memcmp(id.base.vendor_name, "COTSWORKS       ", 16);
+	cotsworks_sfbg = !memcmp(id.base.vendor_pn, "SFBG", 4);
+
+	/* Cotsworks SFF module EEPROM do not always have valid phys_id,
+	 * phys_ext_id, and connector bytes.  Rewrite SFF EEPROM bytes if
+	 * Cotsworks PN matches and bytes are not correct.
+	 */
+	if (cotsworks && cotsworks_sfbg) {
+		ret = sfp_cotsworks_fixup_check(sfp, &id);
+		if (ret < 0)
+			return ret;
+	}
 
 	/* Validate the checksum over the base structure */
 	check = sfp_check(&id.base, sizeof(id.base) - 1);
@@ -1479,6 +1832,18 @@
 		}
 	}
 
+	ret = sfp_read(sfp, false, SFP_CC_BASE + 1, &id.ext, sizeof(id.ext));
+	if (ret < 0) {
+		if (report)
+			dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret);
+		return -EAGAIN;
+	}
+
+	if (ret != sizeof(id.ext)) {
+		dev_err(sfp->dev, "EEPROM short read: %d\n", ret);
+		return -EAGAIN;
+	}
+
 	check = sfp_check(&id.ext, sizeof(id.ext) - 1);
 	if (check != id.ext.cc_ext) {
 		if (cotsworks) {
@@ -1505,7 +1870,7 @@
 		 (int)sizeof(id.ext.datecode), id.ext.datecode);
 
 	/* Check whether we support this module */
-	if (!sfp->type->module_supported(&sfp->id)) {
+	if (!sfp->type->module_supported(&id)) {
 		dev_err(sfp->dev,
 			"module is not supported - phys id 0x%02x 0x%02x\n",
 			sfp->id.base.phys_id, sfp->id.base.phys_ext_id);
@@ -1517,33 +1882,328 @@
 		dev_warn(sfp->dev,
 			 "module address swap to access page 0xA2 is not supported.\n");
 
-	ret = sfp_hwmon_insert(sfp);
+	/* Parse the module power requirement */
+	ret = sfp_module_parse_power(sfp);
 	if (ret < 0)
 		return ret;
 
-	ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
-	if (ret < 0)
-		return ret;
+	if (!memcmp(id.base.vendor_name, "ALCATELLUCENT   ", 16) &&
+	    !memcmp(id.base.vendor_pn, "3FE46541AA      ", 16))
+		sfp->module_t_start_up = T_START_UP_BAD_GPON;
+	else
+		sfp->module_t_start_up = T_START_UP;
 
-	return sfp_sm_mod_hpower(sfp);
+	return 0;
 }
 
 static void sfp_sm_mod_remove(struct sfp *sfp)
 {
-	sfp_module_remove(sfp->sfp_bus);
+	if (sfp->sm_mod_state > SFP_MOD_WAITDEV)
+		sfp_module_remove(sfp->sfp_bus);
 
 	sfp_hwmon_remove(sfp);
 
-	if (sfp->mod_phy)
-		sfp_sm_phy_detach(sfp);
-
-	sfp_module_tx_disable(sfp);
-
 	memset(&sfp->id, 0, sizeof(sfp->id));
+	sfp->module_power_mW = 0;
 
 	dev_info(sfp->dev, "module removed\n");
 }
 
+/* This state machine tracks the upstream's state */
+static void sfp_sm_device(struct sfp *sfp, unsigned int event)
+{
+	switch (sfp->sm_dev_state) {
+	default:
+		if (event == SFP_E_DEV_ATTACH)
+			sfp->sm_dev_state = SFP_DEV_DOWN;
+		break;
+
+	case SFP_DEV_DOWN:
+		if (event == SFP_E_DEV_DETACH)
+			sfp->sm_dev_state = SFP_DEV_DETACHED;
+		else if (event == SFP_E_DEV_UP)
+			sfp->sm_dev_state = SFP_DEV_UP;
+		break;
+
+	case SFP_DEV_UP:
+		if (event == SFP_E_DEV_DETACH)
+			sfp->sm_dev_state = SFP_DEV_DETACHED;
+		else if (event == SFP_E_DEV_DOWN)
+			sfp->sm_dev_state = SFP_DEV_DOWN;
+		break;
+	}
+}
+
+/* This state machine tracks the insert/remove state of the module, probes
+ * the on-board EEPROM, and sets up the power level.
+ */
+static void sfp_sm_module(struct sfp *sfp, unsigned int event)
+{
+	int err;
+
+	/* Handle remove event globally, it resets this state machine */
+	if (event == SFP_E_REMOVE) {
+		if (sfp->sm_mod_state > SFP_MOD_PROBE)
+			sfp_sm_mod_remove(sfp);
+		sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0);
+		return;
+	}
+
+	/* Handle device detach globally */
+	if (sfp->sm_dev_state < SFP_DEV_DOWN &&
+	    sfp->sm_mod_state > SFP_MOD_WAITDEV) {
+		if (sfp->module_power_mW > 1000 &&
+		    sfp->sm_mod_state > SFP_MOD_HPOWER)
+			sfp_sm_mod_hpower(sfp, false);
+		sfp_sm_mod_next(sfp, SFP_MOD_WAITDEV, 0);
+		return;
+	}
+
+	switch (sfp->sm_mod_state) {
+	default:
+		if (event == SFP_E_INSERT) {
+			sfp_sm_mod_next(sfp, SFP_MOD_PROBE, T_SERIAL);
+			sfp->sm_mod_tries_init = R_PROBE_RETRY_INIT;
+			sfp->sm_mod_tries = R_PROBE_RETRY_SLOW;
+		}
+		break;
+
+	case SFP_MOD_PROBE:
+		/* Wait for T_PROBE_INIT to time out */
+		if (event != SFP_E_TIMEOUT)
+			break;
+
+		err = sfp_sm_mod_probe(sfp, sfp->sm_mod_tries == 1);
+		if (err == -EAGAIN) {
+			if (sfp->sm_mod_tries_init &&
+			   --sfp->sm_mod_tries_init) {
+				sfp_sm_set_timer(sfp, T_PROBE_RETRY_INIT);
+				break;
+			} else if (sfp->sm_mod_tries && --sfp->sm_mod_tries) {
+				if (sfp->sm_mod_tries == R_PROBE_RETRY_SLOW - 1)
+					dev_warn(sfp->dev,
+						 "please wait, module slow to respond\n");
+				sfp_sm_set_timer(sfp, T_PROBE_RETRY_SLOW);
+				break;
+			}
+		}
+		if (err < 0) {
+			sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0);
+			break;
+		}
+
+		err = sfp_hwmon_insert(sfp);
+		if (err)
+			dev_warn(sfp->dev, "hwmon probe failed: %d\n", err);
+
+		sfp_sm_mod_next(sfp, SFP_MOD_WAITDEV, 0);
+		fallthrough;
+	case SFP_MOD_WAITDEV:
+		/* Ensure that the device is attached before proceeding */
+		if (sfp->sm_dev_state < SFP_DEV_DOWN)
+			break;
+
+		/* Report the module insertion to the upstream device */
+		err = sfp_module_insert(sfp->sfp_bus, &sfp->id);
+		if (err < 0) {
+			sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0);
+			break;
+		}
+
+		/* If this is a power level 1 module, we are done */
+		if (sfp->module_power_mW <= 1000)
+			goto insert;
+
+		sfp_sm_mod_next(sfp, SFP_MOD_HPOWER, 0);
+		fallthrough;
+	case SFP_MOD_HPOWER:
+		/* Enable high power mode */
+		err = sfp_sm_mod_hpower(sfp, true);
+		if (err < 0) {
+			if (err != -EAGAIN) {
+				sfp_module_remove(sfp->sfp_bus);
+				sfp_sm_mod_next(sfp, SFP_MOD_ERROR, 0);
+			} else {
+				sfp_sm_set_timer(sfp, T_PROBE_RETRY_INIT);
+			}
+			break;
+		}
+
+		sfp_sm_mod_next(sfp, SFP_MOD_WAITPWR, T_HPOWER_LEVEL);
+		break;
+
+	case SFP_MOD_WAITPWR:
+		/* Wait for T_HPOWER_LEVEL to time out */
+		if (event != SFP_E_TIMEOUT)
+			break;
+
+	insert:
+		sfp_sm_mod_next(sfp, SFP_MOD_PRESENT, 0);
+		break;
+
+	case SFP_MOD_PRESENT:
+	case SFP_MOD_ERROR:
+		break;
+	}
+}
+
+static void sfp_sm_main(struct sfp *sfp, unsigned int event)
+{
+	unsigned long timeout;
+	int ret;
+
+	/* Some events are global */
+	if (sfp->sm_state != SFP_S_DOWN &&
+	    (sfp->sm_mod_state != SFP_MOD_PRESENT ||
+	     sfp->sm_dev_state != SFP_DEV_UP)) {
+		if (sfp->sm_state == SFP_S_LINK_UP &&
+		    sfp->sm_dev_state == SFP_DEV_UP)
+			sfp_sm_link_down(sfp);
+		if (sfp->sm_state > SFP_S_INIT)
+			sfp_module_stop(sfp->sfp_bus);
+		if (sfp->mod_phy)
+			sfp_sm_phy_detach(sfp);
+		sfp_module_tx_disable(sfp);
+		sfp_soft_stop_poll(sfp);
+		sfp_sm_next(sfp, SFP_S_DOWN, 0);
+		return;
+	}
+
+	/* The main state machine */
+	switch (sfp->sm_state) {
+	case SFP_S_DOWN:
+		if (sfp->sm_mod_state != SFP_MOD_PRESENT ||
+		    sfp->sm_dev_state != SFP_DEV_UP)
+			break;
+
+		if (!(sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE))
+			sfp_soft_start_poll(sfp);
+
+		sfp_module_tx_enable(sfp);
+
+		/* Initialise the fault clearance retries */
+		sfp->sm_fault_retries = N_FAULT_INIT;
+
+		/* We need to check the TX_FAULT state, which is not defined
+		 * while TX_DISABLE is asserted. The earliest we want to do
+		 * anything (such as probe for a PHY) is 50ms.
+		 */
+		sfp_sm_next(sfp, SFP_S_WAIT, T_WAIT);
+		break;
+
+	case SFP_S_WAIT:
+		if (event != SFP_E_TIMEOUT)
+			break;
+
+		if (sfp->state & SFP_F_TX_FAULT) {
+			/* Wait up to t_init (SFF-8472) or t_start_up (SFF-8431)
+			 * from the TX_DISABLE deassertion for the module to
+			 * initialise, which is indicated by TX_FAULT
+			 * deasserting.
+			 */
+			timeout = sfp->module_t_start_up;
+			if (timeout > T_WAIT)
+				timeout -= T_WAIT;
+			else
+				timeout = 1;
+
+			sfp_sm_next(sfp, SFP_S_INIT, timeout);
+		} else {
+			/* TX_FAULT is not asserted, assume the module has
+			 * finished initialising.
+			 */
+			goto init_done;
+		}
+		break;
+
+	case SFP_S_INIT:
+		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) {
+			/* TX_FAULT is still asserted after t_init or
+			 * or t_start_up, so assume there is a fault.
+			 */
+			sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,
+				     sfp->sm_fault_retries == N_FAULT_INIT);
+		} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
+	init_done:
+			sfp->sm_phy_retries = R_PHY_RETRY;
+			goto phy_probe;
+		}
+		break;
+
+	case SFP_S_INIT_PHY:
+		if (event != SFP_E_TIMEOUT)
+			break;
+	phy_probe:
+		/* TX_FAULT deasserted or we timed out with TX_FAULT
+		 * clear.  Probe for the PHY and check the LOS state.
+		 */
+		ret = sfp_sm_probe_for_phy(sfp);
+		if (ret == -ENODEV) {
+			if (--sfp->sm_phy_retries) {
+				sfp_sm_next(sfp, SFP_S_INIT_PHY, T_PHY_RETRY);
+				break;
+			} else {
+				dev_info(sfp->dev, "no PHY detected\n");
+			}
+		} else if (ret) {
+			sfp_sm_next(sfp, SFP_S_FAIL, 0);
+			break;
+		}
+		if (sfp_module_start(sfp->sfp_bus)) {
+			sfp_sm_next(sfp, SFP_S_FAIL, 0);
+			break;
+		}
+		sfp_sm_link_check_los(sfp);
+
+		/* Reset the fault retry count */
+		sfp->sm_fault_retries = N_FAULT;
+		break;
+
+	case SFP_S_INIT_TX_FAULT:
+		if (event == SFP_E_TIMEOUT) {
+			sfp_module_tx_fault_reset(sfp);
+			sfp_sm_next(sfp, SFP_S_INIT, sfp->module_t_start_up);
+		}
+		break;
+
+	case SFP_S_WAIT_LOS:
+		if (event == SFP_E_TX_FAULT)
+			sfp_sm_fault(sfp, SFP_S_TX_FAULT, true);
+		else if (sfp_los_event_inactive(sfp, event))
+			sfp_sm_link_up(sfp);
+		break;
+
+	case SFP_S_LINK_UP:
+		if (event == SFP_E_TX_FAULT) {
+			sfp_sm_link_down(sfp);
+			sfp_sm_fault(sfp, SFP_S_TX_FAULT, true);
+		} else if (sfp_los_event_active(sfp, event)) {
+			sfp_sm_link_down(sfp);
+			sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
+		}
+		break;
+
+	case SFP_S_TX_FAULT:
+		if (event == SFP_E_TIMEOUT) {
+			sfp_module_tx_fault_reset(sfp);
+			sfp_sm_next(sfp, SFP_S_REINIT, sfp->module_t_start_up);
+		}
+		break;
+
+	case SFP_S_REINIT:
+		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) {
+			sfp_sm_fault(sfp, SFP_S_TX_FAULT, false);
+		} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
+			dev_info(sfp->dev, "module transmit fault recovered\n");
+			sfp_sm_link_check_los(sfp);
+		}
+		break;
+
+	case SFP_S_TX_DISABLE:
+		break;
+	}
+}
+
 static void sfp_sm_event(struct sfp *sfp, unsigned int event)
 {
 	mutex_lock(&sfp->sm_mutex);
@@ -1554,134 +2214,9 @@
 		sm_state_to_str(sfp->sm_state),
 		event_to_str(event));
 
-	/* This state machine tracks the insert/remove state of
-	 * the module, and handles probing the on-board EEPROM.
-	 */
-	switch (sfp->sm_mod_state) {
-	default:
-		if (event == SFP_E_INSERT && sfp->attached) {
-			sfp_module_tx_disable(sfp);
-			sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT);
-		}
-		break;
-
-	case SFP_MOD_PROBE:
-		if (event == SFP_E_REMOVE) {
-			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
-		} else if (event == SFP_E_TIMEOUT) {
-			int val = sfp_sm_mod_probe(sfp);
-
-			if (val == 0)
-				sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
-			else if (val > 0)
-				sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val);
-			else if (val != -EAGAIN)
-				sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0);
-			else
-				sfp_sm_set_timer(sfp, T_PROBE_RETRY);
-		}
-		break;
-
-	case SFP_MOD_HPOWER:
-		if (event == SFP_E_TIMEOUT) {
-			sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0);
-			break;
-		}
-		/* fallthrough */
-	case SFP_MOD_PRESENT:
-	case SFP_MOD_ERROR:
-		if (event == SFP_E_REMOVE) {
-			sfp_sm_mod_remove(sfp);
-			sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0);
-		}
-		break;
-	}
-
-	/* This state machine tracks the netdev up/down state */
-	switch (sfp->sm_dev_state) {
-	default:
-		if (event == SFP_E_DEV_UP)
-			sfp->sm_dev_state = SFP_DEV_UP;
-		break;
-
-	case SFP_DEV_UP:
-		if (event == SFP_E_DEV_DOWN) {
-			/* If the module has a PHY, avoid raising TX disable
-			 * as this resets the PHY. Otherwise, raise it to
-			 * turn the laser off.
-			 */
-			if (!sfp->mod_phy)
-				sfp_module_tx_disable(sfp);
-			sfp->sm_dev_state = SFP_DEV_DOWN;
-		}
-		break;
-	}
-
-	/* Some events are global */
-	if (sfp->sm_state != SFP_S_DOWN &&
-	    (sfp->sm_mod_state != SFP_MOD_PRESENT ||
-	     sfp->sm_dev_state != SFP_DEV_UP)) {
-		if (sfp->sm_state == SFP_S_LINK_UP &&
-		    sfp->sm_dev_state == SFP_DEV_UP)
-			sfp_sm_link_down(sfp);
-		if (sfp->mod_phy)
-			sfp_sm_phy_detach(sfp);
-		sfp_sm_next(sfp, SFP_S_DOWN, 0);
-		mutex_unlock(&sfp->sm_mutex);
-		return;
-	}
-
-	/* The main state machine */
-	switch (sfp->sm_state) {
-	case SFP_S_DOWN:
-		if (sfp->sm_mod_state == SFP_MOD_PRESENT &&
-		    sfp->sm_dev_state == SFP_DEV_UP)
-			sfp_sm_mod_init(sfp);
-		break;
-
-	case SFP_S_INIT:
-		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT)
-			sfp_sm_fault(sfp, true);
-		else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR)
-			sfp_sm_link_check_los(sfp);
-		break;
-
-	case SFP_S_WAIT_LOS:
-		if (event == SFP_E_TX_FAULT)
-			sfp_sm_fault(sfp, true);
-		else if (sfp_los_event_inactive(sfp, event))
-			sfp_sm_link_up(sfp);
-		break;
-
-	case SFP_S_LINK_UP:
-		if (event == SFP_E_TX_FAULT) {
-			sfp_sm_link_down(sfp);
-			sfp_sm_fault(sfp, true);
-		} else if (sfp_los_event_active(sfp, event)) {
-			sfp_sm_link_down(sfp);
-			sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0);
-		}
-		break;
-
-	case SFP_S_TX_FAULT:
-		if (event == SFP_E_TIMEOUT) {
-			sfp_module_tx_fault_reset(sfp);
-			sfp_sm_next(sfp, SFP_S_REINIT, T_INIT_JIFFIES);
-		}
-		break;
-
-	case SFP_S_REINIT:
-		if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) {
-			sfp_sm_fault(sfp, false);
-		} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
-			dev_info(sfp->dev, "module transmit fault recovered\n");
-			sfp_sm_link_check_los(sfp);
-		}
-		break;
-
-	case SFP_S_TX_DISABLE:
-		break;
-	}
+	sfp_sm_device(sfp, event);
+	sfp_sm_module(sfp, event);
+	sfp_sm_main(sfp, event);
 
 	dev_dbg(sfp->dev, "SM: exit %s:%s:%s\n",
 		mod_state_to_str(sfp->sm_mod_state),
@@ -1693,15 +2228,12 @@
 
 static void sfp_attach(struct sfp *sfp)
 {
-	sfp->attached = true;
-	if (sfp->state & SFP_F_PRESENT)
-		sfp_sm_event(sfp, SFP_E_INSERT);
+	sfp_sm_event(sfp, SFP_E_DEV_ATTACH);
 }
 
 static void sfp_detach(struct sfp *sfp)
 {
-	sfp->attached = false;
-	sfp_sm_event(sfp, SFP_E_REMOVE);
+	sfp_sm_event(sfp, SFP_E_DEV_DETACH);
 }
 
 static void sfp_start(struct sfp *sfp)
@@ -1828,7 +2360,10 @@
 	struct sfp *sfp = container_of(work, struct sfp, poll.work);
 
 	sfp_check_state(sfp);
-	mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
+
+	if (sfp->state_soft_mask & (SFP_F_LOS | SFP_F_TX_FAULT) ||
+	    sfp->need_poll)
+		mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
 }
 
 static struct sfp *sfp_alloc(struct device *dev)
@@ -1846,6 +2381,8 @@
 	INIT_DELAYED_WORK(&sfp->poll, sfp_poll);
 	INIT_DELAYED_WORK(&sfp->timeout, sfp_timeout);
 
+	sfp_hwmon_init(sfp);
+
 	return sfp;
 }
 
@@ -1853,6 +2390,8 @@
 {
 	struct sfp *sfp = data;
 
+	sfp_hwmon_exit(sfp);
+
 	cancel_delayed_work_sync(&sfp->poll);
 	cancel_delayed_work_sync(&sfp->timeout);
 	if (sfp->i2c_mii) {
@@ -1868,8 +2407,8 @@
 {
 	const struct sff_data *sff;
 	struct i2c_adapter *i2c;
+	char *sfp_irq_name;
 	struct sfp *sfp;
-	bool poll = false;
 	int err, i;
 
 	sfp = sfp_alloc(&pdev->dev);
@@ -1964,6 +2503,11 @@
 		sfp->state |= SFP_F_RATE_SELECT;
 	sfp_set_state(sfp, sfp->state);
 	sfp_module_tx_disable(sfp);
+	if (sfp->state & SFP_F_PRESENT) {
+		rtnl_lock();
+		sfp_sm_event(sfp, SFP_E_INSERT);
+		rtnl_unlock();
+	}
 
 	for (i = 0; i < GPIO_MAX; i++) {
 		if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i])
@@ -1972,23 +2516,30 @@
 		sfp->gpio_irq[i] = gpiod_to_irq(sfp->gpio[i]);
 		if (sfp->gpio_irq[i] < 0) {
 			sfp->gpio_irq[i] = 0;
-			poll = true;
+			sfp->need_poll = true;
 			continue;
 		}
 
+		sfp_irq_name = devm_kasprintf(sfp->dev, GFP_KERNEL,
+					      "%s-%s", dev_name(sfp->dev),
+					      gpio_of_names[i]);
+
+		if (!sfp_irq_name)
+			return -ENOMEM;
+
 		err = devm_request_threaded_irq(sfp->dev, sfp->gpio_irq[i],
 						NULL, sfp_irq,
 						IRQF_ONESHOT |
 						IRQF_TRIGGER_RISING |
 						IRQF_TRIGGER_FALLING,
-						dev_name(sfp->dev), sfp);
+						sfp_irq_name, sfp);
 		if (err) {
 			sfp->gpio_irq[i] = 0;
-			poll = true;
+			sfp->need_poll = true;
 		}
 	}
 
-	if (poll)
+	if (sfp->need_poll)
 		mod_delayed_work(system_wq, &sfp->poll, poll_jiffies);
 
 	/* We could have an issue in cases no Tx disable pin is available or
@@ -2013,6 +2564,10 @@
 
 	sfp_unregister_socket(sfp->sfp_bus);
 
+	rtnl_lock();
+	sfp_sm_event(sfp, SFP_E_REMOVE);
+	rtnl_unlock();
+
 	return 0;
 }