diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c index 1674dba43f83..d83fe84407bf 100644 --- a/drivers/net/wimax/i2400m/driver.c +++ b/drivers/net/wimax/i2400m/driver.c @@ -395,6 +395,16 @@ int __i2400m_dev_start(struct i2400m *i2400m, enum i2400m_bri flags) result = i2400m_dev_initialize(i2400m); if (result < 0) goto error_dev_initialize; + + /* We don't want any additional unwanted error recovery triggered + * from any other context so if anything went wrong before we come + * here, let's keep i2400m->error_recovery untouched and leave it to + * dev_reset_handle(). See dev_reset_handle(). */ + + atomic_dec(&i2400m->error_recovery); + /* Every thing works so far, ok, now we are ready to + * take error recovery if it's required. */ + /* At this point, reports will come for the device and set it * to the right state if it is different than UNINITIALIZED */ d_fnend(3, dev, "(net_dev %p [i2400m %p]) = %d\n", @@ -770,6 +780,66 @@ int i2400m_dev_reset_handle(struct i2400m *i2400m, const char *reason) EXPORT_SYMBOL_GPL(i2400m_dev_reset_handle); + /* + * The actual work of error recovery. + * + * The current implementation of error recovery is to trigger a bus reset. + */ +static +void __i2400m_error_recovery(struct work_struct *ws) +{ + struct i2400m_work *iw = container_of(ws, struct i2400m_work, ws); + struct i2400m *i2400m = iw->i2400m; + + i2400m_reset(i2400m, I2400M_RT_BUS); + + i2400m_put(i2400m); + kfree(iw); + return; +} + +/* + * Schedule a work struct for error recovery. + * + * The intention of error recovery is to bring back the device to some + * known state whenever TX sees -110 (-ETIMEOUT) on copying the data to + * the device. The TX failure could mean a device bus stuck, so the current + * error recovery implementation is to trigger a bus reset to the device + * and hopefully it can bring back the device. + * + * The actual work of error recovery has to be in a thread context because + * it is kicked off in the TX thread (i2400ms->tx_workqueue) which is to be + * destroyed by the error recovery mechanism (currently a bus reset). + * + * Also, there may be already a queue of TX works that all hit + * the -ETIMEOUT error condition because the device is stuck already. + * Since bus reset is used as the error recovery mechanism and we don't + * want consecutive bus resets simply because the multiple TX works + * in the queue all hit the same device erratum, the flag "error_recovery" + * is introduced for preventing unwanted consecutive bus resets. + * + * Error recovery shall only be invoked again if previous one was completed. + * The flag error_recovery is set when error recovery mechanism is scheduled, + * and is checked when we need to schedule another error recovery. If it is + * in place already, then we shouldn't schedule another one. + */ +void i2400m_error_recovery(struct i2400m *i2400m) +{ + struct device *dev = i2400m_dev(i2400m); + + if (atomic_add_return(1, &i2400m->error_recovery) == 1) { + if (i2400m_schedule_work(i2400m, __i2400m_error_recovery, + GFP_ATOMIC, NULL, 0) < 0) { + dev_err(dev, "run out of memory for " + "scheduling an error recovery ?\n"); + atomic_dec(&i2400m->error_recovery); + } + } else + atomic_dec(&i2400m->error_recovery); + return; +} +EXPORT_SYMBOL_GPL(i2400m_error_recovery); + /* * Alloc the command and ack buffers for boot mode * @@ -839,6 +909,10 @@ void i2400m_init(struct i2400m *i2400m) atomic_set(&i2400m->bus_reset_retries, 0); i2400m->alive = 0; + + /* initialize error_recovery to 1 for denoting we + * are not yet ready to take any error recovery */ + atomic_set(&i2400m->error_recovery, 1); } EXPORT_SYMBOL_GPL(i2400m_init); diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h index ad8e6a3be1e3..7a9c2c5b25cb 100644 --- a/drivers/net/wimax/i2400m/i2400m.h +++ b/drivers/net/wimax/i2400m/i2400m.h @@ -545,6 +545,15 @@ struct i2400m_barker_db; * all the device reboot events detected can be still handled properly * by either dev_reset_handle() or .pre_reset/.post_reset as long as * the driver presents. It is set 0 along with @updown in dev_stop(). + * + * @error_recovery: flag to denote if we are ready to take an error recovery. + * 0 for ready to take an error recovery; 1 for not ready. It is + * initialized to 1 while probe() since we don't tend to take any error + * recovery during probe(). It is decremented by 1 whenever dev_start() + * succeeds to indicate we are ready to take error recovery from now on. + * It is checked every time we wanna schedule an error recovery. If an + * error recovery is already in place (error_recovery was set 1), we + * should not schedule another one until the last one is done. */ struct i2400m { struct wimax_dev wimax_dev; /* FIRST! See doc */ @@ -625,6 +634,10 @@ struct i2400m { /* if the device is expected to be alive */ unsigned alive; + + /* 0 if we are ready for error recovery; 1 if not ready */ + atomic_t error_recovery; + }; @@ -847,6 +860,7 @@ void i2400m_put(struct i2400m *i2400m) extern int i2400m_dev_reset_handle(struct i2400m *, const char *); extern int i2400m_pre_reset(struct i2400m *); extern int i2400m_post_reset(struct i2400m *); +extern void i2400m_error_recovery(struct i2400m *); /* * _setup()/_release() are called by the probe/disconnect functions of diff --git a/drivers/net/wimax/i2400m/sdio-tx.c b/drivers/net/wimax/i2400m/sdio-tx.c index 412b6a8eaef2..b53cd1c80e3e 100644 --- a/drivers/net/wimax/i2400m/sdio-tx.c +++ b/drivers/net/wimax/i2400m/sdio-tx.c @@ -98,6 +98,10 @@ void i2400ms_tx_submit(struct work_struct *ws) tx_msg_size, result); } + if (result == -ETIMEDOUT) { + i2400m_error_recovery(i2400m); + break; + } d_printf(2, dev, "TX: %zub submitted\n", tx_msg_size); }