linux/drivers/pcmcia
Russell King 025e4ab3db pcmcia: fix socket refcount decrementing on each resume
This fixes a memory-corrupting bug: not only does it cause the warning,
but as a result of dropping the refcount to zero, it causes the
pcmcia_socket0 device structure to be freed while it still has
references, causing slab caches corruption.  A fatal oops quickly
follows this warning - often even just a 'dmesg' following the warning
causes the kernel to oops.

While testing suspend/resume on an ARM device with PCMCIA support, and a
CF card inserted, I found that after five suspend and resumes, the
kernel would complain, and shortly die after with slab corruption.

  WARNING: at include/linux/kref.h:41 kobject_get+0x28/0x50()

As the message doesn't give a clue about which kobject, and the built-in
debugging in drivers/base/power/main.c happens too late, this was added
right before each get_device():

  printk("%s: %p [%s] %u\n", __func__, dev, kobject_name(&dev->kobj), atomic_read(&dev->kobj.kref.refcount));

and on the 3rd s2ram cycle, the following behaviour observed:

On the 3rd suspend/resume cycle:

  dpm_prepare: c1a0d998 [pcmcia_socket0] 3
  dpm_suspend: c1a0d998 [pcmcia_socket0] 3
  dpm_suspend_noirq: c1a0d998 [pcmcia_socket0] 3
  dpm_resume_noirq: c1a0d998 [pcmcia_socket0] 3
  dpm_resume: c1a0d998 [pcmcia_socket0] 3
  dpm_complete: c1a0d998 [pcmcia_socket0] 2

4th:

  dpm_prepare: c1a0d998 [pcmcia_socket0] 2
  dpm_suspend: c1a0d998 [pcmcia_socket0] 2
  dpm_suspend_noirq: c1a0d998 [pcmcia_socket0] 2
  dpm_resume_noirq: c1a0d998 [pcmcia_socket0] 2
  dpm_resume: c1a0d998 [pcmcia_socket0] 2
  dpm_complete: c1a0d998 [pcmcia_socket0] 1

5th:

  dpm_prepare: c1a0d998 [pcmcia_socket0] 1
  dpm_suspend: c1a0d998 [pcmcia_socket0] 1
  dpm_suspend_noirq: c1a0d998 [pcmcia_socket0] 1
  dpm_resume_noirq: c1a0d998 [pcmcia_socket0] 1
  dpm_resume: c1a0d998 [pcmcia_socket0] 1
  dpm_complete: c1a0d998 [pcmcia_socket0] 0
  ------------[ cut here ]------------
  WARNING: at include/linux/kref.h:41 kobject_get+0x28/0x50()
  Modules linked in: ucb1x00_core
  Backtrace:
  [<c0212090>] (dump_backtrace+0x0/0x110) from [<c04799dc>] (dump_stack+0x18/0x1c)
  [<c04799c4>] (dump_stack+0x0/0x1c) from [<c021cba0>] (warn_slowpath_common+0x50/0x68)
  [<c021cb50>] (warn_slowpath_common+0x0/0x68) from [<c021cbdc>] (warn_slowpath_null+0x24/0x28)
  [<c021cbb8>] (warn_slowpath_null+0x0/0x28) from [<c0335374>] (kobject_get+0x28/0x50)
  [<c033534c>] (kobject_get+0x0/0x50) from [<c03804f4>] (get_device+0x1c/0x24)
  [<c0388c90>] (dpm_complete+0x0/0x1a0) from [<c0389cc0>] (dpm_resume_end+0x1c/0x20)
  ...

Looking at commit 7b24e79882 ("pcmcia: split up central event handler"),
the following change was made to cs.c:

                return 0;
        }
 #endif
-
-       send_event(skt, CS_EVENT_PM_RESUME, CS_EVENT_PRI_LOW);
+       if (!(skt->state & SOCKET_CARDBUS) && (skt->callback))
+               skt->callback->early_resume(skt);
        return 0;
 }

And the corresponding change in ds.c is from:

-static int ds_event(struct pcmcia_socket *skt, event_t event, int priority)
-{
-       struct pcmcia_socket *s = pcmcia_get_socket(skt);
...
-       switch (event) {
...
-       case CS_EVENT_PM_RESUME:
-               if (verify_cis_cache(skt) != 0) {
-                       dev_dbg(&skt->dev, "cis mismatch - different card\n");
-                       /* first, remove the card */
-                       ds_event(skt, CS_EVENT_CARD_REMOVAL, CS_EVENT_PRI_HIGH);
-                       mutex_lock(&s->ops_mutex);
-                       destroy_cis_cache(skt);
-                       kfree(skt->fake_cis);
-                       skt->fake_cis = NULL;
-                       s->functions = 0;
-                       mutex_unlock(&s->ops_mutex);
-                       /* now, add the new card */
-                       ds_event(skt, CS_EVENT_CARD_INSERTION,
-                                CS_EVENT_PRI_LOW);
-               }
-               break;
...
-    }

-    pcmcia_put_socket(s);

-    return 0;
-} /* ds_event */

to:

+static int pcmcia_bus_early_resume(struct pcmcia_socket *skt)
+{
+       if (!verify_cis_cache(skt)) {
+               pcmcia_put_socket(skt);
+               return 0;
+       }

+       dev_dbg(&skt->dev, "cis mismatch - different card\n");

+       /* first, remove the card */
+       pcmcia_bus_remove(skt);
+       mutex_lock(&skt->ops_mutex);
+       destroy_cis_cache(skt);
+       kfree(skt->fake_cis);
+       skt->fake_cis = NULL;
+       skt->functions = 0;
+       mutex_unlock(&skt->ops_mutex);

+       /* now, add the new card */
+       pcmcia_bus_add(skt);
+       return 0;
+}

As can be seen, the original function called pcmcia_get_socket() and
pcmcia_put_socket() around the guts, whereas the replacement code
calls pcmcia_put_socket() only in one path.  This creates an imbalance
in the refcounting.

Testing with pcmcia_put_socket() put removed shows that the bug is gone:

  dpm_suspend: c1a10998 [pcmcia_socket0] 5
  dpm_suspend_noirq: c1a10998 [pcmcia_socket0] 5
  dpm_resume_noirq: c1a10998 [pcmcia_socket0] 5
  dpm_resume: c1a10998 [pcmcia_socket0] 5
  dpm_complete: c1a10998 [pcmcia_socket0] 5

Tested-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-02-08 19:03:51 -08:00
..
at91_cf.c
bcm63xx_pcmcia.c
bcm63xx_pcmcia.h
bfin_cf_pcmcia.c
cardbus.c
cirrus.h
cistpl.c
cs.c
cs_internal.h
db1xxx_ss.c MIPS: Alchemy: Basic support for the DB1300 board. 2011-12-07 22:02:06 +00:00
ds.c pcmcia: fix socket refcount decrementing on each resume 2012-02-08 19:03:51 -08:00
electra_cf.c
i82092.c
i82092aa.h
i82365.c
i82365.h
Kconfig MIPS: Alchemy: Basic support for the DB1300 board. 2011-12-07 22:02:06 +00:00
m8xx_pcmcia.c
m32r_cfc.c
m32r_cfc.h
m32r_pcc.c
m32r_pcc.h
Makefile MIPS: Alchemy: remove PB1000 support 2011-12-07 22:02:05 +00:00
o2micro.h
omap_cf.c
pcmcia_cis.c
pcmcia_resource.c
pd6729.c
pd6729.h
pxa2xx_balloon3.c ARM: pxa: use correct __iomem annotations 2011-10-08 21:03:07 +08:00
pxa2xx_base.c ARM: PXA: fix lubbock PCMCIA driver build error 2011-11-05 19:09:40 +00:00
pxa2xx_base.h
pxa2xx_cm_x2xx.c ARM: PXA: fix includes in pxa2xx_cm_x2xx PCMCIA driver 2011-11-05 22:26:46 +00:00
pxa2xx_cm_x255.c driver: pcmcia: replace IRQ_GPIO() with gpio_to_irq() 2011-11-15 19:09:56 +08:00
pxa2xx_cm_x270.c driver: pcmcia: replace IRQ_GPIO() with gpio_to_irq() 2011-11-15 19:09:56 +08:00
pxa2xx_colibri.c
pxa2xx_e740.c pcmcia: pxa: replace IRQ_GPIO() with gpio_to_irq() 2011-12-28 10:14:04 +00:00
pxa2xx_lubbock.c
pxa2xx_mainstone.c
pxa2xx_palmld.c pcmcia: pxa: replace IRQ_GPIO() with gpio_to_irq() 2011-12-28 10:14:04 +00:00
pxa2xx_palmtc.c pcmcia: pxa: replace IRQ_GPIO() with gpio_to_irq() 2011-12-28 10:14:04 +00:00
pxa2xx_palmtx.c
pxa2xx_sharpsl.c
pxa2xx_stargate2.c pcmcia: pxa: replace IRQ_GPIO() with gpio_to_irq() 2011-12-28 10:14:04 +00:00
pxa2xx_trizeps4.c pcmcia: pxa: replace IRQ_GPIO() with gpio_to_irq() 2011-12-28 10:14:04 +00:00
pxa2xx_viper.c
pxa2xx_vpac270.c pcmcia: pxa: replace IRQ_GPIO() with gpio_to_irq() 2011-12-28 10:14:04 +00:00
ricoh.h
rsrc_iodyn.c
rsrc_mgr.c
rsrc_nonstatic.c
sa11xx_base.c
sa11xx_base.h
sa1100_assabet.c
sa1100_badge4.c
sa1100_cerf.c
sa1100_generic.c
sa1100_generic.h
sa1100_h3600.c
sa1100_jornada720.c
sa1100_nanoengine.c
sa1100_neponset.c
sa1100_shannon.c
sa1100_simpad.c ARM: 7024/1: simpad: Cleanup CS3 accessors. 2011-10-17 09:12:42 +01:00
sa1111_generic.c PCMCIA: fix sa1111 oops on remove 2012-01-24 21:33:26 +00:00
sa1111_generic.h
soc_common.c
soc_common.h
socket_sysfs.c
tcic.c
tcic.h
ti113x.h
topic.h
vg468.h
vrc4171_card.c
vrc4173_cardu.c
vrc4173_cardu.h
xxs1500_ss.c
yenta_socket.c module_param: make bool parameters really bool (drivers & misc) 2012-01-13 09:32:20 +10:30
yenta_socket.h