linux/net/dsa/switch.c
Vladimir Oltean f66a6a69f9 net: dsa: permit cross-chip bridging between all trees in the system
One way of utilizing DSA is by cascading switches which do not all have
compatible taggers. Consider the following real-life topology:

      +---------------------------------------------------------------+
      | LS1028A                                                       |
      |               +------------------------------+                |
      |               |      DSA master for Felix    |                |
      |               |(internal ENETC port 2: eno2))|                |
      |  +------------+------------------------------+-------------+  |
      |  | Felix embedded L2 switch                                |  |
      |  |                                                         |  |
      |  | +--------------+   +--------------+   +--------------+  |  |
      |  | |DSA master for|   |DSA master for|   |DSA master for|  |  |
      |  | |  SJA1105 1   |   |  SJA1105 2   |   |  SJA1105 3   |  |  |
      |  | |(Felix port 1)|   |(Felix port 2)|   |(Felix port 3)|  |  |
      +--+-+--------------+---+--------------+---+--------------+--+--+

+-----------------------+ +-----------------------+ +-----------------------+
|   SJA1105 switch 1    | |   SJA1105 switch 2    | |   SJA1105 switch 3    |
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
|sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3| |sw3p0|sw3p1|sw3p2|sw3p3|
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+

The above can be described in the device tree as follows (obviously not
complete):

mscc_felix {
	dsa,member = <0 0>;
	ports {
		port@4 {
			ethernet = <&enetc_port2>;
		};
	};
};

sja1105_switch1 {
	dsa,member = <1 1>;
	ports {
		port@4 {
			ethernet = <&mscc_felix_port1>;
		};
	};
};

sja1105_switch2 {
	dsa,member = <2 2>;
	ports {
		port@4 {
			ethernet = <&mscc_felix_port2>;
		};
	};
};

sja1105_switch3 {
	dsa,member = <3 3>;
	ports {
		port@4 {
			ethernet = <&mscc_felix_port3>;
		};
	};
};

Basically we instantiate one DSA switch tree for every hardware switch
in the system, but we still give them globally unique switch IDs (will
come back to that later). Having 3 disjoint switch trees makes the
tagger drivers "just work", because net devices are registered for the
3 Felix DSA master ports, and they are also DSA slave ports to the ENETC
port. So packets received on the ENETC port are stripped of their
stacked DSA tags one by one.

Currently, hardware bridging between ports on the same sja1105 chip is
possible, but switching between sja1105 ports on different chips is
handled by the software bridge. This is fine, but we can do better.

In fact, the dsa_8021q tag used by sja1105 is compatible with cascading.
In other words, a sja1105 switch can correctly parse and route a packet
containing a dsa_8021q tag. So if we could enable hardware bridging on
the Felix DSA master ports, cross-chip bridging could be completely
offloaded.

Such as system would be used as follows:

ip link add dev br0 type bridge && ip link set dev br0 up
for port in sw0p0 sw0p1 sw0p2 sw0p3 \
	    sw1p0 sw1p1 sw1p2 sw1p3 \
	    sw2p0 sw2p1 sw2p2 sw2p3; do
	ip link set dev $port master br0
done

The above makes switching between ports on the same row be performed in
hardware, and between ports on different rows in software. Now assume
the Felix switch ports are called swp0, swp1, swp2. By running the
following extra commands:

ip link add dev br1 type bridge && ip link set dev br1 up
for port in swp0 swp1 swp2; do
	ip link set dev $port master br1
done

the CPU no longer sees packets which traverse sja1105 switch boundaries
and can be forwarded directly by Felix. The br1 bridge would not be used
for any sort of traffic termination.

For this to work, we need to give drivers an opportunity to listen for
bridging events on DSA trees other than their own, and pass that other
tree index as argument. I have made the assumption, for the moment, that
the other existing DSA notifiers don't need to be broadcast to other
trees. That assumption might turn out to be incorrect. But in the
meantime, introduce a dsa_broadcast function, similar in purpose to
dsa_port_notify, which is used only by the bridging notifiers.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2020-05-10 19:52:33 -07:00

407 lines
10 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Handling of a single switch chip, part of a switch fabric
*
* Copyright (c) 2017 Savoir-faire Linux Inc.
* Vivien Didelot <vivien.didelot@savoirfairelinux.com>
*/
#include <linux/if_bridge.h>
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/if_vlan.h>
#include <net/switchdev.h>
#include "dsa_priv.h"
static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds,
unsigned int ageing_time)
{
int i;
for (i = 0; i < ds->num_ports; ++i) {
struct dsa_port *dp = dsa_to_port(ds, i);
if (dp->ageing_time && dp->ageing_time < ageing_time)
ageing_time = dp->ageing_time;
}
return ageing_time;
}
static int dsa_switch_ageing_time(struct dsa_switch *ds,
struct dsa_notifier_ageing_time_info *info)
{
unsigned int ageing_time = info->ageing_time;
struct switchdev_trans *trans = info->trans;
if (switchdev_trans_ph_prepare(trans)) {
if (ds->ageing_time_min && ageing_time < ds->ageing_time_min)
return -ERANGE;
if (ds->ageing_time_max && ageing_time > ds->ageing_time_max)
return -ERANGE;
return 0;
}
/* Program the fastest ageing time in case of multiple bridges */
ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time);
if (ds->ops->set_ageing_time)
return ds->ops->set_ageing_time(ds, ageing_time);
return 0;
}
static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port,
struct dsa_notifier_mtu_info *info)
{
if (ds->index == info->sw_index)
return (port == info->port) || dsa_is_dsa_port(ds, port);
if (!info->propagate_upstream)
return false;
if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
return true;
return false;
}
static int dsa_switch_mtu(struct dsa_switch *ds,
struct dsa_notifier_mtu_info *info)
{
int port, ret;
if (!ds->ops->port_change_mtu)
return -EOPNOTSUPP;
for (port = 0; port < ds->num_ports; port++) {
if (dsa_switch_mtu_match(ds, port, info)) {
ret = ds->ops->port_change_mtu(ds, port, info->mtu);
if (ret)
return ret;
}
}
return 0;
}
static int dsa_switch_bridge_join(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
struct dsa_switch_tree *dst = ds->dst;
if (dst->index == info->tree_index && ds->index == info->sw_index &&
ds->ops->port_bridge_join)
return ds->ops->port_bridge_join(ds, info->port, info->br);
if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
ds->ops->crosschip_bridge_join)
return ds->ops->crosschip_bridge_join(ds, info->tree_index,
info->sw_index,
info->port, info->br);
return 0;
}
static int dsa_switch_bridge_leave(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
bool unset_vlan_filtering = br_vlan_enabled(info->br);
struct dsa_switch_tree *dst = ds->dst;
int err, i;
if (dst->index == info->tree_index && ds->index == info->sw_index &&
ds->ops->port_bridge_join)
ds->ops->port_bridge_leave(ds, info->port, info->br);
if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
ds->ops->crosschip_bridge_join)
ds->ops->crosschip_bridge_leave(ds, info->tree_index,
info->sw_index, info->port,
info->br);
/* If the bridge was vlan_filtering, the bridge core doesn't trigger an
* event for changing vlan_filtering setting upon slave ports leaving
* it. That is a good thing, because that lets us handle it and also
* handle the case where the switch's vlan_filtering setting is global
* (not per port). When that happens, the correct moment to trigger the
* vlan_filtering callback is only when the last port left this bridge.
*/
if (unset_vlan_filtering && ds->vlan_filtering_is_global) {
for (i = 0; i < ds->num_ports; i++) {
if (i == info->port)
continue;
if (dsa_to_port(ds, i)->bridge_dev == info->br) {
unset_vlan_filtering = false;
break;
}
}
}
if (unset_vlan_filtering) {
struct switchdev_trans trans = {0};
err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
false, &trans);
if (err && err != EOPNOTSUPP)
return err;
}
return 0;
}
static int dsa_switch_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
int port = dsa_towards_port(ds, info->sw_index, info->port);
if (!ds->ops->port_fdb_add)
return -EOPNOTSUPP;
return ds->ops->port_fdb_add(ds, port, info->addr, info->vid);
}
static int dsa_switch_fdb_del(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
int port = dsa_towards_port(ds, info->sw_index, info->port);
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
}
static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
struct dsa_notifier_mdb_info *info)
{
if (ds->index == info->sw_index && port == info->port)
return true;
if (dsa_is_dsa_port(ds, port))
return true;
return false;
}
static int dsa_switch_mdb_prepare(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
int port, err;
if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
return -EOPNOTSUPP;
for (port = 0; port < ds->num_ports; port++) {
if (dsa_switch_mdb_match(ds, port, info)) {
err = ds->ops->port_mdb_prepare(ds, port, info->mdb);
if (err)
return err;
}
}
return 0;
}
static int dsa_switch_mdb_add(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
int port;
if (switchdev_trans_ph_prepare(info->trans))
return dsa_switch_mdb_prepare(ds, info);
if (!ds->ops->port_mdb_add)
return 0;
for (port = 0; port < ds->num_ports; port++)
if (dsa_switch_mdb_match(ds, port, info))
ds->ops->port_mdb_add(ds, port, info->mdb);
return 0;
}
static int dsa_switch_mdb_del(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
if (!ds->ops->port_mdb_del)
return -EOPNOTSUPP;
if (ds->index == info->sw_index)
return ds->ops->port_mdb_del(ds, info->port, info->mdb);
return 0;
}
static int dsa_port_vlan_device_check(struct net_device *vlan_dev,
int vlan_dev_vid,
void *arg)
{
struct switchdev_obj_port_vlan *vlan = arg;
u16 vid;
for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
if (vid == vlan_dev_vid)
return -EBUSY;
}
return 0;
}
static int dsa_port_vlan_check(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_vlan *vlan)
{
const struct dsa_port *dp = dsa_to_port(ds, port);
int err = 0;
/* Device is not bridged, let it proceed with the VLAN device
* creation.
*/
if (!dp->bridge_dev)
return err;
/* dsa_slave_vlan_rx_{add,kill}_vid() cannot use the prepare phase and
* already checks whether there is an overlapping bridge VLAN entry
* with the same VID, so here we only need to check that if we are
* adding a bridge VLAN entry there is not an overlapping VLAN device
* claiming that VID.
*/
return vlan_for_each(dp->slave, dsa_port_vlan_device_check,
(void *)vlan);
}
static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
struct dsa_notifier_vlan_info *info)
{
if (ds->index == info->sw_index && port == info->port)
return true;
if (dsa_is_dsa_port(ds, port))
return true;
return false;
}
static int dsa_switch_vlan_prepare(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
int port, err;
if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
return -EOPNOTSUPP;
for (port = 0; port < ds->num_ports; port++) {
if (dsa_switch_vlan_match(ds, port, info)) {
err = dsa_port_vlan_check(ds, port, info->vlan);
if (err)
return err;
err = ds->ops->port_vlan_prepare(ds, port, info->vlan);
if (err)
return err;
}
}
return 0;
}
static int dsa_switch_vlan_add(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
int port;
if (switchdev_trans_ph_prepare(info->trans))
return dsa_switch_vlan_prepare(ds, info);
if (!ds->ops->port_vlan_add)
return 0;
for (port = 0; port < ds->num_ports; port++)
if (dsa_switch_vlan_match(ds, port, info))
ds->ops->port_vlan_add(ds, port, info->vlan);
return 0;
}
static int dsa_switch_vlan_del(struct dsa_switch *ds,
struct dsa_notifier_vlan_info *info)
{
if (!ds->ops->port_vlan_del)
return -EOPNOTSUPP;
if (ds->index == info->sw_index)
return ds->ops->port_vlan_del(ds, info->port, info->vlan);
/* Do not deprogram the DSA links as they may be used as conduit
* for other VLAN members in the fabric.
*/
return 0;
}
static int dsa_switch_event(struct notifier_block *nb,
unsigned long event, void *info)
{
struct dsa_switch *ds = container_of(nb, struct dsa_switch, nb);
int err;
switch (event) {
case DSA_NOTIFIER_AGEING_TIME:
err = dsa_switch_ageing_time(ds, info);
break;
case DSA_NOTIFIER_BRIDGE_JOIN:
err = dsa_switch_bridge_join(ds, info);
break;
case DSA_NOTIFIER_BRIDGE_LEAVE:
err = dsa_switch_bridge_leave(ds, info);
break;
case DSA_NOTIFIER_FDB_ADD:
err = dsa_switch_fdb_add(ds, info);
break;
case DSA_NOTIFIER_FDB_DEL:
err = dsa_switch_fdb_del(ds, info);
break;
case DSA_NOTIFIER_MDB_ADD:
err = dsa_switch_mdb_add(ds, info);
break;
case DSA_NOTIFIER_MDB_DEL:
err = dsa_switch_mdb_del(ds, info);
break;
case DSA_NOTIFIER_VLAN_ADD:
err = dsa_switch_vlan_add(ds, info);
break;
case DSA_NOTIFIER_VLAN_DEL:
err = dsa_switch_vlan_del(ds, info);
break;
case DSA_NOTIFIER_MTU:
err = dsa_switch_mtu(ds, info);
break;
default:
err = -EOPNOTSUPP;
break;
}
/* Non-switchdev operations cannot be rolled back. If a DSA driver
* returns an error during the chained call, switch chips may be in an
* inconsistent state.
*/
if (err)
dev_dbg(ds->dev, "breaking chain for DSA event %lu (%d)\n",
event, err);
return notifier_from_errno(err);
}
int dsa_switch_register_notifier(struct dsa_switch *ds)
{
ds->nb.notifier_call = dsa_switch_event;
return raw_notifier_chain_register(&ds->dst->nh, &ds->nb);
}
void dsa_switch_unregister_notifier(struct dsa_switch *ds)
{
int err;
err = raw_notifier_chain_unregister(&ds->dst->nh, &ds->nb);
if (err)
dev_err(ds->dev, "failed to unregister notifier (%d)\n", err);
}