diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index 55b9c73cd8ef40669062341df66138ec377ce067..16293b58e0a89aecd0c7bd9c409f8574f38b9300 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -10,7 +10,8 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \ efx_devlink.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ - mae.o tc.o tc_bindings.o tc_counters.o + mae.o tc.o tc_bindings.o tc_counters.o \ + tc_encap_actions.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 274f3a2562ad1a948467d85af4e213c9c1020f14..7f7d560cb2b4c956ad46ce794a593f3f4fda7829 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -24,6 +24,7 @@ #include "rx_common.h" #include "ef100_sriov.h" #include "tc_bindings.h" +#include "tc_encap_actions.h" #include "efx_devlink.h" static void ef100_update_name(struct efx_nic *efx) @@ -300,14 +301,38 @@ int ef100_netdev_event(struct notifier_block *this, { struct efx_nic *efx = container_of(this, struct efx_nic, netdev_notifier); struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); + struct ef100_nic_data *nic_data = efx->nic_data; + int err; if (efx->net_dev == net_dev && (event == NETDEV_CHANGENAME || event == NETDEV_REGISTER)) ef100_update_name(efx); + if (!nic_data->grp_mae) + return NOTIFY_DONE; + err = efx_tc_netdev_event(efx, event, net_dev); + if (err & NOTIFY_STOP_MASK) + return err; + return NOTIFY_DONE; } +static int ef100_netevent_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct efx_nic *efx = container_of(this, struct efx_nic, netevent_notifier); + struct ef100_nic_data *nic_data = efx->nic_data; + int err; + + if (!nic_data->grp_mae) + return NOTIFY_DONE; + err = efx_tc_netevent_event(efx, event, ptr); + if (err & NOTIFY_STOP_MASK) + return err; + + return NOTIFY_DONE; +}; + static int ef100_register_netdev(struct efx_nic *efx) { struct net_device *net_dev = efx->net_dev; @@ -367,6 +392,7 @@ void ef100_remove_netdev(struct efx_probe_data *probe_data) rtnl_unlock(); unregister_netdevice_notifier(&efx->netdev_notifier); + unregister_netevent_notifier(&efx->netevent_notifier); #if defined(CONFIG_SFC_SRIOV) if (!efx->type->is_vf) efx_ef100_pci_sriov_disable(efx, true); @@ -487,6 +513,14 @@ int ef100_probe_netdev(struct efx_probe_data *probe_data) goto fail; } + efx->netevent_notifier.notifier_call = ef100_netevent_event; + rc = register_netevent_notifier(&efx->netevent_notifier); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "Failed to register netevent notifier, rc=%d\n", rc); + goto fail; + } + efx_probe_devlink_unlock(efx); return rc; fail: diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 37a4c6925ad44f9bca0bda04deef250bdb4ab761..0cab508f2f9d1896414ff33384dd14e2f12d8445 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -15,6 +15,7 @@ #include "mcdi.h" #include "mcdi_pcol.h" #include "mcdi_pcol_mae.h" +#include "tc_encap_actions.h" int efx_mae_allocate_mport(struct efx_nic *efx, u32 *id, u32 *label) { @@ -610,6 +611,87 @@ static int efx_mae_encap_type_to_mae_type(enum efx_encap_type type) } } +int efx_mae_allocate_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LEN(EFX_TC_MAX_ENCAP_HDR)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_LEN); + size_t inlen, outlen; + int rc; + + rc = efx_mae_encap_type_to_mae_type(encap->type); + if (rc < 0) + return rc; + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_ALLOC_IN_ENCAP_TYPE, rc); + inlen = MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LEN(encap->encap_hdr_len); + if (WARN_ON(inlen > sizeof(inbuf))) /* can't happen */ + return -EINVAL; + memcpy(MCDI_PTR(inbuf, MAE_ENCAP_HEADER_ALLOC_IN_HDR_DATA), + encap->encap_hdr, + encap->encap_hdr_len); + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_ALLOC, inbuf, + inlen, outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + encap->fw_id = MCDI_DWORD(outbuf, MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID); + return 0; +} + +int efx_mae_update_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LEN(EFX_TC_MAX_ENCAP_HDR)); + size_t inlen; + int rc; + + rc = efx_mae_encap_type_to_mae_type(encap->type); + if (rc < 0) + return rc; + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_ENCAP_TYPE, rc); + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_EH_ID, + encap->fw_id); + inlen = MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LEN(encap->encap_hdr_len); + if (WARN_ON(inlen > sizeof(inbuf))) /* can't happen */ + return -EINVAL; + memcpy(MCDI_PTR(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_HDR_DATA), + encap->encap_hdr, + encap->encap_hdr_len); + + BUILD_BUG_ON(MC_CMD_MAE_ENCAP_HEADER_UPDATE_OUT_LEN != 0); + return efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_UPDATE, inbuf, + inlen, NULL, 0, NULL); +} + +int efx_mae_free_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ENCAP_HEADER_FREE_OUT_LEN(1)); + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_FREE_IN_LEN(1)); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_FREE_IN_EH_ID, encap->fw_id); + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_FREE, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + /* FW freed a different ID than we asked for, should also never happen. + * Warn because it means we've now got a different idea to the FW of + * what encap_mds exist, which could cause mayhem later. + */ + if (WARN_ON(MCDI_DWORD(outbuf, MAE_ENCAP_HEADER_FREE_OUT_FREED_EH_ID) != encap->fw_id)) + return -EIO; + /* We're probably about to free @encap, but let's just make sure its + * fw_id is blatted so that it won't look valid if it leaks out. + */ + encap->fw_id = MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL; + return 0; +} + int efx_mae_lookup_mport(struct efx_nic *efx, u32 vf_idx, u32 *id) { struct ef100_nic_data *nic_data = efx->nic_data; @@ -833,8 +915,12 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act) MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN1_PROTO_BE, act->vlan_proto[1]); } - MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, - MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL); + if (act->encap_md) + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, + act->encap_md->fw_id); + else + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, + MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL); if (act->deliver) MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DELIVER, act->dest_mport); @@ -1229,6 +1315,29 @@ int efx_mae_insert_rule(struct efx_nic *efx, const struct efx_tc_match *match, return 0; } +int efx_mae_update_rule(struct efx_nic *efx, u32 acts_id, u32 id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ACTION_RULE_UPDATE_IN_LEN); + MCDI_DECLARE_STRUCT_PTR(response); + + BUILD_BUG_ON(MC_CMD_MAE_ACTION_RULE_UPDATE_OUT_LEN); + response = _MCDI_DWORD(inbuf, MAE_ACTION_RULE_UPDATE_IN_RESPONSE); + + MCDI_SET_DWORD(inbuf, MAE_ACTION_RULE_UPDATE_IN_AR_ID, id); + if (efx_mae_asl_id(acts_id)) { + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_ASL_ID, acts_id); + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_AS_ID, + MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL); + } else { + /* We only had one AS, so we didn't wrap it in an ASL */ + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_ASL_ID, + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_AS_ID, acts_id); + } + return efx_mcdi_rpc(efx, MC_CMD_MAE_ACTION_RULE_UPDATE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + int efx_mae_delete_rule(struct efx_nic *efx, u32 id) { MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ACTION_RULE_DELETE_OUT_LEN(1)); diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h index 1cf8dfeb0c286a51d1c4f4db0b4418426ef0f9c1..24abfe50969091c052901c84d8a30dfb4d68fad3 100644 --- a/drivers/net/ethernet/sfc/mae.h +++ b/drivers/net/ethernet/sfc/mae.h @@ -90,6 +90,13 @@ int efx_mae_check_encap_type_supported(struct efx_nic *efx, int efx_mae_allocate_counter(struct efx_nic *efx, struct efx_tc_counter *cnt); int efx_mae_free_counter(struct efx_nic *efx, struct efx_tc_counter *cnt); +int efx_mae_allocate_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); +int efx_mae_update_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); +int efx_mae_free_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); + int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act); int efx_mae_free_action_set(struct efx_nic *efx, u32 fw_id); @@ -105,6 +112,7 @@ int efx_mae_unregister_encap_match(struct efx_nic *efx, int efx_mae_insert_rule(struct efx_nic *efx, const struct efx_tc_match *match, u32 prio, u32 acts_id, u32 *id); +int efx_mae_update_rule(struct efx_nic *efx, u32 acts_id, u32 id); int efx_mae_delete_rule(struct efx_nic *efx, u32 id); int efx_init_mae(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index fcd51d3992fa68b55cba2cb8595794bb6d2c34f1..a7a22b01979479bd711975dfd240d09e6d687888 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -27,6 +27,7 @@ #include <linux/mtd/mtd.h> #include <net/busy_poll.h> #include <net/xdp.h> +#include <net/netevent.h> #include "enum.h" #include "bitfield.h" @@ -996,6 +997,7 @@ struct efx_mae; * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their * xdp_rxq_info structures? * @netdev_notifier: Netdevice notifier. + * @netevent_notifier: Netevent notifier (for neighbour updates). * @tc: state for TC offload (EF100). * @devlink: reference to devlink structure owned by this device * @dl_port: devlink port associated with the PF @@ -1183,6 +1185,7 @@ struct efx_nic { bool xdp_rxq_info_failed; struct notifier_block netdev_notifier; + struct notifier_block netevent_notifier; struct efx_tc_state *tc; struct devlink *devlink; diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index bb9ec1e761d3196d0af44dde4b549e2b8233bde2..77acdb60381e3e805747095a48927013e992469c 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -14,11 +14,12 @@ #include <net/geneve.h> #include "tc.h" #include "tc_bindings.h" +#include "tc_encap_actions.h" #include "mae.h" #include "ef100_rep.h" #include "efx.h" -static enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) { if (netif_is_vxlan(net_dev)) return EFX_ENCAP_TYPE_VXLAN; @@ -33,8 +34,8 @@ static enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) * May return NULL for the PF (us), or an error pointer for a device that * isn't supported as a TC offload endpoint */ -static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, - struct net_device *dev) +struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, + struct net_device *dev) { struct efx_rep *efv; @@ -70,7 +71,7 @@ static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv } /* Convert a driver-internal vport ID into an external device (wire or VF) */ -static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) +s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) { u32 mport; @@ -111,6 +112,10 @@ static void efx_tc_free_action_set(struct efx_nic *efx, } if (act->count) efx_tc_flower_put_counter_index(efx, act->count); + if (act->encap_md) { + list_del(&act->encap_user); + efx_tc_flower_release_encap_md(efx, act->encap_md); + } kfree(act); } @@ -594,6 +599,7 @@ enum efx_tc_action_order { EFX_TC_AO_VLAN_POP, EFX_TC_AO_VLAN_PUSH, EFX_TC_AO_COUNT, + EFX_TC_AO_ENCAP, EFX_TC_AO_DELIVER }; /* Determine whether we can add @new action without violating order */ @@ -623,6 +629,10 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, if (act->count) return false; fallthrough; + case EFX_TC_AO_ENCAP: + if (act->encap_md) + return false; + fallthrough; case EFX_TC_AO_DELIVER: return !act->deliver; default: @@ -918,11 +928,13 @@ static int efx_tc_flower_replace(struct efx_nic *efx, { struct flow_rule *fr = flow_cls_offload_flow_rule(tc); struct netlink_ext_ack *extack = tc->common.extack; + const struct ip_tunnel_info *encap_info = NULL; struct efx_tc_flow_rule *rule = NULL, *old; struct efx_tc_action_set *act = NULL; const struct flow_action_entry *fa; struct efx_rep *from_efv, *to_efv; struct efx_tc_match match; + u32 acts_id; s64 rc; int i; @@ -1087,6 +1099,48 @@ static int efx_tc_flower_replace(struct efx_nic *efx, case FLOW_ACTION_MIRRED: save = *act; + if (encap_info) { + struct efx_tc_encap_action *encap; + + if (!efx_tc_flower_action_order_ok(act, + EFX_TC_AO_ENCAP)) { + rc = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order"); + goto release; + } + encap = efx_tc_flower_create_encap_md( + efx, encap_info, fa->dev, extack); + if (IS_ERR_OR_NULL(encap)) { + rc = PTR_ERR(encap); + if (!rc) + rc = -EIO; /* arbitrary */ + goto release; + } + act->encap_md = encap; + list_add_tail(&act->encap_user, &encap->users); + act->dest_mport = encap->dest_mport; + act->deliver = 1; + rc = efx_mae_alloc_action_set(efx, act); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)"); + goto release; + } + list_add_tail(&act->list, &rule->acts.list); + act->user = &rule->acts; + act = NULL; + if (fa->id == FLOW_ACTION_REDIRECT) + break; /* end of the line */ + /* Mirror, so continue on with saved act */ + save.count = NULL; + act = kzalloc(sizeof(*act), GFP_USER); + if (!act) { + rc = -ENOMEM; + goto release; + } + *act = save; + break; + } + if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { /* can't happen */ rc = -EOPNOTSUPP; @@ -1150,6 +1204,37 @@ static int efx_tc_flower_replace(struct efx_nic *efx, act->vlan_proto[act->vlan_push] = fa->vlan.proto; act->vlan_push++; break; + case FLOW_ACTION_TUNNEL_ENCAP: + if (encap_info) { + /* Can't specify encap multiple times. + * If you want to overwrite an existing + * encap_info, use an intervening + * FLOW_ACTION_TUNNEL_DECAP to clear it. + */ + NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set"); + rc = -EINVAL; + goto release; + } + if (!fa->tunnel) { + NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key"); + rc = -EOPNOTSUPP; + goto release; + } + encap_info = fa->tunnel; + break; + case FLOW_ACTION_TUNNEL_DECAP: + if (encap_info) { + encap_info = NULL; + break; + } + /* Since we don't support enc_key matches on ingress + * (and if we did there'd be no tunnel-device to give + * us a type), we can't offload a decap that's not + * just undoing a previous encap action. + */ + NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device"); + rc = -EOPNOTSUPP; + goto release; default: NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", fa->id); @@ -1193,8 +1278,21 @@ static int efx_tc_flower_replace(struct efx_nic *efx, NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); goto release; } + if (from_efv == EFX_EFV_PF) + /* PF netdev, so rule applies to traffic from wire */ + rule->fallback = &efx->tc->facts.pf; + else + /* repdev, so rule applies to traffic from representee */ + rule->fallback = &efx->tc->facts.reps; + if (!efx_tc_check_ready(efx, rule)) { + netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n"); + acts_id = rule->fallback->fw_id; + } else { + netif_dbg(efx, drv, efx->net_dev, "ready for hw\n"); + acts_id = rule->acts.fw_id; + } rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, - rule->acts.fw_id, &rule->fw_id); + acts_id, &rule->fw_id); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); goto release_acts; @@ -1391,6 +1489,58 @@ void efx_tc_deconfigure_default_rule(struct efx_nic *efx, rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; } +static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port, + struct efx_tc_action_set_list *acts) +{ + struct efx_tc_action_set *act; + int rc; + + act = kzalloc(sizeof(*act), GFP_KERNEL); + if (!act) + return -ENOMEM; + act->deliver = 1; + act->dest_mport = eg_port; + rc = efx_mae_alloc_action_set(efx, act); + if (rc) + goto fail1; + EFX_WARN_ON_PARANOID(!list_empty(&acts->list)); + list_add_tail(&act->list, &acts->list); + rc = efx_mae_alloc_action_set_list(efx, acts); + if (rc) + goto fail2; + return 0; +fail2: + list_del(&act->list); + efx_mae_free_action_set(efx, act->fw_id); +fail1: + kfree(act); + return rc; +} + +static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx) +{ + struct efx_tc_action_set_list *acts = &efx->tc->facts.pf; + u32 eg_port; + + efx_mae_mport_uplink(efx, &eg_port); + return efx_tc_configure_fallback_acts(efx, eg_port, acts); +} + +static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx) +{ + struct efx_tc_action_set_list *acts = &efx->tc->facts.reps; + u32 eg_port; + + efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port); + return efx_tc_configure_fallback_acts(efx, eg_port, acts); +} + +static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx, + struct efx_tc_action_set_list *acts) +{ + efx_tc_free_action_set_list(efx, acts, true); +} + static int efx_tc_configure_rep_mport(struct efx_nic *efx) { u32 rep_mport_label; @@ -1481,6 +1631,12 @@ int efx_init_tc(struct efx_nic *efx) if (rc) return rc; rc = efx_tc_configure_rep_mport(efx); + if (rc) + return rc; + rc = efx_tc_configure_fallback_acts_pf(efx); + if (rc) + return rc; + rc = efx_tc_configure_fallback_acts_reps(efx); if (rc) return rc; efx->tc->up = true; @@ -1500,6 +1656,8 @@ void efx_fini_tc(struct efx_nic *efx) efx_tc_deconfigure_rep_mport(efx); efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf); efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire); + efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf); + efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps); efx->tc->up = false; } @@ -1549,6 +1707,9 @@ int efx_init_struct_tc(struct efx_nic *efx) mutex_init(&efx->tc->mutex); init_waitqueue_head(&efx->tc->flush_wq); + rc = efx_tc_init_encap_actions(efx); + if (rc < 0) + goto fail_encap_actions; rc = efx_tc_init_counters(efx); if (rc < 0) goto fail_counters; @@ -1564,6 +1725,10 @@ int efx_init_struct_tc(struct efx_nic *efx) efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list); efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; + INIT_LIST_HEAD(&efx->tc->facts.pf.list); + efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; + INIT_LIST_HEAD(&efx->tc->facts.reps.list); + efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type; return 0; fail_match_action_ht: @@ -1571,6 +1736,8 @@ int efx_init_struct_tc(struct efx_nic *efx) fail_encap_match_ht: efx_tc_destroy_counters(efx); fail_counters: + efx_tc_destroy_encap_actions(efx); +fail_encap_actions: mutex_destroy(&efx->tc->mutex); kfree(efx->tc->caps); fail_alloc_caps: @@ -1589,11 +1756,16 @@ void efx_fini_struct_tc(struct efx_nic *efx) MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); + EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id != + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); + EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id != + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free, efx); rhashtable_free_and_destroy(&efx->tc->encap_match_ht, efx_tc_encap_match_free, NULL); efx_tc_fini_counters(efx); + efx_tc_fini_encap_actions(efx); mutex_unlock(&efx->tc->mutex); mutex_destroy(&efx->tc->mutex); kfree(efx->tc->caps); diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 24e9640c74e9063a34dc6ae24b6a76f20529c0c6..607429f8bb289ab486276be283b0c94b481267be 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -25,6 +25,8 @@ static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr) } #endif +struct efx_tc_encap_action; /* see tc_encap_actions.h */ + struct efx_tc_action_set { u16 vlan_push:2; u16 vlan_pop:2; @@ -33,6 +35,9 @@ struct efx_tc_action_set { __be16 vlan_tci[2]; /* TCIs for vlan_push */ __be16 vlan_proto[2]; /* Ethertypes for vlan_push */ struct efx_tc_counter_index *count; + struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */ + struct list_head encap_user; /* entry on encap_md->users list */ + struct efx_tc_action_set_list *user; /* Only populated if encap_md */ u32 dest_mport; u32 fw_id; /* index of this entry in firmware actions table */ struct list_head list; @@ -127,6 +132,7 @@ struct efx_tc_flow_rule { struct rhash_head linkage; struct efx_tc_match match; struct efx_tc_action_set_list acts; + struct efx_tc_action_set_list *fallback; /* what to use when unready? */ u32 fw_id; }; @@ -144,8 +150,10 @@ enum efx_tc_rule_prios { * @mutex: Used to serialise operations on TC hashtables * @counter_ht: Hashtable of TC counters (FW IDs and counter values) * @counter_id_ht: Hashtable mapping TC counter cookies to counters + * @encap_ht: Hashtable of TC encap actions * @encap_match_ht: Hashtable of TC encap matches * @match_action_ht: Hashtable of TC match-action rules + * @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder) * @reps_mport_id: MAE port allocated for representor RX * @reps_filter_uc: VNIC filter for representor unicast RX (promisc) * @reps_filter_mc: VNIC filter for representor multicast RX (allmulti) @@ -160,6 +168,11 @@ enum efx_tc_rule_prios { * %EFX_TC_PRIO_DFLT. Named by *ingress* port * @dflt.pf: rule for traffic ingressing from PF (egresses to wire) * @dflt.wire: rule for traffic ingressing from wire (egresses to PF) + * @facts: Fallback action-set-lists for unready rules. Named by *egress* port + * @facts.pf: action-set-list for unready rules on PF netdev, hence applying to + * traffic from wire, and egressing to PF + * @facts.reps: action-set-list for unready rules on representors, hence + * applying to traffic from representees, and egressing to the reps mport * @up: have TC datastructures been set up? */ struct efx_tc_state { @@ -168,8 +181,10 @@ struct efx_tc_state { struct mutex mutex; struct rhashtable counter_ht; struct rhashtable counter_id_ht; + struct rhashtable encap_ht; struct rhashtable encap_match_ht; struct rhashtable match_action_ht; + struct rhashtable neigh_ht; u32 reps_mport_id, reps_mport_vport_id; s32 reps_filter_uc, reps_filter_mc; bool flush_counters; @@ -180,11 +195,19 @@ struct efx_tc_state { struct efx_tc_flow_rule pf; struct efx_tc_flow_rule wire; } dflt; + struct { + struct efx_tc_action_set_list pf; + struct efx_tc_action_set_list reps; + } facts; bool up; }; struct efx_rep; +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev); +struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, + struct net_device *dev); +s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv); int efx_tc_configure_default_rule_rep(struct efx_rep *efv); void efx_tc_deconfigure_default_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule); diff --git a/drivers/net/ethernet/sfc/tc_bindings.c b/drivers/net/ethernet/sfc/tc_bindings.c index c18d64519c2db92faf371e321a18ea0a46752488..1b79c535c54ed5a326f387fae178cc2113dcda8f 100644 --- a/drivers/net/ethernet/sfc/tc_bindings.c +++ b/drivers/net/ethernet/sfc/tc_bindings.c @@ -10,6 +10,7 @@ #include "tc_bindings.h" #include "tc.h" +#include "tc_encap_actions.h" struct efx_tc_block_binding { struct list_head list; @@ -226,3 +227,15 @@ int efx_tc_setup(struct net_device *net_dev, enum tc_setup_type type, return -EOPNOTSUPP; } + +int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, + struct net_device *net_dev) +{ + if (efx->type->is_vf) + return NOTIFY_DONE; + + if (event == NETDEV_UNREGISTER) + efx_tc_unregister_egdev(efx, net_dev); + + return NOTIFY_OK; +} diff --git a/drivers/net/ethernet/sfc/tc_bindings.h b/drivers/net/ethernet/sfc/tc_bindings.h index c210bb09150ee632734a89456512ec1348c362f6..095ddeb59eb3c8a721a9c3fdaca571c94bfd25df 100644 --- a/drivers/net/ethernet/sfc/tc_bindings.h +++ b/drivers/net/ethernet/sfc/tc_bindings.h @@ -26,4 +26,6 @@ int efx_tc_indr_setup_cb(struct net_device *net_dev, struct Qdisc *sch, void *cb_priv, enum tc_setup_type type, void *type_data, void *data, void (*cleanup)(struct flow_block_cb *block_cb)); +int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, + struct net_device *net_dev); #endif /* EFX_TC_BINDINGS_H */ diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c new file mode 100644 index 0000000000000000000000000000000000000000..aac259528e73e286175bc2fff8f5cb97fd332336 --- /dev/null +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -0,0 +1,746 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2023, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "tc_encap_actions.h" +#include "tc.h" +#include "mae.h" +#include <net/vxlan.h> +#include <net/geneve.h> +#include <net/netevent.h> +#include <net/arp.h> + +static const struct rhashtable_params efx_neigh_ht_params = { + .key_len = offsetof(struct efx_neigh_binder, ha), + .key_offset = 0, + .head_offset = offsetof(struct efx_neigh_binder, linkage), +}; + +static const struct rhashtable_params efx_tc_encap_ht_params = { + .key_len = offsetofend(struct efx_tc_encap_action, key), + .key_offset = 0, + .head_offset = offsetof(struct efx_tc_encap_action, linkage), +}; + +static void efx_tc_encap_free(void *ptr, void *__unused) +{ + struct efx_tc_encap_action *enc = ptr; + + WARN_ON(refcount_read(&enc->ref)); + kfree(enc); +} + +static void efx_neigh_free(void *ptr, void *__unused) +{ + struct efx_neigh_binder *neigh = ptr; + + WARN_ON(refcount_read(&neigh->ref)); + WARN_ON(!list_empty(&neigh->users)); + put_net_track(neigh->net, &neigh->ns_tracker); + netdev_put(neigh->egdev, &neigh->dev_tracker); + kfree(neigh); +} + +int efx_tc_init_encap_actions(struct efx_nic *efx) +{ + int rc; + + rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params); + if (rc < 0) + goto fail_neigh_ht; + rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params); + if (rc < 0) + goto fail_encap_ht; + return 0; +fail_encap_ht: + rhashtable_destroy(&efx->tc->neigh_ht); +fail_neigh_ht: + return rc; +} + +/* Only call this in init failure teardown. + * Normal exit should fini instead as there may be entries in the table. + */ +void efx_tc_destroy_encap_actions(struct efx_nic *efx) +{ + rhashtable_destroy(&efx->tc->encap_ht); + rhashtable_destroy(&efx->tc->neigh_ht); +} + +void efx_tc_fini_encap_actions(struct efx_nic *efx) +{ + rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL); + rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL); +} + +static void efx_neigh_update(struct work_struct *work); + +static int efx_bind_neigh(struct efx_nic *efx, + struct efx_tc_encap_action *encap, struct net *net, + struct netlink_ext_ack *extack) +{ + struct efx_neigh_binder *neigh, *old; + struct flowi6 flow6 = {}; + struct flowi4 flow4 = {}; + int rc; + + /* GCC stupidly thinks that only values explicitly listed in the enum + * definition can _possibly_ be sensible case values, so without this + * cast it complains about the IPv6 versions. + */ + switch ((int)encap->type) { + case EFX_ENCAP_TYPE_VXLAN: + case EFX_ENCAP_TYPE_GENEVE: + flow4.flowi4_proto = IPPROTO_UDP; + flow4.fl4_dport = encap->key.tp_dst; + flow4.flowi4_tos = encap->key.tos; + flow4.daddr = encap->key.u.ipv4.dst; + flow4.saddr = encap->key.u.ipv4.src; + break; + case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: + case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: + flow6.flowi6_proto = IPPROTO_UDP; + flow6.fl6_dport = encap->key.tp_dst; + flow6.flowlabel = ip6_make_flowinfo(encap->key.tos, + encap->key.label); + flow6.daddr = encap->key.u.ipv6.dst; + flow6.saddr = encap->key.u.ipv6.src; + break; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d", + (int)encap->type); + return -EOPNOTSUPP; + } + + neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT); + if (!neigh) + return -ENOMEM; + neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT); + neigh->dst_ip = flow4.daddr; + neigh->dst_ip6 = flow6.daddr; + + old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht, + &neigh->linkage, + efx_neigh_ht_params); + if (old) { + /* don't need our new entry */ + put_net_track(neigh->net, &neigh->ns_tracker); + kfree(neigh); + if (!refcount_inc_not_zero(&old->ref)) + return -EAGAIN; + /* existing entry found, ref taken */ + neigh = old; + } else { + /* New entry. We need to initiate a lookup */ + struct neighbour *n; + struct rtable *rt; + + if (encap->type & EFX_ENCAP_FLAG_IPV6) { +#if IS_ENABLED(CONFIG_IPV6) + struct dst_entry *dst; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6, + NULL); + rc = PTR_ERR_OR_ZERO(dst); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap"); + goto out_free; + } + neigh->egdev = dst->dev; + netdev_hold(neigh->egdev, &neigh->dev_tracker, + GFP_KERNEL_ACCOUNT); + neigh->ttl = ip6_dst_hoplimit(dst); + n = dst_neigh_lookup(dst, &flow6.daddr); + dst_release(dst); +#else + /* We shouldn't ever get here, because if IPv6 isn't + * enabled how did someone create an IPv6 tunnel_key? + */ + rc = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)"); +#endif + } else { + rt = ip_route_output_key(net, &flow4); + if (IS_ERR_OR_NULL(rt)) { + rc = PTR_ERR_OR_ZERO(rt); + if (!rc) + rc = -EIO; + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap"); + goto out_free; + } + neigh->egdev = rt->dst.dev; + netdev_hold(neigh->egdev, &neigh->dev_tracker, + GFP_KERNEL_ACCOUNT); + neigh->ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &flow4.daddr); + ip_rt_put(rt); + } + if (!n) { + rc = -ENETUNREACH; + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap"); + netdev_put(neigh->egdev, &neigh->dev_tracker); + goto out_free; + } + refcount_set(&neigh->ref, 1); + INIT_LIST_HEAD(&neigh->users); + read_lock_bh(&n->lock); + ether_addr_copy(neigh->ha, n->ha); + neigh->n_valid = n->nud_state & NUD_VALID; + read_unlock_bh(&n->lock); + rwlock_init(&neigh->lock); + INIT_WORK(&neigh->work, efx_neigh_update); + neigh->efx = efx; + neigh->used = jiffies; + if (!neigh->n_valid) + /* Prod ARP to find us a neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + /* Add us to this neigh */ + encap->neigh = neigh; + list_add_tail(&encap->list, &neigh->users); + return 0; + +out_free: + /* cleanup common to several error paths */ + rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, + efx_neigh_ht_params); + synchronize_rcu(); + put_net_track(net, &neigh->ns_tracker); + kfree(neigh); + return rc; +} + +static void efx_free_neigh(struct efx_neigh_binder *neigh) +{ + struct efx_nic *efx = neigh->efx; + + rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, + efx_neigh_ht_params); + synchronize_rcu(); + netdev_put(neigh->egdev, &neigh->dev_tracker); + put_net_track(neigh->net, &neigh->ns_tracker); + kfree(neigh); +} + +static void efx_release_neigh(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + struct efx_neigh_binder *neigh = encap->neigh; + + if (!neigh) + return; + list_del(&encap->list); + encap->neigh = NULL; + if (!refcount_dec_and_test(&neigh->ref)) + return; /* still in use */ + efx_free_neigh(neigh); +} + +static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto) +{ + struct efx_neigh_binder *neigh = encap->neigh; + struct ethhdr *eth; + + encap->encap_hdr_len = sizeof(*eth); + eth = (struct ethhdr *)encap->encap_hdr; + + if (encap->neigh->n_valid) + ether_addr_copy(eth->h_dest, neigh->ha); + else + eth_zero_addr(eth->h_dest); + ether_addr_copy(eth->h_source, neigh->egdev->dev_addr); + eth->h_proto = htons(proto); +} + +static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len) +{ + struct efx_neigh_binder *neigh = encap->neigh; + struct ip_tunnel_key *key = &encap->key; + struct iphdr *ip; + + ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*ip); + + ip->daddr = key->u.ipv4.dst; + ip->saddr = key->u.ipv4.src; + ip->ttl = neigh->ttl; + ip->protocol = ipproto; + ip->version = 0x4; + ip->ihl = 0x5; + ip->tot_len = cpu_to_be16(ip->ihl * 4 + len); + ip_send_check(ip); +} + +#ifdef CONFIG_IPV6 +static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len) +{ + struct efx_neigh_binder *neigh = encap->neigh; + struct ip_tunnel_key *key = &encap->key; + struct ipv6hdr *ip; + + ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*ip); + + ip6_flow_hdr(ip, key->tos, key->label); + ip->daddr = key->u.ipv6.dst; + ip->saddr = key->u.ipv6.src; + ip->hop_limit = neigh->ttl; + ip->nexthdr = ipproto; + ip->version = 0x6; + ip->payload_len = cpu_to_be16(len); +} +#endif + +static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len) +{ + struct ip_tunnel_key *key = &encap->key; + struct udphdr *udp; + + udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*udp); + + udp->dest = key->tp_dst; + udp->len = cpu_to_be16(sizeof(*udp) + len); +} + +static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap) +{ + struct ip_tunnel_key *key = &encap->key; + struct vxlanhdr *vxlan; + + vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*vxlan); + + vxlan->vx_flags = VXLAN_HF_VNI; + vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id)); +} + +static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap) +{ + struct ip_tunnel_key *key = &encap->key; + struct genevehdr *geneve; + u32 vni; + + geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*geneve); + + geneve->proto_type = htons(ETH_P_TEB); + /* convert tun_id to host-endian so we can use host arithmetic to + * extract individual bytes. + */ + vni = ntohl(tunnel_id_to_key32(key->tun_id)); + geneve->vni[0] = vni >> 16; + geneve->vni[1] = vni >> 8; + geneve->vni[2] = vni; +} + +#define vxlan_header_l4_len (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) +#define vxlan4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len) +static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IP); + efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr)); + efx_gen_tun_header_vxlan(encap); +} + +#define geneve_header_l4_len (sizeof(struct udphdr) + sizeof(struct genevehdr)) +#define geneve4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len) +static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IP); + efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct genevehdr)); + efx_gen_tun_header_geneve(encap); +} + +#ifdef CONFIG_IPV6 +#define vxlan6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len) +static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IPV6); + efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr)); + efx_gen_tun_header_vxlan(encap); +} + +#define geneve6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len) +static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IPV6); + efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct genevehdr)); + efx_gen_tun_header_geneve(encap); +} +#endif + +static void efx_gen_encap_header(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + encap->n_valid = encap->neigh->n_valid; + + /* GCC stupidly thinks that only values explicitly listed in the enum + * definition can _possibly_ be sensible case values, so without this + * cast it complains about the IPv6 versions. + */ + switch ((int)encap->type) { + case EFX_ENCAP_TYPE_VXLAN: + efx_gen_vxlan_header_ipv4(encap); + break; + case EFX_ENCAP_TYPE_GENEVE: + efx_gen_geneve_header_ipv4(encap); + break; +#ifdef CONFIG_IPV6 + case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: + efx_gen_vxlan_header_ipv6(encap); + break; + case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: + efx_gen_geneve_header_ipv6(encap); + break; +#endif + default: + /* unhandled encap type, can't happen */ + if (net_ratelimit()) + netif_err(efx, drv, efx->net_dev, + "Bogus encap type %d, can't generate\n", + encap->type); + + /* Use fallback action. */ + encap->n_valid = false; + break; + } +} + +static void efx_tc_update_encap(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + struct efx_tc_action_set_list *acts, *fallback; + struct efx_tc_flow_rule *rule; + struct efx_tc_action_set *act; + int rc; + + if (encap->n_valid) { + /* Make sure no rules are using this encap while we change it */ + list_for_each_entry(act, &encap->users, encap_user) { + acts = act->user; + if (WARN_ON(!acts)) /* can't happen */ + continue; + rule = container_of(acts, struct efx_tc_flow_rule, acts); + if (rule->fallback) + fallback = rule->fallback; + else /* fallback fallback: deliver to PF */ + fallback = &efx->tc->facts.pf; + rc = efx_mae_update_rule(efx, fallback->fw_id, + rule->fw_id); + if (rc) + netif_err(efx, drv, efx->net_dev, + "Failed to update (f) rule %08x rc %d\n", + rule->fw_id, rc); + else + netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n", + rule->fw_id); + } + } + + /* Make sure we don't leak arbitrary bytes on the wire; + * set an all-0s ethernet header. A successful call to + * efx_gen_encap_header() will overwrite this. + */ + memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); + encap->encap_hdr_len = ETH_HLEN; + + if (encap->neigh) { + read_lock_bh(&encap->neigh->lock); + efx_gen_encap_header(efx, encap); + read_unlock_bh(&encap->neigh->lock); + } else { + encap->n_valid = false; + } + + rc = efx_mae_update_encap_md(efx, encap); + if (rc) { + netif_err(efx, drv, efx->net_dev, + "Failed to update encap hdr %08x rc %d\n", + encap->fw_id, rc); + return; + } + netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n", + encap->fw_id); + if (!encap->n_valid) + return; + /* Update rule users: use the action if they are now ready */ + list_for_each_entry(act, &encap->users, encap_user) { + acts = act->user; + if (WARN_ON(!acts)) /* can't happen */ + continue; + rule = container_of(acts, struct efx_tc_flow_rule, acts); + if (!efx_tc_check_ready(efx, rule)) + continue; + rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id); + if (rc) + netif_err(efx, drv, efx->net_dev, + "Failed to update rule %08x rc %d\n", + rule->fw_id, rc); + else + netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n", + rule->fw_id); + } +} + +static void efx_neigh_update(struct work_struct *work) +{ + struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work); + struct efx_tc_encap_action *encap; + struct efx_nic *efx = neigh->efx; + + mutex_lock(&efx->tc->mutex); + list_for_each_entry(encap, &neigh->users, list) + efx_tc_update_encap(neigh->efx, encap); + /* release ref taken in efx_neigh_event() */ + if (refcount_dec_and_test(&neigh->ref)) + efx_free_neigh(neigh); + mutex_unlock(&efx->tc->mutex); +} + +static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n) +{ + struct efx_neigh_binder keys = {NULL}, *neigh; + bool n_valid, ipv6 = false; + char ha[ETH_ALEN]; + size_t keysize; + + if (WARN_ON(!efx->tc)) + return NOTIFY_DONE; + + if (n->tbl == &arp_tbl) { + keysize = sizeof(keys.dst_ip); +#if IS_ENABLED(CONFIG_IPV6) + } else if (n->tbl == ipv6_stub->nd_tbl) { + ipv6 = true; + keysize = sizeof(keys.dst_ip6); +#endif + } else { + return NOTIFY_DONE; + } + if (!n->parms) { + netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n"); + return NOTIFY_DONE; + } + keys.net = read_pnet(&n->parms->net); + if (n->tbl->key_len != keysize) { + netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n", + n->tbl->key_len); + return NOTIFY_DONE; + } + read_lock_bh(&n->lock); /* Get a consistent view */ + memcpy(ha, n->ha, ETH_ALEN); + n_valid = (n->nud_state & NUD_VALID) && !n->dead; + read_unlock_bh(&n->lock); + if (ipv6) + memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len); + else + memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len); + rcu_read_lock(); + neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys, + efx_neigh_ht_params); + if (!neigh || neigh->dying) + /* We're not interested in this neighbour */ + goto done; + write_lock_bh(&neigh->lock); + if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) { + write_unlock_bh(&neigh->lock); + /* Nothing has changed; no work to do */ + goto done; + } + neigh->n_valid = n_valid; + memcpy(neigh->ha, ha, ETH_ALEN); + write_unlock_bh(&neigh->lock); + if (refcount_inc_not_zero(&neigh->ref)) { + rcu_read_unlock(); + if (!schedule_work(&neigh->work)) + /* failed to schedule, release the ref we just took */ + if (refcount_dec_and_test(&neigh->ref)) + efx_free_neigh(neigh); + } else { +done: + rcu_read_unlock(); + } + return NOTIFY_DONE; +} + +bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule) +{ + struct efx_tc_action_set *act; + + /* Encap actions can only be offloaded if they have valid + * neighbour info for the outer Ethernet header. + */ + list_for_each_entry(act, &rule->acts.list, list) + if (act->encap_md && !act->encap_md->n_valid) + return false; + return true; +} + +struct efx_tc_encap_action *efx_tc_flower_create_encap_md( + struct efx_nic *efx, const struct ip_tunnel_info *info, + struct net_device *egdev, struct netlink_ext_ack *extack) +{ + enum efx_encap_type type = efx_tc_indr_netdev_type(egdev); + struct efx_tc_encap_action *encap, *old; + struct efx_rep *to_efv; + s64 rc; + + if (type == EFX_ENCAP_TYPE_NONE) { + /* dest is not an encap device */ + NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set"); + return ERR_PTR(-EOPNOTSUPP); + } + rc = efx_mae_check_encap_type_supported(efx, type); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type"); + return ERR_PTR(rc); + } + /* No support yet for Geneve options */ + if (info->options_len) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options"); + return ERR_PTR(-EOPNOTSUPP); + } + switch (info->mode) { + case IP_TUNNEL_INFO_TX: + break; + case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6: + type |= EFX_ENCAP_FLAG_IPV6; + break; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u", + info->mode); + return ERR_PTR(-EOPNOTSUPP); + } + encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT); + if (!encap) + return ERR_PTR(-ENOMEM); + encap->type = type; + encap->key = info->key; + INIT_LIST_HEAD(&encap->users); + old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht, + &encap->linkage, + efx_tc_encap_ht_params); + if (old) { + /* don't need our new entry */ + kfree(encap); + if (!refcount_inc_not_zero(&old->ref)) + return ERR_PTR(-EAGAIN); + /* existing entry found, ref taken */ + return old; + } + + rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack); + if (rc < 0) + goto out_remove; + to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev); + if (IS_ERR(to_efv)) { + /* neigh->egdev isn't ours */ + NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch"); + rc = PTR_ERR(to_efv); + goto out_release; + } + rc = efx_tc_flower_external_mport(efx, to_efv); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port"); + goto out_release; + } + encap->dest_mport = rc; + read_lock_bh(&encap->neigh->lock); + efx_gen_encap_header(efx, encap); + read_unlock_bh(&encap->neigh->lock); + + rc = efx_mae_allocate_encap_md(efx, encap); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw"); + goto out_release; + } + + /* ref and return */ + refcount_set(&encap->ref, 1); + return encap; +out_release: + efx_release_neigh(efx, encap); +out_remove: + rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, + efx_tc_encap_ht_params); + kfree(encap); + return ERR_PTR(rc); +} + +void efx_tc_flower_release_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + if (!refcount_dec_and_test(&encap->ref)) + return; /* still in use */ + efx_release_neigh(efx, encap); + rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, + efx_tc_encap_ht_params); + efx_mae_free_encap_md(efx, encap); + kfree(encap); +} + +static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh) +{ + struct efx_tc_encap_action *encap, *next; + + list_for_each_entry_safe(encap, next, &neigh->users, list) { + /* Should cause neigh usage count to fall to zero, freeing it */ + efx_release_neigh(efx, encap); + /* The encap has lost its neigh, so it's now unready */ + efx_tc_update_encap(efx, encap); + } +} + +void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev) +{ + struct efx_neigh_binder *neigh; + struct rhashtable_iter walk; + + mutex_lock(&efx->tc->mutex); + rhashtable_walk_enter(&efx->tc->neigh_ht, &walk); + rhashtable_walk_start(&walk); + while ((neigh = rhashtable_walk_next(&walk)) != NULL) { + if (IS_ERR(neigh)) + continue; + if (neigh->egdev != net_dev) + continue; + neigh->dying = true; + rhashtable_walk_stop(&walk); + synchronize_rcu(); /* Make sure any updates see dying flag */ + efx_tc_remove_neigh_users(efx, neigh); /* might sleep */ + rhashtable_walk_start(&walk); + } + rhashtable_walk_stop(&walk); + rhashtable_walk_exit(&walk); + mutex_unlock(&efx->tc->mutex); +} + +int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, + void *ptr) +{ + if (efx->type->is_vf) + return NOTIFY_DONE; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + return efx_neigh_event(efx, ptr); + default: + return NOTIFY_DONE; + } +} diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.h b/drivers/net/ethernet/sfc/tc_encap_actions.h new file mode 100644 index 0000000000000000000000000000000000000000..4d755fb92daf1fa581ec2f1d7acdd7854032e27b --- /dev/null +++ b/drivers/net/ethernet/sfc/tc_encap_actions.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2023, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_TC_ENCAP_ACTIONS_H +#define EFX_TC_ENCAP_ACTIONS_H +#include "net_driver.h" + +#include <linux/refcount.h> +#include <net/tc_act/tc_tunnel_key.h> + +/** + * struct efx_neigh_binder - driver state for a neighbour entry + * @net: the network namespace in which this neigh resides + * @dst_ip: the IPv4 destination address resolved by this neigh + * @dst_ip6: the IPv6 destination address resolved by this neigh + * @ha: the hardware (Ethernet) address of the neighbour + * @n_valid: true if the neighbour is in NUD_VALID state + * @lock: protects @ha and @n_valid + * @ttl: Time To Live associated with the route used + * @dying: set when egdev is going away, to skip further updates + * @egdev: egress device from the route lookup. Holds a reference + * @dev_tracker: reference tracker entry for @egdev + * @ns_tracker: reference tracker entry for @ns + * @ref: counts encap actions referencing this entry + * @used: jiffies of last time traffic hit any encap action using this. + * When counter reads update this, a new neighbour event is sent to + * indicate that the neighbour entry is still in use. + * @users: list of &struct efx_tc_encap_action + * @linkage: entry in efx->neigh_ht (keys are @net, @dst_ip, @dst_ip6). + * @work: processes neighbour state changes, updates the encap actions + * @efx: owning NIC instance. + * + * Associates a neighbour entry with the encap actions that are + * interested in it, allowing the latter to be updated when the + * neighbour details change. + * Whichever of @dst_ip and @dst_ip6 is not in use will be all-zeroes, + * this distinguishes IPv4 from IPv6 entries. + */ +struct efx_neigh_binder { + struct net *net; + __be32 dst_ip; + struct in6_addr dst_ip6; + char ha[ETH_ALEN]; + bool n_valid; + rwlock_t lock; + u8 ttl; + bool dying; + struct net_device *egdev; + netdevice_tracker dev_tracker; + netns_tracker ns_tracker; + refcount_t ref; + unsigned long used; + struct list_head users; + struct rhash_head linkage; + struct work_struct work; + struct efx_nic *efx; +}; + +/* This limit is arbitrary; current hardware (SN1022) handles encap headers + * of up to 126 bytes, but that limit is not enshrined in the MCDI protocol. + */ +#define EFX_TC_MAX_ENCAP_HDR 126 +struct efx_tc_encap_action { + enum efx_encap_type type; + struct ip_tunnel_key key; /* 52 bytes */ + u32 dest_mport; /* is copied into struct efx_tc_action_set */ + u8 encap_hdr_len; + bool n_valid; + u8 encap_hdr[EFX_TC_MAX_ENCAP_HDR]; + struct efx_neigh_binder *neigh; + struct list_head list; /* entry on neigh->users list */ + struct list_head users; /* action sets using this encap_md */ + struct rhash_head linkage; /* efx->tc_encap_ht */ + refcount_t ref; + u32 fw_id; /* index of this entry in firmware encap table */ +}; + +/* create/uncreate/teardown hashtables */ +int efx_tc_init_encap_actions(struct efx_nic *efx); +void efx_tc_destroy_encap_actions(struct efx_nic *efx); +void efx_tc_fini_encap_actions(struct efx_nic *efx); + +struct efx_tc_flow_rule; +bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule); + +struct efx_tc_encap_action *efx_tc_flower_create_encap_md( + struct efx_nic *efx, const struct ip_tunnel_info *info, + struct net_device *egdev, struct netlink_ext_ack *extack); +void efx_tc_flower_release_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); + +void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev); +int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, + void *ptr); + +#endif /* EFX_TC_ENCAP_ACTIONS_H */