From dc17f1982f94dd356c0813754edfd04a3daad5f1 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Fri, 10 Nov 2023 02:32:15 +0100 Subject: [PATCH] BGP: PMSI tunnel attribute support PMSI tunnel attribute is required for EVPN IMET routes. --- proto/bgp/attrs.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++ proto/bgp/bgp.h | 28 +++++++++++++++ proto/bgp/config.Y | 4 ++- proto/evpn/evpn.c | 18 ++++++++-- 4 files changed, 137 insertions(+), 3 deletions(-) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index 4346cd5d..96ebf877 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -203,6 +203,30 @@ bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size) } +/* + * PMSI tunnel handling + */ + +adata * +bgp_pmsi_new_ingress_replication(linpool *pool, ip_addr addr, u32 label) +{ + int v4 = ipa_is_ip4(addr); + uint dlen = 5 + (v4 ? sizeof(ip4_addr) : sizeof(ip6_addr)); + adata *ad = lp_alloc_adata(pool, dlen); + + ad->data[0] = 0; + ad->data[1] = BGP_PMSI_TYPE_INGRESS_REPLICATION; + put_u24(ad->data + 2, label); + + if (v4) + put_ip4(ad->data + 5, ipa_to_ip4(addr)); + else + put_ip6(ad->data + 5, ipa_to_ip6(addr)); + + return ad; +} + + /* * AIGP handling */ @@ -849,6 +873,64 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt } +static void +bgp_decode_pmsi_tunnel(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to) +{ + if (len < 5) + WITHDRAW(BAD_LENGTH, "PMSI_TUNNEL", len); + + uint dlen = len - 5; + + switch (data[1]) + { + case BGP_PMSI_TYPE_NO_INFO: + if (dlen != 0) + WITHDRAW(BAD_LENGTH, "PMSI_TUNNEL", len); + break; + + case BGP_PMSI_TYPE_INGRESS_REPLICATION: + if ((dlen != sizeof(ip4_addr)) && (dlen != sizeof(ip6_addr))) + WITHDRAW(BAD_LENGTH, "PMSI_TUNNEL", len); + break; + + default: + flags |= BAF_PARTIAL; + } + + bgp_set_attr_data(to, s->pool, BA_PMSI_TUNNEL, flags, data, len); +} + +static void +bgp_format_pmsi_tunnel(const eattr *a, byte *buf, uint size) +{ + const adata *ad = a->u.ptr; + uint type = bgp_pmsi_get_type(ad); + uint label = bgp_pmsi_get_label(ad); + + char mpls[16] = {}; + if (label) + bsprintf(mpls, " mpls %u", label); + + switch (type) + { + case BGP_PMSI_TYPE_NO_INFO: + bsnprintf(buf, size, "no-info%s", mpls); + break; + + case BGP_PMSI_TYPE_INGRESS_REPLICATION:; + ip_addr a = bgp_pmsi_ir_get_endpoint(ad); + bsnprintf(buf, size, "ingress-replication %I%s", a, mpls); + break; + + default:; + int n = bsnprintf(buf, size, "type %u%s ", type, mpls); + ADVANCE(buf, size, n); + bstrbintohex(ad->data + 5, ad->length - 5, buf, size, ':'); + break; + } +} + + static void bgp_export_aigp(struct bgp_export_state *s, eattr *a) { @@ -1112,6 +1194,14 @@ static const struct bgp_attr_desc bgp_attr_table[] = { .decode = bgp_decode_as4_aggregator, .format = bgp_format_aggregator, }, + [BA_PMSI_TUNNEL] = { + .name = "pmsi_tunnel", + .type = EAF_TYPE_OPAQUE, + .flags = BAF_OPTIONAL | BAF_TRANSITIVE, + .encode = bgp_encode_raw, + .decode = bgp_decode_pmsi_tunnel, + .format = bgp_format_pmsi_tunnel, + }, [BA_AIGP] = { .name = "aigp", .type = EAF_TYPE_OPAQUE, diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 3128d309..4227cc83 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -657,10 +657,37 @@ void bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *ol int bgp_preexport(struct channel *, struct rte *); int bgp_get_attr(const struct eattr *e, byte *buf, int buflen); void bgp_get_route_info(struct rte *, byte *buf); +adata * bgp_pmsi_new_ingress_replication(linpool *pool, ip_addr addr, u32 label); int bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad); byte * bgp_bmp_encode_rte(struct bgp_channel *c, byte *buf, const net_addr *n, const struct rte *new, const struct rte_src *src); +#define BGP_PMSI_TYPE_NO_INFO 0 +#define BGP_PMSI_TYPE_INGRESS_REPLICATION 6 + +static inline uint +bgp_pmsi_get_type(const adata *ad) +{ return ad->data[1]; } + +static inline u32 +bgp_pmsi_get_label(const adata *ad) +{ return get_u24(ad->data + 2); } + +static inline ip_addr +bgp_pmsi_ir_get_endpoint(const adata *ad) +{ + uint dlen = ad->length - 5; + const byte *data = ad->data + 5; + + if (dlen == sizeof(ip4_addr)) + return ipa_from_ip4(get_ip4(data)); + else if (dlen == sizeof(ip6_addr)) + return ipa_from_ip6(get_ip6(data)); + else + return IPA_NONE; +} + + #define BGP_AIGP_METRIC 1 #define BGP_AIGP_MAX U64(0xffffffffffffffff) @@ -727,6 +754,7 @@ byte *bgp_create_end_mark_(struct bgp_channel *c, byte *buf); #define BA_EXT_COMMUNITY 0x10 /* RFC 4360 */ #define BA_AS4_PATH 0x11 /* RFC 6793 */ #define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ +#define BA_PMSI_TUNNEL 0x16 /* RFC 6514 */ #define BA_AIGP 0x1a /* RFC 7311 */ #define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ #define BA_ONLY_TO_CUSTOMER 0x23 /* RFC 9234 */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 2a831bc2..83aefb2a 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -32,7 +32,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, KEEPALIVE, LIVED, STALE, IMPORT, IBGP, EBGP, MANDATORY, INTERNAL, EXTERNAL, SETS, DYNAMIC, RANGE, NAME, DIGITS, BGP_AIGP, AIGP, ORIGINATE, COST, ENFORCE, FIRST, FREE, VALIDATE, BASE, ROLE, ROLES, PEER, PROVIDER, CUSTOMER, - RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC, GLOBAL, SEND) + RS_SERVER, RS_CLIENT, REQUIRE, BGP_OTC, GLOBAL, SEND, BGP_PMSI_TUNNEL) %type bgp_nh %type bgp_afi @@ -371,6 +371,8 @@ dynamic_attr: BGP_CLUSTER_LIST { $$ = f_new_dynamic_attr(EAF_TYPE_INT_SET, T_CLIST, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST)); } ; dynamic_attr: BGP_EXT_COMMUNITY { $$ = f_new_dynamic_attr(EAF_TYPE_EC_SET, T_ECLIST, EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY)); } ; +dynamic_attr: BGP_PMSI_TUNNEL + { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_PMSI_TUNNEL)); } ; dynamic_attr: BGP_AIGP { $$ = f_new_dynamic_attr(EAF_TYPE_OPAQUE, T_ENUM_EMPTY, EA_CODE(PROTOCOL_BGP, BA_AIGP)); } ; dynamic_attr: BGP_LARGE_COMMUNITY diff --git a/proto/evpn/evpn.c b/proto/evpn/evpn.c index e686cfa3..1c8cad92 100644 --- a/proto/evpn/evpn.c +++ b/proto/evpn/evpn.c @@ -56,6 +56,7 @@ #define EA_BGP_NEXT_HOP EA_CODE(PROTOCOL_BGP, BA_NEXT_HOP) #define EA_BGP_EXT_COMMUNITY EA_CODE(PROTOCOL_BGP, BA_EXT_COMMUNITY) +#define EA_BGP_PMSI_TUNNEL EA_CODE(PROTOCOL_BGP, BA_PMSI_TUNNEL) #define EA_BGP_MPLS_LABEL_STACK EA_CODE(PROTOCOL_BGP, BA_MPLS_LABEL_STACK) static inline const struct adata * ea_get_adata(ea_list *e, uint id) @@ -194,6 +195,9 @@ evpn_announce_imet(struct evpn_proto *p, int new) struct adata *ad = evpn_export_targets(p, &null_adata); ea_set_attr_ptr(&a->eattrs, tmp_linpool, EA_BGP_EXT_COMMUNITY, 0, EAF_TYPE_EC_SET, ad); + ad = bgp_pmsi_new_ingress_replication(tmp_linpool, p->router_addr, p->vni); + ea_set_attr_ptr(&a->eattrs, tmp_linpool, EA_BGP_PMSI_TUNNEL, 0, EAF_TYPE_OPAQUE, ad); + rte *e = rte_get_temp(a, p->p.main_source); rte_update2(c, n, e, p->p.main_source); } @@ -259,7 +263,13 @@ evpn_receive_imet(struct evpn_proto *p, const net_addr_evpn_imet *n0, rte *new) if (new && rte_resolvable(new)) { - eattr *nh = ea_find(new->attrs->eattrs, EA_BGP_NEXT_HOP); + eattr *pt = ea_find(new->attrs->eattrs, EA_BGP_PMSI_TUNNEL); + if (!pt) + BAD("Missing PMSI_TUNNEL attribute in %N", n0); + + uint pmsi_type = bgp_pmsi_get_type(pt->u.ptr); + if (pmsi_type != BGP_PMSI_TYPE_INGRESS_REPLICATION) + BAD("Unsupported PMSI_TUNNEL type %u in %N", pmsi_type, n0); rta *a = alloca(RTA_MAX_SIZE); *a = (rta) { @@ -267,15 +277,19 @@ evpn_receive_imet(struct evpn_proto *p, const net_addr_evpn_imet *n0, rte *new) .scope = SCOPE_UNIVERSE, .dest = RTD_UNICAST, .pref = c->preference, - .nh.gw = nh ? *((ip_addr *) nh->u.ptr->data) : IPA_NONE, + .nh.gw = bgp_pmsi_ir_get_endpoint(pt->u.ptr), .nh.iface = p->tunnel_dev, }; + a->nh.labels = 1; + a->nh.label[0] = bgp_pmsi_get_label(pt->u.ptr); + rte *e = rte_get_temp(a, s); rte_update2(c, n, e, s); } else { + withdraw: rte_update2(c, n, NULL, s); } }