<?xml version="1.0"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!ENTITY RFC2119 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC3107 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3107.xml">
<!ENTITY RFC8402 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8402.xml">
<!ENTITY RFC8570 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8570.xml">
<!ENTITY RFC7311 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7311.xml">
<!ENTITY RFC8679 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8679.xml">
<!ENTITY RFC8029 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8029.xml">
<!ENTITY RFC8287 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8287.xml">
<!ENTITY RFC7471 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7471.xml">
<!ENTITY RFC8669 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.8669.xml">
<!ENTITY RFC4364 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4364.xml">
<!ENTITY RFC1997 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1997.xml">
<!ENTITY RFC6388 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6388.xml">
<!ENTITY RFC5357 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5357.xml">
<!ENTITY RFC7510 PUBLIC "" "http://xml.resource.org/public/rfc/bibxml/reference.RFC.7510.xml">
]>
<?rfc toc="yes"?>
<?rfc tocompact="yes"?>
<?rfc tocdepth="3"?>
<?rfc tocindent="yes"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes"?>
<?rfc comments="yes"?>
<?rfc inline="yes"?>
<?rfc compact="yes"?>
<?rfc subcompact="no"?>
<rfc category="std" docName="draft-hegde-spring-mpls-seamless-sr-03" ipr="trust200902">
<front>
  <title abbrev=" Seamless Segment Routing ">Seamless Segment Routing</title>

 
 

 <author initials="S." surname="Hegde" fullname="Shraddha Hegde">
    <organization>Juniper Networks Inc.</organization>
    <address>
      <postal>
        <street>Exora Business Park</street>
        <city>Bangalore</city>
        <region>KA</region>
        <code>560103</code>
        <country>India</country>
      </postal>
      <email>shraddha@juniper.net</email>
    </address>
  </author>
  <author initials="C." surname="Bowers" fullname="Chris Bowers">
    <organization>Juniper Networks Inc.</organization>
    <address>
      <postal>
        <street></street>
        <city></city>
        <region></region>
        <code></code>
        <country></country>
      </postal>
      <email>cbowers@juniper.net</email>
    </address>
  </author>
     
     <author initials="X." surname="Xu" fullname="Xiaohu Xu">
    <organization>Alibaba Inc.</organization>
    <address>
      <postal>
        <street></street>
        <city>Beijing</city>
        <region></region>
        <code></code>
        <country>China</country>
      </postal>
      <email>xiaohu.xxh@alibaba-inc.com</email>
    </address>
  </author>
      <author initials="A." surname="Gulko" fullname="Arkadiy Gulko">
    <organization>Refinitiv</organization>
    <address>
      <postal>
        <street></street>
        <city></city>
        <region></region>
        <code></code>
        <country></country>
      </postal>
      <email>arkadiy.gulko@refinitiv.com</email>
    </address>
  </author>
    <author initials="A." surname="Bogdanov" fullname="Alex Bogdanov">
    <organization>Google Inc.</organization>
    <address>
      <postal>
        <street></street>
        <city></city>
        <region></region>
        <code></code>
        <country></country>
      </postal>
      <email>bogdanov@google.com</email>
    </address>
  </author>
  
    <author initials="J." surname="Uttaro" fullname="Jim Uttaro">
    <organization>ATT</organization>
    <address>
      <postal>
        <street></street>
        <city></city>
        <region></region>
        <code></code>
        <country></country>
      </postal>
      <email>ju1738@att.com</email>
    </address>
  </author>
  <author initials="L." surname="Jalil" fullname="Luay Jalil">
    <organization>Verizon</organization>
    <address>
      <postal>
        <street></street>
        <city></city>
        <region></region>
        <code></code>
        <country></country>
      </postal>
      <email>luay.jalil@verizon.com</email>
    </address>
  </author>
    <author initials="M." surname="Khaddam" fullname="Mazen Khaddam">
    <organization>Cox communications</organization>
    <address>
      <postal>
        <street></street>
        <city></city>
        <region></region>
        <code></code>
        <country></country>
      </postal>
      <email>mazen.khaddam@cox.com</email>
    </address>
  </author>
      <author initials="A." surname="Alston" fullname="Andrew Alston">
    <organization>Liquid Telecom</organization>
    <address>
      <postal>
        <street></street>
        <city></city>
        <region></region>
        <code></code>
        <country></country>
      </postal>
      <email>andrew.alston@liquidtelecom.com</email>
    </address>
  </author>
  
  <date year="2020"/>
  <area>Routing</area>
  <workgroup>SPRING</workgroup>
  <keyword>MPLS</keyword>
  <keyword>BGP-LU</keyword>
  <keyword>AS</keyword>
  <keyword>IGP</keyword>
  <keyword>SPRING</keyword>
  <keyword>LDP</keyword>
  <keyword>RSVP</keyword>
  <keyword>SLO</keyword>
  <abstract>
 <t> In order to operate networks with large numbers of devices, network operators
 organize networks into multiple smaller network domains.
 Each network domain typically runs an IGP which has complete visibility 
 within its own domain, but limited visibility outside of its domain.
 Seamless Segment Routing (Seamless SR) provides flexible, scalable and reliable
 end-to-end connectivity for services across independent network domains.
 Seamless SR accommodates domains using SR, LDP, and RSVP for MPLS label distribution
 as well as domains running IP without MPLS (IP-Fabric).
</t> 

  </abstract>

  <note title="Requirements Language">
    <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
    "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
    document are to be interpreted as described in <xref
    target="RFC2119">RFC 2119</xref>.</t>
  </note>

</front>

<middle>
<section title="Introduction" anchor='intro'>
<t> Evolving wireless access technology
 and cloud applications are expected to place new 
 requirements on the packet transport networks. 
 These services are contributing to significantly
 higher bandwidth throughput which in turn leads to a
 growing number of transport network devices.

As an example, 5G networks are expected to require up 
to 250Gbps in the fronthaul and up to 400Gbps in the backhaul.

There is a desire to allow many network functions to be 
virtualized and cloud native. In order to support latency-sensitive
 cloud-native network functions, packet transport networks should 
 be capable of providing low-latency paths end-to-end. Some services 
 will require low-latency paths while others may require different
 QoS properties. The network should be able to differentiate 
 between the services and provide corresponding SLA transport paths.

In addition, as these applications become more sensitive and
less loss tolerant, more and more emphasis is placed on overall 
service availability and reliability.
</t>

<t>
  The Seamless SR architecture builds upon the Seamless 
MPLS architecture and caters to new requirements imposed by the
5G transport networks and the cloud applications.

<xref target = 'I-D.ietf-mpls-seamless-mpls'/>, 
contains a good description
of the Seamless MPLS architecture. 
Although <xref target = 'I-D.ietf-mpls-seamless-mpls'/>
has not been published as an RFC, it serves as a useful description 
of the Seamless MPLS architecture. <xref target = 'I-D.ietf-mpls-seamless-mpls'/> 
describes the Seamless MPLS architecture, which 
uses LDP and/or RSVP for intra-domain label distribution, and 
BGP-LU <xref target = 'RFC3107'/> for end-to-end label 
distribution.  Seamless SR focuses on using segment routing for intra-domain
label distribution. The mechansims described in this document are equally
applicable to intra-domain tunneling mechanisms deployed using RSVP and/or LDP. 
</t>


<t>
By using segment routing for intra-domain label distribution, Seamless SR 
is able to easily support both SR-MPLS on IPv4 and IPv6 networks.  This 
overcomes a limitation of the classic Seamless MPLS architecture, which 
was limited to run MPLS on IPv4 networks in practice. Seamless SR (like Seamless MPLS) 
can use BGP-LU (RFC 3107) to stitch different domains. However, Seamless SR
can also take advantage of BGP Prefix-SID <xref target = 'RFC8669'/> to 
provide predictable and deterministic labels for the inter-domain connectivity.
</t>


<t> 
The basic functionality of the Seamless SR architecture does not 
require any enhancements to existing protocols.  However, in order to 
support end-to-end service requirements across multiple domains, 
protocol extensions may be needed. This draft discusses use cases, 
requirements, and potential protocol enhancements.  
 </t>

</section>
<section title="Terminology" anchor='terminology'>
<t> 
<figure anchor="usecase_terminology" title="Terminology">
      <artwork>
 This document uses the following terminology

   o  Access Node (AN): An access node is a node which processes
      customers frames or packets at Layer 2 or above.  This includes
      but is not limited to DSLAMs and Cell Site Routers in 5G networks.
      Access nodes have only limited MPLS functionalities
      in order to reduce complexity in the access network.

   o  Pre-Aggregation Node (P-AGG): A pre-aggregation node (P-AGG) is a node
      which aggregates several access nodes (ANs).
      
   o  Aggregation Node (AGG): A aggregation node (AGG) is a node which
      aggregates several pre-aggregation nodes (P-AGG).

   o  Area Border Router (ABR): Router between aggregation and core
      domain.
   
   o  Label Switch Router (LSR): Label Switch router are pure transit nodes.
      ideally have no customer or service state and are therefore decoupled
      from service creation.

  
   o  Use Case: Describes a typical network including service creation
      points and distribution of remote node loopback prefixes. 
</artwork>
</figure>
</t>
</section>
<section title="Use Cases" anchor='usecase'>
<section title="Service provider network" anchor ='service_provider'>

    <t>Service provider transport networks use multiple domains to 
    support scalability.  For this analysis, we consider a representative
    network design with four level of hierarchy: access domains, pre-aggregation domains, 
    aggregation domains and a core. (See <xref target = 'reference_diagram1'/>). 
    The 5G transport networks in particular are expected to scale to 
    very large number of access nodes due to the shorter range of the 
    5G radio technology. The networks are expected to scale up to one million nodes.
    </t>
    
     <t>
<figure anchor="reference_diagram1" title="5G network">
      <artwork>
        
              +-------+   +-------+   +------+   +------+
              |       |   |       |   |      |   |      |
           +--+ P-AGG1+---+ AGG1  +---+ ABR1 +---+ LSR1 +--> to ABR
          /   |       |  /|       |   |      |   |      |
   +----+/    +-------+\/ +-------+   +------+  /+------+
   | AN |              /\                     \/
   +----+\    +-------+  \+-------+   +------+/\ +------+
          \   |       |   |       |   |      |  \|      |
           +--+ P-AGG2+---+ AGG2  +---+ ABR2 +---+ LSR2 +--> to ABR
              |       |   |       |   |      |   |      |
              +-------+   +-------+   +------+   +------+

   ISIS L1       ISIS L2                   ISIS L2 

   |-Access-|--Aggregation Domain--|---------Core-----------------|
   
    </artwork>
    </figure>
    </t>
    
     <t>
    Many network functions in a 5G network will be virtualized/containerized and 
    distributed across multiple data centers.  Virtualized
    network functions are instantiated dynamically across different 
    compute resources.  This requires that the underlying transport 
    network supports the stringent SLA on end-to-end paths.
    </t>
    
     <t>
    5G networks support variety of service use cases that require
    end-to-end slicing. 
    
    In certain cases the end-to-end
    connectivity requires differentiated forwarding capabilities.
    Seamless SR architecture should provide the ability to establish
    end-to-end paths that satisfy
    the required SLAs. For example, end user requirement could be 
    to establish a low latency path end-to-end. 
    
    The System Architecture for the 5G System <xref target="TS.23.501-3GPP"/>
    currently defines four standardized Slice/Service Types: 
    Enhanced Mobile Broadband (eMBB), Ultra-Reliable Low Latency 
    Communication (URLLC), massive Internet of Things (mIoT),
    Vehicle to everything (V2X). 
    
    The Seamless SR should support end-to-end Service Level Objectives(SLO)
    to allow the creation of network slices with these four 
    Slice/Service Types.       
    </t>
    <t> Many deployments consist of ring topologies in the access and 
        aggregation networks. In the ring topologies, there are at most two forwarding
        paths for the traffic, whereas the core networks consist of nodes with
        more denser connectivity compared to ring topologies. Thus core networks
        may have a larger number of TE paths while access networks 
        will have a smaller number of TE paths.
        The Seamless SR  architecture should support the ability to have more 
        TE paths in one domain and lesser number of TE paths in another 
        domain and provide the ability to effectively connect the domains
        end-to-end while satisfying end-to-end constraints.
        </t>
   
</section>

<section title="Large scale WAN networks" anchor ='wan_network'>

    <t> As WAN networks grow beyond several thousand nodes, it is often
   useful to divide the network into multiple IGP domains, as illustrated 
   in <xref target="wan_network"/>. Separate 
   IGP domains increase service availability by establishing a 
   constrained failure domain.  Smaller IGP
   domains may also improve network performance and 
   health by reducing the device scale profile 
   (including protocol and FIB scale).
   </t>
    

     <t>
<figure anchor="reference_diagram2" title="WAN Network">
      <artwork>
        
              +-------+     +-------+     +-------+
              |       |     |       |     |       |           
              |      ABR1  ABR2    ABR3   ABR4    |  
              |       |     |       |     |       |           
           PE1+DOMAIN1+-----+DOMAIN2+-----+DOMAIN3+PE2
              |       |     |       |     |       |
              |      ABR11  ABR22  ABR33  ABR44   |
              |       |     |       |     |       |
              +-------+     +-------+     +-------+
     

             |-ISIS1-|      |-ISIS2-|     |-ISIS3-|
   
    </artwork>
    </figure>
    </t>
    
    <t> 
     These Large WAN networks often cross national boundaries.
     In order to meet data sovereignty requirements,
     operators need to maintain strict control over end-to-end
     traffic-engineered(TE) paths. Segment Routing 
     provides two main solutions to implement highly constrained TE paths.  
     
     Flex-algo (defined in <xref target ='I-D.ietf-lsr-flex-algo'/>) uses 
     prefix-SIDs computed by all nodes in the IGP domain using the same 
     pruned topology. 
     
     Highly constrained TE paths for the data sovereignty use case can
     also be implemented using SR-TE policies 
     (<xref target ='I-D.ietf-spring-segment-routing-policy'/>) built 
     using unprotected adjacency SIDs.
     </t>
     
     <t> 
     Both of these approaches work well for intra-domain TE paths.
     However, they both have limitations when one tries to 
     extend them to the creation of highly constrained inter-domain TE paths.
     A goal of seamless SR is to be able to create highly 
     constrained inter-domain TE paths in a scalable manner.
    </t>
    <t>
    Some deployments may use a centralized controller to acquire the 
    topologies of multiple domains and build end-to-end constrained paths.
    This can be scaled with hierarchical controllers. 
    However, there is still significant risk of a loss of network 
    connectivity to one or more controllers, which can result in a failure
    to satisfy the strict requirements of data sovereignty.
    The network should have pre-established TE paths end-to-end that don't
    rely on controllers in order to address these failure scenarios.
    </t>
</section>

<section title="Data Center Interconnect (DCI) Networks" anchor ='dci_network'>

    <t> Data centers are playing an increasingly important role in providing
    access to information and applications.
    Geographically diverse data centers usually connect via a high speed,
    reliable and secure core network.

<figure anchor="reference_diagram3" title="DCI Network">
      <artwork>
        
              +-------+     +-------+     +-------+   
              |       ASBR1 ASBR2 ASBR3   ASBR4   |  
              |       |     |       |     |       |           
           PE1+  DC1  +-----+  CORE +-----+  DC2  +PE2
              |    ASBR11  ASBR22 ASBR33 ASBR44   |
              |       |     |       |     |       |
              +-------+     +-------+     +-------+
     

              |-ISIS1-|      |-ISIS2-|    |-ISIS3-|
   
    </artwork>
    </figure>
    
    In many Data Center deployments, applications require end-to-end path
    diversity and/or end-to-end low latency paths. It is desirable to have a 
    uniform technology deployed in the core as well as in the Data Centers to create
    these SLA paths. Such uniformity simplifies the network to a great extent.
    It is desirable for a solution to only require service-related 
    configurations on the access end-points where services are attached,
    avoiding service-related configurations on the ABR/ASBR nodes.
    </t>
   
</section>

<section title="Multicast Use cases" anchor ='multicast'>

    <t> Multicast services such as IPTV and multicast also need 
    to be support across a multi-domain service provider network.
    Multicast services such as IPTV, multicast VPN etc need to be 
    supported in a service provider network.
    </t>
    
    <t>
    <figure anchor="reference_diagram4" title="Multicast usecases">
      <artwork>
        
              +---------+---------+---------+   
              |         |         |         |
              S1       ABR1      ABR2       R1            
              | Metro1  |  Core   |  Metro2 |
              |         |         |         |     
              S2       ABR11     ABR22      R2
              |         |         |         |
              +---------+---------+---------+
     

              |-ISIS1-|  |-ISIS2-|  |-ISIS3-|
   
    </artwork>
    </figure>
    </t>
    
    
    <t>
    <xref target="reference_diagram4"/> shows a simplified multi-domain
    network supporting multicast.  Multicast sources S1 and S2 lie in
    a different domain from the receivers R1 and R2. Using multiple
    IGP domains presents a problem for the establishment of multicast 
    replication trees.  Typically, a multicast receiver does a 
    reverse path forwarding (RPF) lookup for a multicast source.  One 
    solution is to leak the routes for multicast sources across the 
    IGP domains.  However, this can compromise the scaling properties of 
    the multi-domain architecture.  SR-P2MP 
    <xref target ='I-D.voyer-pim-sr-p2mp-policy'/> offers a solution 
    for both intra-domain and inter-domain multicast. However, it does not
    accommodate deployments using existing intra-domain multicast technology,
    such as mLDP <xref target="RFC6388"/> in some of the domains.    
    A solution should accommodate a mixture of existing and newer technologies
    to better facilitate coexistence and migration.

    </t>
   
</section>

</section>

<section title="Requirements" anchor ='requirements'>
<t>This section provides a summary of requirements derived from
 the use cases described in previous sections. </t>
<section title="MPLS Transport" anchor ='transport'>
<t>
<list>
<t>The architecture SHOULD provide
 MPLS transport between two service endpoints regardless of whether
 the two end-points are in the same IGP domain, different IGP domains,
 or in different autonomous systems.</t>

<t> The MPLS transport SHOULD be supported on IPv4, IPv6, and 
dual-stack networks.</t>
</list>
</t>
</section>
<section title="SLA Guarantee" anchor ='sla'>
<t>
<list>
<t>The architecture SHOULD allow the creation of paths that 
support end-to-end SLAs. The paths should for example 
obey constraints related to latency, diversity, bandwidth and availability.</t>
<t> The architecture SHOULD support end-to-end
network slicing as described by 5G transport requirements
<xref target="TS.23.501-3GPP"/>.</t>
</list>
</t>
</section>
<section title="Scalability" anchor ='scale'>
<t>
<list>
<t> The architecture SHOULD be able to support
 up to 1 million nodes.</t>
<t> The architecture SHOULD facilitate the use 
of access nodes with low RIB/FIB and low CPU capabilities.</t>
<t> The architecture SHOULD facilitate the use 
of access nodes with low label stacking capability.</t>
<t> The architecture SHOULD allow for a scalable response to network
events.  An individual node SHOULD only need to respond to a limited 
subset of network events.  </t>
<t> Service routes on the border nodes SHOULD be minimized.</t>
 
</list>
</t>
</section>
<section title="Availability" anchor ='availability'>
<t>
<list>
<t> Traffic SHOULD be Fast Reroute (FRR) protected against 
link, node, and SRLG failures within a domain.  </t>
<t> Traffic SHOULD be Fast Reroute (FRR) protected against 
border node failures.  </t>
<t> Traffic SHOULD be Fast Reroute (FRR) protected against 
egress node and egress link failures.  </t>
</list>
</t>
</section>
<section title="Operations" anchor ='ops'>
<t>
<list>
<t> Each domain SHOULD be independent and SHOULD not depend
 on the transport technology in another domain. This allows for 
 more flexible evolution of the network. </t>
<t> Basic MPLS OAM mechanisms described in 
<xref target="RFC8029"/> SHOULD be supported.</t>
<t> End-to-end mpls ping and traceroute procedures SHOULD be supported.</t>
<t> Ability to validate the path inside each domain SHOULD be supported.</t>
<t> Statistics for inter-domain paths on the ingress and egress PE nodes as well 
as border nodes SHOULD be supported.</t>
</list>
</t>
</section>
<section title="Service Mapping" anchor ='service_mapping'>
<t>
<list>
<t>The architecture SHOULD support the automated steering of traffic 
on to transport paths based on communities carried in the service prefix 
advertisements.</t>
<t> The architecture SHOULD support the steering of traffic 
on to transport paths based on the DSCP value carried in IPv4/IPv6 packets. </t>

<t>Traffic steering based on EXP bits in the mpls header SHOULD be supported. </t>
 
<t>Traffic steering based on 5-tuple packet filter SHOULD be supported. 
Source address, destination address, source port, destination port
and protocol fields should be allowed.</t>

<t> All traffic steering mechanims SHOULD be supported for all kinds
of service traffic including VPN traffic as well as global internet traffic.</t>

<t>The core domain is expected to have more traffic engineering constraints 
as compared to metros. The ability to map the
services to appropriate transport tunnels at service attachment 
points SHOULD be supported. </t>
</list>
</t>
</section>
</section>

<section title="Seamless Segment Routing architecture" anchor ='architecture'>

<section title="Solution Concepts" anchor='seamless_sr_terminology'>
<t> 
<figure anchor="solution_concepts" title="Solution Concepts">
      <artwork>
The solution described below makes use of the following concepts.
The definitions from the draft-kaliraj-idr-bgp-classful-transport-planes have
been reproduced here for readability. In case of any conflicts, text from
kaliraj-idr-bgp-classful-transport-planes should be used.

   o  Transport Class (TC): A Transport Class is defined as a collection of
      end-to-end MPLS paths that satisfy a set of constraints or  
      Service Level Agreements. 
   
   o  BGP-Classful Transport (BGP-CT): A new BGP family used to 
      establish Transport Class paths across different domains.

   o  Route Distinguisher (RD):  The Route Distinguisher is
      defined in RFC4364.  In BGP-CT, the RD is used in BGP advertisements 
      to differentiate multiple paths to the same loopback address.
      It may be useful to automatically generate RDs in order to
      simplify configuration.   
      
   o  Route Target (RT): The Route Target extended community is 
      carried in BGP-CT advertisements. The RT represents the Transport Class 
      of an advertised path.  Note that the RT is only carried in 
      the BGP-CT advertisements. No BGP-VPN related configuration or
      VPN family advertisements are needed when BGP-CT transport paths are used
      to carry non-VPN traffic.  
   
   o  Mapping Community (MC): The Mapping Community is the  BGP extended community
      as defined in RFC4360. In the Seamless SR architecture, 
      an MC is carried by a BGP-CT route and/or a service route.  
	  The MC is used to identify the specific local policy used 
	  to map traffic for a service route to different Transport Class paths. 
	  When a mapping community is advertised in a BGP-CT route it 
	  identifies the specific local policy used to map the BGP-CT
	  route to the intra-domain tunnels.The local policy can include 
	  additional traffic steering properties for placing traffic on different 
      Transport Class paths.  The values of the MCs and the 
	  corresponding local policies for service mapping are defined 
	  by the network operator.
      </artwork>
      </figure>

</t>
</section>

<section title="BGP Classful Transport" anchor ='bgp_ct'>
<figure anchor="reference_diagram5" title="WAN Network">
      <artwork>
        
              ----IBGP------EBGP----IBGP------EBGP-----IBGP---
             |            |     |           |     |           |
        
              +-----------+     +-----------+     +-----------+   
              |           |     |           |     |           |
              |        ASBR1+--+ASBR2    ASBR3+--+ASBR4       | 
           PE1+     D1    |  X  |     D2    |  X  |     D3    +PE2
              |        ASBR5+--+ASBR6    ASBR7+--+ASBR8       |
              |           |     |           |     |           |
              +-----+-----+     +-----------+     +-----------+
                   PE3                                 

              |---ISIS1---|      |---ISIS2---|      |---ISIS3---|
              
              
   
    </artwork>
    </figure>
<t> The above diagram shows a WAN network divided into 
3 different domains. Within each domain, BGP sessions 
are established between the PE nodes and the border nodes
as well as between border nodes.  BGP sessions are also 
established between border nodes across domains.  
The goal is for PE1 to have MPLS connectivity to PE2, satisfying
specific characteristics.  Multiple MPLS paths from PE1 to PE2 
are required in order to satisfy different SLAs. 
<xref target ='I-D.kaliraj-idr-bgp-classful-transport-planes'/>
defines a new BGP family called BGP-Classful Transport.  
The NLRI for this new family consists of a prefix and a Route Distinguisher.
The prefix corresponds to the loopback of the destination PE, and 
RD is used to distinguish different paths to the same PE loopback.
The BGP-CT advertisement also carries a Route Target.  The RT specifies
the Transport Class to which the BGP-CT advertisement belongs.
BGP-CT mechanisms are applicable to single ownership networks that are organized
into multiple domains. It is also applicable to multiple ASes
with different ownership but closely co-operating administration. 
BGP-CT mechansims are not expected to be applied on the internet peering 
or between domains that have completely independent administrations.</t> 

 
 <t>
 <figure anchor="reference_diagram7" title="BGP-CT Advertisements and Label Stacks">
      <artwork>
        
        
        
        
        
                 BGP-CT advertisements for red Transport Class
        
            Prefix:PE2    Prefix:PE2  Prefix:PE2   Prefix:PE2   Prefix:PE2
            RD:RD1        RD:RD1      RD:RD1       RD:RD1       RD:RD1
            RT:Red        RT:Red      RT:Red       RT:Red       RT:Red(100)
            nh:ASBR1      nh:ASBR2    nh:ASBR3     nh:ASBR4     nh:PE2
            Label:L1      Label:L2    Label:L3     Label:L4     Label:L5
            
            
        PE1-------ASBR1------ASBR2---------ASBR3-------ASBR4--------PE2
        
                                                              VPNa Prefix: 
                                                              10.1.1.1/32
                                                              RD: RD50    
                                                              RT: RT-VPNa
                                                              ext-community:
                                                              Red(100)
                                                              nh: PE2
                                                              Label: S1
        
            +------+              +------+                   +------+
            | IL71 |              | IL72 |                   | IL73 | 
            +------+   +------+   +------+      +------+     +------+
            | L1   |   | L2   |   |  L3  |      | L4   |     |  L5  |  
            +------+   +------+   +------+      +------+     +------+
            | S1   |   | S1   |   |  S1  |      | S1   |     |  S1  | 
            +------+   +------+   +------+      +------+     +------+

                      Label stacks along end-to-end path
                      S1 is the end-to-end service label.
            IL71, IL72, and IL73 are intra-domain labels corresponding to
                            red intra-domain paths.
 </artwork>
</figure>                           
 <figure anchor="reference_diagram7a" title="BGP-CT Advertisements and Label Stacks">
      <artwork>                         

              
        
                  BGP-CT advertisements for blue Transport Class        
        
            Prefix:PE2    Prefix:PE2  Prefix:PE2   Prefix:PE2   Prefix:PE2
            RD:RD2        RD:RD2      RD:RD2       RD:RD2       RD:RD2
            RT:Blue       RT:Blue     RT:Blue      RT:Blue      RT:Blue(200)
            nh:ASBR1      nh:ASBR2    nh:ASBR3     nh:ASBR4     nh:PE2
            Label:L11     Label:L12   Label:L13    Label:L14    Label:L15
            
            
        PE1-------ASBR1----ASBR2----------ASBR3-------ASBR4--------PE2
        
                                                              VPNb Prefix: 
                                                              10.1.1.1/32
                                                              RD: RD51    
                                                              RT: RT-VPNb
                                                              ext-community:
                                                              Blue(200)
                                                              nh: PE2
                                                              Label: S2
        
            +------+              +------+                   +------+
            | IL81 |              | IL82 |                   | IL83 | 
            +------+   +------+   +------+      +------+     +------+
            | L11  |   | L12  |   |  L13 |      | L14  |     |  L15 | 
            +------+   +------+   +------+      +------+     +------+
            | S2   |   | S2   |   |  S2  |      | S2   |     |  S2  | 
            +------+   +------+   +------+      +------+     +------+

                      Label stacks along end-to-end path
                      S2 is the end-to-end service label.
            IL81, IL82, and IL83 are intra-domain labels corresponding to
                            blue intra-domain paths.    
   
    </artwork>
    </figure>
    
 </t>
 <t>For example, consider the diagram in <xref target ='reference_diagram7'/> and
<xref target ='reference_diagram7a'/> .
 The diagram shows the BGP-CT advertisements corresponding to two different
 end-to-end paths between PE1 and PE2.  The two different paths belong
 to two different Transport Classes, red and blue.  
 </t>
 <t>
 The inter-domain paths created by BGP-CT Transport Classes can be used
 by any traffic that can be steered using BGP next-hop resolution, including
 vanilla IPv4 and IPv6, L2VPN, L3VPN, and eVPN.  In the example above, we 
 show how traffic from two different L3VPNs (VPNa and VPNb) is mapped onto 
 two different BGP-CT Transport Classes (Red and Blue).  The L3VPN advertisements
 for VPNa and VPNb are originated by PE2 as usual.  PE1 receives these L3VPN advertisements
 and uses the next-hop in the L3VPN advertisements to determine the path to 
 use.  In the absence of any BGP-CT Transport Classes in the network, PE1 would likely
 resolve the L3VPN next-hop over BGP-LU routes corresponding to the BGP best path.  
 However, when BGP-CT Transport Classes are used, PE1 will resolve the L3VPN next-hop over
 a BGP-CT route.  
 </t>
 <t>
 In the example above, PE2 originates BGP-CT advertisements for the Red and Blue 
 Transport Classes.  These BGP-CT advertisements propagate across the multiple 
 domains, causing forwarding state for the two Transport Classes to be installed
 at ABRs along the way. In order to create unique NLRIs for the two advertisements, 
 PE2 uses two different RDs. In the example above, the red BGP-CT advertisement 
 has an RD of RD1 and the blue BGP-CT advertisement has an RD of RD2.
 Note that the RD values used in the BGP-CT advertisement are completely independent
 of the RD values used in the L3VPN advertisements.  In both cases, the RD values
 are simply a mechanism to guarantee uniqueness of a prefix/RD pair.
</t>
 <t>
 The RT values used in the BGP-CT advertisements are unrelated to the RT values used on the L3VPN
 advertisements.  The L3VPN RT values identify VPN membership, as usual.  
 The BGP-CT RT values identify Transport Class membership. 
 In order to be able to easily map VPN traffic into BGP-CT Transport classes,
 it can be useful however to make an association between BGP-CT RT values and
 color extended community values in the L3VPN advertisements. 
 In the example above,the RT value carried in the BGP-CT advertisement originated from 
 PE2 for the  red Transport Class is configured to correspond to the color extended community 
 advertised in the VPN advertisement for VPNa.  Similarly, 
 the RT value for the blue Transport Class corresponds to the color extended community for VPNb. 
 In this way, traffic on PE1 for each VPN can be mapped to a tranport class path by 
 associating the value of the color extended community carried in the VPN advertisement
 with an RT value carried in a BGP-CT advertisement.  
 </t>
 <t>
 The example above also shows the label stacks
 at different points along the end-to-end paths for the forwarding entries 
 which are established by the two advertisements.
 Labels L1-L4 are red BGP-CT labels advertised by border nodes ASBR1,2,3,and 4, while
 label L5 is advertised by PE2 for the red Transport Class.  Labels L11-L14 are
 blue BGP-CT labels advertised by border nodes ASBR1,2,3,and 4, while label L15
 is advertised by PE2 for the blue Transport Class.  
 </t>
 <t> IL71, IL72, and IL73 represent tunnels internal to the domains 1, 2, and 3 which 
 correspond to the red Transport Class.  IL81, IL82, and IL83 represent tunnels
 internal to the domains 1, 2, and 3 which correspond to the blue Transport Class. 
 In this example, we assume that the intra-domain tunnels correspond to SRTE policies 
 having red SRTE-policy-color and blue SRTE-policy-color.  
 Service labels are represented by S1 and S2. 
 </t>

 <t> 
 Note that this example focuses on how signalling originated by PE2 results in forwarding
 state used by PE1 to reach PE2 on a specific Transport Class path.  The solution supports the 
 establishment of forwarding state for an arbitrary number of PEs to reach PE2.  For example, 
 PE3 in <xref target ='reference_diagram7'/> can reach PE2 on a red Transport Class path 
 established using the same BGP-CT signalling.   The signalling and forwarding state 
 from ASBR1 all the way to PE2 is common to the paths used by both PE1 and PE3. 
 This merging of signalling and forwarding state is essentially to the good 
 scaling properties of the Seamless SR architecture.  
 Millions of end-to-end Transport Class paths can be established in a scalable manner.
 </t>
 
</section>


<section title="Automatically Creating Transport Classes" anchor ='auto-creating-tc'>
<t>  In order to simplify the creation of inter-domain paths, it may be desirable to 
automatically advertise a BGP-CT Transport Class based on the existence of an 
intra-domain tunnel.  The RT value used on the BGP-CT advertisement is automatically derived from 
a property of the intra-domain tunnel that triggered its creation. How the Transpor Class RT value
is derived for different types of intra-domain tunnels is discussed below.  
</t>

<section title="Automatically Creating Transport Classes for BGP-SR-TE Intra-domain Tunnels" anchor ='auto-deriving-bgp-sr-te'>
<t>
 When the intra-domain tunnel is a BGP-SR-TE policy 
 <xref target ='I-D.ietf-idr-segment-routing-te-policy'/>, 
 the value of the Transport Class RT in the corresponding BGP-CT advertisement
 is derived from the Policy Color contained in SR Policy NLRI.  
 The 32-bit Policy Color is directly converted to a 32-bit Transport Class RT.
</t>
</section>

<section title="Automatically Creating Transport Classes for Flex-Algo Tunnels" anchor ='auto-deriving-flex-algo'>
<t>
When the intra-domain tunnel is created using Flex-Algo 
<xref target ='I-D.ietf-lsr-flex-algo'/>, 
the value of the Transport Class RT in the corresponding BGP-CT advertisement
is derived from the 8-bit Algorithm value carried in
SR-Algorithm sub-TLV (RFC8667).  The conversion from 8-bit Algorithm value to 
32-bit Transport Class RT is done by treating both as unsigned integers.
Note that this definition allows for intra-domain tunnels created via standardized 
algorithm (0-127) as well as flex-algo (128-255).
</t>
</section>

<section title="Auto-deriving Transport Classes for PCEP" anchor ='auto-deriving-pcep'>
<t>
When the intra-domain tunnel is created using PCEP, 
the value of the Transport Class RT in the corresponding BGP-CT advertisement
is derived from the Color of the SR Policy Identifiers TLV defined 
in <xref target ='I-D.ietf-pce-segment-routing-policy-cp'/>.
The 32-bit Color is directly converted to a 32-bit Transport Class RT.
</t>
</section>

</section>



<section title="Inter-domain flex-algo with BGP-CT" anchor ='inter-domain-flex-algo'>
<t>
Flex-algo (defined in <xref target ='I-D.ietf-lsr-flex-algo'/>) provides a mechanism 
to separate routing planes. Multiple algorithms are defined and prefix-SIDs are advertised for
each algorithm. BGP-CT can be used to advertise these flex-algo SIDs in BGP-CT.
BGP Prefix-SID (RFC 8669) is an attribute and can be carried in the BGP-CT NLRI.
Multiple transport classes that correspond to each of the flex-algo in IGP domain
are defined. These Transport Classes advertise the IGP flex-algo SIDs in the prefix-SIDs
attribute in the BGP-CT NLRI.
</t>
</section>

<section title="Applicability to color-only policies" anchor ='color-only'>
<t>
 Color-only policies consist of (nullEndpont, color) as specified in 
 <xref target ='I-D.ietf-spring-segment-routing-policy'/>. Special steering mechanisms
 are defined with "CO" flags defined in the color extended community 
 <xref target ='I-D.ietf-idr-segment-routing-te-policy'/>. Color-only policies 
 can be advertised in BGP-CT  with the prefix being NULL (0.0.0.0/32 or 0::0/128).
 Seperate RD will be advertised for each NULL advertisement with different color.
 The Route target carries the Policy Color contained in SR Policy NLRI.
 
 The steering mechanisms defined in  <xref target ='I-D.ietf-spring-segment-routing-policy'/>
 MUST be honoured while resolving services prefixes on the BGP-CT advertisements.
</t>
</section>



<section title="Data sovereignty" anchor ='data-sovereignty'>
<figure anchor="reference_diagram7b" title="Multi domain Network">
      <artwork>
       
              +-----------+     +-----------+     +-----------+   
              |           |     |  +-+  AS2 |     |           |
              |           A1+--+A2 | |      A3+--+A4          | 
           PE1+    AS1    |     |  |Z|      |     |     AS3   +PE3
              |           A5+--+A6 | |      A7+--+A8          |
              |           |     |  +-+      |     |           |
              +--A13--A15-+     +-A17--A19--+     +-----------+
                 |     |           |    |                  
                 |     |           |    |
                 |     |           |    | 
              +--A14--A16-+     +-A18--A20--+       
              |           |     |           |     
              |          A9+--+A10          |
           PE4+   AS4     |     |   AS5     |  
              |          A11+-+A12          |
              |           |     |           | 
              +-----------+     +-----------+ 
                           
    </artwork>
    </figure>
    <t> Consider a WAN network with multiple ASes as shown in the diagram
    <xref target ='reference_diagram7b'/>. The ASes roughly correspond to the 
    geographical location of the nodes.  In this example, we assume that 
	each AS corresponds to a continent.  The data 
    sovereignty requirement in this example is that certain traffic from PE1(in AS1) to
	PE3(in AS3) must not cross through country Z in AS2. As indicate by the location
	of country Z in the diagram, all paths that go directly from AS1 to AS3 through AS2 
	necessarily passes through country Z.  Using BGP-LU to provide connectivity 
	from PE1 to PE3 would generally result in a path that goes from AS1 to AS2 to AS3, 
	which does not satisfy the data sovereignty requirement in this example.  
	Instead, the solution using BGP-CT will go from 
    AS1 to AS4 to AS5 to AS2 to AS3.  BGP-CT will ensure that when the traffic passes
	through AS2, only intra-domain paths satisfying the 
	data sovereignty requirement will be used.
	</t>
	<t>
	
    Within AS2, there are several different intra-domain TE mechanisms that can be used to 
    exclude links that pass through country Z. For example, RSVP-TE or flex-algo
	can be used to create intra-domain paths that satisfy the data sovereignty requirement.
	BGP-CT allows the constrained intra-domain paths to satisfy requirements for end-to-end
	inter-domain paths.  LSPs created by RSVP-TE or Flex-algo that satisfy the
	"exclude country Z" constraint are associated with a color Green. 	
	A Green Transport Class is defined on border nodes in all ASes.
	This Green Transport Class is associated with a mapping community called Not-Z.
	</t>
    <t>
	In AS2, the ASBRs are configured such that the presence of the mapping 
	community Not-Z in BGP-CT routes results in a strict route resolution
	mechanism for those routes.  A BGP-CT route
	carrying the color extended community Not-Z will only resolve on the Green Tranport Class.  So
	it will only use Green intra-domain tunnels.  
    </t>
    <t>
    In AS1, AS3, AS4, and AS5, no links pass through country Z, so all intra-domain paths
	automatically satisfy the data sovereignty requirement. 
	So there is no need for the creation of Green
    intra-domain tunnels. In these ASes, the presence of the mapping community Not-Z in 
	BGP-CT routes results in resolution on best-effort paths.  Even though the ASBRs in 
	these ASes do not need to create Green
	intra-domain tunnels, they still need to allocate labels to identify traffic
	using the Green Transport 
	Class.  These labels will be used by the ASBRs in AS2 to put traffic on
	the Green intra-domain
	tunnels in AS2.  
    </t>
    <t>
    The requirement is that only a subset of traffic honor the data sovereignty requirement.  
    The service prefixes from PE1 to PE2 that need to honor the data sovereignty requirement
	will be associated with Green extended color community in the service advertisements.
	This will result in PE1 using the BGP-CT labels corresponding to {PE2, Green} to
	forward the traffic.
	BGP-CT labels corresponding to {PE2, Green} will exist at every ASBR along the path.
	 
	
	The traffic originating on PE1,
    will be associated with Green color community. The bottom-most label in the packet consists of a VPN label.
    Above the VPN label, BGP-CT label is imposed. Above BGP-CT label, the intra-domain transport label is imposed.
    Let us assume the traffic from PE1 needs to go to PE2 through AS1, AS4, AS5, AS2, and AS3.
	The BGP-CT label
    for {PE2, Green} will be swapped at the border nodes.
    </t>

    <t> 
	Note that end-to-end inter-domain data sovereignty can in principle be accomplished using 
	BGP-LU  with multiple loopbacks and associating those loopbacks to appropriate transport
	tunnels at every border node in every domain.  This is very configuration intensive and require
	multiple loopbacks. BGP-CT builds on the basic mechanisms of BGP-LU while greatly simplifying
	such use cases.  
    </t>
	
</section>

<section title="Interconnecting IP Fabric Data Centers" anchor ='ip_fabric'>
<figure anchor="reference_diagram7c" title="Operation in IP fabric">
      <artwork>
      
      
            Prefix:TOR2   Prefix:TOR2 Prefix:TOR2  Prefix:TOR2  Prefix:TOR2
            RD:RD2        RD:RD2      RD:RD2       RD:RD2       RD:RD2
            RT:Blue       RT:Blue     RT:Blue      RT:Blue      RT:Blue
            nh:ASBR1      nh:ASBR2    nh:ASBR3     nh:ASBR4     nh:TOR2
            Label:L11     Label:L12   Label:L13    Label:L14    Label:L15
        
          +-----------+       +-----------+        +-----------+   
          |           ASBR1  ASBR2     ASBR3      ASBR4        |  
          |           |       |           |        |           |           
      TOR1+  DC1      +-------+  CORE     +--------+  DC2      +TOR2
          |           ASBR11 ASBR22     ASBR33    ASBR44       |
          |           |       |           |        |           |
          +-----------+       +-----------+        +-----------+
     
     
            +------+              +------+                   +------+
            | UDP  |              | IL82 |                   |  UDP | 
            +------+   +------+   +------+      +------+     +------+
            | L11  |   | L12  |   |  L13 |      | L14  |     |  L15 | 
            +------+   +------+   +------+      +------+     +------+
            | S2   |   | S2   |   |  S2  |      | S2   |     |  S2  | 
            +------+   +------+   +------+      +------+     +------+


              Label stacks along end-to-end path
                      S2 is the end-to-end service label.
            IL82, is intra-domain labels corresponding to
                            blue intra-domain paths.
   
    </artwork>
    </figure>
    <t> Many data center networks consist of IP fabrics which do not have MPLS packet processing
capability. A common requirement is that traffic originated from an IP Fabric data center
needs to satisfy certain constraints in the MPLS-enable core, for example, 
only using a subset of links (blue links). 
It is useful for the traffic originating in an IP Fabric DC to carry 
information that allows the MPLS-enable core to treat it accordingly.
MPLSoUDP, as defined in 
<xref target ='RFC7510'/>, is a mechanism where a UDP header is imposed on an MPLS packets on the border nodes.
In  <xref target ='reference_diagram7c'/> above, the traffic needs to take blue paths in the core. The
Blue Transport Class is defined on the ASBRs.   In the core, Blue intra-domain tunnels are created. 
The BGP-CT advertisements for the Blue Transport Class are as shown in the diagram.  The BGP-CT 
advertisements originate at TOR2 and propagate through all the ASBRs, until finally reaching TOR1. 
Within DC1, traffic is encapsulated with a UDP header.  Traffic with the UDP header
gets decapsulated at ASBR1. The traffic follows Blue paths in the core.  
At ASBR4, the MPLS packet gets encapsulated with a UDP header.  
The UDP header is removed at TOR2, and the lookup will be done for the service label.
</t>
</section>
<section title="Translating Transport Classes across Domains" anchor ='stitching'>
<figure anchor="reference_diagram7d" title="Translating Transport Classes across Domains">
      <artwork>
      
      
                 Prefix:PE2        Prefix:PE2  Prefix:PE2
                 RD:RD2            RD:RD2      RD:RD2       
                 RT:Red            RT:Blue     RT:Blue      
                 nh:ASBR1          nh:ASBR2    nh:PE2   
                 Label:L11         Label:L12   Label:L13    
        
          +-----------+                +-----------+   
          |           ASBR1           ASBR2        |  
          |           |                |           |              
       PE1+  AS1      +----------------+    AS2    +PE2
          |           ASBR11          ASBR22       |        
          |           |                |           |       
          +-----------+                +-----------+         
     
            +------+              +------+                   
            | IL1  |              | IL2 |                   
            +------+   +------+   +------+      +------+    
            | L11  |   | L12  |   |  L13 |      | L14  |    
            +------+   +------+   +------+      +------+    
            | S2   |   | S2   |   |  S2  |      | S2   |     
            +------+   +------+   +------+      +------+    

              Label stacks along end-to-end path
                      S2 is the end-to-end service label.
            IL1 and IL2 are intra-domain labels corresponding to
                            red  intra-domain path in AS1 and Blue intra-domain
                            path in AS2.
   
    </artwork>
    </figure>
    <t> In certain scenarios, the TE intent represented by Transport Classes may 
	differ from one domain to another. This could be the result of two independent 
	organizations merging into one.  It could also occur when two ASes are under
	different administration, but use BGP-CT to provide an end-to-end service.  In
	both scenarios, the same color may represent different intent in each domain. 
When the traffic needs to satisfy 
certain TE characteristic, the colors need to be mapped correctly at the border. In
the example in <xref target ='reference_diagram7d'/>, there are two ASes. 
The low latency TE intent is represented with the Red Transport Class in AS1 and
with the Blue Transport Class in AS2.
PE2 advertises a BGP-CT prefix with RT of Blue.
ASBR2 sets the nexthop to self and advertises a new label L12. On ASBR1, the Blue BGP-CT 
advertisement is imported into the Red Transport RIB and the advertisement from ASBR1 will carry a Red
RT. This ensures that the BGP-CT prefix for PE2 resolves on a Red intra-domain path in AS1.
</t>
    

</section>
<section title="SLA Guarantee" anchor ='seamless_sr_sla'>

<section title="Low latency" anchor ='low_latency'>
<t>Many network functions are virtualized and distributed.
Certain functions are time and latency sensitive. 
  In inter-domain networks, End-to-End 
latency measurement is required. Inside a domain, latency measurement
mechanisms such as TWAMP <xref target ='RFC5357'/> 
are used and link latency is advertised in IGP
using extensions described in  <xref target ='RFC8570'/>and 
<xref target ='RFC7471'/> .
</t>

<t>
<xref target ='I-D.ietf-idr-performance-routing'/> 
extends the BGP 
AIGP attribute <xref target ='RFC7311'/> by adding a sub TLV to
carry an accumulated latency metric.
The BGP best path selection algorithm used for a Transport Class requiring low
latency will consider the accumulated latency metric to choose the lowest latency path.  </t>
</section>

<section title="Traffic Engineering (TE) constraints" anchor ='TE_constraints'>
<t>TE constraints generally include the ability to send traffic via certain nodes or links or
avoid using certain nodes or links.  
In the Seamless SR architecture, the intra-domain transport technology is responsible for 
ensuring the TE constraints inside the domain, BGP-CT ensures that the end-to-end path is 
constructed from intra-domain paths and inter-AS links that individually satisfy the TE
constraints. </t>

<t>  For example, in order to construct a pair of diverse paths, we can define a red and a blue
Transport Class.  Within each domain, the red and blue Transport Class path are realized using 
intra-domain path diversity mechanisms.  For example, in a domain using flex-algo, red and blue
Transport Classes are realized using red and blue flex-algo definitions (FAD) which don't share any links.  
To maintain path diversity on inter-AS links, BGP policies are used to associate 
two inter-AS peers with the red Transport Class and another two inter-AS peers with the
blue Transport Class.
</t>

</section>

<section title="Bandwidth constraints" anchor ='bandwidth'>
<t>  The Seamless SR architecture does not natively support 
end-to-end bandwidth reservations.  In this architecture, 
the bandwidth utilization characteristics of each domain are managed 
independently.  The intra-domain bandwidth management can 
make use of a variety of tools.  
</t>

<t>
Link bandwidth extended community as defined in <xref target ='I-D.ietf-idr-link-bandwidth'/>
allows for efficient weighted load-balancing of traffic on multiple BGP-CT
paths that belong to the same Transport Class.  For optimized path placement, 
a centralized TE system may be deployed with BGP policies/communities used for path placement. </t>

</section>
</section>
<section title="Scalability" anchor ='seamless_sr_scale'>

<section title="Access node scalability" anchor ='access_node'>
<t> The Seamless SR architecture needs to be able to accommodate 
very large numbers of access devices.  These access devices are 
expected to be low-end devices with limited FIB capacity.
The Seamless MPLS architecture, as described in 
<xref target = 'I-D.ietf-mpls-seamless-mpls'/>, 
recommends the use of LDP DOD mode to
limit the size of both the RIB and the FIB needed on the access devices. 
In the Seamless SR architecture, networks use IGP-based label distribution and
do not have this selective label request mechanism. 
However, RIB scalability of access nodes has not been
a problem for real seamless MPLS deployments. In cases where access devices
are low on CPU and memory and unable to support large a RIB, BGP filtering
policies can be applied at the ABR/ASBR routers to restrict the number 
of BGP-CT advertisements towards the access devices. The access devices
will receive only the PE loopbacks that it needs to connect to. </t> 
</section>

<section title="Label stack depth" anchor ='label_stack_depth'>
<t>The ability for a device to push multiple MPLS labels on a packet depends
on hardware capabilities.  Access devices are expected to have limited label stack push 
capabilities.  Assuming shortest path SR-MPLS in the access domain, the access
domain transport will use a single label. Lightweight traffic-engineering and 
slicing could also be achieved with a single label as described in 
<xref target ='I-D.ietf-lsr-flex-algo'/>. The Seamless SR architecture can
 provide cross-domain MPLS connectivity with a single label.
 Assuming the use of a service label, end-to-end
connectivity is provided by pushing one service label, one BGP-CT label, and one intra-domain 
transport label (which could also be a Binding-SID).  
Therefore, access nodes will only need to be able to push 3 labels for most
applications.
</t>
</section>
<section title="Label Resources" anchor ='label_resources'>
<figure anchor="reference_diagram6" title="Recursive Route Resolution">
      <artwork>
        
               -----IBGP----- -----IBGP----- -----IBGP------
              |              |              |              |  
                                   
                                                         BGP-CT Advt:
                                                         Prefix: 2.2.2.2 (PE2 loopback)
                                                         RD:20000
                                                         RT: 128
                          Label:100       Label:100      Label:101
                          Next hop:ABR3   Next hop:ABR3  Next hop: PE2
        ----------------------------------------------------------------                  
                          
                                          BGP-CT Advt: 
                                          Prefix: 30.30.30.30 (ABR3 loopback)
                                          RD:30000
                                          RT:128
                        Label:2000        Label:2001
                        Nexthop:ABR1      Nexthop:ABR3
                   
               +-----------+   +------------+  +-----------+
              /             \ /              \/             \             
              |             ABR1            ABR3            |
              |              |               |              |             
           PE1+    Metro1    +     Core      +    Metro2    +PE2
              |              |               |              |
              |             ABR2            ABR4            |
              \              /\             /\              /
               +------------+  +-----------+  +------------+
     

                 |-ISIS1-|      |-ISIS2-|       |-ISIS3-|
                   
                 +------+        +------+        +------+
                 | 11111|        | 22222|        | 33333|    IGP-labels:
                 +------+        +------+        +------+    11111,22222,33333
                 | 2000 |        | 2001 |        | 101  |    BGP-CT label:
                 +------+        +------+        + -----+    For ABR3:
                 | 100  |        | 100  |        | VPN  |    2000,2001
                 +------+        +------+        +------+    For PE2:
                 | VPN  |        | VPN  |                    100, 101
                 +------+        +------+
                 
                 

      
    </artwork>
    </figure>
    <t>The label resources are an important consideration in MPLS networks.
    On access devices, labels are consumed by services 
    as well as for transport loopbacks inside IGP domain where the access
    device resides. For example, in the above diagram PE1 would have to allocate
    label resources equal to the number of customers connecting 
    (i.e. the number of L2/L3 VPNs).
    Based on the size of the IGP domain that PE1 resides in, it will also 
    have to allocate labels for IGP loopbacks.  This number is at most a few 
    thousands. So overall a typical access device should have adequate label 
    resources in Seamless SR architecture.   
    The P routers need to allocate labels for IGP loopbacks. This number again 
    is small.  At most it will be a few thousand based on number of nodes in the 
    largest IGP domains.   
    The metro networks connect to the core network through ABRs. It is possible 
    that a given ABR may end up having to maintain forwarding entries for a large
    subset of the transport loopback routes.  There may be a large number of metro
    networks connecting to a given ABR, and in this case, the ABR will
    need forwarding entries for every access node in the directly 
    connected metros. So, this ABR may have to maintain on the order of 100k routes. 
    With BGP-CT each Transport Class will have to be separately
    allocated a label. So, in the above example, the ABR1 would have to use
    300k labels if there were 3 Transport Classes.  This large number of 
    label forwarding entries could be problematic. 
</t>
<t> 
In highly scaled scenarios, it is therefore desirable to 
reduce the forwarding state on the ABRs. 
This reduction can be achieved with label stacking as a result of 
recursive route resolution. <xref target ='reference_diagram6'/> illustrates
how the forwarding state on ABRs can be greatly reduced by removing 
forward state for PEs in remote domains from the ABRs.  In this example, we 
assume that we are setting up end-to-end paths for a single Transport Class, 
for example red.  PE2 advertises a BGP-CT prefix of 2.2.2.2 
with nexthop of 2.2.2.2 and label 101. 2.2.2.2 is PE2's loopback.
ABR3 advertises label 100 for BGP-CT prefix 2.2.2.2 and changes the nexthop to self.
When ABR1 receives the BGP-CT advertisement for 2.2.2.2, it does not change the nexthop 
and advertises same label advertised by ABR3.  When PE1 receives the BGP-CT
advertisement for 2.2.2.2 with a nexthop of ABR3, it resolves the route 
using reachability to ABR3. 
</t>
<t>  
The reachability of ABR3 has been learned by PE1 as the result of a BGP-CT advertisement
originated by ABR3. As shown in  <xref target ='reference_diagram6'/>, 
ABR3 advertises BGP-CT prefix 30.30.30.30 with label 2001.  ABR1 advertises label 2000 
for BGP-CT prefix 30.30.30.30 and sets nexthop to self. 
PE1 constructs the service data packet with a VPN label
at the bottom followed by 2 BGP-CT labels 100 and 2000. The top most label 2000 is the
transport label for the metro1 domain. Removing the forwarding state for PEs in remote 
domains on the ABRs comes at the expense of one additional BGP-CT label on the data packet.
</t>
<t>
Recursive route resolution provides significant forwarding state reduction on the ABRs.
ABRs have to allocate label resources only for the PEs in their local domain. 
The number of PEs in the same domain as a given ABR is much lower than the total number
of PEs in the network.</t>
<t>
The examples in this draft generally show VPN routes resolving on BGP-CT prefixes.
However, the mechanisms are equally applicable to non-VPN routes.    
</t>

</section>
</section>
<section title="Availability" anchor ='seamless_sr_reliability'>
<t>Transport layer availability is very important in latency and
loss sensitive networks. Any link
or node failure must be repaired with 50ms convergence time.
50 ms convergence time can be achieved with Fast ReRoute (FRR) mechanisms.
The seamless SR architecture provides protection against intra-domain link and node failures,
Protection against border node failures and the egress link and node failures are 
also provided. Details of the FRR techniques are described in the sections below.</t>

<section title="Intra domain link and node protection" anchor ='intra'>
<t>In the seamless SR architecture, protection against node and link failure 
is achieved with the relevant FRR techniques for the corresponding
 transport mechanism used inside the domain. In the case of an IP fabric,
 ECMP FRR or LFA can be used. In SR networks, TI-LFA
 <xref target ='I-D.ietf-rtgwg-segment-routing-ti-lfa'/> provides link and node 
 protection.  For SR-TE transport (<xref target ='I-D.ietf-spring-segment-routing-policy'/>),
 link and node protection can be achieved using TI-LFA, combined with mechanisms described in 
<xref target ='I-D.hegde-spring-node-protection-for-sr-te-paths'/>.</t>
</section>
<section title="Egress link and node protection" anchor ='egress'>
<t><xref target="RFC8679"/> describes the mechanisms for
 providing protection for border nodes and PE devices where services are hosted. 
 The mechanism can be further simplified operationally with anycast SIDs 
 and anycast service labels, as described in 
 <xref target ='I-D.hegde-rtgwg-egress-protection-sr-networks'/>. </t>
</section>
<section title="Border Node protection" anchor ='border'>
<t>Border node protection is very important in a network consisting of multiple domains. 
Seamless SR architecture can achieve 50ms FRR protection 
in the event of node failure using anycast addresses for the ABR/ASBRs.  
The requires that a set of ABRs advertise the same label for a given BGP-CT Prefix.
The detailed mechanism is described in <xref target ='I-D.hegde-rtgwg-egress-protection-sr-networks'/>.</t>
</section>
</section>
<section title="Operations" anchor ='seamless_sr_ops'>
<section title="MPLS ping and Traceroute" anchor ='ping_traceroute'>
<t>The Seamless SR Architecture consists of 3 layers: the service layer,
intra-domain transport, and BGP-CT transport.  Within each layer,
connectivity can be verified independently. Within the BGP-CT 
transport layer, end-to-end connectivity can be verified using a new OAM FEC 
for BGP-CT defined in draft <xref target ='I-D.kaliraj-idr-bgp-classful-transport-planes'/>. 
The draft describes end-to-end connectivity verification as well as fault isolation. BGP-CT
verification happens only on the BGP nodes. The intra-domain 
connectivity verification and fault isolation will be based on
the technology deployed in that domain as defined in <xref target ='RFC8029'/>
and <xref target ='RFC8287'/>.</t>
</section>
<section title="Counters and Statistics" anchor ='stats'>
<t>Traffic accounting and the ability to build demand matrix
for PE to PE traffic is very important. With BGP-CT, 
per-label transit counters should be supported on 
every transit router. Per-label transit counters provide details of total
traffic towards a remote PE measured at every BGP transit router.
Per-label egress counters should be supported on ingress PE router.
Per-label egress counters provide total traffic from ingress PE to the
specific remote PE.</t>
</section>
</section>
<section title="Service Mapping" anchor ='seamless_sr_service_mapping'>
<t>Service mapping is an important aspect of any architecture. It provides
means to translate end users SLA requirements into operator's network
configurations. Seamless SR architecture supports automatic steering
with extended color community. The Transport Class and the route target
carried by the BGP-CT advertisement directly map to the extended color
community. Services that require specific SLA carry the extended color community
which maps to the Transport Class to which the BGP-CT advertisement belongs.</t>
<t> Other types of traffic steering such as DSCP based forwarding is expressed with
mapping-community. Mapping community is a standard BGP community and is completely
generic and user defined. The mapping community will have a specific service mapping
feature associated with it along with required fallback behaviour when the primary
transport goes down. The below list provides a general guideline into the different
service mapping features and fallback options an implementation should provide.
<list>
<t>DSCP based mapping with each DSCP mapping to a Transport Class.</t>
<t>DSCP based mapping with default mapping to a best-effort transport</t>
<t>DSCP based mapping with fallback to best-effort when primary transport 
tunnel goes down. </t>
<t>Extended color community based mapping with fallback to best effort</t>
<t>Fallback options with specific protocol during migrations</t>
<t>Fallback options to a different Transport Class.</t>
<t> No Fallback permitted.</t>
</list>
</t>

</section>

<section title="Migrations" anchor ='migration'>
<t>Networks that migrate from Seamless MPLS architecture to Seamless SR architecture, require
that all the border nodes and PE devices be upgraded and enabled with new family on the BGP
session. In cases where legacy nodes that cannot be upgraded, exporting from BGP-LU into
BGP-CT and vice versa SHOULD be supported. Once the entire network is 
migrated to support BGP-CT, there is no need to run BGP-LU family on the BGP sessions.
BGP-CT itself can advertise a best effort Transport Class and BGP-LU family can be removed.</t>
</section>

<section title="Interworking with v6 transport technologies" anchor ='interworking'>
<t>A later version of this document will address interworking with other v6 technologies, 
including SRv6, SRm6, and MPLS over GRE6.</t>
</section>


<section title="BGP based Multicast" anchor ='bgp_multicast'>
<t>BGP based multicast as described in draft 
<xref target ='I-D.zzhang-bess-bgp-multicast'/> serves two 
main purposes. It can replace PIM/ mLDP inside a domain to
natively do a BGP based multicast. It can also serve as an
overlay stitching protocol to stitch multiple P2MP LSPs across the
domain. This gives the ability to easily transition each domain independently
from one technology to the other. BGP based multicast defines a new SAFI
for carrying the MULTICAST TREE SAFI. Different route types are defined
to support the various usecases. </t>
</section>


</section>

  <section anchor='backward_compatibility' title='Backward Compatibility'>
         <t></t>
         
</section>
  <section title='Security Considerations' anchor='sec-con'>
    <t>TBD</t>
  </section>
  <section anchor="IANA" title="IANA Considerations">
    <t> </t>
  </section>
   <section title='Acknowledgements' anchor='ack'>
    <t>Many thanks to Kireeti Kompella, Ron Bonica, Krzysztof Szarcowitz, 
        Srihari Sangli,Julian Lucek, Ram Santhanakrishnan 
       for discussions and inputs. Thanks to Joel Halpern for review and comments.</t>
  </section>
     <section title='Contributors'>
    <t>1.Kaliraj Vairavakkalai</t>
    <t>Juniper Networks</t>
    <t>kaliraj@juniper.net</t>
    
    <t>   </t>
    <t>2. Jeffrey Zhang</t>
    <t>Juniper Networks</t>
    <t>zzhang@juniper.net</t>
    
    
  </section>
  
</middle>

<back>
  <references title='Normative References'>
    
    
    &RFC2119;
    &RFC3107;
    &RFC8669;
 
   <?rfc include="reference.I-D.kaliraj-idr-bgp-classful-transport-planes"?>
   <?rfc include="reference.I-D.hegde-rtgwg-egress-protection-sr-networks"?>
   <?rfc include="reference.I-D.ietf-idr-performance-routing"?>
   <?rfc include="reference.I-D.zzhang-bess-bgp-multicast"?>
  </references>
  
   <references title='Informative References'>  
    &RFC8029;
    &RFC8287;
    &RFC4364;
    &RFC7311;
    &RFC1997;
    &RFC8679;
    &RFC8570;
    &RFC7471;
    &RFC6388;
    &RFC7510;
   <?rfc include="reference.I-D.ietf-lsr-flex-algo"?>
   <?rfc include="reference.I-D.ietf-mpls-seamless-mpls"?>  
   <?rfc include="reference.I-D.hegde-spring-node-protection-for-sr-te-paths"?>
   <?rfc include="reference.I-D.ietf-spring-segment-routing-policy"?>
   <?rfc include="reference.I-D.ietf-rtgwg-segment-routing-ti-lfa"?>
   <?rfc include="reference.I-D.ietf-idr-tunnel-encaps"?>
   <?rfc include="reference.I-D.ietf-idr-segment-routing-te-policy"?>
   <?rfc include="reference.I-D.voyer-pim-sr-p2mp-policy"?>
   <?rfc include="reference.I-D.ietf-idr-link-bandwidth"?>
   <?rfc include="reference.I-D.ietf-pce-segment-routing-policy-cp"?>
   
    &RFC8402;
    &RFC5357;
    
    <reference anchor='TS.23.501-3GPP'>
        <front>
        <title>System Architecture for 5G System; Stage 2, 3GPP TS 23.501 v16.4.0</title>
        <author>
        <organization>
        3rd Generation Partnership Project (3GPP)
        </organization>
        </author>
        <date month="March" year="2020"/>
        </front>
      </reference>
    
  </references>
 </back>
</rfc>
