<?xml version="1.0" encoding="US-ASCII"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd">
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
     please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
     (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
     (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std" docName="draft-ietf-lsr-isis-spine-leaf-ext-01"
     ipr="trust200902">
  <!-- category values: std, bcp, info, exp, and historic
     ipr values: full3667, noModification3667, noDerivatives3667
     you can add the attributes updates="NNNN" and obsoletes="NNNN" 
     they will automatically be output with "(if approved)" -->

  <!-- ***** FRONT MATTER ***** -->

  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the 
         full title is longer than 39 characters -->

    <title abbrev="IS-IS SL Extension">IS-IS Routing for Spine-Leaf
    Topology</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->

    <author fullname="Naiming Shen" initials="N" surname="Shen">
      <organization>Cisco Systems</organization>

      <address>
        <postal>
          <street>560 McCarthy Blvd.</street>

          <!-- Reorder these if your country does things differently -->

          <city>Milpitas</city>

          <region>CA</region>

          <code>95035</code>

          <country>US</country>
        </postal>

        <phone/>

        <email>naiming@cisco.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Les Ginsberg" initials="L" surname="Ginsberg">
      <organization>Cisco Systems</organization>

      <address>
        <postal>
          <street>821 Alder Drive</street>

          <!-- Reorder these if your country does things differently -->

          <city>Milpitas</city>

          <region>CA</region>

          <code>95035</code>

          <country>US</country>
        </postal>

        <phone/>

        <email>ginsberg@cisco.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Sanjay Thyamagundalu" initials="S"
            surname="Thyamagundalu">
      <address>
        <email>tsanjay@gmail.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <date day="8" month="March" year="2019"/>

    <!-- If the month and year are both specified and are the current ones, xml2rfc will fill 
         in the current day for you. If only the current year is specified, xml2rfc will fill 
	 in the current day and month for you. If the year is not the current one, it is 
	 necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the 
	 purpose of calculating the expiry date).  With drafts it is normally sufficient to 
	 specify just the year. -->

    <!-- Meta-data Declarations -->

    <area>Routing</area>

    <workgroup>Networking Working Group</workgroup>

    <!-- WG name at the upperleft corner of the doc,
         IETF is fine for individual submissions.  
	 If this element is not present, the default is "Network Working Group",
         which is used by the RFC Editor as a nod to the history of the IETF. -->

    <!-- Keywords will be incorporated into HTML output
         files in a meta tag but they have no effect on text or nroff
         output. If you submit your draft to the RFC Editor, the
         keywords will be used for the search engine. -->

    <abstract>
      <t>This document describes a mechanism for routers and switches in a
      Spine-Leaf type topology to have non-reciprocal Intermediate System to
      Intermediate System (IS-IS) routing relationships between the leafs and
      spines. The leaf nodes do not need to have the topology information of
      other nodes and exact prefixes in the network. This extension also has
      application in the Internet of Things (IoT).</t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>The IS-IS routing protocol defined by <xref target="ISO10589"/> has
      been widely deployed in provider networks, data centers and enterprise
      campus environments. In the data center and enterprise switching
      networks, a Spine-Leaf topology is commonly used. This document
      describes a mechanism where IS-IS routing can be optimized for a
      Spine-Leaf topology.</t>

      <t>In a Spine-Leaf topology, normally a leaf node connects to a number
      of spine nodes. Data traffic going from one leaf node to another leaf
      node needs to pass through one of the spine nodes. Also, the decision to
      choose one of the spine nodes is usually part of equal cost multi-path
      (ECMP) load sharing. The spine nodes can be considered as gateway
      devices to reach destinations on other leaf nodes. In this type of
      topology, the spine nodes have to know the topology and routing
      information of the entire network, but the leaf nodes only need to know
      how to reach the gateway devices to which are the spine nodes they are
      uplinked.</t>

      <t>This document describes the IS-IS Spine-Leaf extension that allows
      the spine nodes to have all the topology and routing information, while
      keeping the leaf nodes free of topology information other than the
      default gateway routing information. The leaf nodes do not even need to
      run a Shortest Path First (SPF) calculation since they have no topology
      information.</t>

      <section title="Requirements Language">
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
        "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
        document are to be interpreted as described in <xref
        target="RFC2119">RFC 2119</xref>.</t>
      </section>
    </section>

    <section title="Motivations">
      <t><list hangIndent="3" style="hanging">
          <t hangText="o">The leaf nodes in a Spine-Leaf topology do not
          require complete topology and routing information of the entire
          domain since their forwarding decision is to use ECMP with spine
          nodes as default gateways</t>

          <t hangText="o">The spine nodes in a Spine-Leaf topology are richly
          connected to leaf nodes, which introduces significant flooding
          duplication if they flood all Link State PDUs (LSPs) to all the leaf
          nodes. It saves both spine and leaf nodes' CPU and link bandwidth
          resources if flooding is blocked to leaf nodes. For small Top of the
          Rack (ToR) leaf switches in data centers, it is meaningful to
          prevent full topology routing information and massive database
          flooding through those devices.</t>

          <t hangText="o">When a spine node advertises a topology change,
          every leaf node connected to it will flood the update to all the
          other spine nodes, and those spine nodes will further flood them to
          all the leaf nodes, causing a O(n^2) flooding storm which is largely
          redundant.</t>

          <t hangText="o">Similar to some of the overlay technologies which
          are popular in data centers, the edge devices (leaf nodes) may not
          need to contain all the routing and forwarding information on the
          device's control and forwarding planes. "Conversational Learning"
          can be utilized to get the specific routing and forwarding
          information in the case of pure CLOS topology and in the events of
          link and node down.</t>

          <t hangText="o">Small devices and appliances of Internet of Things
          (IoT) can be considered as leafs in the routing topology sense. They
          have CPU and memory constrains in design, and those IoT devices do
          not have to know the exact network topology and prefixes as long as
          there are ways to reach the cloud servers or other devices.</t>
        </list></t>
    </section>

    <section title="Spine-Leaf (SL) Extension">
      <section anchor="topo-example" title="Topology Examples">
        <figure align="center" anchor="pic-topo-exp"
                title="A Spine-Leaf Topology">
          <artwork align="left"><![CDATA[
          +--------+    +--------+             +--------+
          |        |    |        |             |        |
          | Spine1 +----+ Spine2 +- ......... -+ SpineN |
          |        |    |        |             |        |
          +-+-+-+-++    ++-+-+-+-+             +-+-+-+-++
     +------+ | | |      | | | |                 | | | |
     |  +-----|-|-|------+ | | |                 | | | |
     |  |  +--|-|-|--------+-|-|-----------------+ | | |
     |  |  |  | | |    +---+ | |                   | | |
     |  |  |  | | |    |  +--|-|-------------------+ | |
     |  |  |  | | |    |  |  | |              +------+ +----+
     |  |  |  | | |    |  |  | +--------------|----------+  |
     |  |  |  | | |    |  |  +-------------+  |          |  |
     |  |  |  | | +----|--|----------------|--|--------+ |  |
     |  |  |  | +------|--|--------------+ |  |        | |  |
     |  |  |  +------+ |  |              | |  |        | |  |
    ++--+--++      +-+-+--++            ++-+--+-+     ++-+--+-+
    | Leaf1 |~~~~~~| Leaf2 |  ........  | LeafX |     | LeafY |
    +-------+      +-------+            +-------+     +-------+ ]]></artwork>
        </figure>

        <figure align="center" anchor="pic-topo-exp2" title="A CLOS Topology">
          <artwork align="left"><![CDATA[
              +---------+             +--------+
              | Spine1  |             | Spine2 |
              +-+-+-+-+-+             +-+-+-+-++
                | | | |                 | | | |
                | | | +-----------------|-|-|-|-+
                | | +------------+      | | | | |
       +--------+ +-+            |      | | | | |
       |   +----------------------------+ | | | |
       |   |        |  +------------------+ | +----+
       |   |        |  |         |  +-------+   |  |
       |   |        |  |         |  |           |  |
     +-+---+-+   +--+--+-+     +-+--+--+     +--+--+-+
     | Leaf1 |   | Leaf2 |     | Leaf3 |     | Leaf4 |
     +-------+   +-------+     +-------+     +-------+]]></artwork>
        </figure>
      </section>

      <!--EOS topo-example -->

      <section title="Applicability Statement">
        <t>This extension assumes the network is a Spine-Leaf topology, and it
        should not be applied in an arbitrary network setup. The spine nodes
        can be viewed as the aggregation layer of the network, and the leaf
        nodes as the access layer of the network. The leaf nodes use a load
        sharing algorithm with spine nodes as nexthops in routing and
        forwarding.</t>

        <t>This extension works when the spine nodes are inter-connected, and
        it works with a pure CLOS or Fat Tree topology based network where the
        spines are NOT horizontally interconnected.</t>

        <t>Although the example diagram in <xref target="pic-topo-exp"/> shows
        a fully meshed Spine-Leaf topology, this extension also works in the
        case where they are partially meshed. For instance, leaf1 through
        leaf10 may be fully meshed with spine1 through spine5 while leaf11
        through leaf20 is fully meshed with spine4 through spine8, and all the
        spines are inter-connected in a redundant fashion.</t>

        <t>This extension can also work in multi-level spine-leaf topology.
        The lower level spine node can be a 'leaf' node to the upper level
        spine node. A spine-leaf 'Tier' can be exchanged with IS-IS hello
        packets to allow tier X to be connected with tier X+1 using this
        extension. Normally tier-0 will be the TOR routers and switches if
        provisioned.</t>

        <t>This extension also works with normal IS-IS routing in a topology
        with more than two layers of spine and leaf. For instance, in example
        diagrams <xref target="pic-topo-exp"/> and <xref
        target="pic-topo-exp2"/>, there can be another Core layer of
        routers/switches on top of the aggregation layer. From an IS-IS
        routing point of view, the Core nodes are not affected by this
        extension and will have the complete topology and routing information
        just like the spine nodes. To make the network even more scalable, the
        Core layer can operate as a level-2 IS-IS sub-domain while the Spine
        and Leaf layers operate as stays at the level-1 IS-IS domain.</t>

        <t>This extension assumes the link between the spine and leaf nodes
        are point-to-point, or point-to-point <xref target="RFC5309"> over
        LAN</xref>. The links connecting among the spine nodes or the links
        between the leaf nodes can be any type.</t>
      </section>

      <!--EOS Applicatbility Statement-->

      <section title="Spine-Leaf TLVs">
        <t>This extension introduces two new TLVs, the Spine-Leaf TLV and the
        Leaf-Set TLV. The Spine-Leaf TLV may be advertised in IS-IS Hello
        (IIH) PDUs; the Leaf-Set TLV may be advertised in IS-IS Circuit Scoped
        Link State PDUs (CS-LSP) <xref target="RFC7356"/>. They are used by
        both spine and leaf nodes in this Spine-Leaf mechanism.</t>

        <section title="Spine-Leaf TLV">
          <figure align="center">
            <artwork align="left"><![CDATA[
    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |      Type     |     Length    |            SL Flag            |
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+]]></artwork>

            <postamble/>
          </figure>

          <t>The fields of this TLV are defined as follows:</t>

          <t><list>
              <t><list hangIndent="9" style="hanging">
                  <t hangText="Type:">1 octet Suggested value 151 (to be
                  assigned by IANA)</t>

                  <t hangText="Length:">1 octet (2 + length of sub-TLVs).</t>

                  <t hangText="SL Flags:">16 bits</t>
                </list> <figure align="center">
                  <preamble/>

                  <artwork align="left"><![CDATA[
   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
   |  Tier |     Reserved    |T|R|L|
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
              ]]></artwork>

                  <postamble/>
                </figure> <list>
                  <t><list hangIndent="9" style="hanging">
                      <t hangText="Tier:">A value from 0 to 15. It represents
                      the spine-leaf tier level. The value 15 is reserved to
                      indicate the tier level is unknown. This value is only
                      valid when the 'T' bit (see below) is set. If the 'T'
                      bit is clear, this value MUST be set to zero on
                      transmission, and it MUST be ignored on receipt.</t>

                      <t hangText="L bit (0x01):">Only leaf node sets this
                      bit. If the L bit is set in the SL flag, the node
                      indicates it is in 'Leaf-Mode'.</t>

                      <t hangText="R bit (0x02):">Only Spine node sets this
                      bit. If the R bit is set, the node indicates to the leaf
                      neighbor that it can be used as the default route
                      gateway.</t>

                      <t hangText="T bit (0x04):">If set, the value in the
                      "Tier" field (see above) is valid.</t>
                    </list></t>
                </list></t>
            </list></t>
        </section>

        <!-- Spine-Leaf TLV -->

        <section title="Leaf-Set TLV">
          <figure align="center">
            <artwork align="left"><![CDATA[
    0                   1                   2                   3
    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    |      Type     |     Length    |      .. Optional Sub-TLVs
    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+.... ]]></artwork>

            <postamble/>
          </figure>

          <t>The Type is suggested value of 152 (to be assigned by IANA). This
          TLV and associated Sub-TLVs MAY appear in CS-LSP PDUs. Multiple TLVs
          MAY be sent.</t>

          <section title="Leaf-Set Sub-TLVs">
            <t>If the data center topology is a pure CLOS or Fat Tree, there
            are no link connections among the spine nodes. If we also assume
            there is not another Core layer on top of the aggregation layer,
            then the traffic from one leaf node to another may have a problem
            if there is a link outage between a spine node and a leaf node.
            For instance, in the diagram of <xref target="pic-topo-exp2"/>, if
            Leaf1 sends data traffic to Leaf3 through Spine1 node, and the
            Spine1-Leaf3 link is down, the data traffic will be dropped on the
            Spine1 node.</t>

            <t>To address this issue spine and leaf nodes may use the sub-TLVs
            defined below to obtain more specific reachability
            information.</t>

            <t>Two Leaf-Set sub-TLVs are defined. The Leaf-Neighbors sub-TLV
            and the Reachability-Req sub-TLV.</t>

            <section anchor="leaf_set" title="Leaf-Neighbors Sub-TLV">
              <t>This sub-TLV is used by spine nodes to advertise the current
              set of Leaf neighbors to Leaf nodes. The fields of this sub-TLV
              are defined as follows:</t>

              <t><list>
                  <t><list hangIndent="9" style="hanging">
                      <t hangText="Type:">1 octet Suggested value 1 (to be
                      assigned by IANA)</t>

                      <t hangText="Length:">1 octet MUST be a multiple of 6
                      octets.</t>

                      <t hangText="Leaf-Neighbors">A list of IS-IS System-IDs
                      of the leaf node neighbors of this spine node.</t>
                    </list></t>
                </list></t>
            </section>

            <!-- Info-Req Sub-TLV-->

            <section anchor="info_req" title="Reachability-Req Sub-TLV">
              <t>This sub-TLV is used by leaf nodes to request the
              advertisement of more specific prefix information from one or
              more selected spine node(s). The list of leaf nodes in this
              sub-TLV reflects the current set of leaf-nodes for which not all
              spine node neighbors have indicated the presence of connectivity
              in the Leaf-Neighbors sub-TLV (See <xref target="leaf_set"/>).
              The fields of this sub-TLV are defined as follows:</t>

              <t><list>
                  <t><list hangIndent="9" style="hanging">
                      <t hangText="Type:">1 octet Suggested value 2 (to be
                      assigned by IANA)</t>

                      <t hangText="Length:">1 octet. It MUST be a multiple of
                      6 octets.</t>
                    </list> <list hangIndent="9" style="hanging">
                      <t hangText="Leaf Nodes">List of IS-IS System-IDs of
                      leaf nodes for which reachability information is being
                      requested.</t>
                    </list></t>
                </list></t>
            </section>

            <!-- Info-Req Sub-TLV-->

            <!-- IPv6 Info-Adv Sub-TLV-->
          </section>

          <!-- Leaf-Set TLV -->

          <!-- IPv4 Info-Advertise Sub-TLV-->
        </section>

        <section title="Advertising IPv4/IPv6 Reachability">
          <t>In cases where connectivity between a leaf node and a spine node
          is down, the leaf node MAY request reachability information from a
          spine node as described in <xref target="info_req"/>. The spine node
          utilizes TLVs 135 <xref target="RFC5305"/> and TLVs 236 <xref
          target="RFC5308"/> to advertise this information. These TLVs MAY be
          included in CS-LSPs <xref target="RFC7356"/> sent from the spine to
          the requesting leaf node.</t>
        </section>

        <section title="Advertising Connection to RF-Leaf Node">
          <t>For links between Spine and Leaf Nodes on which the Spine Node
          has set the R-bit and the Leaf node has set the L-bit in their
          respective Spine-Leaf TLVs, spine nodes MAY advertise the link with
          a bit in the "link-attribute" sub-TLV <xref target="RFC5029"/> to
          indicate that this link is not used for LSP flooding. This bit is
          named the Connect-to-RF-Leaf Node bit. This information can be used
          by nodes computing a flooding topology e.g., <xref
          target="DYNAMIC-FLOODING"/>, to exclude the RF-Leaf nodes from the
          computed flooding topology.</t>

          <t>For links between Spine and Leaf Nodes on which the Spine Node
          has set the R-bit and the Leaf node has set the L-bit in their
          respective Spine-Leaf TLVs, leaf nodes MAY advertise the link with a
          bit in the "link-attribute" sub-TLV <xref target="RFC5029"/> to
          indicate that this link is to a Spine Node neighbor. This bit is
          named the Connect-to-RF-Spine Node bit. This information can be used
          by leaf nodes when deciding whether a leaf to leaf link can be used
          as an alternate default path when a leaf node has no connectivity to
          any spines. See Section 3.5.2.</t>
        </section>
      </section>

      <section title="Mechanism">
        <t>Leaf nodes in a spine-leaf application using this extension are
        provisioned with two attributes:</t>

        <t>1)Tier level of 0. This indicates the node is a Leaf Node. The
        value 0 is advertised in the Tier field of Spine-Leaf TLV defined
        above.</t>

        <t>2)Flooding reduction enabled/disabled. If flooding reduction is
        enabled the L-bit is set to one in the Spine-Leaf TLV defined
        above</t>

        <t>A spine node does not need explicit configuration. Spine nodes can
        dynamically discover their tier level by computing the number of hops
        to a leaf node. Until a spine node determines its tier level it MUST
        advertise level 15 (unknown tier level) in the Spine-Leaf TLV defined
        above. Each tier level can also be statically provisioned on the
        node.</t>

        <t>When a spine node receives an IIH which includes the Spine-Leaf TLV
        with Tier level 0 and 'L' bit set, it labels the point-to-point
        interface and adjacency to be a 'Reduced Flooding Leaf-Peer
        (RF-Leaf)'. IIHs sent by a spine node on a link to an RF-Leaf include
        the Spine-Leaf TLV with the 'R' bit set in the flags field. The 'R'
        bit indicates to the RF-Leaf neighbor that the spine node can be used
        as a default routing nexthop.</t>

        <t>There is no change to the IS-IS adjacency bring-up mechanism for
        Spine-Leaf peers.</t>

        <t>A spine node blocks LSP flooding to RF-Leaf adjacencies, except for
        the LSP PDUs in which the IS-IS System-ID matches the System-ID of the
        RF-Leaf neighbor. This exception is needed since when the leaf node
        reboots, the spine node needs to forward to the leaf node non-purged
        LSPs from the RF-Leaf's previous incarnation.</t>

        <t>Leaf nodes will perform IS-IS LSP flooding as normal to send the
        LSPs over all of its IS-IS adjacencies. In the case of RF-Leafs only
        self-originated LSPs will exist in its LSP database, and in the case
        of leaf-leaf connections, there will be neighbor leaf nodes LSPs in
        the LSP database in addition to the self-originated LSPs.</t>

        <t>Spine nodes will receive all the LSP PDUs in the network, including
        all the spine nodes and leaf nodes. It will perform Shortest Path
        First (SPF) as a normal IS-IS node does. There is no change to the
        route calculation and forwarding on the spine nodes.</t>

        <t>The LSPs of a node only floods north bound towards the upper layer
        spine nodes. The default route is generated with loadsharing also
        towards the upper layer spine nodes.</t>

        <t>RF-Leaf nodes do not have any LSP in the network except for its
        own. Therefore there is no need to perform SPF calculation on the
        RF-Leaf node. It only needs to download the default route with the
        nexthops of those Spine Neighbors which have the 'R' bit set in the
        Spine-Leaf TLV in IIH PDUs. IS-IS can perform equal cost or unequal
        cost load sharing while using the spine nodes as nexthops. The
        aggregated metric of the outbound interface and the 'Reverse Metric'
        <xref target="RFC8500"/> can be used for this purpose.</t>

        <section title="Pure CLOS Topology">
          <t>In a data center where the topology is pure CLOS or Fat Tree,
          there is no interconnection among the spine nodes, and there is not
          another Core layer above the aggregation layer with reachability to
          the leaf nodes. When flooding reduction to RF-Leafs is in use, if
          the link between a spine and a leaf goes down, there is then a
          possibility of black holing the data traffic in the network.</t>

          <t>As in the diagram <xref target="pic-topo-exp2"/>, if the link
          Spine1-Leaf3 goes down, there needs to be a way for Leaf1, Leaf2 and
          Leaf4 to avoid the Spine1 if the destination of data traffic is to
          Leaf3 node.</t>

          <t>In the above example, the Spine1 and Spine2 are provisioned to
          advertise the Leaf-Set sub-TLV of the Spine-Leaf TLV. Originally
          both Spines will advertise Leaf1 through Leaf4 as their Leaf-Set.
          When the Spine1-Leaf3 link is down, Spine1 will only have Leaf1,
          Leaf2 and Leaf4 in its Leaf-Set. This allows the other leaf nodes to
          know that Spine1 has lost connectivity to the leaf node of
          Leaf3.</t>

          <t>Each RF-Leaf node can select another spine node to request for
          some prefix information associated with the lost leaf node. In this
          diagram of <xref target="pic-topo-exp2"/>, there are only two spine
          nodes (Spine-Leaf topology can have more than two spine nodes in
          general). Each RF-Leaf node can independently select a spine node
          for the leaf information. The RF-Leaf nodes will include the
          Info-Req sub-TLV in the Spine-Leaf TLV in hellos sent to the
          selected spine node, Spine2 in this case.</t>

          <t>The spine node, upon receiving the request from one or more leaf
          nodes, will find the IPv6/IPv4 prefixes advertised by the leaf nodes
          listed in the Info-Req sub-TLV. The spine node will use the
          mechanism defined in Section 3.3.2 to advertise these prefixes to
          the RF-Leaf node. For instance, it will include the IPv4 loopback
          prefix of leaf3 based on the policy configured or administrative tag
          attached to the prefixes. When the leaf nodes receive the more
          specific prefixes, they will install the advertised prefixes towards
          the other spine nodes (Spine2 in this example).</t>

          <t>For instance in the data center overlay scenario, when any IP
          destination or MAC destination uses the leaf3's loopback as the
          tunnel nexthop, the overlay tunnel from leaf nodes will only select
          Spine2 as the gateway to reach leaf3 as long as the Spine1-Leaf3
          link is still down.</t>

          <t>In cases where multiple links or nodes fail at the same time, the
          RF-leaf node may need to send the Info-Req to multiple upper layer
          spine nodes in order to obtain reachability information for all the
          partially connected nodes.</t>

          <t>This negative routing is more useful between tier 0 and tier 1
          spine-leaf levels in a multi-level spine-leaf topology when the
          reduced flooding extension is in use. Nodes in tiers 1 or greater
          may have much richer topology information and alternative paths.</t>
        </section>

        <!--EOS Pure CLOS-->
      </section>

      <!--EOS Mechanism-->

      <section title="Implementation and Operation">
        <section title="CSNP PDU">
          <t>In Spine-Leaf extension, Complete Sequence Number PDUs (CSNP) do
          not need to be transmitted over the Spine-Leaf link to an RF-Leaf.
          Some IS-IS implementations send periodic CSNPs after the initial
          adjacency bring-up over a point-to-point interface. There is no need
          for this optimization here since the RF-Leaf does not need to
          receive any other LSPs from the network, and the only LSPs
          transmitted across the Spine-Leaf link are the leaf node LSPs.</t>

          <t>Also in the graceful restart case<xref target="RFC5306"/>, for
          the same reason, there is no need to send the CSNPs over the
          Spine-Leaf interface to an RF-Leaf. Spine nodes only need to set the
          SRMflag on the LSPs belonging to the RF-Leaf that has restarted.</t>
        </section>

        <!-- csnp -->

        <!---->

        <section title="Leaf to Leaf connection">
          <t>Leaf to leaf node links are useful in host redundancy cases in
          switching networks. There are no flooding extensions required in
          this case. Leaf node LSPs will be exchanged over this link using the
          normal operation of the IS-IS Update process. In the example diagram
          <xref target="pic-topo-exp"/>, Leaf1 will receive Leaf2's LSPs and
          Leaf2 will receive Leaf1's LSPs. Each of the Leaf nodes will in turn
          flood the LSPs they receive from their leaf node neighbor to their
          spine neighbors. Prefix reachability advertisements received from
          the leaf neighbor will result in the installation of more specific
          routes using this local Leaf-Leaf link. SPF will be performed in
          this case just like when the entire network only involves with those
          two IS-IS nodes. This does not affect the normal Spine-Leaf
          mechanism they perform toward the spine nodes.</t>

          <t>Leaf to leaf connections SHOULD be limited to a single leaf
          neighbor.</t>

          <t>Two modes of operation for the Leaf-Leaf link are possible and
          are described in the following sub-sections.</t>

          <section title="Local traffic only">
            <t>The leaf node sets the 'overload' bit in its LSP PDU so that
            spine nodes will not send traffic destined for the neighboring
            leaf node via its leaf node neighbor. The Leaf-Leaf link will then
            be used solely for local traffic between the two Leaf Nodes.</t>
          </section>

          <section title="Transit traffic allowed">
            <t>If a leaf node becomes disconnected from all spine nodes, it is
            possible for spine nodes to route traffic destined for the
            disconnected leaf node via its leaf node neighbor. However the
            leaf to leaf link SHOULD be the link of last resort. To support
            this mode the leaf nodes do NOT set the overload bit in their LSPs
            and they advertise a high metric for the leaf to leaf link((2^24 -
            2) is recommended). This signals to the Spine Nodes that the leaf
            to leaf link may be used for transit traffic, but also insures
            that it will not be used unless the spine node has no other path
            to a given leaf node.</t>

            <t>When the leaf node is disconnected from all spine nodes it MAY
            install a default route towards its leaf-node neighbor in support
            of return traffic to the spine nodes. When doing so the leaf
            should validate that its leaf neighbor has at least one spine
            neighbor. This can be done by looking for the Connect-to-RF-Spine
            Node bit in the Link Attributes sub-TLVs <xref target="RFC5029"/>
            advertised in the LSPs of its leaf node neighbor.</t>
          </section>
        </section>

        <!-- leaf-to-leaf -->

        <section title="Spine Node Hostname">
          <t>This extension creates a non-reciprocal relationship between the
          spine node and leaf node. The spine node will receive leaf's LSP and
          will know the leaf's hostname, but the leaf does not have spine's
          LSP. This extension allows the Dynamic Hostname TLV <xref
          target="RFC5301"/> to be optionally included in spine's IIH PDU when
          sending to a 'Leaf-Peer'. This is useful in troubleshooting
          cases.</t>
        </section>

        <!-- hostname -->

        <section title="IS-IS Reverse Metric">
          <t>This metric is part of the aggregated metric for leaf's default
          route installation with load sharing among the spine nodes. When a
          spine node is in 'overload' condition, it should use the IS-IS
          Reverse Metric TLV in IIH <xref target="RFC8500"/> to set this
          metric to maximum to discourage the leaf using it as part of the
          loadsharing.</t>

          <t>In some cases, certain spine nodes may have less bandwidth in
          link provisioning or in real-time condition, and it can use this
          metric to signal to the leaf nodes dynamically.</t>

          <t>In other cases, such as when the spine node loses a link to a
          particular leaf node, although it can redirect the traffic to other
          spine nodes to reach that destination leaf node, but it MAY want to
          increase this metric value if the inter-spine connection becomes
          over utilized, or the latency becomes an issue.</t>
        </section>

        <!-- spine-def-metric -->

        <section title="Spine-Leaf Traffic Engineering">
          <t>Besides using the IS-IS Reverse Metric by the spine nodes to
          affect the traffic pattern for leaf default gateway towards multiple
          spine nodes, the IPv6/IPv4 Info-Advertise sub-TLVs can be
          selectively used by traffic engineering controllers to move data
          traffic around the data center fabric to alleviate congestion and to
          reduce the latency of a certain class of traffic pairs. By injecting
          more specific leaf node prefixes, it will allow the spine nodes to
          attract more traffic on some underutilized links.</t>
        </section>

        <!-- spine-leaf TE -->

        <section title="Other End-to-End Services">
          <t>Losing the topology information will have an impact on some of
          the end-to-end network services, for instance, MPLS TE or end-to-end
          segment routing. Some other mechanisms such as those described in
          <xref target="RFC4655">PCE</xref> based solution may be used. In
          this Spine-Leaf extension, the role of the leaf node is not too much
          different from the multi-level IS-IS routing while the level-1 IS-IS
          nodes only have the default route information towards the node which
          has the Attach Bit (ATT) set, and the level-2 backbone does not have
          any topology information of the level-1 areas. The exact mechanism
          to enable certain end-to-end network services in Spine-Leaf network
          is outside the scope of this document.</t>
        </section>

        <!-- end-to-end -->

        <section title="Address Family and Topology">
          <t>IPv6 Address families<xref target="RFC5308"/>, Multi-Topology
          (MT)<xref target="RFC5120"/> and Multi-Instance (MI)<xref
          target="RFC8202"/> information is carried over the IIH PDU. Since
          the goal is to simplify the operation of IS-IS network, for the
          simplicity of this extension, the Spine-Leaf mechanism is applied
          the same way to all the address families, MTs and MIs.</t>
        </section>

        <!-- end-to-end -->

        <section title="Migration">
          <t>For this extension to be deployed in existing networks, a simple
          migration scheme is needed. To support any leaf node in the network,
          all the involved spine nodes have to be upgraded first. So the first
          step is to migrate all the involved spine nodes to support this
          extension, then the leaf nodes can be enabled with 'Leaf-Mode' one
          by one. No flag day is needed for the extension migration.</t>
        </section>

        <!-- migration -->
      </section>

      <!--EOS impl-and-operation-->
    </section>

    <!--EOS SL Extension-->

    <!-- This PI places the pagebreak correctly (before the section title) in the text output. -->

    <?rfc needLines="8" ?>

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
      <t>Two new TLV codepoint is defined in this document and needs to be
      assigned by IANA from the "IS-IS TLV Codepoints" registry. They are
      referred to as the Spine-Leaf TLV and the suggested value is 151, and
      Leaf-Set TLV and suggested value is 152. The Spine-Leaf TLV is only to
      be optionally inserted in the IIH PDU, and the Leaf-Set TLV is only to
      be optionally inserted in Circuit Flooding Scoped LSP PDU. IANA is also
      requested to maintain the SL-flag bit values in the Spine-Leaf TLV, and
      0x01, 0x02 and 0x04 bits are defined in this document. <figure
          align="center">
          <preamble/>

          <artwork align="left"><![CDATA[
   Value  Name                   IIH  LSP  SNP  Purge  CS-LSP
   -----  ---------------------  ---  ---  ---  -----  -------
   151    Spine-Leaf              y    n    n    n        n
   152    Leaf-Set                n    n    n    n        y
            ]]></artwork>

          <postamble/>
        </figure></t>

      <t>This document also proposes to have the Dynamic Hostname TLV, already
      assigned as code 137, to be allowed in IIH PDU. <figure align="center">
          <preamble/>

          <artwork><![CDATA[
   Value  Name                   IIH  LSP  SNP  Purge
   -----  ---------------------  ---  ---  ---  -----
   137    Dynamic Name            y    y    n    y
            ]]></artwork>

          <postamble/>
        </figure></t>

      <t>This documents requests IANA to create a new registry under the IS-IS
      TLV Codepoints registry. The suggested name of the registry is "Sub-TLVs
      for TLV 152 (Leaf-Set TLV)". Initial contents of the new registry is
      defined below:</t>

      <t><figure align="center">
          <preamble/>

          <artwork><![CDATA[
   Value  Name                   
   -----  ---------------------  
   0      Reserved
   1      Leaf Neighbors 
   2      Reachability Req       
   3-255  Unassigned
            ]]></artwork>

          <postamble/>
        </figure></t>

      <t>This document also requests that IANA allocate from the registry of
      link-attribute two new bit values for sub-TLV 19 of TLV 22 (Extended IS
      reachability TLV).</t>

      <t><figure align="center">
          <preamble/>

          <artwork align="left"><![CDATA[
   Value  Name                             Reference                 
   -----  -----                            ----------
   0x4    Connect to RF-Leaf Node          This document
   0x8    Connect to RF-Spine Node         This document
            ]]></artwork>

          <postamble/>
        </figure></t>
    </section>

    <!--EOS IANA-->

    <section anchor="Security" title="Security Considerations">
      <t>Security concerns for IS-IS are addressed in <xref
      target="ISO10589"/>, <xref target="RFC5304"/>, <xref target="RFC5310"/>,
      and <xref target="RFC7602"/>. This extension does not raise additional
      security issues.</t>
    </section>

    <!--EOS security-->

    <section anchor="Acknowledgments" title="Acknowledgments">
      <t>The authors would like to thank Tony Przygienda and Lukas Krattiger
      for their discussion and contributions. The authors also would like to
      thank Acee Lindem, Russ White, Christian Hopps and Aijun Wang for their
      review and comments of this document.</t>
    </section>

    <!-- ack -->
  </middle>

  <!--  *****BACK MATTER ***** -->

  <back>
    <!-- References split into informative and normative -->

    <!-- There are 2 ways to insert reference entries from the citation libraries:
     1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
     2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
        (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

     Both are cited textually in the same manner: by using xref elements.
     If you use the PI option, xml2rfc will, by default, try to find included files in the same
     directory as the including file. You can also define the XML_LIBRARY environment variable
     with a value containing a set of directories to search.  These can be either in the local
     filing system or remote ones accessed by http (http://domain/dir/... ).-->

    <references title="Normative References">
      <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml"?-->

      <reference anchor="ISO10589">
        <front>
          <title>Intermediate system to Intermediate system intra-domain
          routeing information exchange protocol for use in conjunction with
          the protocol for providing the connectionless-mode Network Service
          (ISO 8473), ISO/IEC 10589:2002, Second Edition.</title>

          <author>
            <organization>ISO "International Organization for
            Standardization"</organization>
          </author>

          <date month="Nov" year="2002"/>
        </front>
      </reference>

      <?rfc include="reference.RFC.7602"?>

      <?rfc include="reference.RFC.7356"?>

      <?rfc include="reference.RFC.5029"?>

      <?rfc include="reference.RFC.5305"?>

      <?rfc include="reference.RFC.5306"?>

      <?rfc include="reference.RFC.5308"?>

      <?rfc include="reference.RFC.5120"?>

      <?rfc include="reference.RFC.8202"?>

      <?rfc include="reference.RFC.5301"?>

      <?rfc include="reference.RFC.5304"?>

      <?rfc include="reference.RFC.5310"?>

      <?rfc include="reference.RFC.8500"?>

      <?rfc include="reference.RFC.2119"?>
    </references>

    <references title="Informative References">
      <!-- Here we use entities that we defined at the beginning. -->

      <?rfc include="reference.RFC.5309"?>

      <?rfc include="reference.RFC.4655"?>

      <reference anchor="DYNAMIC-FLOODING">
        <front>
          <title>Dynamic Flooding on Dense Graphs</title>

          <author initials="T." surname="Li">
            <organization>Arista Networks</organization>
          </author>

          <date year="2018"/>
        </front>

        <seriesInfo name="Internet-Draft" value="draft-li-dynamic-flooding"/>
      </reference>
    </references>
  </back>
</rfc>
