<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
    which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
    There has to be one entity for each item to be referenced. last_setInstance_caller 
    An alternate method (rfc include) is described in the references. -->
<!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2629 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC3552 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3552.xml">
<!ENTITY I-D.narten-iana-considerations-rfc2434bis SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.narten-iana-considerations-rfc2434bis.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
    please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
    (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
    (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std" docName="draft-xu-idr-performance-routing-01"
     ipr="trust200902">
  <front>
    <title abbrev="">Performance-based BGP Routing Mechanism</title>

    <author fullname="Xiaohu Xu" initials="X.X." surname="Xu">
      <organization>Huawei</organization>

      <address>
        <!--
       <postal>
         <street></street>
-->

        <!-- Reorder these if your country does things differently -->

        <!--
         <city>Soham</city>

         <region></region>

         <code></code>

         <country>UK</country>
       </postal>

       <phone>+44 7889 488 335</phone>
-->

        <email>xuxiaohu@huawei.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Mohamed Boucadair" initials="M.B." surname="Boucadair">
      <organization>France Telecom</organization>

      <address>
        <!--
       <postal>
         <street></street>
-->

        <!-- Reorder these if your country does things differently -->

        <!--
         <city>Soham</city>

         <region></region>

         <code></code>

         <country>UK</country>
       </postal>

       <phone>+44 7889 488 335</phone>
-->

        <email>mohamed.boucadair@orange.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Christian Jacquenet" initials="C.J." surname="Jacquenet">
      <organization>France Telecom</organization>

      <address>
        <!--
       <postal>
         <street></street>
-->

        <!-- Reorder these if your country does things differently -->

        <!--
         <city>Soham</city>

         <region></region>

         <code></code>

         <country>UK</country>
       </postal>

       <phone>+44 7889 488 335</phone>
-->

        <email>christian.jacquenet@orange.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Ning So" initials="N.S." surname="So">
      <organization>Vinci Systems</organization>

      <address>
        <!--
       <postal>
         <street></street>
-->

        <!-- Reorder these if your country does things differently -->

        <!--
         <city>Soham</city>

         <region></region>

         <code></code>

         <country>UK</country>
       </postal>

       <phone>+44 7889 488 335</phone>
-->

        <email>ning.so@vinci-systems.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Yimin Shen" initials="Y.S." surname="Shen">
      <organization>Juniper</organization>

      <address>
        <!--
       <postal>
         <street></street>
-->

        <!-- Reorder these if your country does things differently -->

        <!--
         <city>Soham</city>

         <region></region>

         <code></code>

         <country>UK</country>
       </postal>

       <phone>+44 7889 488 335</phone>
-->

        <email>yshen@juniper.net</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Uma Chunduri" initials="U.C." surname="Chunduri">
      <organization>Ericsson</organization>

      <address>
        <!--
       <postal>
         <street></street>
-->

        <!-- Reorder these if your country does things differently -->

        <!--
         <city>Soham</city>

         <region></region>

         <code></code>

         <country>UK</country>
       </postal>

       <phone>+44 7889 488 335</phone>
-->

        <email>uma.chunduri@ericsson.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Hui Ni" initials="H.N." surname="Ni">
      <organization>Huawei</organization>

      <address>
        <!--
       <postal>
         <street></street>
-->

        <!-- Reorder these if your country does things differently -->

        <!--
         <city>Soham</city>

         <region></region>

         <code></code>

         <country>UK</country>
       </postal>

       <phone>+44 7889 488 335</phone>
-->

        <email>nihui@huawei.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Yongbing Fan" initials="Y.F." surname="Fan">
      <organization>China Telecom</organization>

      <address>
        <!--
       <postal>
         <street></street>
-->

        <!-- Reorder these if your country does things differently -->

        <!--
         <city>Soham</city>

         <region></region>

         <code></code>

         <country>UK</country>
       </postal>

       <phone>+44 7889 488 335</phone>
-->

        <email>fanyb@gsta.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <!--

-->

    <date day="" month="" year="2014"/>

    <abstract>
      <t>The current BGP specification doesn't use network performance metrics
      (e.g., network latency) in the route selection decision process. This
      document describes a performance-based BGP routing mechanism in which
      network latency metric is taken as one of the route selection criteria.
      This routing mechanism is useful for those server providers with global
      reach to deliver low-latency network connectivity services to their
      customers.</t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>Network latency is widely recognized as one of major obstacles in
      migrating business applications to the cloud since cloud-based
      applications usually have very clearly defined and stringent network
      latency requirements. Service providers with global reach aim at
      delivering low-latency network connectivity services to their cloud
      service customers as a competitive advantage. Sometimes, the network
      connectivity may travel across more than one Autonomous System (AS)
      under their administration. However, the BGP <xref target="RFC4271"/>
      which is used for path selection across ASes doesn't use network latency
      in the route selection process. As such, the best route selected based
      upon the existing BGP route selection criteria may not be the best from
      the customer experience perspective. </t>

      <t>This document describes a performance-based BGP routing paradigm in
      which network latency metric is disseminated via a new TLV of the AIGP
      attribute <xref target="RFC7311"/> and that metric is used as an input
      to the route selection process. This mechanism is useful for those
      server providers with global reach, which usually own more than one AS,
      to deliver low-latency network connectivity services to their customers.
      </t>

      <t>Furthermore, in order to be backward compatible with existing BGP
      implementations and have no impact on the stability of the overall
      routing system, it's expected that the performance routing paradigm
      could coexist with the vanilla routing paradigm. As such, service
      providers could thus provide low-latency routing services while still
      offering the vanilla routing services depending on customers'
      requirements. </t>

      <t>For the sake of simplicity, this document considers only one network
      performance metric that's the network latency metric. The support of
      multiple network performance metrics is out of scope of this document.
      In addition, this document focuses exclusively on BGP matters and
      therefore all those BGP-irrelevant matters such as the mechanisms for
      measuring network latency are outside the scope of this document. </t>

      <t>A variant of this performance-based BGP routing is implemented (see
      http://www.ist-mescal.org/roadmap/qbgp-demo.avi). </t>

      <section title="Requirements Language">
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
        "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
        document are to be interpreted as described in <xref
        target="RFC2119">RFC 2119</xref>.</t>
      </section>
    </section>

    <section anchor="Abbreviations_Terminology" title="Terminology">
      <t>This memo makes use of the terms defined in <xref
      target="RFC4271"/>.</t>

      <t>Network latency indicates the amount of time it takes for a packet to
      traverse a given network path <xref target="RFC2679"/>. Provided a
      packet was forwarded along a path which contains multiple links and
      routers, the network latency would be the sum of the transmission
      latency of each link (i.e., link latency), plus the sum of the internal
      delay occurred within each router (i.e., router latency) which includes
      queuing latency and processing latency. The sum of the link latency is
      also known as the cumulative link latency. In today&rsquo;s service
      provider networks which usually span across a wide geographical area,
      the cumulative link latency becomes the major part of the network
      latency since the total of the internal latency happened within each
      high-capacity router seems trivial compared to the cumulative link
      latency. In other words, the cumulative link latency could approximately
      represent the network latency in the above networks. </t>

      <t>Furthermore, since the link latency is more stable than the router
      latency, such approximate network latency represented by the cumulative
      link latency is more stable. Therefore, if there was a way to calculate
      the cumulative link latency of a given network path, it is strongly
      recommended to use such cumulative link latency to approximately
      represent the network latency. Otherwise, the network latency would have
      to be measured frequently by some means (e.g., PING or other measurement
      tools). </t>
    </section>

    <section title="Performance Route Advertisement">
      <t>Performance (i.e., low latency) routes SHOULD be exchanged between
      BGP peers by means of a specific Subsequent Address Family Identifier
      (SAFI) of TBD (see IANA Section) and also be carried as labeled routes
      as per <xref target="RFC3107"/>. In other word, performance routes can
      then be looked as specific labeled routes which are associated with
      network latency metric. </t>

      <t>A BGP speaker SHOULD NOT advertise performance routes to a particular
      BGP peer unless that peer indicates, through BGP capability
      advertisement (see Section 4), that it can process update messages with
      that specific SAFI field. </t>

      <t>Network latency metric is attached to the performance routes via a
      new TLV of the AIGP attribute, referred to as NETWORK_LATENCY TLV. The
      value of this TLV indicates the network latency in microseconds from the
      BGP speaker depicted by the NEXT_HOP path attribute to the address
      depicted by the NLRI prefix. The type code of this TLV is TBD (see IANA
      Section), and the value field is 4 octets in length. In some abnormal
      cases, if the cumulative link latency exceeds the maximum value of
      0xFFFFFFFF, the value field SHOULD be set to 0xFFFFFFFF. </t>

      <t>A BGP speaker SHOULD be configurable to enable or disable the
      origination of performance routes. If enabled, a local latency value for
      a given to-be-originated performance route MUST be configured to the BGP
      speaker so that it can be filled to the NETWORK_LATENCY TLV of that
      performance route. </t>

      <t>When distributing a performance route learnt from a BGP peer, if this
      BGP speaker has set itself as the NEXT_HOP of such route, the value of
      the NETWORK_LATENCY TLV SHOULD be increased by adding the network
      latency from itself to the previous NEXT_HOP of such route. Otherwise,
      the NETWORK_LATENCY TLV of such route MUST NOT be modified. </t>

      <t>As for how to obtain the network latency to a given BGP NEXT_HOP is
      outside the scope of this document. However, note that the path latency
      to the NEXT HOP SHOULD approximately represent the network latency of
      the exact forwarding path towards the NEXT_HOP. For example, if a BGP
      speaker uses a Traffic Engineering (TE) Label Switching Path (LSP) from
      itself to the NEXT_HOP, rather than the shortest path calculated by
      Interior Gateway Protocol (IGP), the latency to the NEXT HOP SHOULD
      reflect the network latency of that TE LSP path, rather than the IGP
      shortest path.</t>

      <t>To keep performance routes stable enough, a BGP speaker SHOULD use a
      configurable threshold for network latency fluctuation to avoid sending
      any update which would otherwise be triggered by a minor network latency
      fluctuation below that threshold. </t>
    </section>

    <section anchor="cap" title="Capability Advertisement">
      <t>A BGP speaker that uses multiprotocol extensions to advertise
      performance routes SHOULD use the Capabilities Optional Parameter, as
      defined in <xref target="RFC5492"/>, to inform its peers about this
      capability. </t>

      <t>The MP_EXT Capability Code, as defined in <xref target="RFC4760"/>,
      is used to advertise the (AFI, SAFI) pairs available on a particular
      connection.</t>

      <t>A BGP speaker that implements the Performance Routing Capability MUST
      support the BGP Labeled Route Capability, as defined in <xref
      target="RFC3107"/>. A BGP speaker that advertises the Performance
      Routing Capability to a peer using BGP Capabilities advertisement <xref
      target="RFC5492"/> does not have to advertise the BGP Labeled Route
      Capability to that peer. </t>
    </section>

    <section title="Performance Route Selection">
      <t>Performance route selection only requires the following modification
      to the tie-breaking procedures of the BGP route selection decision
      (phase 2) described in <xref target="RFC4271"/>: network latency metric
      comparison SHOULD be executed just ahead of the AS-Path Length
      comparison step. </t>

      <t>Prior to executing the network latency metric comparison, the value
      of the NETWORK_LATENCY TLV SHOULD be increased by adding the network
      latency from the BGP speaker to the NEXT_HOP of that route. In the case
      where a router reflector is deployed without next-hop-self enabled when
      reflecting received routes from one IBGP peer to other IBGP peer, it is
      RECOMMENDED to enable such route reflector to reflect all received
      performance routes by using some mechanisms such as <xref
      target="I-D.ietf-idr-add-paths"/>, rather than reflecting only the
      performance route which is the best from its own perspective. Otherwise,
      it may result in a non-optimal choice by its clients and/or its IBGP
      peers. </t>

      <t>The Loc-RIB of performance routing paradigm is independent from that
      of vanilla routing paradigm. Accordingly, the routing table of
      performance routing paradigm is independent from that of the vanilla
      routing paradigm. Whether performance routing paradigm or vanilla
      routing paradigm would be used for a given packet is a local policy
      issue which is outside the scope of this document. </t>
    </section>

    <section title="Deployment Considerations">
      <t>It is strongly RECOMMENDED to deploy this performance-based BGP
      routing mechanism across multiple ASes which belong to a single
      administrative domain. Within each AS, it is RECOMMENTED to deliver a
      packet from a BGP speaker to the BGP NEXT_HOP via tunnels, typically TE
      LSP tunnels. Furthermore, if a TE LSP is used between iBGP peers, it is
      RECOMMENDED to use the latency metric carried in Unidirectional Link
      Delay Sub-TLV <xref target="I-D.ietf-isis-te-metric-extensions"/> <xref
      target="I-D.ietf-isis-te-metric-extensions"/> if possible, rather than
      the TE metric <xref target="RFC3630"/><xref target="RFC5305"/> to
      calculate the cumulative link latency associated with the TE LSP and use
      that cumulative link latency to approximately represent the network
      latency. Thus, there is no need for frequent measurement of network
      latency between IBGP peers.</t>
    </section>

    <section anchor="Acknowledgements" title="Acknowledgements">
      <t>Thanks to Joel Halpern, Alvaro Retana, Jim Uttaro, Robert Raszuk,
      Eric Rosen, Qing Zeng, Jie Dong, Mach Chen, Saikat Ray, Wes George, Jeff
      Haas, John Scudder and Sriganesh Kini for their valuable comments on the
      initial idea of this document. Special thanks should be given to Jim
      Uttaro and Eric Rosen for their proposal of using a new TLV of the AIGP
      attribute to convey the network latency metric.</t>

      <!---->
    </section>

    <section anchor="IANA" title="IANA Considerations">
      <t>A new BGP Capability Code for the Performance Routing Capability, a
      new SAFI specific for performance routing and a new type code for
      NETWORK_LATENCY TLV of the AIGP attribute are required to be allocated
      by IANA.</t>

      <!---->
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>In addition to the considerations discussed in <xref
      target="RFC4271"/>, the following items should be considered as well:
      </t>

      <t><list style="letters">
          <t>Tweaking the value of the NETWORK_LATENCY by an illegitimate
          party may influence the route selection results. Therefore, it MUST
          disable Performance Routing Capability negotiation between BGP peers
          which belong to different administration domains. Furthermore, a BGP
          speaker MUST discard all performance routes received from the BGP
          peer for which the Performance Routing Capability negotiation has
          been disabled. </t>

          <t> Frequent updates of the NETWORK_LATENCY TLV may have a severe
          impact on the stability of the routing system. Such practice SHOULD
          be avoided by setting a reasonable threshold for network latency
          fluctuation.</t>
        </list></t>

      <!---->
    </section>
  </middle>

  <back>
    <references title="Normative References">
      &RFC2119;

      <?rfc include="reference.RFC.4271"?>

      <?rfc include="reference.RFC.7311"?>

      <!---->
    </references>

    <references title="Informative References">
      <?rfc include="reference.RFC.2679"?>

      <?rfc include="reference.RFC.3107"?>

      <?rfc include="reference.RFC.5492"?>

      <?rfc include="reference.RFC.4760"?>

      <?rfc include="reference.RFC.3630"?>

      <?rfc include="reference.RFC.5305"?>

      <?rfc include="reference.I-D.ietf-ospf-te-metric-extensions"?>

      <?rfc include="reference.I-D.ietf-isis-te-metric-extensions"?>

      <?rfc include="reference.I-D.ietf-idr-add-paths"?>

      <!---->
    </references>
  </back>
</rfc>
