<?xml version="1.0" encoding="US-ASCII"?>
<!-- xml2rfc is available at http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [

  <!ENTITY RFC0791 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.0791.xml">
  <!ENTITY RFC2119 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
  <!ENTITY RFC2474 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2474.xml">
  <!ENTITY RFC2475 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2475.xml">
  <!ENTITY RFC2597 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2597.xml">
  <!ENTITY RFC3031 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3031.xml">
  <!ENTITY RFC3032 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3032.xml">
  <!ENTITY RFC3209 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3209.xml">
  <!ENTITY RFC3270 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3270.xml">
  <!ENTITY RFC3429 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3429.xml">
  <!ENTITY RFC3443 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3443.xml">
  <!ENTITY RFC3985 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3985.xml">
  <!ENTITY RFC4090 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4090.xml">
  <!ENTITY RFC4110 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4110.xml">
  <!ENTITY RFC4124 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4124.xml">
  <!ENTITY RFC4182 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4182.xml">
  <!ENTITY RFC4201 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4201.xml">
  <!ENTITY RFC4206 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4206.xml">
  <!ENTITY RFC4221 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4221.xml">
  <!ENTITY RFC4377 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4377.xml">
  <!ENTITY RFC4379 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4379.xml">
  <!ENTITY RFC4385 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4385.xml">
  <!ENTITY RFC4664 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4664.xml">
  <!ENTITY RFC4875 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4875.xml">
  <!ENTITY RFC4928 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4928.xml">
  <!ENTITY RFC4950 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4950.xml">
  <!ENTITY RFC5082 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5082.xml">
  <!ENTITY RFC5085 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5085.xml">
  <!ENTITY RFC5129 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5129.xml">
  <!ENTITY RFC5332 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5332.xml">
  <!ENTITY RFC5462 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5462.xml">
  <!ENTITY RFC5586 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5586.xml">
  <!ENTITY RFC5695 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5695.xml">
  <!ENTITY RFC5860 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5860.xml">
  <!ENTITY RFC5880 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5880.xml">
  <!ENTITY RFC5884 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5884.xml">
  <!ENTITY RFC5885 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5885.xml">
  <!ENTITY RFC5905 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.5905.xml">
  <!ENTITY RFC6310 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6310.xml">
  <!ENTITY RFC6371 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6371.xml">
  <!ENTITY RFC6374 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6374.xml">
  <!ENTITY RFC6375 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6375.xml">
  <!ENTITY RFC6388 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6388.xml">
  <!ENTITY RFC6391 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6391.xml">
  <!ENTITY RFC6424 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6424.xml">
  <!ENTITY RFC6425 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6425.xml">
  <!ENTITY RFC6426 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6426.xml">
  <!ENTITY RFC6427 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6427.xml">
  <!ENTITY RFC6428 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6428.xml">
  <!ENTITY RFC6435 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6435.xml">
  <!ENTITY RFC6438 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6438.xml">
  <!ENTITY RFC6478 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6478.xml">
  <!ENTITY RFC6639 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6639.xml">
  <!ENTITY RFC6669 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6669.xml">
  <!ENTITY RFC6670 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6670.xml">
  <!ENTITY RFC6720 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6720.xml">
  <!ENTITY RFC6790 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6790.xml">
  <!ENTITY RFC6829 SYSTEM "http://xml.resource.org/public/rfc/bibxml/reference.RFC.6829.xml">

  <!ENTITY I-D.ietf-tictoc-1588overmpls SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-tictoc-1588overmpls-03">
  <!ENTITY I-D.ietf-pwe3-mpls-eth-oam-iwk SYSTEM "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.draft-ietf-pwe3-mpls-eth-oam-iwk-07">

  ]>

<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<?rfc tocdepth="4"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<?rfc comments="yes"?>
<?rfc inline="yes" ?>

<rfc category="info" ipr="trust200902"
     docName="draft-villamizar-mpls-forwarding-01">

  <front>
    <title abbrev="MPLS Forwarding">
      MPLS Forwarding Compliance and Performance Requirements</title>

    <author role="editor"
	    fullname="Curtis Villamizar" initials="C." surname="Villamizar">
      <organization abbrev="OCCNC">
	Outer Cape Cod Network Consulting, LLC
      </organization>
      <address>
	<email>curtis@occnc.com</email>
      </address>
    </author>

    <author
	    fullname="Kireeti Kompella" initials="K." surname="Kompella">
      <organization>Contrail Systems</organization>
      <address>
	<email>kireeti.kompella@gmail.com</email>
      </address>
    </author>

    <author
	fullname="Shane Amante" initials="S." surname="Amante">
      <organization>Level 3 Communications, Inc.</organization>
      <address>
	<postal>
	  <street>1025 Eldorado Blvd</street>
	  <city>Broomfield, CO</city>
	  <code>80021</code>
	</postal>
	<email>shane@level3.net</email>
      </address>
    </author>

    <author
	    fullname="Andrew Malis" initials="A.G." surname="Malis">
      <organization>Verizon</organization>
      <address>
        <postal>
          <street>60 Sylvan Road</street>
          <city>Waltham, MA</city>
	  <code>02451</code>
        </postal>
        <phone>+1 781-466-2362</phone>
        <email>andrew.g.malis@verizon.com</email>
      </address>
    </author>

    <author
	    fullname="Carlos Pignataro" initials="C.M." surname="Pignataro">
      <organization abbrev="Cisco">Cisco Systems</organization>
      <address>
	<postal>
	  <street>7200-12 Kit Creek Road</street>
	  <city>Research Triangle Park</city>
	  <code>27709</code>
	  <region>NC</region>
	  <country>US</country>
	</postal>
	<!--
	    phone: +1-919-392-7428
	    facsimile: +1-919-869-1438
	-->
	<email>cpignata@cisco.com</email>
      </address>
    </author>

    <date year="2013" />

    <area>Routing</area>
    <workgroup>MPLS</workgroup>

    <keyword>MPLS</keyword>
    <keyword>ECMP</keyword>
    <keyword>link bundling</keyword>
    <keyword>multipath</keyword>
    <keyword>MPLS-TP</keyword>
    <keyword>forwarding</keyword>

    <abstract>
      <t>
	This document provides guidelines for implementors regarding
	MPLS forwarding and a basis for evaluations of forwarding
	implementations.  Guidelines cover many aspects of MPLS
	forwarding.  Topics are highlighted where implementors might
	potentially overlook practical requirements which are unstated
	or underemphasized or are optional for conformance to RFCs.
	<!-- examples don't seem to belong in an abstract -
	Examples are forwarding when a deep MPLS label stack is
	encountered, MPLS UHP operations which require one or more
	label POP plus a PUSH, guidelines for hashing an MPLS stack
	and payload for multipath, and conformance and performance
	requirements for recent pseudowire and MPLS standards.
	-->
      </t>
    </abstract>

  </front>

  <middle>

    <section title="Introduction">

      <t>
	The initial purpose of this document was to address concerns
	raised on the MPLS WG mailing list about shortcomings in
	implementations of MPLS forwarding.  Documenting existing
	misconceptions and potential pitfalls might potentially avoid
	repeating past mistakes.  The document has grown to address a
	broad set of forwarding requirements.
      </t>

      <section title="Use of Requirements Language">

	<t>
	  This document is informational.  The key words "MUST", "MUST
	  NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD
	  NOT", "RECOMMENDED", "MAY", and "OPTIONAL" are used only
	  where the requirement is specified in an existing RFC.
	  These keywords SHOULD be interpreted as described in
	  <xref target="RFC2119">RFC 2119</xref>.
	</t>
	<t>
	  Advice given in this document does not use the upper case
	  RFC 2119 keywords, except where explicitly noted that the
	  keywords indicate that operator experiences indicate a
	  requirement, but there are no existing RFC requirements.
	  Such advice may be ignored by implementations.  Similarly,
	  implementations not claiming conformance to specific RFCs
	  may ignore the requirements of those RFCs.  In both cases,
	  implementators may be doing so at their own peril.
	</t>

      </section>

      <section title="Apparent Misconceptions">
	<t>
	  In early generations of forwarding silicon (which might now be
	  behind us), there apparently were some misconceptions about
	  MPLS.  The following statements provide clarifications.
	  <list style="numbers">
	    <t>
	      There are practical reasons to have more than one or two
	      labels in an MPLS label stack.  Under some circumstances
	      the label stack can become quite deep.
	      See <xref target="sect.basics" />.
	    </t>
	    <t>
	      The label stack MUST be considered to be arbitrarily
	      deep.  Section 3.27.4. "Hierarchy: LSP Tunnels within
	      LSPs" of <xref target="RFC3031">RFC 3031</xref> states
	      "The label stack mechanism allows LSP tunneling to nest
	      to any depth."  If a the bottom of the label stack
	      cannot be found, but sufficient number of labels exist
	      to forward, an LSR MUST forward the packet.  An LSR MUST
	      NOT assume the packet is malformed unless the end of
	      packet is found before bottom of stack.
	      See <xref target="sect.basics" />.
	    </t>
	    <t>
	      In networks where deep label stacks are encountered,
	      they are not rare.  Full packet rate performance is
	      required regardless of label stack depth, except where
	      multiple POP operations are required.
	      See <xref target="sect.basics" />.
	    </t>
	    <t>
	      Research has shown that long bursts of short packets
	      with 40 byte or 44 byte IP payload sizes in these
	      bursts are quite common.
	      This is due to TCP ACK compression
	      <xref target="ACK-compression" />.
	      <list style="letters">
		<t>
		  A forwarding engine SHOULD, if practical, be able to
		  sustain an arbitrarily long sequence of small packets
		  arriving at full interface rate.
		</t>
		<t>
		  <!--
		      clarification requested by Paul Doolan (off-list
		      email with Message-ID:
		      9E246201-2B66-4CD5-A1EA-64DD7BB88A50@att.net
		  -->
		  If indefinite full packet rate for small packets is
		  not practical, a forwarding engine MUST be able to
		  buffer a long sequence of small packets inbound to
		  the on-chip decision engine and sustain full
		  interface rate for some reasonable average packet
		  rate.  Absent this small on-chip buffering, QoS
		  agnostic packet drops can occur.
		</t>
	      </list>
	      See <xref target="sect.pkt-rate" />.
	    </t>
	    <!--
		Both Andy Malis and Shane Amante recommended that the
		phrase "SHOULD be considered mandatory" needs to be
		changed to "MUST".  The "considered mandatory" and
		"considered optional" wording was made more direct.
	    -->
	    <t>
	      The implementor and system designer MUST support
	      pseudowire control word if MPLS-TP is supported or if
	      ACH is being used on a pseudowire
	      <xref target="RFC5586" />.
	      Deployments SHOULD enable pseudowire control word.
	      See <xref target="sect.pw-cw" />.
	      <!--
	      draft-ietf-pwe3-vccv-impl-survey-results
	      recommends that the control word always be used.  Some
	      service providers require control word support for all
	      encapsulations, and always use it.
	      -->
	    </t>
	    <t>
	      The implementor and system designer SHOULD support
	      adding a pseudowire Flow Label
	      <xref target="RFC6391" />.  Deployments MAY enable this
	      feature for appropriate pseudowire types.
	      See <xref target="sect.fat-pw" />.
	    </t>
	    <t>
	      The implementor and system designer SHOULD support
	      adding a MPLS Entropy Label <xref target="RFC6790" />.
	      Deployments MAY enable this feature.
	      See <xref target="sect.entropy" />.
	    </t>
	  </list>
	</t>
      </section>

      <section title="Target Audience">
	<t>
	  This document is intended for multiple audiences:
	  implementor (implementing MPLS forwarding in silicon or in
	  software); systems designer (putting together a MPLS
	  forwarding systems); deployer (running an MPLS network).
	  These guidelines are intended to serve the following
	  purposes:
	</t>
	<t>
	  <list style="numbers">
	    <t>
	      Explain what to do and what not to do when a deep label
	      stack is encountered. (audience: implementor)
	    </t>
	    <t>
	      Highlight pitfalls to look for when implementing an MPLS
	      forwarding chip. (audience: implementor)
	    </t>
	    <t>
	      Provide a checklist of features and performance
	      specifications to request.  (audience: systems
	      designer, deployer)
	    </t>
	    <t>
	      Provide a set of tests to perform.  (audience: systems
	      designer, deployer).
	    </t>
	  </list>
	</t>
	<t>
	  The implementor, systems designer, and deployer have a
	  transitive supplier customer relationship.  It is in the best
	  interest of the supplier to review their product against their
	  customer's checklist and customer's customer's checklist if
	  applicable.
	</t>
      </section>

    </section>

    <section anchor="sect.issues" title="Forwarding Issues">

      <t>
	A brief review of forwarding issues is provided in the
	subsections that follow.  This section provides some
	background on why some of these requirements exist.  The
	questions to ask of suppliers and testing is covered in the
	following sections, <xref target="sect.ask" /> and <xref
	target="sect.test" />.
      </t>

      <section anchor="sect.basics" title="Forwarding Basics">
	<t>
	  Basic MPLS architecture and MPLS encapsulation, and
	  therefore packet forwarding is defined in <xref
	  target="RFC3031" /> and <xref target="RFC3032" />.  RFC3031
	  and RFC3032 are somewhat LDP centric.  RSVP-TE supports
	  traffic engineering (TE) and fast reroute, features that LDP
	  lacks.  The base document for RSVP-TE based MPLS is <xref
	  target="RFC3209" />.
	</t>
	<t>
	  A few RFCs update RFC3032.  Those with impact on forwarding
	  include the following.
	  <list style="numbers">
	    <t>
	      TTL processing is clarified in <xref target="RFC3443" />.
	    </t>
	    <t>
	      The use of MPLS Explicit NULL is modified in <xref
	      target="RFC4182" />.
	    </t>
	    <t>
	      Differentiated Services is supported by
	      <xref target="RFC3270" /> and <xref target="RFC4124" />.
	      The "EXP" field is renamed to "Traffic Class" in
	      <xref target="RFC5462" />, removing any misconception
	      that it was available for experimentation or could be
	      ignored.
	    </t>
	    <t>
	      ECN is supported by <xref target="RFC5129" />.
	    </t>
	    <t>
	      The MPLS G-ACh and GAL are defined in <xref
	      target="RFC5586" />.
	    </t>
	  </list>
	</t>
	<t>
	  Other RFCs have implications to MPLS Forwarding and do not
	  update RFC3032 or RFC3209, including:
	  <list style="numbers">
	    <t>
	      The pseudowire (PW) Associated Channel Header (ACH),
	      defined by <xref target="RFC5085" />, later generalized
	      by the MPLS G-ACh <xref target="RFC5586" />.
	    </t>
	    <t>
	      The Entropy Label Indicator and Entropy Label are
	      defined by <xref target="RFC6790" />.
	    </t>
	  </list>
	</t>
	<t>
	  A few RFCs update RFC3209.  Those that are listed as
	  updating RFC3209 generally impact only RSVP-TE signaling.
	  Forwarding is modified by major extension built upon
	  RFC3209.
	</t>
	<t>
	  RFCs which impact forwarding are discussed in the following
	  subsections.
	</t>

	<section anchor="sect.resv-labels" title="MPLS Reserved Labels">
	  <t>
	    <xref target="RFC3032" /> specifies that label values 0-15
	    are reserved labels with special meanings.  Three values
	    of NULL label are defined (two of which are later updated
	    by <xref target="RFC4182" />) and a router-alert label is
	    defined.  The original intent was that reserved labels,
	    except the NULL labels, could be sent to the routing
	    engine CPU rather than be processed in forwarding
	    hardware.  Hardware support is required by new RFCs such
	    as those defining Entropy Label and OAM processed as a
	    result of receiving a GAL.  For new reserved labels, some
	    accommodation is needed for LSR that will send the labels
	    to a general purpose CPU.  For example, ELI will only be
	    sent to LSR which have signaled support for
	    <xref target="RFC6790" /> and high OAM packet rate must be
	    negotiated among endpoints.
	  </t>
	  <t>
	    <xref target="RFC3429" /> reserves a label for ITU-T
	    Y.1711, however Y.1711 does not work with multipath and
	    its use is strongly discouraged.
	  </t>
	  <t>
	    The current list of reserved labels can be found on the
	    "Multiprotocol Label Switching Architecture (MPLS) Label
	    Values" registry reachable at IANA's pages at
	    <eref target="http://www.iana.org" />.
	  </t>
	  <t>
	    <!-- not required by any RFC, but ... -->
	    When an unknown reserved label is encountered or a
	    reserved label not directly handled in forwarding hardware
	    is encountered, the packet should be sent to a general
	    purpose CPU by default.  If this capability is supported,
	    there must be an option to either drop or rate limit such
	    packets on a per reserved label value basis.
	  </t>
	</section>

	<section anchor="sect.qos" title="MPLS Differentiated Services">
	  <!--
	      QoS coverage requested by Nabil Bitar.
	  -->
	  <t>
	    <xref target="RFC2474" />
	    deprecates the IP Type of Service (TOS) and IP Precedence
	    (Prec) fields and replaces them with the Differentiated
	    Services Field more commonly known as the Differentiated
	    Services Code Point (DSCP) field.
	    <xref target="RFC2475" />
	    defines the Differentiated Services architecture, which in
	    other forum is often called a Quality of Service (QoS)
	    architecture.
	  </t>
	  <t>
	    MPLS uses the Traffic Class (TC) field to support
	    Differentiated Services <xref target="RFC5462" />.  There
	    are two primary documents describing how DSCP is mapped
	    into TC.
	    <list style="numbers">
	      <t>
		<xref target="RFC3270" />
		defines E-LSP and L-LSP.  E-LSP use a static mapping
		of DSCP into TC.  L-LSP use a per LSP mapping of DSCP
		into TC, with one PHB Scheduling Class (PSC) per
		L-LSP.  Each PSC can use multiple Per-Hop Behavior
		(PHB) values.  For example, the Assured Forwarding
		service defines three PSC, each with three PHB
		<xref target="RFC2597" />.
	      </t>
	      <t>
		<xref target="RFC4124" />
		defines assignment of a class-type (CT) to an LSP,
		where a per CT static mapping of TC to PHB is used.
		<xref target="RFC4124" />
		provides a means to support up to eight E-LSP-like
		mappings of DSCP to TC.
	      </t>
	    </list>
	  </t>
	  <t>
	    To meet Differentiated Services requirements specified in
	    <xref target="RFC3270" />, the following forwarding
	    requirements must be met.
	    An ingress LER MUST be able to select an LSP and then
	    apply a per LSP map of DSCP into TC.  A midpoint LSR MUST
	    be able to apply a per LSP map of TC to PHB.  The number
	    of mappings supported will be far less than the number of
	    LSP supported.
	  </t>

	</section>

	<section anchor="sect.time-sync" title="Time Synchronization">
	  <t>
	    PTP or NTP may be carried over MPLS <xref
	    target="I-D.ietf-tictoc-1588overmpls" />.  Generally NTP
	    will be carried within IP with IP carried in MPLS <xref
	    target="RFC5905" />.  Both PTP and NTP benefit from
	    accurate time stamping of incoming packets and the ability
	    to insert accurate time stamps in outgoing packets.
	  </t>
	  <t>
	    Since the label stack depth may vary, hardware should allow
	    a timestamp to be placed in an outgoing packet at any
	    specified byte position.  It may be necessary to modify
	    layer-2 checksums or frame check sequences after
	    insertion.  PTP and NTP timestamp formats differ slightly.
	  </t>
	  <t>
	    Accurate time synchronization in addition to being
	    generally useful is required for MPLS-TP delay measurement
	    (DM) OAM.  See <xref target="sect.tp-oam" />.
	  </t>

	</section>

	<section anchor="sect.early-deep"
		 title="Uses of Multiple Label Stack Entries">
	  <!--
	      clarified text as a result of comments from Shane
	      Amante.
	  -->
	  <t>
	    MPLS deployments in the early part of the prior decade
	    (circa 2000) tended to support either LDP or RSVP-TE.  LDP
	    was favored by some for its ability to scale to
	    a very large number of PE devices at the edge of the
	    network, without adding deployment complexity.  RSVP-TE
	    was favored, generally in the network core, where traffic
	    engineering and/or fast reroute were considered important.
	  </t>
	  <t>
	    Both LDP and RSVP-TE are used simultaneously within major
	    Service Provider networks using a technique known as "LDP
	    over RSVP-TE Tunneling".  This technique allows service
	    providers to carry LDP tunnels, originating and
	    terminating at PE's, inside of RSVP-TE tunnels, generally
	    between Inter-City P routers, to take advantage of Traffic
	    Engineering and Fast Re-Route on more expensive Inter-City
	    and Inter-Continental Transport paths.  LDP over RSVP-TE
	    tunneling requires a minimum of two MPLS labels: one each
	    for LDP and RSVP-TE.
	  </t>
	  <t>
	    The use of MPLS FRR <xref target="RFC4090" /> added one
	    more label to MPLS traffic, but only when FRR protection
	    was in use.  If LDP over RSVP-TE is in use, and FRR
	    protection is in use, then at least three MPLS labels are
	    present on the label stack on the links through which the
	    Bypass LSP traverses.  FRR is covered in
	    <xref target="sect.frr" />.
	  </t>
	  <t>
	    LDP L2VPN, LDP IPVPN, BGP L2VPN, and BGP IPVPN added
	    support for VPN services that are deployed in the vast
	    majority of service providers.  These VPN services added
	    yet another label, bringing the label stack depth (when
	    FRR is active) to four.
	  </t>
	  <t>
	    Pseudowires and VPN are discussed in further detail in
	    <xref target="sect.pw" /> and
	    <xref target="sect.vpn" />.
	  </t>
	</section>

	<section anchor="sect.link-bundle" title="MPLS Link Bundling">
	  <t>
	    MPLS Link Bundling was the first RFC to address the need for
	    multiple parallel links between nodes <xref target="RFC4201"
	    />.  MPLS Link Bundling is notable in that it tried not to
	    change MPLS forwarding, except in specifying the "All-Ones"
	    component link.  MPLS Link Bundling is seldom if ever
	    deployed.  Instead multipath techniques described in <xref
	    target="sect.multipath" /> are used.
	  </t>
	</section>

	<section anchor="sect.hierarchy" title="MPLS Hierarchy">
	  <t>
	    MPLS hierarchy is defined in <xref target="RFC4206" />.
	    Although RFC4206 is considered part of GMPLS, the Packet
	    Switching Capable (PSC) portion of the MPLS hierarchy are
	    applicable to MPLS and may be supported in an otherwise
	    GMPLS free implementation.  The MPLS PSC hierarchy remains
	    the most likely means of providing further scaling in an
	    RSVP-TE MPLS network, particularly where the network is
	    designed to provide RSVP-TE connectivity to the edges.  This
	    is the case for envisioned MPLS-TP networks.  The use of the
	    MPLS PSC hierarchy can add as many as four labels to a label
	    stack, though it is likely that only one layer of PSC will
	    be used in the near future.
	  </t>
	</section>

	<section anchor="sect.frr" title="MPLS Fast Reroute (FRR)">
	  <!-- section suggested by Carlos -->

	  <t>
	    Fast reroute is defined by <xref target="RFC4090" />.
	    Two significantly different methods are the "One-to-One
	    Backup" method which uses the "Detour LSP" and the "
	    Facility Backup" which uses a "bypass tunnel".  These are
	    commonly referred to as the detour and bypass methods
	    respectively.
	  </t>
	  <t>
	    The detour method makes use of a presignaled LSP.
	    Hardware assistance is needed for detour FRR only if
	    necessary to accomplish local repair of a large number of
	    LSP within the 10s of milliseconds target.  For each
	    affected LSP a SWAP operation must be reprogrammed or
	    otherwise switched over.  The use of detour FRR doubles
	    the number of LSP terminating at any given hop and will
	    increase the number of LSP within a network by a factor
	    dependent on the average detour path length.
	  </t>
	  <t>
	    The bypass method makes use of a tunnel that is unused
	    when no fault exists but may carry many LSP when a local
	    repair is required.  There is no presignaling indicating
	    which working LSP will be diverted into any specific
	    bypass LSP.  The egress LSR of the bypass LSP MUST use
	    platform label space (as defined in
	    <xref target="RFC3031" />) so that an LSP working path on
	    any give interface can be backed up using a bypass LSP
	    terminating on any other interface.  Hardware assistance
	    is needed if necessary to accomplish local repair of a
	    large number of LSP within the 10s of milliseconds target.
	    For each affected LSP a SWAP operation must be
	    reprogrammed or otherwise switched over with an additional
	    PUSH of the bypass LSP label.  In addition the use of
	    platform label space impacts the size of the LSR ILM for
	    LSR with a very large number of interfaces.
	  </t>

	</section>

	<section anchor="sect.pw" title="Pseudowire Encapsulation">

	  <!--
	    section added at request of Andy Malis with Andy's PWE3
	    chair hat on.  Message-ID:
	    CAA=duU3JnmHH2zsnmWQs_Fr5M+c-v48sipTx-RV7D7OiXBEjAA@mail.gmail.com
	    76E11C3E-30EA-453F-A6F1-FF33263D77AA@gmail.com
	    201210131948.q9DJmpno047601@gateway1.orleans.occnc.com
	  -->

	  <t>
	    The pseudowire (PW) architecture is defined in
	    <xref target="RFC3985" />.

	    A pseudowire, when carried over MPLS, adds one or more
	    additional label entries to the MPLS label stack.

	    A PW Control Word is defined in
	    <xref target="RFC4385" />
	    with motivation for defining the control word in
	    <xref target="RFC4928" />.

	    The PW Associated Channel defined in <xref
	    target="RFC4385" /> is used for OAM in <xref
	    target="RFC5085" />.

	    The PW Flow Label is defined in
	    <xref target="RFC6391" />
	    and is discussed further in this document in
	    <xref target="sect.fat-pw" />.
	  </t>

	  <t>
	    There are numerous pseudowire encapsulations, supporting
	    emulation of services such as Frame Relay, ATM, Ethernet,
	    TDM, and SONET/SDH over packet switched networks (PSNs)
	    using IP or MPLS.
	  </t>
	  <t>
	    The pseudowire encapsulation is out of scope for this
	    document.  Pseudowire impact on MPLS forwarding at
	    midpoint LSR is within scope.  The impact on ingress MPLS
	    PUSH and egress MPLS UHP POP are within scope.  While
	    pseudowire encapsulation is out of scope, some advice is
	    given on sequence number support.
	  </t>

	  <section anchor="sect.pw-seq" title="Pseudowire Sequence Number">
	    <!-- inspired by Andy Malis -->
	    <t>
	      Pseudowire (PW) sequence number support is most
	      important for PW payload types with a high expectation
	      of in-order delivery.  Resequencing support, rather than
	      dropping at egress on out of order arrival, is most
	      important for PW payload types with a high expectation
	      of lossless delivery.  For example, TDM payloads require
	      sequence number support and require resequencing
	      support.  The same is true of ATM CBR service.  ATM VBR
	      or ABR may have somewhat relaxed requirements, but
	      generally require ATM Early Packet Discard (EPD) or ATM
	      Partial Packet Discard (PPD)
	      <xref target="ATM-EPD-and-PPD" />.  Though sequence
	      number support and resequencing support are beneficial
	      to PW packet oriented payloads such as FR and Ethernet,
	      they are highly desirable but not as strongly required.
	    </t>
	    <t>
	      Packet reorder should be rare except in a small number
	      of circumstances, most of which are due to network
	      design or equipment design errors:
	      <list style="numbers">
		<t>
		  The most common case is where reordering occurs is
		  rare, occurring only when a network or equipment
		  fault forces traffic on a new path with different
		  delay. The packet loss that accompanies a network or
		  equipment fault is generally more disruptive than
		  any reordering which may occur.
		</t>
		<t>
		  A path change can be caused by reasons other than a
		  network or equipment fault, such as administrative
		  routing change.  This may result in packet
		  reordering but generally without any packet loss.
		</t>
		<t>
		  If the edge is not using pseudowire control word
		  (CW) and the core is using multipath, reordering
		  will be far more common.  If this is occurring, the
		  best solution is to use CW on the edge, rather than
		  try to fix the reordering using resequencing.
		</t>
		<t>
		  <!-- the MUST below upholds TP requirements -->
		  Another avoidable case is where some core equipment
		  has multipath and for some reason insists on
		  periodically installing a new random number as the
		  multipath hash seed.  If supporting MPLS-TP,
		  equipment MUST provide a means to disable periodic
		  hash reseeding and deployments MUST disable periodic
		  hash reseeding.  Even if not supporting MPLS-TP,
		  equipment should provide a means to disable periodic
		  hash reseeding and deployments should disable
		  periodic hash reseeding.
		</t>
	      </list>
	    </t>

	  </section>

	</section>

	<section anchor="sect.vpn" title="Layer-2 and Layer-3 VPN">

	  <!--
	    If PW is mentioned, then VPN and friends deserve mention.
	  -->

	  <t>
	    Layer-2 VPN
	    <xref target="RFC4664" />
	    and Layer-3 VPN
	    <xref target="RFC4110" />
	    add one or more label entry to the MPLS label stack.  VPN
	    encapsulations are out of scope for this document.  Its
	    impact on forwarding at midpoint LSR are within scope.
	  </t>

	  <t>
	    Any of these services may be used on an MPLS Entropy Label
	    enabled ingress and egress (see
	    <xref target="sect.entropy" />
	    for discussion of Entropy Label) which would add an
	    additional label to the MPLS label stack.  The need to
	    provide a useful Entropy Label value impacts the
	    requirements of the VPN ingress LER but is out of
	    scope for this document.
	  </t>

	</section>

      </section>

      <section anchor="sect.mcast" title="MPLS Multicast">
	<!--
	    Multicast coverage requested by Nabil Bitar.
	-->
	<t>
	  MPLS Multicast encapsulation is clarified in
	  <xref target="RFC5332" />.
	  MPLS Multicast may be signaled using RSVP-TE
	  <xref target="RFC4875" />
	  or LDP
	  <xref target="RFC6388" />.
	</t>
	<t>
	  <xref target="RFC4875" />
	  defines a root initiated RSVP-TE LSP setup rather than leaf
	  initiated join used in IP multicast.
	  <xref target="RFC6388" />
	  defines a leaf initiated LDP setup.
	  Both
	  <xref target="RFC4875" />
	  and
	  <xref target="RFC6388" />
	  define point to multipoint (P2MP) LSP setup.
	  <xref target="RFC6388" />
	  also defined multipoint to multipoint (MP2MP) LSP setup.
	</t>
	<t>
	  The P2MP LSP have a single source.  An LSR may be a leaf
	  node, an intermediate node, or a "bud" node.  A bud serves
	  as both a leaf and intermediate.  At a leaf an MPLS POP is
	  performed.  The payload may be a IP Multicast packet that
	  requires further replication.  At an intermediate node a
	  MPLS SWAP is performed.  The bud requires that both a POP
	  and SWAP be performed for the same incoming packet.
	</t>
	<t>
	  One strategy to support P2MP functionality is to POP at the
	  LSR ingress and then optionally PUSH labels at each LSR
	  egress.  A given LSR egress chip may support multiple egress
	  interfaces, each of which requires a copy, but each with a
	  different set of added labels and layer-2 encapsulation.
	  Some physical interfaces may have multiple sub-interfaces
	  (such as Ethernet VLAN or channelized interfaces) each
	  requiring a copy.
	</t>
	<t>
	  <!-- length discussion with Shane about multicast replication -->
	  If packet replication is performed at LSR ingress, then the
	  ingress interface performance may suffer.  If the packet
	  replication is performed within a LSR switching fabric and
	  at LSR egress, congestion of egress interfaces cannot make
	  use of backpressure to ingress interfaces using techniques
	  such as virtual output queuing (VOQ).  If buffering is
	  primarily supported at egress, then the need for
	  backpressure is minimized.  There may be no good solution
	  for high volumes of multicast traffic if VOQ is used.
	</t>
	<t>
	  MP2MP LSP differ in that any branch may provide an input,
	  including a leaf.  Packets must be replicated onto all other
	  branches.  This forwarding is often implemented as multiple
	  P2MP forwarding trees, one for each potential input.
	</t>

      </section>

      <section anchor="sect.pkt-rate" title="Packet Rates">
	<t>
	  <!--
	      clarification regarding 100 Gb/s assumption requested by
	      Paul Doolan (off-list email with Message-ID:
	      9E246201-2B66-4CD5-A1EA-64DD7BB88A50@att.net
	  -->
	  While average packet size of Internet traffic may be large,
	  long sequences of small packets have both been predicted in
	  theory and observed in practice.  Traffic compression and
	  TCP ACK compression can conspire to create long sequences of
	  packets of 40-44 bytes in payload length.  If carried over
	  Ethernet, the 64 byte minimum payload applies, yielding a
	  packet rate of approximately 150 Mpps (million packets per
	  second) for the duration of the burst on a nominal 100 Gb/s
	  link.  The peak rate is higher for other encapsulations can
	  be as high as 250 Mpps (for example IP or MPLS encapsulated
	  using GFP over OTN ODU4).
	</t>
	<t>
	  <!--
	      Thanks to Pablo Frank for pointing out the sawtooth
	      effect.  The next two paragraphs are the result.
	  -->
	  It is also possible that the packet rates for a minimum
	  payload size, such as 64 byte (64B) payload for Ethernet, is
	  acceptable, but the rate declines for other packet sizes,
	  such as 65B payload.  There are other packet rates of
	  interest besides TCP ACK.  For example, a TCP ACK carried
	  over an Ethernet PW over MPLS over Ethernet may occupy 82B
	  or 82B plus an increment of 4B if additional MPLS labels are
	  present.
	</t>
	<t>
	  A graph of packet rate vs. packet size often displays a
	  sawtooth.  The sawtooth is commonly due to a memory
	  bottleneck and memory widths, sometimes internal cache, but
	  often a very wide external buffer memory interface.  In some
	  cases it may be due to a fabric transfer width.  A fine
	  packing, rounding up to the nearest 8B or 16B will result in
	  a fine sawtooth with small degradation for 65B, and even
	  less for 82B packets.  A course packing, rounding up to 64B
	  can yield a sharper drop in performance for 65B packets, or
	  perhaps more important, a larger drop for 82B packets.
	</t>
	<t>
	  The loss of some TCP ACK packets are not the primary concern
	  when such a burst occurs.  When a burst occurs, any other
	  packets, regardless of packet length and packet QoS are
	  dropped once on-chip input buffers prior to the decision
	  engine are exceeded.  Buffers in front of the packet
	  decision engine are often very small or non-existent (less
	  than one packet of buffer) causing significant QoS agnostic
	  packet drop.
	</t>
	<t>
	  Internet service providers and content providers generally
	  specify full rate forwarding with 40 byte payload packets as a
	  requirement.  This requirement often can be waived if the
	  provider can be convinced that when long sequence of short
	  packets occur no packets will be dropped.
	</t>
	<t>
	  <!--
	      Following paragraph based on suggestion made by Shane.
	  -->
	  Many equipment suppliers have pointed out that the extra
	  cost in designing hardware capable of processing the minimum
	  size packets at full line rate is significant for very high
	  speed interfaces.  If hardware is not capable of processing
	  the minimum size packets are full line rate, then that
	  hardware MUST be capable of handling large burst of small
	  packets, a condition which is often observed.  This level of
	  performance is necessary to meet Differentiated Services
	  <xref target="RFC2475" />
	  requirements for without it, packets are lost prior to
	  inspection of the IP DSCP field
	  <xref target="RFC2474" />
	  or MPLS TC field <xref target="RFC5462" />.
	</t>
	<t>
	  With adequate on-chip buffers before the packet decision
	  engine, an LSR can absorb a long sequence of short packets.
	  Even if the output is slowed to the point where light
	  congestion occurs, the packets, having cleared the decision
	  process, can make use of larger VOQ or output side buffers
	  and be dealt with according to configured QoS treatment,
	  rather than dropped completely at random.
	</t>
	<t>
	  <!--
	      clarification requested by Paul Doolan (off-list email
	      with Message-ID:
	      9E246201-2B66-4CD5-A1EA-64DD7BB88A50@att.net
	  -->
	  These on-chip buffers need not contribute significant delay
	  since they are only used when the packet decision engine is
	  unable to keep up, not in response to congestion, plus these
	  buffers are quite small.  For example, an on-chip buffer
	  capable of handling 4K packets of 64 bytes in length, or
	  256KB, corresponds to 2 msec on a 10 Mb/s link and 0.2 usec
	  on a 100 Gb/s link.  If the packet decision engine is
	  capable of handling packets at 90% of the full rate for
	  small packets, then the maximum added delay is 0.2 msec and
	  20 nsec respectively, and this delay only applies if a 4K
	  burst of short packets occurs.  When no burst of short
	  packets was being processed, no delay is added.
	</t>
	<t>
	  <!--
	      Requirement changed to list format to improve clarity in
	      response to comment from Shane.
	  -->
	  Packet rate requirements apply regardless of which network
	  tier equipment is deployed in.  Whether deployed in the
	  network core or near the network edges, one of the two
	  conditions MUST be met:
	  <list style="numbers">
	    <t>
	      Packets must be processed at full line rate with minimum
	      sized packets.  -OR-
	    </t>
	    <t>
	      Packets must be processed at a rate well under generally
	      accepted average packet sizes, with sufficient buffering
	      prior to the packet decision engine to accommodate long
	      bursts of small packets.
	    </t>
	  </list>
	</t>
      </section>

      <section anchor="sect.multipath" title="MPLS Multipath Techniques">
	<!--
	    Some comments by Shane Amonte are reflected in the
	    description of the extent of multipath use in providers.
	-->
	<t>
	  In any large provider, service providers and content
	  providers, hash based multipath techniques are used in the
	  core and in the edge.  In many of these providers hash based
	  multipath is also used in the larger metro networks.
	</t>
	<t>
	  The most common multipath techniques are ECMP applied at
	  the IP forwarding level, Ethernet LAG with inspection of the
	  IP payload, and multipath on links carrying both IP and
	  MPLS, where the IP header is inspected below the MPLS label
	  stack.  In most core networks, the vast majority of traffic
	  is MPLS encapsulated.
	</t>
	<t>
	  In order to support an adequately balanced load distribution
	  across multiple links, IP header information must be used.
	  Common practice today is to reinspect the IP headers at each
	  LSR and use the label stack and IP header information in a
	  hash performed at each LSR.  Further details are provided in
	  <xref target="sect.mp-hash" />.
	</t>
	<t>
	  The use of this technique is so ubiquitous in provider
	  networks that lack of support for multipath makes any
	  product unsuitable for use in large core networks.  This
	  will continue to be the case in the near future, even as
	  deployment of MPLS Entropy Label begins to relax the core
	  LSR multipath performance requirements given the existing
	  deployed base of edge equipment without the ability to add
	  an Entropy Label.
	</t>
	<t>
	  A generation of edge equipment supporting the ability to add
	  an MPLS Entropy Label is needed before the performance
	  requirements for core LSR can be relaxed.  However, it is
	  likely that two generations of deployment in the future will
	  allow core LSR to support full packet rate only when a
	  relatively small number of MPLS labels need to be inspected
	  before hashing.  For now, don't count on it.
	</t>
	<t>
	  <!--
	      Paragraph inpired by comment from Shane.
	  -->
	  Common practice today is to reinspect the packet at each LSR
	  and use the label stack and use the IP header field as input
	  to a hash algorithm performed on each packet at each LSR in
	  the network combined with a hash seed that is selected
	  by each LSR.  Where flow labels or entropy
	  labels are used, a hash seed must be used.
	</t>

	<section anchor="sect.pw-cw" title="Pseudowire Control Word">
	  <t>
	    Within the core of a network some form of multipath is
	    almost certain to be used.  Multipath techniques deployed
	    today are likely to be looking beneath the label stack for
	    an opportunity to hash on IP addresses.
	  </t>
	  <t>
	    A pseudowire encapsulated at a network edge must have a
	    means to prevent reordering within the core if the
	    pseudowire will be crossing a network core, or any part of
	    a network topology where multipath is used
	    (see <xref target="RFC4385" />
	    and <xref target="RFC4928" />).
	  </t>
	  <t>
	    Not supporting the ability to encapsulate a pseudowire
	    with a control word may lock a product out from
	    consideration.  A pseudowire capability without control
	    word support might be sufficient for applications that
	    are strictly both intra-metro and low bandwidth.  However
	    a provider with other applications will very likely not
	    tolerate having equipment which can only support a subset
	    of their pseudowire needs.
	  </t>
	</section>

	<section anchor="sect.large-uflow" title="Large Microflows">

	  <t>
	    <!--
		reflects comment from Paul Doolan (off-list email)
		Message-Id: <39A5E8FD-EB2B-496B-890D-C085EFDC0F2A@att.net>
	    -->
	    Where multipath makes use of a simple hash and simple load
	    balance such as modulo or other fixed allocation (see
	    <xref target="sect.multipath" />) the presence of large
	    microflows that each consumes 10% of the capacity of a
	    component link of a potentially congested composite link,
	    one such microflow can upset the traffic balance and more
	    than one can in effect reduce the effective capacity of
	    the entire composite link by more than 10%.
	  </t>
	  <t>
	    When even a very small number of large microflows are
	    present, there is a significant probability that more
	    than one of these large microflows could fall on the same
	    component link.  If the traffic contribution from large
	    microflows is small, the probability for three or more
	    large microflows on the same component link drops
	    significantly.  Therefore in a network where a significant
	    number of parallel 10 Gb/s links exists, even a 1 Gb/s
	    pseudowire or other large microflow that could not
	    otherwise be subdivided into smaller flows should carry a
	    flow label or entropy label if possible.
	  </t>
	  <t>
	    Active management of the hash space to better accommodate
	    large microflows has been implemented and deployed in the
	    past, however such techniques are out of scope for this
	    document.
	  </t>

	</section>

	<section anchor="sect.fat-pw" title="Pseudowire Flow Label">
	  <t>
	    Unlike a pseudowire control word, a pseudowire flow label
	    <xref target="RFC6391" />, is required only for relatively
	    large capacity pseudowires.  There are many cases where a
	    pseudowire flow label makes sense.  Any service such as a
	    VPN which carries IP traffic within a pseudowire can make
	    use of a pseudowire flow label.
	  </t>
	  <t>
	    <!-- fix pointed out by Carlos -->
	    Any pseudowire carried over MPLS which makes use of the
	    pseudowire control word and does not carry a
	    flow label is in effect a single microflow (in <xref
	    target="RFC2475" /> terms).
	  </t>
	</section>

	<section anchor="sect.entropy" title="MPLS Entropy Label">
	  <t>
	    The MPLS Entropy Label simplifies flow group
	    identification <xref target="RFC6790" /> in the network
	    core.  Prior to the MPLS Entropy Label core LSR needed to
	    inspect the entire label stack and often the IP headers to
	    provide an adequate distribution of traffic when using
	    multipath techniques (see <xref target="sect.mp-hash" />).
	    With the use of MPLS Entropy Label, a hash can be
	    performed closer to network edges, placed in the label
	    stack, and used within the network core.
	  </t>
	  <t>
	    The MPLS Entropy Label is capable of avoiding full label
	    stack and payload inspection within the core where
	    performance levels are most difficult to achieve (see
	    <xref target="sect.pkt-rate" />).
	    The label stack inspection can be terminated as soon as the
	    first Entropy Label is encountered, which is generally after a
	    small number of labels are inspected.
	  </t>
	  <t>
	    In order to provide these benefits in the core, LSR closer
	    to the edge must be capable of adding an entropy label.
	    This support may not be required in the access tier, the
	    tier closest to the customer, but is likely to be required
	    in the edge or the border to the network core.  LSR peering
	    with external networks will also need to be able to add an
	    Entropy Label.
	  </t>
	</section>

	<section anchor="sect.mp-hash" title="Fields Used for Multipath ">

	  <!--
	      This section with it's two subsections was created as a
	      result of email conversation between me (curtis) and
	      kireeti.  Thread ending with:
	      <201210182135.q9ILZKGh028695@gateway1.orleans.occnc.com>
	      Subject: Re: [mpls] mpls-forwarding (was: Question
		regarding MPLS reserved label with ECMP)
	  -->

	  <t>
	    The most common multipath techniques are based on a hash
	    over a set of fields.  Regardless of whether a hash is
	    used or some other method is used, the there are a limited
	    set of fields which can safely be used for multipath.
	  </t>

	  <section anchor="sect.label-hash" title="MPLS Fields in Multipath">

	    <t>
	      If the "outer" or "first" layer of encapsulation is
	      MPLS, then label stack entries are used in the hash.
	      Within a finite amount of time (and for small packets
	      arriving at high speed that time can quite limited) only
	      a finite number of label entries can be inspected.
	      Pipelined or parallel architectures improve this, but
	      the limit is still finite.
	    </t>
	    <t>
	      The following guidelines are provided for use of MPLS
	      fields in multipath load balancing.
	      <list style="numbers">
		<t>
		  Only the 20 bit label field SHOULD be used.  The TTL
		  field SHOULD NOT be used.  The S bit MUST NOT be
		  used.  The TC field (formerly EXP) MUST NOT be used.
		  See below this list for reasons.
		</t>
		<t>
		  If an ELI label is found, then if the LSR supports
		  Entropy Label, the EL label field in the next label
		  entry (the EL) SHOULD be used and label entries
		  below that label SHOULD NOT be used and the MPLS
		  payload SHOULD NOT be used.
		  See below this list for reasons.
		</t>
		<t>
		  Reserved labels (label values 0-15) MUST NOT be
		  used.  In particular, GAL and RA MUST NOT be used so
		  that OAM traffic follows the same path as payload
		  packets with the same label stack.
		</t>
		<t>
		  The most entropy is generally found in the label
		  stack entries near the bottom of the label stack
		  (innermost label, closest to S=1 bit).  If the
		  entire label stack cannot be used (or entire stack
		  up to an EL), then it is better to use as many
		  labels as possible closest to the bottom of stack.
		</t>
		<t>
		  If no ELI is encountered, and the first nibble of
		  payload contains a 4 (IPv4) or 6 (IPv6), an
		  implementation SHOULD support the ability to
		  interpret the payload as IPv4 or IPv6 and extract
		  and use appropriate fields from the IP headers.
		  This feature is considered a hard requirement by
		  many service providers.  If supported, there MUST be
		  a way to disable it (if, for example, PW without CW
		  are used).  This ability to disable this feature is
		  considered a hard requirement by many service
		  providers.  Therefore an implementation has a very
		  strong incentive to support both options.
		</t>
		<t>
		  A label which is popped at egress (UHP POP) SHOULD
		  NOT be used.  A label which is popped at the
		  penultimate hop (PHP POP) SHOULD be used.
		</t>
	      </list>
	    </t>
	    <t>
	      Apparently some chips have made use of the TC (formerly
	      EXP) bits as a source of entropy.  This is very harmful
	      since it will reorder Assured Forwarding (AF) traffic
	      <xref target="RFC2597" />
	      when a subset does not conform to the configured rates
	      and is remarked but not dropped at a prior LSR.  Traffic
	      which uses MPLS ECN
	      <xref target="RFC5129" />
	      can also be reordered if TC is used for entropy.
	      Therefore, as stated in the guidelines above, the TC
	      field (formerly EXP) MUST NOT be used in multipath load
	      balancing as it violates Differentiated Services Ordered
	      Aggregate (OA) requirements in these two instances.
	    </t>
	    <t>
	      Use of the MPLS label entry S bit would result in
	      putting OAM traffic on a different path if the addition
	      of a GAL at the bottom of stack removed the S bit from
	      the prior label.
	    </t>
	    <t>
	      If an ELI label is found, then if the LSR supports
	      Entropy Label, the EL label field in the next label
	      entry (the EL) SHOULD be used and the search for
	      additional entropy within the packet SHOULD be
	      terminated.  Failure to terminate the search will impact
	      client MPLS-TP LSP carried within server MPLS LSP.  A
	      network operator has the option to use administrative
	      attributes as a means to identify LSR which do not
	      terminate the entropy search at the first EL.
	      Administrative attributes are defined in
	      <xref target="RFC3209" />.  Some configuration is
	      required to support this.
	    </t>
	    <t>
	      If the PHP POP label is not used, then for any PW for
	      which CW is used, there is no basis for multipath load
	      split.  In some networks it is infeasible to put all PW
	      traffic on one component link.  Any PW which does not
	      use CW will be improperly split regardless of whether
	      the PHP POP label is used.
	    </t>

	  </section>

	  <section anchor="sect.ip-hash" title="IP Fields in Multipath">

	    <t>
	      Inspecting the IP payload provides the most entropy in
	      provider networks.  The practice of looking past the
	      bottom of stack label for an IP payload is well accepted
	      and documented in
	      <xref target="RFC4928" />
	      and in other RFCs.
	    </t>
	    <t>
	      <!-- inspired by Shane -->
	      Where IP is mentioned in the document, both IPv4 and
	      IPv6 apply.  All LSRs MUST fully support IPv6.
	      <!-- or face the wrath of Shane -->
	    </t>
	    <t>
	      When information in the IP header is used, the following
	      guidelines apply:
	      <list style="numbers">
		<t>
		  Both the IP source address and IP destination
		  address SHOULD be used.  There MAY be an option to
		  reverse the order of these address, improving the
		  ability to provide symmetric paths in some cases.
		  Many service providers require that both addresses
		  be used.
		</t>
		<t>
		  Implementations SHOULD allow inspection of the IP
		  protocol field and use of the UDP or TCP port
		  numbers.  For many service providers this feature is
		  considered manditory, particularly for enterprise,
		  data center, or edge equipment.  If this feature is
		  provided, it SHOULD be possible to disable use of
		  TCP and UDP ports.  Many service providers consider
		  it a hard requirement that use of UDP and TCP ports
		  can be disabled.  Therefore there is a stong
		  incentive for implementations to provide both
		  options.
		</t>
		<t>
		  <!-- inspired by Shane -->
		  Equipment suppliers MUST NOT make assumptions that
		  because the IP version field is equal to 4 (an IPv4
		  packet) that the IP protocol will either be TCP (IP
		  protocol 6) or UDP (IP protocol 17) and blindly
		  fetch the data at the offset where the TCP or UDP
		  ports would be found.  With IPv6, TCP and UDP port
		  numbers are not at fixed offsets.  With IPv4 packets
		  carrying IP options, TCP and UDP port numbers are
		  not at fixed offsets.
		</t>
		<t>
		  The IPv6 header flow field SHOULD be used.  This is
		  the explicit purpose of the IPv6 flow field, however
		  observed flow fields rarely contains a non-zero
		  value.  Some uses af the flow field have been
		  defined such as <xref target="RFC6438" />.  In the
		  absense of MPLS encapsulation, the IPv6 flow field
		  can serve a role equivalent to Entropy Label.
		</t>
		<t>
		  <!-- inspired by Shane -->
		  Support other protocols that share a common Layer-4
		  header such as RTP, UDP-lite, SCTP and DCCP SHOULD
		  be provided, particularly for edge or access
		  equipment where additional entropy may be needed.
		  Equipment SHOULD also use RTP, UDP-lite, SCTP and
		  DCCP headers when creating an Entropy Label.
		  <!-- appendix on TCP dominance if we need one. -->
		</t>
		<t>
		  Similar to avoiding TC in MPLS, the IP DSCP, and ECN
		  bits MUST NOT be used.  The IPv4 TTL or IPv6 Hop
		  Count SHOULD NOT be used.  Note that the IP TOS
		  field was deprecated (<xref target="RFC0791" /> was
		  updated by <xref target="RFC2474" />).  No part of
		  the IP DSCP (formerly IP PREC and IP TOS bits) field
		  can be used.
		</t>
		<t>
		  Some IP encapsulations support tunneling, such as
		  IP-in-IP, GRE, L2TPv3, and IPSEC.  These provide a
		  greater source of entropy which some provider
		  networks carrying large amounts of tunneled traffic
		  may need.  The use of tunneling header information
		  is out of scope for this document.
		</t>
	      </list>
	    </t>
	    <t>
	      <!--
		  More clarifications inspired by Shane.
	      -->
	      This document makes the following recommendations.
	      These recommendations are not required to claim
	      compliance to any existing RFC therefoer implementors
	      are free to ignore them, but due to service provider
	      requirements may be doing so at their own peril.
	      The use of IP addresses MUST be supported and TCP and
	      UDP ports (conditional on the protocol field and
	      properly located) MUST be supported.  The ability to
	      disable use of UDP and TCP ports MUST be available.
	      Though potentially very useful in some networks, it is
	      uncommon to support using payloads of tunneling
	      protocols carried over IP.  Though the use of tunneling
	      protocol header information is out of scope for this
	      document, it is not discouraged.
	    </t>

	  </section>

	  <section anchor="sect.fl-gen" title="Fields Used in Flow Label">

	    <t>
	      The ingress to a pseudowire (PW) can extract information
	      from the payload being encapsulated to create a flow
	      label.  <xref target="RFC6391" /> references IP carried
	      in Ethernet as an example.  The Native Service
	      Processing (NSP) function defined in
	      <xref target="RFC3985" /> differs with pseudowire type.
	      It is in the NSP function where information for a
	      specific type of PW can be extracted for use in a flow
	      label.  Which fields to use for any given PW NSP is out
	      of scope for this document.
	    </t>

	  </section>

	  <section anchor="sect.el-gen" title="Fields Used in Entropy Label">

	    <t>
	      An entropy label is added at the ingress to an LSP.  The
	      payload being encapsulated is most often MPLS, a PW, or
	      IP.  The payload type is identified by the layer-2
	      encapsulation (Ethernet, GFP, POS, etc).
	    </t>
	    <t>
	      If the payload is MPLS, then the information used to
	      create an entropy label is the same information used for
	      local load balancing (see
	      <xref target="sect.label-hash" />).  This information
	      MUST be extracted for use in generating an entropy label
	      even if the LSR local egress interface is not a
	      multipath.
	    </t>
	    <t>
	      Of the non-MPLS payload types, only payloads that are
	      forwarded are of interest.  For example, ARP is not
	      forwarded and CNLP (used only for ISIS) is not
	      forwarded.
	    </t>
	    <t>
	      The non-MPLS payload type of greatest interest are IPv4
	      and IPv6.  The guidelines in
	      <xref target="sect.ip-hash" />
	      apply to fields used to create and entropy label.
	    </t>
	    <t>
	      The IP tunneling protocols mentioned in
	      <xref target="sect.ip-hash" />
	      may be more applicable to generation of an entropy label
	      at edge or access where deep packet inspection is
	      practical due to lower interface speeds than in the core
	      where deep packet inspection may be impractical.
	    </t>

	  </section>

	</section>

      </section>

      <section anchor="sect.tp-uhp" title="MPLS-TP and UHP">
	<t>
	  MPLS-TP introduces forwarding demands that will be extremely
	  difficult to meet in a core network.  Most troublesome is
	  the requirement for Ultimate Hop Popping (UHP, the opposite
	  of Penultimate Hop Popping or PHP).  Using UHP opens the
	  possibility of one or more MPLS POP operation plus an MPLS
	  SWAP operation for each packet.  The potential for multiple
	  lookups and multiple counter instances per packet exists.
	</t>
	<t>
	  As networks grow and tunneling of LDP LSPs into RSVP-TE LSPs
	  is used, and/or RSVP-TE hierarchy is used, the requirement to
	  perform one or two or more MPLS POP operations plus a MPLS
	  SWAP operation (and possibly a PUSH or two) increases.  If
	  MPLS-TP LM (link monitoring) OAM is enabled at each layer,
	  then a packet and byte count MUST be maintained for each POP
	  and SWAP operation so as to offer OAM for each layer.
	</t>
      </section>

      <section anchor="sect.oam+gtsm" title="OAM and DoS Protection">
	<t>
	  Denial of service (DoS) protection is an area requiring hardware
	  support that is often overlooked or inadequately considered.
	  Hardware assist is also needed for OAM, particularly the
	  more demanding MPLS-TP OAM.
	</t>

	<section anchor="sect.gtsm" title="DoS Protection">
	  <t>
	    Modern equipment supports a number of control plane and
	    management plane protocols.  Generally no single means of
	    protecting network equipment from denial of service (DoS)
	    attacks is sufficient, particularly for high speed
	    interfaces.  This problem is not specific to MPLS, but is
	    a topic that cannot be ignored when implementing or
	    evaluating MPLS implementations.
	  </t>
	  <t>
	    Two types of protections are often cited as primary means
	    of protecting against attacks of all kinds.
	    <list style="hanging" hangIndent="4">
	      <t hangText="Isolated Control/Management Traffic">
		<vspace blankLines="0" />
		Control and Management traffic can be carried
		out-of-band (OOB), meaning not intermixed with
		payload.  For MPLS use of G-ACh and GAL to carry
		control and management traffic provides a means of
		isolation from potentially malicious payload.  Used
		along, the compromise of a single node, including a
		small computer at a network operations center, could
		compromise an entire network.  Implementations which
		send all G-ACh/GAL traffic directly to a routing
		engine CPU are subject to DoS attack as a result of
		such a compromise.
	      </t>
	      <t hangText="Cryptographic Authentication">
		<vspace blankLines="0" />
		Cryptographic authentication can very effectively
		prevent malicious injection of control or management
		traffic.  Cryptographic authentication can is some
		circumstances be subject to DoS attack by overwhelming
		the capacity of the decryption with a high volume of
		malicious traffic.  For very low speed interfaces
		cryptographic authentication can be performed by the
		general purpose CPU used as a routing engine.  For all
		other cases, cryptographic hardware may be needed.
		For very high speed interfaces, even cryptographic
		hardware can be overwhelmed.
	      </t>
	    </list>
	  </t>
	  <t>
	    Some control and management protocols are often carried
	    with payload traffic.  This is commonly the case with BGP,
	    T-LDP, and SNMP.  It is often the case with RSVP-TE.
	    Even when carried over G-ACh/GAL additional measures can
	    reduce the potential for a minor breach to be leveraged to
	    a full network attack.
	  </t>
	  <t>
	    Some of the additional protections are supported by
	    hardware packet filtering.
	    <list style="hanging" hangIndent="4">
	      <t hangText="GTSM">
		<vspace blankLines="0" />
		<xref target="RFC5082" />
		defines a mechanism that uses the IPv4 TTL or IPv6 Hop
		Limit fields to insure control traffic that can only
		originate from an immediate neighbor is not forged and
		originating from a distant source.  GTSM can be
		applies to many control protocols which are routable,
		for example LDP <xref target="RFC6720" />.
	      </t>
	      <t hangText="IP Filtering">
		<vspace blankLines="0" />
		At the very minimum, packet filtering plus
		classification and use of multiple queues supporting
		rate limiting is needed for traffic that could
		potentially be sent to a general purpose CPU used as a
		routing engine.  The first level of filtering only
		allows connections to be initiated from specific IP
		prefixes to specific destination ports and then
		preferably passes traffic directly to a cryptographic
		engine and/or rate limits.  The second level of
		filtering passes connected traffic, such as TCP
		connections having received at least one authenticated
		SYN or having been locally initiated.  The second
		level of filtering only passes traffic to specific
		address and port pairs to be checked for cryptographic
		authentication.
	      </t>
	    </list>
	  </t>
	  <t>
	    The cryptographic authentication is generally the last
	    resort in DoS attack mitigation.  If a packet must be
	    first sent to a general purpose CPU, then sent to a
	    cryptographic engine, a DoS attack is possible on high
	    speed interfaces.  Only where hardware can identify a
	    signature and the portion of packet covered by the
	    signature is cryptographic authentication highly
	    beneficial in protecting against DoS attacks.
	  </t>
	  <t>
	    For chips supporting multiple 100 Gb/s interfaces, only a
	    very large number of parallel cryptographic engines can
	    provide the processing capacity to handle a large scale
	    DoS or distributed DoS (DDoS) attack.  For many forwarding
	    chips this much processing power requires significant chip
	    real estate and power, and therefore reduces system space
	    and power density.  For this reason, cryptographic
	    authentication is not considered a viable first line of
	    defense.
	  </t>
	  <t>
	    For some networks the first line of defense is some means
	    of supporting OOB control and management traffic.  In the
	    past this OOB channel migh make use of overhead bits in
	    SONET or OTN or a dedicated DWDM wavelength.  G-ACh and
	    GAL provide an alternative OOB mechanism which is
	    independent of underlying layers.  In other networks,
	    including most IP/MPLS networks, perimeter filtering
	    serves a similar purpose, though less effective without
	    extreme vigalence.
	  </t>
	  <t>
	    A second line of defense is filtering, including GTSM.
	    For protocols such as EBGP, GTSM and other filtering is
	    often the first line of defense.  Cryptographic
	    authentication is usually the last line of defense and
	    insufficient by itself to mitigate DoS or DDoS attacks.
	  </t>

	</section>

	<section anchor="sect.oam" title="MPLS OAM">
	  <!--
	      MPLS-TP OAM coverage requested by Nabil Bitar so it
	      makes sense to also cover MPLS OAM.  This was suggested
	      by Carlos.
	  -->
	  <t>
	    <xref target="RFC4377" />
	    defines requirements for MPLS OAM that predate MPLS-TP.
            <xref target="RFC4379" />
	    defines what is commonly referred to as LSP Ping and LSP
	    Traceroute.
            <xref target="RFC4379" />
	    is updated by
            <xref target="RFC6424" />
	    supporting MPLS tunnels and stitched LSP and P2MP LSP.
            <xref target="RFC4379" />
	    is updated by
            <xref target="RFC6425" />
	    supporting P2MP LSP.
	    <xref target="RFC4379" />
	    is updated by
            <xref target="RFC6426" />
	    to support MPLS-TP connectivity verification (CV) and route
	    tracing.
	  </t>
	  <t>
	    <xref target="RFC4950" />
	    extends the ICMP format to support TTL expiration that may
	    occur when using IP traceroute within an MPLS tunnel.  The
	    ICMP message generation can be implemented in forwarding
	    hardware, but if sent to a general purpose CPU must be
	    rate limited to avoid a potential denial or service (DoS)
	    attack.
	  </t>
	  <t>
            <xref target="RFC5880" />
	    defines Bidirectional Forwarding Detection (BFD), a
	    protocol intended to detect faults in the bidirectional
	    path between two forwarding engines.
            <xref target="RFC5884" />
	    and 
            <xref target="RFC5885" />
	    define BFD for MPLS.
	    BFD can provide failure detection on any kind of path
	    between systems, including direct physical links, virtual
	    circuits, tunnels, MPLS Label Switched Paths (LSPs),
	    multihop routed paths, and unidirectional links as long as
	    there is some return path.
	  </t>
	  <t>
	    The processing requirements for BFD are less than for LSP
	    Ping, making BFD somewhat better suited for relatively
	    high rate proactive monitoring.  BFD does not verify that
	    the data plane against the control plane, where LSP Ping
	    does.  LSP Ping somewhat better suited for on-demand
	    monitoring including relatively low rate periodic
	    verification of data plane and as a diagnostic tool.
	  </t>
	  <t>
	    Both BFD and LSP Ping MUST be recognized by hardware and
	    at the very minimum forwarded to the main CPU.  Hardware
	    assistance for BFD is often provided and is considered
	    necessary for relatively high rate proactive monitoring.
	    Both BFD and LSP Ping MUST be recognized in any filtering
	    prior to passing traffic to a general purpose CPU and
	    appropriate DoS protection applied
	    (see <xref target="sect.gtsm" />.
	    Failure to recognize BFD and LSP Ping and at least rate
	    limit creates the potential for misconfiguration to cause
	    outages rather than cause errors in the misconfigured OAM.
	  </t>

	</section>

	<section anchor="sect.pw-oam" title="Pseudowire OAM">

	  <t>
	    Pseudowire OAM makes use of the control channel provided
	    by Virtual Circuit Connectivity Verification (VCCV)
	    <xref target="RFC5085" />.
	    VCCV makes use of the Pseudowire Control Word.
	    BFD support over VCCV is defined by
	    <xref target="RFC5885" />.
	    <xref target="RFC5885" />
	    is updated by
	    <xref target="RFC6478" />
	    in support of static pseudowires.
	    <xref target="RFC4379" />
	    is updated by
	    <xref target="RFC6829" />
	    supporting LSP Ping for Pseudowire FEC advertised over IPv6.
	  </t>

	  <t>
	    G-ACh/GAL (defined in <xref target="RFC5586" />) is the
	    preferred MPLS-TP OAM control channel and applies to any
	    MPLS-TP end points, including Pseudowire.
	    See <xref target="sect.tp-oam" /> for an overview of
	    MPLS-TP OAM.
	  </t>

	</section>

	<section anchor="sect.tp-oam" title="MPLS-TP OAM">
	  <!--
	      TP OAM coverage requested by Nabil Bitar.
	  -->
	  <t>
	    <xref target="RFC6669" />
	    summarizes the MPLS-TP OAM toolset, the set of protocols
	    supporting the MPLS-TP OAM requirements specified in
	    <xref target="RFC5860" />
	    and supported by the MPLS-TP OAM framework defined in
	    <xref target="RFC6371" />.
	  </t>
	  <t>
	    The MPLS-TP OAM toolset includes:
	    <list style="hanging" hangIndent="4">
	      <t hangText="CC-CV">
		<vspace blankLines="0" />
		<xref target="RFC6428" />
		defines BFD extensions to support proactive CC-CV
		applications.
		<xref target="RFC6426" />
		provides LSP ping extensions that are used to
		implement on-demand connectivity verification.
	      </t>
	      <t hangText="RDI">
		<vspace blankLines="0" />
		Remote Defect Indication (RDI) is triggered by
		failure of proactive CC-CV, which is BFD based.  For
		fast RDI initiation, RDI SHOULD be initiated and
		handled by hardware if BFD is handled in forwarding
		hardware.
		<xref target="RFC6428" />
		provides an extension for BFD that includes the RDI
		indication in the BFD format and a specification of
		how this indication is to be used.
	      </t>
	      <t hangText="Route Tracing">
		<vspace blankLines="0" />
		<xref target="RFC6426" />
		specifies that the LSP ping enhancements for MPLS-TP
		on-demand connectivity verification include
		information on the use of LSP ping for route tracing
		of an MPLS-TP path.
	      </t>
	      <t hangText="Alarm Reporting">
		<vspace blankLines="0" />
		<xref target="RFC6427" />
		describes the details of a new protocol supporting
		Alarm Indication Signal, Link Down Indication, and
		fault management.  This functionality SHOULD be
		supported in forwarding hardware on high speed
		interfaces.
	      </t>
	      <t hangText="Lock Instruct">
		<vspace blankLines="0" />
		Lock instruct is initiated on-demand and therefore
		need not be implemented in forwarding hardware.
		<xref target="RFC6435" />
		defines a lock instruct protocol.
	      </t>
	      <t hangText="Lock Reporting">
		<vspace blankLines="0" />
		<xref target="RFC6427" />
		covers lock reporting.  Lock reporting need not be
		implemented in forwarding hardware.
	      </t>
	      <t hangText="Diagnostic">
		<vspace blankLines="0" />
		<xref target="RFC6435" />
		defines protocol support for loopback.  Loopback
		initiation is on-demand and therefore need not be
		implemented in forwarding hardware.  Loopback of
		packet traffic SHOULD be implemented in forwarding
		hardware on high speed interfaces.
	      </t>
	      <t hangText="Packet Loss and Delay Measurement">
		<vspace blankLines="0" />
		<xref target="RFC6374" />
		and
		<xref target="RFC6375" />
		define a protocol and profile for packet loss
		measurement (LM) and delay measurement (DM).  LM
		requires a very accurate capture and insertion of
		packet and byte counters when a packet is transmitted
		and capture of packet and byte counters when a packet
		is received.  This capture and insertion MUST be
		implemented in forwarding hardware for LM OAM to be
		sufficiently
		accurate.  DM requires very accurate capture and
		insertion of a timestamp on transmission and capture
		of timestamp when a packet is received.  This
		timestamp capture and insertion MUST be implemented in
		forwarding hardware for DM OAM to be sufficiently accurate.
	      </t>
	    </list>
	  </t>
	  <t>
	    See <xref target="sect.oam" /> for discussion of hardware
	    support necessary for BFD and LSP Ping.
	  </t>
	  <t>
	    CC-CV and alarm reporting is tied to protection and
	    therefore SHOULD be supported in forwarding hardware in
	    order to provide protection for a large number of affected
	    LSP within target response intervals.  Since CC-CV is
	    supported by BFD, for MPLS-TP, BFD SHOULD be supported in
	    forwarding hardware.
	  </t>

	</section>

	<section anchor="sect.oam-iwk"
		 title="MPLS OAM and Layer-2 OAM Interworking">
	  <t>
	    <xref target="RFC6670" />
	    provides the reasons for selecting a single MPLS-TP OAM
	    solution and examines the consequences were ITU-T to
	    develop a second OAM solution that is based on Ethernet
	    encodings and mechanisms.
	  </t>
	  <t>
	    <xref target="RFC6310" /> and
	    <xref target="I-D.ietf-pwe3-mpls-eth-oam-iwk" />
	    specifies the mapping of defect states between many types
	    of hardware Attachment Circuits (ACs) and associated
	    Pseudowires (PWs).  This functionality SHOULD be supported
	    in forwarding hardware.
	  </t>
	  <t>
	    An MPLS OAM implementation SHOULD interwork with the
	    underlying server layer and provide a means to interwork
	    with a client layer.  Where MPLS hierarchy is used both
	    the client and server layer may be MPLS or MPLS-TP.  Where
	    the server layer is a Layer-2, such as Ethernet,
	    PPP over SONET/SDH, or GFP over OTN, interwork among
	    layers is also required.  For high speed interfaces, this
	    interworking SHOULD be supported in forwarding hardware.
	  </t>

	</section>

	<section anchor="sect.oam-hdwr"
		 title="Extent of OAM Support by Hardware">
	  <t>
	    Some OAM functionality must be supported in forwarding
	    hardware while other OAM functionality must be entirely
	    implemented in forwarding hardware.
	  </t>
	  <t>
	    Where possible, implementation in forwarding hardware
	    should be in programmable hardware such that if standards
	    are later changed or extended these changes are likely to
	    be accommodated with hardware reprogramming rather than
	    replacement.
	  </t>
	  <t>
	    Some functions must be implemented in dedicated forwarding
	    hardware.  Examples include packet and byte counters
	    needed for LM OAM as well as needed for management
	    protocols.  Similarly the capture and insertion of packet
	    and byte counts or timestamps needed for transmitted LM or
	    DM or time synchronization packets MUST be implemented in
	    forwarding hardware to support accurate OAM.
	  </t>
	  <t>
	    Some functions must be supported in forwarding hardware
	    but may make use of an external general purpose processor
	    if performance criteria can be met.  For example
	    origination of AIS to client layers may be triggered by
	    CC-CV server layer hardware but expansion to a large
	    number of client LSP may occur in a general purpose
	    processor.  Some forwarding hardware supports one or more
	    on-chip general purpose processors which may be well
	    suited for such a role.
	  </t>
	  <t>
	    The customer (system supplier or provider) should not
	    dictate design, but should independently validate target
	    functionality and performance.  However, it is not
	    uncommon for service providers and system implementors to
	    insist on reviewing design details (under NDA) due to past
	    experiences with suppliers and to reject suppliers who are
	    unwilling to provide details.
	  </t>

	</section>

      </section>

      <section anchor="sect.no-of-flows" title="Number and Size of Flows">

	<!--
	    A suggestion from Lou Berger at IETF-85 prompted the
	    inclusion of this new section.
	-->
	<t>
	  Service provider networks may carry up to hundreds of
	  millions of flows on 10 Gb/s links.  Most flows are very
	  short lived, many under a second.  A subset of the flows are
	  low capacity and somewhat long lived.  When Internet traffic
	  dominates capacity a very small subset of flows are high
	  capacity and/or very long lived.
	</t>
	<t>
	  Two types of limitations with regard to number and size of
	  flows have been observed.
	  <list style="numbers">
	    <t>
	      Some hardware cannot handle some very large flows
	      because of internal paths which are limited, such as per
	      packet backplane paths or paths internal or external to
	      chips such as buffer memory paths.  Such designs can
	      handle aggregates of smaller flows.  Some hardware with
	      acknowledged limitations has been successfully deployed
	      but may be increasingly problematic if the capacity of
	      large microflows in deployed networks continues to grow.
	    </t>
	    <t>
	      Some hardware approaches cannot handle a large number of
	      flows, or a large number of large flows due to
	      attempting to count per flow, rather than deal with
	      aggregates of flows.  Hash techniques scale with regard
	      to number of flows due to a fixed hash size with many
	      flows falling into the same hash bucket.  Techniques
	      that identify individual flows have been implemented but
	      have never successfully deployed for Internet traffic.
	    </t>
	  </list>
	</t>

      </section>

    </section>

    <section anchor="sect.ask"
	     title="Questions for Suppliers">
      <t>
	<!-- clarification prompted by Nabil -->
	The following questions should be asked of a supplier.  These
	questions are grouped into broad categories.  The questions
	themselves are intended to be an open ended question to the
	supplier.  The tests in <xref target="sect.test" /> are
	intended to verify whether the supplier disclosed any
	compliance or performance limitations completely and
	accurately.
      </t>
      <t>
	<list style="hanging" hangIndent="4">
	  <t hangText="Basic Compliance">
	    <vspace blankLines="0" />
            <list counter="q" hangIndent="4" style="format Q#%d">
	      <t>
		Can the implementation forward packets with an
		arbitrarily large stack depth?
		<!-- clarification prompted by Nabil -->
		What limitations exist, and under what circumstances
		do further limitations come into play (such as high
		packet rate or specific features enabled or specific
		types of packet processing)?
		See <xref target="sect.basics" />.
	      </t>
	      <t>
		Is the entire set of basic MPLS functionality
		described in <xref target="sect.basics" /> supported?
	      </t>
	      <t>
		Are the set of MPLS reserved labels handled correctly
		and with adequate performance?
		See <xref target="sect.resv-labels" />.
	      </t>
	      <t>
		Are mappings of label value and TC to PHB handled
		correctly, including RFC3270 L-LSP mappings and
		RFC4124 CT mappings to PHB?
		See <xref target="sect.qos" />.
	      </t>
	      <t>
		Is time synchronization adequately supported in
		forwarding hardware?
		<list style="letters">
		  <t>
		    Are both PTP and NTP formats supported?
		  </t>
		  <t>
		    Is the accuracy of timestamp insertion and
		    incoming stamping sufficient?
		  </t>
		</list>
		See <xref target="sect.time-sync" />.
	      </t>
	      <t>
		Is link bundling supported?
		<list style="letters">
		  <t>
		    Can LSP be pinned to specific components?
		  </t>
		  <t>
		    Is the "all-ones" component link supported?
		  </t>
		</list>
		See <xref target="sect.link-bundle" />.
	      </t>
	      <t>
		Is MPLS hierarchy supported?
		<list style="letters">
		  <t>
		    Are both PHP and UHP supported?  What limitations
		    exist on the number of POP operations with UHP?
		  </t>
		  <t>
		    Are the pipe, short-pipe, and uniform models
		    supported?  Are TTL and TC values updated
		    correctly at egress where applicable?
		  </t>
		</list>
		See <xref target="sect.hierarchy" />
	      </t>
	      <t>
		Are pseudowire sequence numbers handled correctly?
		See <xref target="sect.pw-seq" />.
	      </t>
	      <t>
		Is VPN LER functionality handled correctly and without
		performance issues?
		See <xref target="sect.vpn" />.
	      </t>
	      <t>
		Is MPLS multicast (P2MP and MP2MP) handled correctly?
		<list style="letters">
		  <t>
		    Are packets dropped on uncongested outputs if some
		    outputs are congested?
		  </t>
		  <t>
		    Is performance limited in high fanout situations?
		  </t>
		</list>
		See <xref target="sect.mcast" />.
	      </t>
	    </list>
	  </t>
	  <t hangText="Basic Performance">
	    <vspace blankLines="0" />
            <list counter="q" hangIndent="4" style="format Q#%d">
	      <t>
		Can very small packets be forwarded at full line rate
		on all interfaces indefinitely?  
		<!-- clarification prompted by Nabil -->
		What limitations exist, and under what circumstances
		do further limitations come into play (such as
		specific features enabled or specific types of packet
		processing)?
	      </t>
	      <t>
		Customers must decide whether to relax the prior
		requirement and to what extent.  If the answer to the
		prior question indicates that limitations exist, then:
		<list style="letters">
		  <t>
		    What is the smallest packet size where full line
		    rate forwarding can be supported?
		  </t>
		  <t>
		    What is the longest burst of full rate small
		    packets that can be supported?
		  </t>
		</list>
		<!-- clarification prompted by Nabil -->
		Specify circumstances (such as specific features
		enabled or specific types of packet processing) often
		impact these rates and burst sizes.
	      </t>
	      <t>
		How many POP operations can be supported along with a
		SWAP operation at full line rate while maintaining
		per LSP packet and byte counts for each POP and SWAP?
		This requirement is particularly relevant for MPLS-TP.
	      </t>
	      <t>
		How many PUSH labels can be supported.  While this
		limitation is rarely an issue, it applies to both PHP
		and UHP, unlike the POP limit which applies to UHP.
	      </t>
	      <t>
		For a worst case where all packets arrive on one LSP,
		what is the counter overflow time?  Are any means
		provided to avoid polling all counters at short
		intervals?  This applies to both MPLS and MPLS-TP.
	      </t>
	    </list>
	  </t>
	  <t hangText="Multipath Capabilities and Performance">
	    <vspace blankLines="0" />
	    Multipath capabilities and performance do not apply to
	    MPLS-TP but apply to MPLS and apply if MPLS-TP is carried
	    in MPLS.
	    <list counter="q" hangIndent="4" style="format Q#%d">
	      <t>
		How are large microflows accommodated?  Is there
		active management of the hash space mapping to output
		ports?  See <xref target="sect.large-uflow" />.
	      </t>
	      <t>
		How many MPLS labels can be included in a hash based
		on the MPLS label stack?
	      </t>
	      <t>
		Is packet rate performance decreased beyond some
		number of labels?
	      </t>
	      <t>
		Can the IP header and payload information below the
		MPLS stack be used in the hash?  If so, which IP
		fields, payload types and payload fields are
		supported?
	      </t>
	      <t>
		At what maximum MPLS label stack depth can Bottom of
		Stack and an IP header appear without impacting packet
		rate performance?
	      </t>
	      <t>
		Are reserved labels excluded from the label stack hash?
		They MUST be excluded.
	      </t>
	      <t>
		How is multipath performance affected by very large
		flows or an extremely large number of flows, or by
		very short lived flows?
		See <xref target="sect.no-of-flows" />.
	      </t>
	    </list>
	  </t>
	  <t hangText="Pseudowire Capabilities and Performance">
	    <vspace blankLines="0" />
            <list counter="q" hangIndent="4" style="format Q#%d">
	      <t>
		Is the pseudowire control word supported?
	      </t>
	      <t>
		What is the maximum rate of pseudowire encapsulation
		and decapsulation?  Apply the same questions as in
		Base Performance for any packet based pseudowire such
		as IP VPN or Ethernet.
	      </t>
	      <t>
		Does inclusion of a pseudowire control word impact
		performance?
	      </t>
	      <t>
		Are flow labels supported?
	      </t>
	      <t>
		If so, what fields are hashed on for the flow label
		for different types of pseudowires?
	      </t>
	      <t>
		Does inclusion of a flow label impact performance?
	      </t>
	    </list>
	  </t>
	  <t hangText="Entropy Label Support and Performance">
	    <vspace blankLines="0" />
            <list counter="q" hangIndent="4" style="format Q#%d">
	      <t>
		Can an entropy label be added when acting as in
		ingress LER and can it be removed when acting as an
		egress LER?
	      </t>
	      <t>
		If so, what fields are hashed on for the entropy label?
	      </t>
	      <t>
		Does adding or removing an entropy label impact packet
		rate performance?
	      </t>
	      <t>
		Can an entropy label be detected in the label stack,
		used in the hash, and properly terminate the search
		for further information to hash on?
	      </t>
	      <t>
		Does using an entropy label have any negative impact
		on performance?  It should have no impact or a
		positive impact.
	      </t>
	    </list>
	  </t>
	  <t hangText="OAM and DoS Protection">
	    <vspace blankLines="0" />
	    <list counter="q" hangIndent="4" style="format Q#%d">
	      <t>
		For each control and management plane protocol in use,
		what measures are taken to provide DoS attack
		hardenning?  Have DoS attack tests been performed?
		Can compromise of an internal computer on a management
		subnet be leveraged for any form of attack including
		DoS attack?
	      </t>
	      <t>
		What OAM proactive and on-demand mechanisms are
		supported?  What performance limits exist under high
		proactive monitoring rates?  Can excessively high
		proactive monitoring rates impact control plane
		performance or cause control plane instability?  Ask
		these questions for each of the following.
		<list style="letters">
		  <t>MPLS OAM</t>
		  <t>Pseudowire OAM</t>
		  <t>MPLS-TP OAM</t>
		  <t>Layer-2 OAM Interworking</t>
		</list>
		See <xref target="sect.oam+gtsm" />.
	      </t>
	    </list>
	  </t>
	</list>
      </t>
    </section>

    <section anchor="sect.test"
	     title="Forwarding Compliance and Performance Testing">
      <t>
	Packet rate performance of equipment supporting a large number
	of 10 Gb/s or 100 Gb/s links is not possible using desktop
	computers or workstations.  The use of high end workstations
	as a source of test traffic was barely viable 20 years ago,
	but is no longer at all viable.  Though custom microcode has
	been used on specialized router forwarding cards to serve the
	purpose of generating test traffic and measuring it, for the
	most part performance testing will require specialized test
	equipment.  There are multiple sources of suitable equipment.
	<!-- test equipment guys will love this paragraph. -->
      </t>
      <t>
	The set of tests listed here do not correspond one-to-one to
	the set of questions in <xref target="sect.ask" />.  The same
	categorization is used and these tests largely serve to
	validate answers provided to the prior questions, and can
	also provide answers where a supplier is unwilling to disclose
	compliance or performance.
	<!-- all too common -->
      </t>
      <t>
	Performance testing is the domain of the IETF Benchmark
	Methodology Working Group (BMWG).  Below are brief
	descriptions of conformance and performance tests.  Some very
	basic tests are specified in <xref target="RFC5695" /> which
	partially cover only the basic performance test T#3.
      </t>
      <t>
	The following tests should be performed by the systems
	designer, or deployer, or performed by the supplier on their
	behalf if it is not practical for the potential customer to
	perform the tests directly.  These tests are grouped into
	broad categories.
      </t>
      <t>
	<list style="hanging" hangIndent="4">
	  <t hangText="Basic Compliance">
	    <vspace blankLines="0" />
            <list counter="t" hangIndent="4" style="format T#%d">
	      <t>
		Test forwarding at a high rate for packets with
		varying number of label entries.  While packets with
		more than a dozen label entries are unlikely to be
		used in any practical scenario today, it is useful to
		know if limitations exists.
	      </t>
	      <t>
		For each of the questions listed under "Basic
		Compliance" in <xref target="sect.ask" />, verify the
		claimed compliance.  For any functionality considered
		critical to a deployment, where applicable performance
		using each capability under load should be verified in
		addition to basic compliance.
	      </t>
	    </list>
	  </t>
	  <t hangText="Basic Performance">
	    <vspace blankLines="0" />
            <list counter="t" hangIndent="4" style="format T#%d">
	      <t>
		Test packet forwarding at full line rate with small
		packets.  See <xref target="RFC5695" />.  The most
		likely case to fail is the smallest packet size.  Also
		test with packet sizes in four byte increments ranging
		from payload sizes or 40 to 128 bytes.
	      </t>
	      <t>
		If the prior tests did not succeed for all packet
		sizes, then perform the following tests.
		<list style="letters">
		  <t>
		    Increase the packet size by 4 bytes until a size
		    is found that can be forwarded at full rate.
		  </t>
		  <t>
		    Inject bursts of consecutive small packets into a
		    stream of larger packets.  Allow some time for
		    recovery between bursts.  Increase the number of
		    packets in the burst until packets are dropped.
		    <!--
			Jay Karthik pointed out that this is benchmark
			methodology advice which is too sketchy to be
			useful and maybe doesn't belong here.
		    -->
		  </t>
		</list>
	      </t>
	      <t>
		Send test traffic where a SWAP operation is required.
		Also set up multiple LSP carried over other LSP where
		the device under test (DUT) is the egress of these
		LSP.  Create test packets such that the SWAP operation
		is performed after POP operations, increasing the
		number of POP operations until forwarding of small
		packets at full line rate can no longer be supported.
		Also check to see how many POP operations can be
		supported before the full set of counters can no
		longer be maintained.  This requirement is
		particularly relevant for MPLS-TP.
	      </t>
	      <t>
		Send all traffic on one LSP and see if the counters
		become inaccurate.  Often counters on silicon are much
		smaller than the 64 bit packet and byte counters in
		IETF MIB.  System developers should consider what
		counter polling rate is necessary to maintain accurate
		counters and whether those polling rates are
		practical.

		Relevant MIBs for MPLS are discussed in 
		<xref target="RFC4221" /> and
		<xref target="RFC6639" />.
	      </t>
	    </list>
	  </t>
	  <t hangText="Multipath Capabilities and Performance">
	    <vspace blankLines="1" />
	    Multipath capabilities do not apply to MPLS-TP but apply
	    to MPLS and apply if MPLS-TP is carried in MPLS.
            <list counter="t" hangIndent="4" style="format T#%d">
	      <t>
		Send traffic at a rate well exceeding the capacity of
		a single multipath component link, and where entropy
		exists only below the top of stack.  If only the top
		label is used this test will fail immediately.
	      </t>
	      <t>
		Move the labels with entropy down in the stack until
		either the full forwarding rate can no longer be
		supported or most or all packets try to use the same
		component link.
	      </t>
	      <t>
		Repeat the two tests above with the entropy contained
		in IP headers or IP payload fields below the label
		stack rather than in the label stack.  Test with the
		set of IP headers or IP payload fields considered
		relevant to the deployment or to the target market.
	      </t>
	      <t>
		Determine whether traffic that contains a pseudowire
		control word is interpreted as IP traffic.
		Information in the payload MUST NOT be used in the
		load balancing if the first nibble of the packet is
		not 4 or 6 (IPv4 or IPv6).
	      </t>
	      <t>
		Determine whether reserved labels are excluded from
		the label stack hash.  They MUST be excluded.
	      </t>
	      <t>
		Perform testing in the presence of combinations of:
		<list style="letters">
		  <t>
		    Very large microflows.
		  </t>
		  <t>
		    Relatively short lived high capacity flows.
		  </t>
		  <t>
		    Extremely large numbers of flows.
		  </t>
		  <t>
		    Very short lived small flows.
		  </t>
		</list>
	      </t>
	    </list>
	  </t>
	  <t hangText="Pseudowire Capabilities and Performance">
	    <vspace blankLines="0" />
            <list counter="t" hangIndent="4" style="format T#%d">
	      <t>
		Ensure that pseudowire can be set up with a pseudowire
		label and pseudowire control word added at ingress and
		the pseudowire label and pseudowire control word
		removed at egress.
	      </t>
	      <t>
		For pseudowire that contains variable length payload
		packets, repeat performance tests listed under "Basic
		Performance" for pseudowire ingress and egress
		functions.
	      </t>
	      <t>
		Repeat pseudowire performance tests with and without 
		a pseudowire control word.
	      </t>
	      <t>
		Determine whether pseudowire can be set up with a
		pseudowire label, flow label, and pseudowire control
		word added at ingress and the pseudowire label, flow
		label, and pseudowire control word removed at egress.
	      </t>
	      <t>
		Determine which payload fields are used to create the
		flow label and whether the set of fields and algorithm
		provide sufficient entropy for load balancing.
	      </t>
	      <t>
		Repeat pseudowire performance tests with flow labels
		included.
	      </t>
	    </list>
	  </t>
	  <t hangText="Entropy Label Support and Performance">
	    <vspace blankLines="0" />
            <list counter="t" hangIndent="4" style="format T#%d">
	      <t>
		Determine whether entropy labels can be added at
		ingress and removed at egress.
	      </t>
	      <t>
		Determine which fields are used to create an entropy
		label.  Labels further down in the stack, including
		entropy labels further down and IP headers or IP
		payload fields where applicable should be used.
		Determine whether the set of fields and algorithm
		provide sufficient entropy for load balancing.
	      </t>
	      <t>
		Repeat performance tests under "Basic Performance"
		when entropy labels are used, where ingress or egress
		is the device under test (DUT).
	      </t>
	      <t>
		Determine whether an ELI is detected when acting as a
		midpoint LSR and whether the search for further
		information on which to base the load balancing is
		used.  Information below the entropy label SHOULD NOT
		be used.
	      </t>
	      <t>
		Ensure that the Entropy Label Indicator and Entropy
		Label (ELI and EI) are removed from the label stack
		during UHP and PHP operations.
	      </t>
	      <t>
		Insure that operations on the TC field when adding and
		removing Entropy Label are correctly carried out.  If
		TC is changed during a SWAP operation, the ability to
		transfer that change MUST be provided.  The ability to
		suppress the transfer of TC MUST also be provided.  See
		"pipe", "short pipe", and "uniform" models in
		<xref target="RFC3443" />.
	      </t>
	      <t>
		Repeat performance tests for midpoint LSR with entropy
		labels found at various label stack depths.
	      </t>
	    </list>
	  </t>
	  <t hangText="DoS Protection">
	    <vspace blankLines="0" />
            <list counter="t" hangIndent="4" style="format T#%d">
	      <t>
		Actively attack LSR under high protocol churn load and
		determine control plane performance impact or
		successful DoS under test conditions.  Specifically
		test for the following.
		<list style="letters">
		  <t>
		    TCP SYN attack against control plane and
		    management plane protocols using TCP, including
		    CLI access (typically SSH protected login),
		    NETCONF, etc.
		  </t>
		  <t>
		    High traffic volume attack against control plane
		    and management plane protocols not using TCP.
		  </t>
		  <t>
		    Attacks which can be performed from a compromised
		    management subnet computer, but not one with
		    authentication keys.
		  </t>
		  <t>
		    Attacks which can be performed from a compromised
		    peer within the control plane (internal domain and
		    external domain).  
		    <!-- where does KARP work on key dist stand? -->
		    Assume that per peering keys and per router ID
		    keys rather than network wide keys are in use.
		  </t>
		</list>
		See <xref target="sect.gtsm" />.
	      </t>
	    </list>
	  </t>
	  <t hangText="OAM Capabilities and Performance">
	    <vspace blankLines="0" />
            <list counter="t" hangIndent="4" style="format T#%d">
	      <t>
		Determine maximum sustainable rates of BFD traffic.
		If BFD requires CPU intervention, determine both
		maximum rates and CPU loading when multiple interfaces
		are active.
	      </t>
	      <t>
		Verify LSP Ping and LSP Traceroute capability.
	      </t>
	      <t>
		Determine maximum rates of MPLS-TP CC-CV traffic.  If
		CC-CV requires CPU intervention, determine both
		maximum rates and CPU loading when multiple interfaces
		are active.
	      </t>
	      <t>
		Determine MPLS-TP DM precision.
	      </t>
	      <t>
		Determine MPLS-TP LM accuracy.
	      </t>
	      <t>
		Verify MPLS-TP AIS/RDI and PSC functionality,
		protection speed, and AIS/RDI notification speed when
		a large number of Management Entities (ME) must be
		notified with AIS/RDI.
	      </t>
	    </list>
	  </t>
	</list>
      </t>
      <t>
	The tests in the "Basic Performance" section of the above list
	should be repeated under various conditions to retest basic
	performance when critical capabilities are enabled.  Complete
	repetition of the performance tests enabling each capability
	and combinations of capabilities would be very time intensive,
	therefore a reduced set of performance tests can be used to
	gauge the impact of enabling specific capabilities.
      </t>

    </section>

    <!-- Possibly an Acknowledgements or a 'Contributors' section ... -->

    <section anchor="sect.ack" title="Acknowledgements">

      <t>
	Numerous very useful comments have been received in private
	email.  Some of these contributions are acknowledged here,
	approximately in chronologic order.
      </t>
      <t>
	Paul Doolan provided a brief review resulting in a number of
	clarifications, most notably regarding on-chip vs. system
	buffering, 100 Gb/s link speed assumptions in the 150 Mpps
	figure, and handling of large microflows.  Pablo Frank
	reminded us of the sawtooth effect in PPS vs. packet size
	graphs, prompting the addition of a few paragraphs on this.
	Comments from Lou Berger at IETF-85 prompted the addition of
	<xref target="sect.no-of-flows" />.
      </t>
      <t>
	Valuable comments were received on the BMWG mailing list.  Jay
	Karthik pointed out extraneous methodology hints that belong
	in an appendix or should be removed.
      </t>
      <t>
	Nabil Bitar pointed out the need to cover QoS (Differentiated
	Services), MPLS multicast (P2MP and MP2MP), and MPLS-TP OAM.
	Nabil also provided a number of clarifications to the
	questions and tests in <xref target="sect.ask" /> and
	<xref target="sect.test" />.
      </t>

    </section>

    <section anchor="sect.iana" title="IANA Considerations">
      <t>This memo includes no request to IANA.</t>
    </section>

    <section anchor="sect.security" title="Security Considerations">
      <t>
	This document reviews forwarding behavior specified elsewhere
	and points out compliance and performance requirements.  As
	such it introduces no new security requirements or concerns.
      </t>
      <t>
	Knowledge of potential performance shortcomings may serve to
	help new implementations avoid pitfalls.  It is unlikely that
	such knowledge could be the basis of new denial of service as
	these pitfalls are already widely known in the service
	provider community and among leading equipment suppliers.  In
	practice extreme data and packet rate are needed to affect
	existing equipment and networks that may be still vulnerable
	due to failure to implement adequate protection and make this
	type of denial of service unlikely and make undetectable
	denial of service of this type impossible.
      </t>
    </section>

  </middle>

  <back>

    <!--
	Possible appendix fodder:

	    One way to accomplish this is to use a router with higher
	    priority set on the interfaces on which small packets are
	    sent to it.  The router should buffer the lower priority
	    large packets.  It is best to inject the small packets to
	    this router on a faster interface (if such a thing
	    exists), or more than one interface.

    -->

    <references title="Normative References">

      &RFC2119;
      &RFC3032;
      &RFC3209;
      &RFC3270;
      &RFC3443;
      &RFC4090;
      &RFC4182;
      &RFC4201;
      &RFC4385;
      &RFC5129;
      &RFC5586;
      &RFC6391;
      &RFC6790;

    </references>

    <references title="Informative References">

      &RFC0791;
      &RFC2474;
      &RFC2475;
      &RFC2597;
      &RFC3031;
      &RFC3429;
      &RFC3985;
      &RFC4110;
      &RFC4124;
      &RFC4206;
      &RFC4221;
      &RFC4377;
      &RFC4379;
      &RFC4664;
      &RFC4875;
      &RFC4928;
      &RFC4950;
      &RFC5082;
      &RFC5085;
      &RFC5332;
      &RFC5462;
      &RFC5695;
      &RFC5860;
      &RFC5880;
      &RFC5884;
      &RFC5885;
      &RFC5905;
      &RFC6310;
      &RFC6371;
      &RFC6374;
      &RFC6375;
      &RFC6388;
      &RFC6424;
      &RFC6425;
      &RFC6426;
      &RFC6427;
      &RFC6428;
      &RFC6435;
      &RFC6438;
      &RFC6478;
      &RFC6639;
      &RFC6669;
      &RFC6670;
      &RFC6720;
      &RFC6829;

      &I-D.ietf-tictoc-1588overmpls;
      &I-D.ietf-pwe3-mpls-eth-oam-iwk;

      <reference anchor="ATM-EPD-and-PPD">
	<front>
	  <title>Dynamics of TCP Traffic over ATM Networks</title>
	  <author fullname="Romanow, A." />
	  <author fullname="Floyd, S." />
	  <date year="1995" month="May" />
	</front>
	<seriesInfo name="IEEE Journal of Special Areas of Communication"
		    value="Vol 13 No 4, May 1995, pp. 633-641." />
      </reference>

      <reference anchor="ACK-compression">
        <front>
          <title>Observations and Dynamics of a Congestion Control
          Algorithm: The Effects of Two-Way Traffic</title>
          <author fullname="Zhang, L." />
          <author fullname="Shenker, S" />
          <author fullname="Clark, D. D." />
          <date year="1991" />
        </front>
	<seriesInfo name="Proc. ACM SIGCOMM, ACM Computer
			  Communications Review (CCR)"
		    value="Vol 21, No 4, 1991, pp.133-147." />
      </reference>

    </references>

    <section title="Organization of References Section">

      <t>
	The References section is split into Normative and Informative
	subsections.  References that directly specify forwarding
	encapsulations or behaviors are listed as normative.
	References which describe signaling only, though normative
	with respect to signaling, are listed as informative.  They
	are informative with respect to MPLS forwarding.
      </t>

    </section>

  </back>
</rfc>
