<?xml version="1.0" encoding="US-ASCII"?>
<!DOCTYPE rfc SYSTEM "http://xml2rfc.tools.ietf.org/authoring/rfc2629.dtd" [
<!ENTITY I-D.geib-tsvwg-diffserv-intercon-05 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-geib-tsvwg-diffserv-intercon-05.xml">
<!ENTITY I-D.ietf-avtext-rtp-grouping-taxonomy-02 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-avtext-rtp-grouping-taxonomy-02.xml">
<!ENTITY I-D.ietf-rtcweb-overview-10 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtcweb-overview-10.xml">
<!ENTITY I-D.ietf-rtcweb-rtp-usage-15 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtcweb-rtp-usage-15.xml">
<!ENTITY I-D.ietf-rtcweb-transports-05 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-rtcweb-transports-05.xml">
<!ENTITY I-D.petithuguenin-avtcore-rfc5764-mux-fixes-00 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-petithuguenin-avtcore-rfc5764-mux-fixes-00.xml">
<!ENTITY I-D.westerlund-avtcore-transport-multiplexing-07 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml3/reference.I-D.draft-westerlund-avtcore-transport-multiplexing-07.xml">
<!ENTITY RFC2119 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2474 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2474.xml">
<!ENTITY RFC2475 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2475.xml">
<!ENTITY RFC2597 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2597.xml">
<!ENTITY RFC2697 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2697.xml">
<!ENTITY RFC2698 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2698.xml">
<!ENTITY RFC2914 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2914.xml">
<!ENTITY RFC3168 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.3168.xml">
<!ENTITY RFC3246 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.3246.xml">
<!ENTITY RFC3270 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.3270.xml">
<!ENTITY RFC3550 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.3550.xml">
<!ENTITY RFC3662 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.3662.xml">
<!ENTITY RFC4103 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4103.xml">
<!ENTITY RFC4303 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4303.xml">
<!ENTITY RFC4594 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.4594.xml">
<!ENTITY RFC5109 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5109.xml">
<!ENTITY RFC5127 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5127.xml">
<!ENTITY RFC5129 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5129.xml">
<!ENTITY RFC5462 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5462.xml">
<!ENTITY RFC5764 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5764.xml">
<!ENTITY RFC5865 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.5865.xml">
<!ENTITY RFC6437 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.6437.xml">
<!ENTITY W3C.WD-mediacapture-streams-20130903 SYSTEM "http://xml2rfc.tools.ietf.org/public/rfc/bibxml4/reference.W3C.WD-mediacapture-streams-20130903.xml">
]>
<?xml-stylesheet type='text/xsl'
             href='http://xml2rfc.tools.ietf.org/authoring/rfc2629.xslt' ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<?rfc tocdepth="4"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes" ?>
<?rfc compact="yes" ?>
<?rfc subcompact="no" ?>
<!-- Change the title here -->
<rfc category="std" docName="draft-york-dart-dscp-rtp-02" ipr="trust200902">
  <front>
    <title abbrev="DiffServ and RT Communication">Differentiated Services
    (DiffServ) and Real-time Communication</title>

    <author fullname="Dan York" initials="D." surname="York">
      <organization>Internet Society</organization>

      <address>
        <postal>
          <street/>

          <city>Keene</city>

          <region>N.H.</region>

          <country>USA</country>
        </postal>

        <phone>+1-802-735-1624</phone>

        <email>dyork@lodestar2.com</email>
      </address>
    </author>

    <author fullname="David Black" initials="D." role="editor" surname="Black">
      <organization>EMC</organization>

      <address>
        <postal>
          <street>176 South Street</street>

          <city>Hopkinton</city>

          <region>MA</region>

          <code>01748</code>

          <country>USA</country>
        </postal>

        <phone>+1 508 293-7953</phone>

        <email>david.black@emc.com</email>
      </address>
    </author>

    <author fullname="Cullen Jennings" initials="C." surname="Jennings">
      <organization>Cisco</organization>

      <address>
        <postal>
          <street>170 West Tasman Drive</street>

          <street>MS: SJC-21/2</street>

          <city>San Jose</city>

          <region>CA</region>

          <code>95134</code>

          <country>USA</country>
        </postal>

        <phone>+1 408 421-9990</phone>

        <email>fluffy@cisco.com</email>
      </address>
    </author>

    <author fullname="Paul Jones" initials="P." surname="Jones">
      <organization>Cisco</organization>

      <address>
        <postal>
          <street>7025 Kit Creek Road</street>

          <city>Research Triangle Park</city>

          <region>MA</region>

          <code>27502</code>

          <country>USA</country>
        </postal>

        <phone>+1 919 476 2048</phone>

        <facsimile/>

        <email>paulej@packetizer.com</email>

        <uri/>
      </address>
    </author>

    <date month="" year="2014"/>

    <area>RAI</area>

    <workgroup>DiffServ Applied to Real-time Transports</workgroup>

    <keyword>DiffServ, DSCP, RAI, RTP</keyword>

    <abstract>
      <t>This document describes the interaction between Differentiated
      Services (DiffServ) network quality of service (QoS) functionality and
      real-time network communication, including communication based on the
      Real-time Transport Protocol (RTP). DiffServ is based on network nodes
      applying different forwarding treatments to packets whose IP headers are
      marked with different DiffServ Code Points (DSCPs). As a result, use of
      different DSCPs within a single traffic stream may cause transport
      protocol interactions (e.g., reordering). In addition, DSCP markings may
      be changed or removed between the traffic source and destination. This
      document covers the implications of these DiffServ aspects for real-time
      network communication, including RTCWEB.</t>
    </abstract>
  </front>

  <middle>
    <section anchor="Intro" title="Introduction">
      <t>This document describes the interactions between Differentiated
      Services (DiffServ) network quality of service (QoS) functionality <xref
      target="RFC2475"/> and real-time network communication, including
      communication based on the Real-time Transport Protocol <xref
      target="RFC3550">(RTP) </xref>. DiffServ is based on network nodes
      applying different forwarding treatments to packets whose IP headers are
      marked with different DiffServ Code Points (DSCPs)<xref
      target="RFC2474"/>. As a result use of different DSCPs within a single
      traffic stream may cause transport protocol interactions (e.g.,
      reordering). In addition, DSCP markings may be changed or removed
      between the traffic's source and destination. This document covers the
      implications of these DiffServ aspects for real-time network
      communication, including RTCWEB traffic <xref
      target="I-D.ietf-rtcweb-overview"/>.</t>

      <section title="Requirements Language">
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
        "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
        document are to be interpreted as described in <xref
        target="RFC2119">RFC 2119</xref>.</t>
      </section>
    </section>

    <section anchor="Background" title="Background">
      <t>Real-time communications enables communication in real-time over an
      IP network using voice, video, text, content sharing, etc. It is
      possible to use one or more of these modalities in parallel in order to
      provide a richer communication experience.</t>

      <t>A simple example of real-time communications is a voice call placed
      over the Internet wherein an audio stream is transmitted in each
      direction between two users. A more complex example is an immersive
      videoconferencing system that has multiple video screens, multiple
      cameras, multiple microphones, and some means of sharing content. For
      such complex systems, there may be multiple media streams that may be
      transmitted via a single IP address and port or via multiple IP
      addresses and ports.</t>

      <section anchor="RTP" title="RTP Background">
        <t>The most common protocol used for real time media is the Real-Time
        Transport Protocol <xref target="RFC3550">(RTP)</xref>. RTP defines a
        common encapsulation format and handling rules for real-time data
        transmitted over the Internet. Unfortunately, RTP terminology usage
        has been inconsistent. For example, this document on RTP grouping
        terminology <xref target="I-D.ietf-avtext-rtp-grouping-taxonomy"/>
        observes that:</t>

        <t><list style="empty">
            <t><xref target="RFC3550">RFC 3550</xref> uses the terms media
            stream, audio stream, video stream and streams of (RTP) packets
            interchangeably.</t>
          </list></t>

        <t>Terminology in this document is based on that RTP grouping
        terminology document with the following terms being of particular
        importance (see that terminology document for full definitions):<list
            style="hanging">
            <t hangText="Source Stream:">A reference clock synchronized, time
            progressing, digital media stream.</t>

            <t hangText="RTP Stream:">A stream of RTP packets containing media
            data, which may be source data or redundant data. The RTP Packet
            Stream is identified by an RTP synchronization source (SSRC)
            belonging to a particular RTP session.</t>
          </list></t>

        <t>Media encoding and packetization of a source stream results in a
        source RTP stream plus zero or more redundancy RTP streams that
        provide resilience against loss of packets from the source RTP stream
        <xref target="I-D.ietf-avtext-rtp-grouping-taxonomy"/>. Redundancy
        information may also be carried in the same RTP stream as the encoded
        source stream, e.g., see Section 7.2 of <xref target="RFC5109"/>. With
        most applications, a single media type (e.g., audio) is transmitted
        within a single RTP session. However, it is possible to transmit
        multiple, distinct source streams over the same RTP session as one or
        more individual RTP streams. This is referred to as RTP
        multiplexing.</t>

        <t>The number of source streams and RTP streams in an overall
        real-time interaction can be surprisingly large. In addition to a
        voice source stream and a video source stream, there could be separate
        source streams for each of the cameras or microphones on a
        videoconferencing system. As noted above, there might also be separate
        redundancy RTP streams that provide protection to a source RTP stream,
        using techniques such as Forward Error Correction. Another example is
        simulcast transmission, where a video source stream can be transmitted
        at high resolution and low resolution RTP streams at the same time. In
        this case, a media processing function might choose to send one or
        both RTP streams onward to a receiver based on bandwidth availability
        or who the active speaker is in a multipoint conference. Lastly, a
        transmitter might send a the same media content concurrently as two
        RTP streams using different encodings (e.g., VP8 in parallel with
        H.264) to allow a media processing function to select a media encoding
        that best matches the capabilities of the receiver.</t>

        <t>Other transport protocols may also be used to transmit real-time
        data or near-real-time data. For example, SCTP can be utilized to
        carry application sharing or whiteboarding information as part of an
        overall interaction that includes real time media. These additional
        transport protocols can be multiplexed with an RTP session via UDP
        encapsulation, thereby using a single pair of UDP ports.</t>

        <t>The RTCWEB protocol suite <xref
        target="I-D.ietf-rtcweb-transports"/> employs two layers of
        multiplexing:<list style="numbers">
            <t>Individual source streams are carried in one or more individual
            RTP streams that can be multiplexed into a single RTP session as
            described in <xref target="RFC3550"/>; and</t>

            <t>An RTP session could be multiplexed with other protocols via
            UDP encapsulation over a common pair of UDP ports as described in
            <xref target="RFC5764"/> and <xref
            target="I-D.petithuguenin-avtcore-rfc5764-mux-fixes"/>. The
            resulting unidirectional UDP packet flow is identified by a
            5-tuple, i.e., a combination of two IP addresses (source and
            destination), two UDP ports (source and destination), and the use
            of the UDP protocol.</t>
          </list>For RTCWEB,an individual source stream is a MediaStreamTrack,
        and a MediaStream contains one or more MediaStreamTracks <xref
        target="W3C.WD-mediacapture-streams-20130903"/>. A MediaStreamTrack is
        transmitted as a source RTP stream plus zero or more redundancy RTP
        streams, so a MediaStream that consists of one MediaStreamTrack is
        transmitted as a single source RTP stream plus zero or more redundancy
        RTP streams.</t>

        <t>For more information on use of RTP in RTCWEB, see <xref
        target="I-D.ietf-rtcweb-rtp-usage"/>.</t>

        <t><xref target="I-D.westerlund-avtcore-transport-multiplexing"/>
        proposes to allow multiple RTP sessions to be multiplexed over a
        single UDP 5-tuple; the future of that expired proposal is
        uncertain.</t>

        <t>For IPv6, addition of the flow label <xref target="RFC6437"/> to
        5-tuples results in 6-tuples, but in practice, use of a flow label is
        unlikely to result in a finer-grain traffic subset than the
        corresponding 5-tuple (e.g., the flow label is likely to represent the
        combination of two ports with use of the UDP protocol). For that
        reason, discussion in this draft focuses on UDP 5-tuples.</t>

        <t>[Editor's Note: Multiple RTP sessions cannot be multiplexed on the
        same UDP 5-tuple, but what about multiple DTLS sessions for RTP? RFC
        5764 appears to allow multiple DTLS sessions.]</t>

        <t>[Editor's Note: Should RTCP multiplexing w/RTP be mentioned here,
        as described in RFC 5761?]</t>
      </section>

      <section anchor="DiffServ"
               title="Differentiated Services (DiffServ) Background">
        <t>The DiffServ architecture is intended to enable scalable service
        discrimination in the Internet without requiring each network node to
        store per-flow state and participate in per-flow signaling. The
        services may be end-to-end or within a network; they include both
        those that can satisfy quantitative performance requirements (e.g.,
        peak bandwidth) and those based on relative performance (e.g., "class"
        differentiation). Services can be constructed by a combination of
        well-defined building blocks deployed in network nodes that: <list
            style="symbols">
            <t>classify traffic and set bits in an IP header field at network
            boundaries or hosts,</t>

            <t>use those bits to determine how packets are forwarded by the
            nodes inside the network, and</t>

            <t>condition the marked packets (e.g., meter, mark, shape, police)
            at network boundaries in accordance with the requirements or rules
            of each service.</t>
          </list>A network node that supports DiffServ includes a classifier
        that selects packets based on the value of the DS field in IP headers,
        along with buffer management and packet scheduling mechanisms capable
        of delivering the specific packet forwarding treatment indicated by
        the DS field value. Setting of the DS field and fine-grain
        conditioning of marked packets need only be performed at network
        boundaries; internal network nodes operate on traffic aggregates that
        share a DS field value, or in some cases, a small set of related
        values.</t>

        <t>The DiffServ architecture<xref target="RFC2475"/> maintains
        distinctions among:<list style="symbols">
            <t>the QoS service provided to a traffic aggregate,</t>

            <t>the conditioning functions and per-hop behaviors (PHBs) used to
            realize services,</t>

            <t>the DS field value (DS codepoint, or DSCP) in the IP header
            used to mark packets to select a per-hop behavior, and</t>

            <t>the particular implementation mechanisms that realize a per-hop
            behavior.</t>
          </list></t>

        <t>This document focuses on PHBs and the usage of DSCPs to obtain
        those behaviors. In a network node's forwarding path, the DSCP is used
        to map a packet to a particular forwarding treatment, or per-hop
        behavior (PHB) that specifies the forwarding treatment.</t>

        <t>A per-hop behavior (PHB) is a description of the externally
        observable forwarding behavior of a network node for network traffic
        marked with a DSCP that selects that PHB. In this context, "forwarding
        behavior" is a general concept - for example, if only one DSCP is used
        for all traffic on a link, the observable forwarding behavior (e.g.,
        loss, delay, jitter) will often depend only on the relative loading of
        the link. To obtain useful behavioral differentiation,multiple traffic
        subsets are marked with different DSCPs for different PHBs for which
        node resources such as buffer space and bandwidth are allocated. PHBs
        provide the framework for a DiffServ network node to allocate
        resources to traffic subsets, with network-scope differentiated
        services constructed on top of this basic hop-by-hop (per-node)
        resource allocation mechanism.</t>

        <t>The codepoints (DSCPs) may be chosen from a small set of fixed
        values (the class selector codepoints), or from a set of recommended
        values defined in PHB specifications, or from values that have purely
        local meanings to a specific network that supports DiffServ; in
        general, packets may be forwarded across multiple such networks
        between source and destination.</t>

        <t>The mandatory DSCPs are the class selector code points as specified
        in <xref target="RFC2474"/>. The class selector codepoints (CS0-CS7)
        extend the deprecated concept of IP Precedence in the IPv4 header;
        three bits are added, so that the class selector DSCPs are of the form
        'xxx000'. The all-zero DSCP ('000000' or CS0) designates a Default PHB
        that provides best-effort forwarding behavior and the remaining class
        selector code points were originally specified to provide relatively
        better per-hop-forwarding behavior in increasing numerical order,
        but:<list style="symbols">
            <t>There is no requirement that any two adjacent class selector
            codepoints select different PHBs; adjacent class selector
            codepoints may use the same pool of resources on each network node
            in some networks.</t>

            <t>CS1 ('001000') was subsequently recommended for a Lower Effort
            (LE) PHB and service when such a service is offered by a network
            <xref target="RFC3662"/>. An LE service forwards traffic with
            "lower" priority than best effort and can be "starved" by best
            effort and other "higher" priority traffic. Not all networks offer
            an LE service. See <xref target="RFC3662"/> for further discussion
            of the LE PHB and service.</t>
          </list></t>

        <t>Applications and traffic sources cannot rely upon different class
        selector codepoints providing differentiated services or upon the
        presence of an LE service that is selected by the CS1 DSCP. There is
        no effective way for a network endpoint to determine whether the CS1
        DSCP selects an LE service on a specific network, let alone
        end-to-end. Packets marked with the CS1 DSCP may be forwarded with
        best effort service or another "higher" priority service, see <xref
        target="RFC2474"/>.</t>
      </section>

      <section anchor="DiffServPHBs" title="Diffserv PHBs (Per-Hop Behaviors)">
        <t>Although Differentiated Services is a general architecture that may
        be used to implement a variety of services, three fundamental
        forwarding behaviors (PHBs) have been defined and characterized for
        general use. These are:<list style="numbers">
            <t>Default Forwarding (DF) for elastic traffic <xref
            target="RFC2474"/>. The Default PHB is always selected by the
            all-zero DSCP.</t>

            <t>Assured Forwarding (AF) <xref target="RFC2597"/> to provide
            differentiated service to elastic traffic. Each instance of the AF
            behavior consists of three PHBs that differ only in drop
            precedence, e.g., AF11, AF12 and AF13; such a set of three AF PHBs
            is referred to as an AF class, e.g., AF1x. There are four defined
            AF classes, AF1x through AF4x, with higher numbered classes
            expected to receive better forwarding treatment than lower
            numbered classes.</t>

            <t>Expedited Forwarding (EF) <xref target="RFC3246"/> intended for
            inelastic traffic. Beyond the basic EF PHB, the VOICE-ADMIT PHB
            <xref target="RFC5865"/> is an admission controlled variant of the
            EF PHB.</t>
          </list></t>
      </section>

      <section anchor="DiffServAndTransport"
               title="DiffServ, Reordering and Transport Protocols">
        <t>[Editor's note: This section and the recommendations in Section 4
        are centered on TCP, UDP, and SCTP. They could use generalization to
        include other transport protocols - DCCP is a likely one to include,
        although it is not necessary to include every known transport
        protocol.]</t>

        <t>Transport protocols provide data communication behaviors beyond
        those possible at the IP layer. An important example is that TCP
        provides reliable in-order delivery of data with congestion control.
        SCTP provides additional properties such as preservation of message
        boundaries, and the ability to avoid head-of-line blocking that may
        occur with TCP. In contrast, UDP is a basic unreliable datagram
        protocol that provides port-based multiplexing and demultiplexing on
        top of IP.</t>

        <t>Transport protocols that provide reliable delivery (e.g., TCP,
        SCTP) are sensitive to network reordering of traffic. When a protocol
        that provides reliable delivery receives a packet other than the next
        expected packet for an ordered connection or stream, it usually
        assumes that the expected packet has been lost and respond with a
        retransmission request for that packet. In addition, congestion
        control functionality in transport protocols usually infers congestion
        when packets are lost, creating an additional sensitivity to
        significant reordering - such reordering may be (mis-)interpreted as
        indicating congestion-caused packet loss, causing a reduction in
        transmission rate. This remains true even when <xref
        target="RFC3168">ECN</xref> is in use, as ECN receivers are required
        to treat missing packets as potential indications of congestion. This
        requirement is based on two factors:</t>

        <t><list style="symbols">
            <t>Severe congestion may cause ECN-capable network nodes to drop
            packets, and</t>

            <t>ECN traffic may be forwarded by network nodes that do not
            support ECN and hence use packet drops to indicate congestion.</t>
          </list>Congestion control is an important aspect of the Internet
        architecture, see <xref target="RFC2914"/> for further discussion.</t>

        <t>In general, marking packets with different DSCPs results in
        different PHBs being applied at network nodes, making reordering
        possible due to use of different pools of forwarding resources for
        each PHB. The primary exception is that reordering is prohibited
        within each AF class (e.g., AF1x), as the three PHBs in an AF class
        differ solely in drop precedence. Reordering within a PHB or AF class
        may occur for other transient reasons (e.g., route flap or ECMP
        rebalancing).</t>

        <t>UDP is the primary transport protocol that is not sensitive to
        reordering in the network, because it does not provide reliable
        delivery or congestion control. On the other hand, when UDP is used to
        encapsulate other protocols (e.g., as is the case for RTCWEB, see
        <xref target="RTP"/>), the reordering considerations for the
        encapsulated protocols apply. For RTCWEB example in particular, every
        encapsulated protocol (i.e., RTP, SCTP and TCP) is sensitive to
        reordering as further discussed in this document.</t>
      </section>

      <section anchor="DiffServandRTC"
               title="DiffServ, Reordering and Real-Time Communication">
        <t>Real-time communications are also sensitive to network reordering
        of packets. Such reordering may lead to spurious NACK generation and
        unneeded retransmission, as is the case for reliable delivery
        protocols (see Section <xref target="DiffServAndTransport"/>). The
        degree of sensitivity depends on protocol or stream timers, in
        contrast to reliable delivery protocols that usually react to all
        reordering.</t>

        <t>Receiver jitter buffers have important roles in the effect of
        reordering on real time communications:<list style="symbols">
            <t>Minor packet reordering that is contained within a jitter
            buffer usually has no effect on rendering of the received RTP
            stream.</t>

            <t>Packet reordering that exceeds the capacity of a jitter buffer
            can cause user-perceptible quality problems (e.g., glitches,
            noise) for delay sensitive communication, such as interactive
            conversations. Interactive real-time communication implementations
            often choose to discard data that is sufficiently late to prevent
            it from being rendered in source stream order, making
            retransmission counterproductive. For this reason, implementations
            of interactive real-time communication often do not use
            retransmission.</t>

            <t>In contrast, replay of recorded media can typically uses
            significantly larger jitter buffers than can be tolerated for
            interactive conversations, with the result that replay is more
            tolerant to reordering than interactive conversations. The size of
            the jitter buffer imposes an upper bound on replay tolerance to
            reordering, but does enable retransmission to be used when the
            jitter buffer is significantly larger than the amount of data that
            will arrive during the round-trip latency for retransmission.</t>
          </list>Network packet reordering caused by use of different DSCPs
        has no effective upper bound, and can exceed the size of any
        reasonable jitter buffer - in practice, the size of jitter buffers for
        replay is limited by external factors such as the amount of time that
        a human is willing to wait for replay to start.</t>
      </section>

      <section anchor="TCs-Remarking"
               title="Traffic Classifiers and DSCP Remarking">
        <t>DSCP markings are not end-to-end in general. Each network can make
        its own decisions about what PHBs to use and which DSCP maps to each
        PHB. While every PHB specification includes a recommended DSCP, and
        RFC 4594 <xref target="RFC4594"/> recommends their end-to-end usage,
        there is no requirement that every network support any PHBs or use any
        DSCPs, with the exception of the class selector codepoint requirements
        in RFC 2474 <xref target="RFC2474"/>. When DiffServ is used, the edge
        or boundary nodes of a network are responsible for ensuring that all
        traffic entering that network conforms to that network's policies for
        DSCP and PHB usage, and such nodes remark traffic (change the DSCP
        marking as part of traffic conditioning) accordingly. As a result,
        DSCP remarking is possible at any network boundary, including the
        first network node that traffic sent by a host encounters. Remarking
        is also possible within a network, e.g., for traffic shaping.</t>

        <t>DSCP remarking is part of traffic conditioning; the traffic
        conditioning functionality applied to packets at a network node is
        determined by a traffic classifier <xref target="RFC2475"/>. Edge
        nodes of a DiffServ network classify traffic based on selected packet
        header fields; typical implementations do not look beyond the
        traffic's 5-tuple in the IP and transport protocol headers. As a
        result, when multiple DSCPs are used for traffic that shares a
        5-tuple, remarking at a network boundary may result in all of the
        traffic being forwarded with a single DSCP, thereby removing any
        differentiation within the 5-tuple downstream of the remarking
        location. Network nodes within a DiffServ network generally classify
        traffic based solely on DSCPs, but may perform finer grain traffic
        conditioning similar to that performed by edge nodes.</t>

        <t>So, for two arbitrary network endpoints, there can be no assurance
        that the DSCP set at the source endpoint will be preserved and
        presented at the destination endpoint. On the contrary, it is quite
        likely that the DSCP will be set to zero (e.g., at the boundary of a
        network operator that distrusts or does not use the DSCP field) or to
        a value deemed suitable by an ingress (MF) classifier for whatever
        5-tuple it carries. DiffServ classifiers generally ignore embedded
        protocol headers (e.g., for SCTP or RTP embedded in UDP,
        classification will be only on the outer UDP header).</t>

        <t>In addition, remarking may remove application-level distinctions in
        forwarding behavior - e.g., if multiple PHBs within an AF class are
        used to distinguish different types of frames within a video RTP
        stream, token-bucket-based remarkers operating in Color-Blind mode
        (see <xref target="RFC2697"/> and <xref target="RFC2698"/> for
        examples) may remark solely based on flow rate and burst behavior,
        removing the drop precedence distinctions specified by the source.</t>

        <t>Backbone and other carrier networks may employ a small number of
        DSCPs (e.g., less than half a dozen) in order to manage a small number
        of traffic aggregates; hosts that use a larger number of DSCPs can
        expect to find that much of their intended differentiation is removed
        by such networks. Better results may be achieved when DSCPs are used
        to spread traffic among a smaller number of DiffServ-based traffic
        subsets or aggregates, see <xref
        target="I-D.geib-tsvwg-diffserv-intercon"/> for one proposal. This is
        of particular importance for MPLS-based networks due to the limited
        size of the Traffic Class (TC) field in an MPLS label <xref
        target="RFC5462"/> that is used to carry DiffServ information and the
        use of that TC field for other purposes, e.g., ECN <xref
        target="RFC5129"/>. For further discussion on use of DiffServ with
        MPLS, see <xref target="RFC3270"/> and <xref target="RFC5127"/>.</t>
      </section>
    </section>

    <section anchor="RTP-Mux" title="RTP Multiplexing Background">
      <t>Section <xref format="counter" target="Background"/> explains how
      source streams can be multiplexed over RTP sessions which can in turn be
      multiplexed over UDP with packets generated by other transport
      protocols. This section provides background on why this level of
      multiplexing is desirable. The rationale in this section applies both to
      multiplexing of source streams in RTP sessions and multiplexing of an
      RTP session with traffic from other transport protocols via UDP
      encapsulation.</t>

      <t>Multiplexing reduces the number of ports utilized for real-time and
      related communication in an overall interaction. While a single endpoint
      might have plenty of ports available for communication, this traffic
      often traverses points in the network that are constrained on the number
      of available ports. A good example is a NAT/FW device sitting at the
      network edge. As the number of simultaneous protocol sessions increases,
      so does the burden placed on these devices in order to provide port
      mapping.</t>

      <t>Another reason for multiplexing is to help reduce the time required
      to establish bi-directional communication. Since any two communicating
      users might be situated behind different NAT/FW devices, it is necessary
      to employ techniques like STUN/ICE/TURN in order to get traffic to flow
      between the two devices <xref target="I-D.ietf-rtcweb-transports"/>.
      Performing the tasks required of STUN/ICE/TURN take time and requiring
      an endpoint to perform these tasks for multiple protocol sessions can
      increase the time required. While tasks for different sessions can be
      performed in parallel, it is nonetheless necessary for applications to
      wait for all sessions to be opened before communication between to users
      can begin. Reducing the number of STUN/ICE/TURN steps reduces the
      probability of losing a packet and introducing delay in setting up a
      communication session. Further, reducing the number of STUN/ICE/TURN
      tasks means that there is a lower burden placed on the STUN and TURN
      servers.</t>

      <t>Multiplexing may reduce the complexity and resulting load on an
      endpoint. A single instance of STUN/ICE/TURN is simpler to execute and
      manage than multiple instances STUN/ICE/TURN operations happening in
      parallel, as the latter require synchronization and create more complex
      failure situations that have to be cleaned up by additional code.</t>

      <!--Explain:

RTP has timing information in so two different streams, one for audio, and one for video, 
can be later synchronized and played at same time even if they got significantly 
reordered.

-->
    </section>

    <section anchor="Recommendations" title="Recommendations">
      <t>The only standardized use of multiple PHBs and DSCPs that avoids
      network reordering among packets marked with different DSCPs is use of
      PHBs within a single AF class. All other uses of multiple PHBs and/or
      the class selector DSCPs allow network reordering of packets that are
      marked with different DSCPs. Based on this and the foregoing discussion,
      the following requirements apply to use of DiffServ with real-time
      communications - applications and other traffic sources:<list
          style="symbols">
          <t>SHOULD NOT use different PHBs and DSCPs that may cause reordering
          within a single RTP stream. If this is not done, significant network
          reordering may overwhelm implementation assumptions about limits on
          reordering, e.g., jitter buffer size, causing poor user experiences,
          see Section <xref target="DiffServandRTC"/> above.</t>

          <t>SHOULD NOT use different PHBs and DSCPs that may cause reordering
          within an ordered session for a reliable transport protocol (e.g.,
          TCP, SCTP). Receivers for such protocols interpret reordering as
          indicating loss of out-of-order packets causing undesired
          retransmission requests, and will infer congestion from significant
          reordering, causing throughput reduction. This requirement applies
          to both unencapsulated and encapsulated (e.g., via UDP) uses of
          reliable transport protocols.</t>

          <t>MAY use different PHBs and DSCPs that cause reordering within a
          single UDP 5-tuple, subject to the above constraints. The service
          differentiation provided by such usage is unreliable, as it may be
          removed at network boundaries for the reasons described in Section
          <xref format="counter" target="TCs-Remarking"/> above.</t>

          <t>MUST NOT rely on end-to-end preservation of DSCPs as network node
          remarking can change DSCPs and remove drop precedence distinctions
          see Section <xref format="counter" target="TCs-Remarking"/> above.
          For example, if a source uses drop precedence distinctions within an
          AF class to identify different types of video frames, using those
          DSCP values at the receiver to identify frame type is inherently
          unreliable.</t>

          <t>SHOULD use the CS1 codepoint only for traffic that is acceptable
          to forward as best effort traffic, as network support for use of CS1
          to select a "less than best effort" PHB is inconsistent. Further,
          some networks may treat CS1 as providing "better than best effort"
          forwarding behavior.</t>
        </list></t>

      <t>There is no requirement in this document for network operators to
      differentiate traffic in any fashion. Networks may support all of the
      PHBs discussed herein, classify EF and AFxx traffic identically, or even
      remark all traffic to best effort at some ingress points. Nonetheless,
      it is useful for network endpoints to provide finer granularity DSCP
      marking on packets for the benefit of networks that offer QoS service
      differentiation. A specific example is that traffic originating from a
      browser may benefit from QoS service differentiation in within-building
      and residential access networks, even if the DSCP marking is
      subsequently removed or simplified. This is because such networks and
      the boundaries between them are likely traffic bottleneck locations
      (e.g., due to customer aggregation onto common links and/or speed
      differences among links used by the same traffic).</t>
    </section>

    <section anchor="Examples" title="Examples">
      <t>For real-time communications, one might want to mark the audio
      packets using EF and the video packets as AF41. However, in a video
      conference receiving the audio packets ahead of the video is not useful
      because lip sync is necessary between audio and video. It may still be
      desirable to send audio with a PHB that provides better service, because
      early arrival of audio helps assure smooth audio rendering, which is
      often more important than fully faithful video rendering. There are also
      limits, as some devices have difficulties in synchronizing voice and
      video when packets that need to be rendered together arrive at
      significantly different times. It makes more sense to use different PHBs
      when the audio and video source streams do not share a strict timing
      relationship. For example, video content may be shared within a video
      conference via playback, perhaps of an unedited video clip that is
      intended to become part of a television advertisement. Such content
      sharing video does not need precise synchronization with video
      conference audio, and could use a different PHB, as content sharing
      video is more tolerant to jitter, loss, and delay.</t>

      <t>Within a layered video RTP stream, ordering of frame communication is
      preferred, but importance of frame types varies, making use of PHBs with
      different drop precedences appropriate. For example, I-frames that
      contain an entire image are usually more important than P-frames that
      contain only changes from the previous image because loss of a P-frame
      (or part thereof) can be recovered (at the latest) via the next I-frame,
      whereas loss of an I-frame (or part thereof) may cause rendering
      problems for all of the P-frames that depend on the I-frame. For this
      reason, it is appropriate to mark I-frame packets with a PHB that has
      lower drop precedence than the PHB used for P-frames, as long as the
      PHBs preserve ordering among frames (e.g., are in an AF class) - AF41
      for I-frames and AF43 for P-frames is one possibility. Additional
      spatial and temporal layers beyond the base video layer could also be
      marked with higher drop precedence than the base video layer, as their
      loss reduces video quality, but does not disrupt video rendering.</t>

      <t>Additional RTP streams in a real-time communication interaction could
      be marked with CS0 and carried as best effort traffic. One example is
      real-time text transmitted as specified in RFC 4103<xref
      target="RFC4103"/>; best effort forwarding suffices when redundancy
      encoding is used (as required by RFC 4103). Best effort forwarding
      suffices because such real-time text has loose timing requirements; RFC
      4103 recommends sending text in chunks every 300ms. Such text is
      technically real-time, but does not need a PHB promising better service
      than best effort, in contrast to audio or video.</t>

      <!--Explain why the we can't do local network configuration of values used - where?

-->
    </section>

    <section anchor="IANA" title="IANA Considerations">
      <t>This document includes no request to IANA.</t>
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>The security considerations for all of the technologies discussed in
      this document apply; in particular see the security considerations for
      RTP in <xref target="RFC3550"/> and DiffServ in <xref target="RFC2474"/>
      <xref target="RFC2475">and </xref>.</t>

      <t>Multiplexing of multiple protocols onto a single UDP 5-tuple via
      encapsulation has implications for network functionality that is based
      on monitoring or inspection of individual protocol flows, e.g.,
      firewalls and traffic monitoring systems. When implementations of such
      functionality lack visibility into encapsulated traffic (likely for many
      current implementations), it may be difficult or impossible to apply
      network security policy and controls at a finer grain than the overall
      UDP 5-tuple.</t>

      <t>Use of multiple DSCPs to provide differentiated QoS service may
      reveal information about the encrypted traffic to which different
      service levels are provided. For example, DSCP-based identification of
      RTP streams combined with packet frequency and packet size could reveal
      the type or nature of the encrypted source streams. The IP header used
      for forwarding has to be unencrypted for obvious reasons, and the DSCP
      likewise has to be unencrypted in order to enable different IP
      forwarding behaviors to be applied to different packets. The nature of
      encrypted traffic components can be disguised via encrypted dummy data
      padding and encrypted dummy packets, e.g., see the discussion of traffic
      flow confidentiality in <xref target="RFC4303"/>. Encrypted dummy
      packets could even be added in a fashion that an observer of the overall
      encrypted traffic might mistake for another encrypted RTP stream.</t>
    </section>

    <section anchor="Acknowledgements" title="Acknowledgements">
      <t>This document is the result of many conversations that have occurred
      within multiple working groups in the RAI and Transport areas. Many
      thanks to Harald Alvestrand, Erin Bournival, Brian Carpenter, Ruediger
      Geib and James Polk for their reviews and input.</t>
    </section>
  </middle>

  <back>
    <references title="Normative References">
      &RFC2119;

      &RFC2474;

      &RFC2597;

      &RFC3246;

      &RFC3550;

      &RFC5865;
    </references>

    <references title="Informative References">
      &I-D.geib-tsvwg-diffserv-intercon-05;

      &I-D.ietf-avtext-rtp-grouping-taxonomy-02;

      &I-D.ietf-rtcweb-overview-10;

      &I-D.ietf-rtcweb-rtp-usage-15;

      &I-D.ietf-rtcweb-transports-05;

      &I-D.petithuguenin-avtcore-rfc5764-mux-fixes-00;

      &I-D.westerlund-avtcore-transport-multiplexing-07;

      &RFC2475;

      &RFC2697;

      &RFC2698;

      &RFC2914;

      &RFC3168;

      &RFC3270;

      &RFC3662;

      &RFC4103;

      &RFC4303;

      &RFC4594;

      &RFC5109;

      &RFC5127;

      &RFC5129;

      &RFC5462;

      &RFC5764;

      &RFC6437;

      &W3C.WD-mediacapture-streams-20130903;
    </references>

    <section title="Change History" toc="exclude">
      <t>[To be removed before RFC publication.]</t>

      <t>Changes from draft-york-dart-dscp-rtp-00 to -01<list style="symbols">
          <t>Added examples (Section 5)</t>

          <t>Reworked text on RTP session multiplexing, at most one RTP
          session can be used per UDP 5-tuple.</t>

          <t>Initial terminology alignment with RTP grouping taxonomy
          draft.</t>

          <t>Added Section 2.5 on real-time communication interaction
          w/reordering based on text from Harald Alvestrand.</t>

          <t>Strengthened warnings on loss of differentiation, but indicate
          that differentiation may still be useful from source to point of
          loss.</t>

          <t>Added a few sentences on DiffServ and MPLS.</t>

          <t>Added discussion of UDP-encapsulated protocols that are
          reordering sensitive.</t>

          <t>Added initial security considerations.</t>

          <t>Many editorial changes</t>
        </list></t>

      <t>Changes from draft-york-dart-dscp-rtp-01 to -02<list style="symbols">
          <t>More terminology alignment with RTP grouping taxonomy draft: "RTP
          packet stream" -&gt; "RTP stream"</t>

          <t>Aligned terminology for less-than-best-effort with RFC 3662 - LE
          (Lower Effort) PHB and service</t>

          <t>Minor reference updates</t>
        </list></t>
    </section>
  </back>
</rfc>
