<?xml version="1.0" encoding="UTF-8"?>
<!-- was: <?xml version="1.0" encoding="US-ASCII"?> -->
<!-- This template is for creating an Internet Draft using xml2rfc,
which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
<!-- One method to get references from the online citation libraries.
There has to be one entity for each item to be referenced.
An alternate method (rfc include) is described in the references. --><!ENTITY RFC5966 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5966.xml">
<!ENTITY I-D.draft-ietf-dnsop-edns-tcp-keepalive SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml3/reference.I-D.draft-ietf-dnsop-edns-tcp-keepalive-00.xml">
<!ENTITY RFC2119 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2629 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC3552 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.3552.xml">
<!ENTITY RFC5625 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5625.xml">
<!ENTITY RFC6824 SYSTEM "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6824.xml">
<!ENTITY RFC0768 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0768.xml">
<!ENTITY RFC0793 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.0793.xml">
<!ENTITY RFC1034 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1034.xml">
<!ENTITY RFC1035 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1035.xml">
<!ENTITY RFC1123 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.1123.xml">
<!ENTITY RFC2616 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2616.xml">
<!ENTITY RFC6891 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.6891.xml">
<!ENTITY RFC4033 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4033.xml">
<!ENTITY RFC5155 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5155.xml">
<!ENTITY RFC5358 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5358.xml">
<!ENTITY RFC5405 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.5405.xml">
<!ENTITY RFC7323 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7323.xml">
<!ENTITY RFC7230 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7230.xml">
<!ENTITY RFC2920 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.2920.xml">
<!ENTITY RFC4786 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.4786.xml">
<!ENTITY RFC7413 PUBLIC "" "http://xml2rfc.ietf.org/public/rfc/bibxml/reference.RFC.7413.xml">
]>
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs),
please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
(Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
(using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std" docName="draft-ietf-dnsop-5966bis-04" ipr="trust200902" obsoletes="5966">
    <!-- category values: std, bcp, info, exp, and historic
  ipr values: full3667, noModification3667, noDerivatives3667
  you can add the attributes updates="NNNN" and obsoletes="NNNN"
  they will automatically be output with "(if approved)" -->
    <!-- ***** FRONT MATTER ***** -->
    <front>
        <!-- The abbreviated title is used in the page header - it is only necessary if the
    full title is longer than 39 characters -->
        <title abbrev="DNS over TCP">DNS Transport over TCP - Implementation Requirements</title>
        <!-- add 'role="editor"' below for the editors if appropriate -->
        <!-- Another author who claims to be an editor -->
        <author fullname="John Dickinson" initials="J." surname="Dickinson">
            <organization abbrev="Sinodun">Sinodun Internet Technologies</organization>
            <address>
                <postal>
                    <street>Magdalen Centre</street>
                    <street>Oxford Science Park</street>
                    <city>Oxford</city>
                    <region/>
                    <code>OX4 4GA</code>
                    <country>UK</country>
                </postal>
                <email>jad@sinodun.com</email>
                <uri>http://sinodun.com</uri>
            </address>
        </author>
        <author fullname="Sara Dickinson" initials="S." surname="Dickinson">
            <organization abbrev="Sinodun">Sinodun Internet Technologies</organization>
            <address>
                <postal>
                    <street>Magdalen Centre</street>
                    <street>Oxford Science Park</street>
                    <city>Oxford</city>
                    <region/>
                    <code>OX4 4GA</code>
                    <country>UK</country>
                </postal>
                <email>sara@sinodun.com</email>
                <uri>http://sinodun.com</uri>
            </address>
        </author>
        <author fullname="Ray Bellis" initials="R." surname="Bellis">
            <organization abbrev="ISC">Internet Systems Consortium, Inc</organization>
            <address>
            <postal>
                <street>950 Charter Street</street>
                <city>Redwood City</city>
                <code>CA  94063</code>
                <country>USA</country>
            </postal>
            <phone>+1 650 423 1200</phone>
            <email>ray@isc.org</email>
            <uri>http://www.isc.org</uri>
        </address>
        </author>
        <author fullname="Allison Mankin" initials="A." surname="Mankin">
            <organization>Verisign Labs</organization>
            <address>
                <postal>
                    <street>12061 Bluemont Way</street>
                    <city>Reston</city>
                    <region>VA</region>
                    <code>20190</code>
                    <country>US</country>
                </postal>
                <phone>+1 703 948-3200</phone>
                <email>amankin@verisign.com</email>
            </address>
        </author>
        <author fullname="Duane Wessels" initials="D." surname="Wessels">
            <organization>Verisign Labs</organization>
            <address>
                <postal>
                    <street>12061 Bluemont Way</street>
                    <city>Reston</city>
                    <region>VA</region>
                    <code>20190</code>
                    <country>US</country>
                </postal>
                <phone>+1 703 948-3200</phone>
                <email>dwessels@verisign.com</email>
            </address>
        </author>
        <date month="Nov" year="2015"/>
        <!-- If the month and year are both specified and are the current ones, xml2rfc will fill
          in the current day for you. If only the current year is specified, xml2rfc will fill
        in the current day and month for you. If the year is not the current one, it is
        necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the
        purpose of calculating the expiry date).  With drafts it is normally sufficient to
        specify just the year. -->
        <!-- Meta-data Declarations -->
        <area>ops</area>
        <workgroup>dnsop</workgroup>
        <!-- WG name at the upperleft corner of the doc,
        IETF is fine for individual submissions.
        If this element is not present, the default is "Network Working Group",
        which is used by the RFC Editor as a nod to the history of the IETF. -->
        <keyword>DNS</keyword>
        <keyword>TCP/IP</keyword>
        <keyword>transport</keyword>
        <!-- Keywords will be incorporated into HTML output
        files in a meta tag but they have no effect on text or nroff
        output. If you submit your draft to the RFC Editor, the
        keywords will be used for the search engine. -->
        <abstract>
            <t> This document specifies the requirement for support of TCP as a
                transport protocol for DNS implementations and provides
                guidelines towards DNS-over-TCP performance on par with that
                of DNS-over-UDP. This document obsoletes RFC5966.</t>
        </abstract>
    </front>
    <middle>
        <section title="Introduction">
            <t> Most <xref target="RFC1034">DNS</xref> transactions take place over
                <xref target="RFC0768">UDP</xref>. <xref target="RFC0793">TCP</xref> is always used
                for full zone transfers (AXFR) and is often used for messages whose sizes exceed the
                DNS protocol's original 512-byte limit. The growing deployment of DNSSEC and IPv6
                has increased response sizes and therefore the use of TCP.
                The need for increased TCP use has also been driven by the 
                protection it provides against address spoofing and therefore 
                exploitation of DNS in reflection/amplification attacks.
                It is now widely used in Response Rate Limiting <xref target="RRL"/>.</t>

            <t> Section 6.1.3.2 of <xref target="RFC1123"/> states:
                <list><t><vspace/>DNS resolvers and recursive servers MUST support UDP, and SHOULD
                support TCP, for sending (non-zone-transfer) queries.</t>
                </list></t>

            <t> However, some implementors have taken the text quoted above to mean that TCP support
                is an optional feature of the DNS protocol.</t>

            <t> The majority of DNS server operators already support TCP and the default
                configuration for most software implementations is to support TCP. The primary
                audience for this document is those implementors whose limited support for TCP
                restricts interoperability and hinders deployment of new DNS features.</t>

            <t> This document therefore updates the core DNS protocol specifications such that
                support for TCP is henceforth a REQUIRED part of a full DNS protocol
                implementation.</t>

            <t> There are several advantages and disadvantages to the increased use of
                TCP (see <xref target="Appendix-A"/>) as well as implementation details
                that need to be considered. This document addresses these
                issues and presents TCP as a valid transport alternative for DNS. It 
                extends the content of <xref target="RFC5966"/>, with
                additional considerations and lessons learned from research, developments
                and implementation of TCP in DNS and in other internet protocols. </t>

            <t> Whilst this document makes no specific requirements for operators of DNS
                servers to meet, it does offer some suggestions to operators to help
                ensure that support for TCP on their servers and network is optimal.
                It should be noted that failure to support TCP (or the blocking of DNS over TCP at
                the network layer) may result in resolution failure and/or application-level
                timeouts. </t>
        </section>

        <section title="Requirements Terminology">
            <t> The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
                "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
                document are to be interpreted as described in <xref target="RFC2119"/>.</t>
        </section>

        <section title="Terminology">
            <t><list style="symbols">
                <t> Persistent connection: a TCP connection that is not closed either by the
                    server after sending the first response nor by the client after receiving
                    the first response. </t>
                <t> Connection Reuse: the sending of multiple queries and responses
                    over a single TCP connection.</t>
                <t> Idle DNS-over-TCP session: Clients and servers view application level
                    idleness differently.  A DNS client considers an established DNS-over-TCP session
                    to be idle when it has no pending queries to send and there are no outstanding
                    responses. A DNS server considers an established DNS-over-TCP session to be idle
                    when it has
                    sent responses to all the queries it has received on that connection.</t>
                <t> Pipelining: the sending of multiple queries and responses over a single TCP
                    connection but not waiting for any outstanding replies before sending another
                    query.</t>
                <t> Out-Of-Order Processing: The processing of queries concurrently
                    and the returning of individual responses as soon as they are
                    available, possibly out-of-order. This will most likely occur in
                    recursive servers, however it is possible in authoritative servers
                    that, for example, have different backend data stores. </t>
                </list>
            </t>
        </section>

        <section title="Discussion">
            <t> In the absence of EDNS0 (<xref target="RFC6891">Extension Mechanisms for DNS 0</xref>) (see below), the normal
                behaviour of any DNS server needing to send a UDP response that would exceed the
                512-byte limit is for the server to truncate the response so that it fits within
                that limit and then set the TC flag in the response header. When the client receives
                such a response, it takes the TC flag as an indication that it should retry over TCP
                instead.</t>

            <t> RFC 1123 also says:
                <list><t><vspace/>... it is also clear that some new DNS record types defined in the
                    future will contain information exceeding the 512 byte limit that applies to
                    UDP, and hence will require TCP. Thus, resolvers and name servers should
                    implement TCP services as a backup to UDP today, with the knowledge that
                    they will require the TCP service in the future.</t></list></t>

            <t> Existing deployments of <xref target="RFC4033">DNS Security (DNSSEC)</xref> have
                shown that truncation at the 512-byte boundary is now commonplace. For example, a
                Non-Existent Domain (NXDOMAIN) (RCODE == 3) response from a DNSSEC-signed zone using
                <xref target="RFC5155">NextSECure 3 (NSEC3)</xref> is almost invariably larger
                than 512 bytes.</t>

            <t> Since the original core specifications for DNS were written, the Extension
                Mechanisms for DNS have been introduced. These
                extensions can be used to indicate that the client is prepared to receive UDP
                responses larger than 512 bytes. An EDNS0-compatible server receiving a request from
                an EDNS0-compatible client may send UDP packets up to that client's announced buffer
                size without truncation.</t>

            <t> However, transport of UDP packets that exceed the size of the path MTU causes IP
                packet fragmentation, which has been found to be unreliable in many circumstances.
                Many firewalls routinely block fragmented IP packets, and some do not implement the
                algorithms necessary to reassemble fragmented packets. Worse still, some network
                devices deliberately refuse to handle DNS packets containing EDNS0 options. Other
                issues relating to UDP transport and packet size are discussed in
                <xref target="RFC5625"/>.</t>

            <t> The MTU most commonly found in the core of the Internet is around 1500 bytes, and
                even that limit is routinely exceeded by DNSSEC-signed responses. </t>

            <t> The future that was anticipated in RFC 1123 has arrived, and the only standardised
                UDP-based mechanism that may have resolved the packet size issue has been found
                inadequate.</t>
        </section>

        <section title="Transport Protocol Selection" anchor="selection">
            <t> All general-purpose DNS implementations MUST support both UDP and TCP transport.</t>
            <t><list style="symbols">
                    <t> Authoritative server implementations MUST support TCP so that they do not
                        limit the size of responses to what fits in a single UDP packet.</t>
                    <t> Recursive server (or forwarder) implementations MUST support TCP so that
                        they do not prevent large responses from a TCP-capable server from reaching
                        its TCP-capable clients.</t>
                    <t> Stub resolver implementations (e.g., an operating system's DNS resolution
                        library) MUST support TCP since to do otherwise would limit the
                        interoperability between their own clients and upstream servers. </t>
                </list></t>

            <t> Regarding the choice of when to use UDP or TCP, Section 6.1.3.2 of RFC 1123 also
                says: <list><t><vspace/>... a DNS resolver or server that is sending a
                non-zone-transfer query MUST send a UDP query first.</t></list></t>

            <t> This requirement is hereby relaxed. Stub resolvers and recursive resolvers MAY elect 
                to send either TCP or UDP
                queries depending on local operational reasons. TCP MAY be used before sending any
                UDP queries. If it already has an open TCP connection to the server it SHOULD reuse
                this connection. In essence, TCP ought to be considered a valid alternative transport
                to UDP, not purely a fallback option.</t>

            <t> In addition it is noted that all Recursive and Authoritative servers
                MUST send responses using the same transport as the query arrived on. In
                the case of TCP this MUST also be the same connection. </t>
        </section>

        <section title="Connection Handling" anchor="connections">

            <section title="Current practices" anchor="current">
                <t> Section 4.2.2 of <xref target="RFC1035"/> says:<vspace/>
                    <list style="symbols">
                        <t>The server should assume that the client will
                            initiate connection closing, and should delay closing its end of the
                            connection until all outstanding client requests have been
                            satisfied.</t>
                        <t>If the server needs to close a dormant connection to reclaim
                           resources, it should wait until the connection has been idle for a period
                           on the order of two minutes. In particular, the server should allow the
                           SOA and AXFR request sequence (which begins a refresh operation) to be
                           made on a single connection. Since the server would be unable to answer
                           queries anyway, a unilateral close or reset may be used instead of
                           graceful close.</t></list></t>

                <t> Other more modern protocols (e.g., <xref target="RFC7230">HTTP/1.1</xref>)
                    have support by default for persistent TCP connections for all
                    requests. Connections are then normally closed via a 'connection close' signal
                    from one party. </t>

                <t> The description in <xref target="RFC1035"/> is clear that servers should view
                    connections as persistent (particularly after receiving an SOA), but
                    unfortunately does not provide enough detail for an unambiguous interpretation
                    of client behaviour for queries other than a SOA. Additionally, DNS does not yet
                    have a signalling mechanism for connection timeout or close, although some have
                    been proposed. </t>

                <section title="Clients" anchor="currentclients">
                    <t> There is no clear guidance today in any RFC as to when a DNS
                        client should close a TCP connection, and there are no specific
                        recommendations with regard to DNS client idle timeouts. However it is
                        common practice for clients to close the TCP connection after sending a
                        single request (apart from the SOA/AXFR case). </t>
                </section>

                <section title="Servers" anchor="currentservers">
                    <t> Many DNS server implementations use a long fixed idle timeout and default to
                        a small number of TCP connections. They also offer little by the way of TCP
                        connection management options. The disadvantages of this include:
                        <list style="symbols">
                            <t> Operational experience has shown that long server timeouts can
                                easily cause resource exhaustion and poor response under heavy
                                load.</t>
                            <t> Intentionally opening many connections and leaving them idle can
                                trivially create a TCP "denial-of-service” attack as many DNS servers
                                are poorly equipped to defend against this by modifying their idle
                                timeouts or other connection management policies. </t>
                            <t> A modest number of clients that all concurrently attempt to
                                use persistent connections with non-zero idle timeouts to such a
                                server could unintentionally cause the same "denial-of-service"
                                problem.</t></list></t>
                    <t> Note that this denial-of-service is only on the TCP service. However,
                        in these cases it affects not only clients
                        wishing to use TCP for their queries for operational reasons, but all
                        clients who choose to fall back to TCP from UDP after receiving a TC=1 flag.</t>
                </section>
            </section>

            <section title="Recommendations" anchor="recommedations">
                <t> The following sections include recommendations that are intended to result in
                    more consistent and scalable implementations of DNS-over-TCP.</t>

                <section title="Connection Re-use" anchor="reuse">
                    <t> One perceived disadvantage to DNS over TCP is the added connection setup
                        latency, generally equal to one RTT.  To amortize connection setup costs,
                        both clients and servers SHOULD support connection reuse by sending multiple
                        queries and responses over a single persistent TCP connection.</t>

                    <t> When sending multiple queries over a TCP connection clients MUST
                        take care to avoid Message ID collisions. In other words, they
                        MUST NOT re-use the DNS
                        Message ID of an in-flight query on the same TCP connection. 
                        This is especially important if the server
                        could be performing out-of-order processing (see <xref target="re-ordering"/>).</t>

                    <section title="Query Pipelining" anchor="pipelining">
                        <t> Due to the historical use of TCP primarily for zone transfer and truncated
                            responses, no existing RFC discusses the idea of pipelining DNS
                            queries over a TCP connection.</t>

                        <t> In order to achieve performance on par with UDP DNS clients SHOULD
                            pipeline their queries. When a DNS client sends multiple queries to a
                            server, it SHOULD NOT wait for an outstanding reply before sending the
                            next query. Clients SHOULD treat TCP and UDP equivalently when
                            considering the time at which to send a particular query.</t>
                        <t> It is likely that DNS servers need to process pipelined
                            queries concurrently and also send out-of-order responses
                            over TCP in order to provide the level of performance
                            possible with UDP transport. If TCP performance is
                            of importance, clients might find it useful to use server processing times
                            as input to server and transport selection algorithms.</t>
                        <t> DNS servers (especially recursive) SHOULD expect to receive
                            pipelined queries.  The server SHOULD process TCP queries concurrently,
                            just as it would for UDP.  The server SHOULD answer all pipelined 
                            queries, even if they are sent in quick succession. The handling of
                            responses to pipelined queries is covered in 
                            <xref target="re-ordering"/>.</t>
                    </section>
                </section>

                <section title="Concurrent connections" anchor="concurrency">
                    <t> To mitigate the risk of unintentional server overload, DNS clients MUST take
                        care to minimize the number of concurrent TCP connections made to any
                        individual server. It is RECOMMENDED that for any given client/server
                        interaction there SHOULD be no more than one connection for regular queries,
                        one for zone transfers and one for each protocol that is being used on top
                        of TCP, for example, if the resolver was using TLS. It is however noted that certain 
                        primary/secondary configurations with many busy zones might need to use more than
                        one TCP connection for zone transfers for operational reasons.</t>

                    <t> Similarly, servers MAY impose limits on the number of concurrent TCP
                        connections being handled for any particular client IP address or subnet. These limits SHOULD be
                        much looser than the client guidelines above, because the server does not
                        know, for example, if a client IP address belongs to a single client or 
                        is multiple resolvers on a single machine, or multiple clients behind a device performing
                        Network Address Translation (NAT).
                     </t>
                </section>

                <section title="Idle Timeouts" anchor="idle">
                    <t> To mitigate the risk of unintentional server overload, DNS clients MUST take
                        care to minimize the idle time of established DNS-over-TCP sessions made to any
                        individual server. DNS clients SHOULD close the TCP
                        connection of an idle session, unless an idle timeout has been established
                        using some other signalling mechanism, for example, <xref target="edns-tcp-keepalive"/>.</t>

                    <t> To mitigate the risk of unintentional server overload
                        it is RECOMMENDED that the default server application-level idle period
                        be of the order of seconds, but no particular value is specified. In
                        practice, the idle period can vary dynamically, and servers MAY allow idle
                        connections to remain open for longer periods as resources permit. A timeout
                        of at least a few seconds is advisable for normal operations
                        to support those clients that expect the SOA and AXFR request sequence to be
                        made on a single connection as originally specified in
                        <xref target="RFC1035"/>. Servers MAY use zero timeouts when experiencing
                        heavy load or are under attack.</t>
                        
                    <t>DNS messages delivered over TCP might arrive in multiple segments.
                        A DNS server that resets its idle timeout after receiving a single segment might
                        be vulnerable to a "slow read attack." For this reason, servers SHOULD
                        apply the idle timeout to the receipt of a full DNS message, rather than to
                        receipt of any part of a DNS message.</t>
                </section>

                <section title="Tear Down" anchor="timeouts">
                    <t> Under normal operation clients typically initiate connection closing on idle
                        connections however servers can close the connection if their local idle
                        timeout policy is exceeded. Connections can be also closed by either end
                        under unusual conditions such as defending against an attack or system
                        failure/reboot.</t>

                    <t> Clients SHOULD retry unanswered queries if the connection closes before 
                        receiving all outstanding responses. No specific retry algorithm is specified
                        in this document. </t>

                    <t> If a server finds that a client has closed a TCP session, or if the
                        session has been otherwise interrupted, before all pending responses
                        have been sent then the server MUST NOT attempt to send those
                        responses. Of course the server MAY cache those responses.</t>
                </section>
            </section>
        </section>

        <section title="Response Reordering" anchor="re-ordering">
            <t> RFC 1035 is ambiguous on the question of whether TCP responses may be reordered --
                the only relevant text is in Section 4.2.1, which relates to UDP:
                <list><t><vspace/>
                Queries or their responses may be reordered by the network, or by processing in name
                servers, so resolvers should not depend on them being returned in order.
                </t></list></t>

            <t> For the avoidance of future doubt, this requirement is clarified.
                Authoritative servers and recursive resolvers are RECOMMENDED to support the preparing
                of responses in parallel and sending them out-of-order, regardless of the transport protocol
                in use. Stub and recursive resolvers MUST be able to process
                responses that arrive in a different order to that in which the requests were
                sent, regardless of the transport protocol in use.</t>

            <t> In order to achieve performance on par with UDP, recursive
                resolvers SHOULD process TCP queries in parallel and return
                individual responses as soon as they are available, possibly
                out-of-order.</t>

            <t> Since pipelined responses can arrive out-of-order, clients MUST match responses
                to outstanding queries on the same TCP connection using the Message ID. If
                the response contains a question section the client MUST match the QNAME, QCLASS and 
                QTYPE fields.
                Failure by clients to properly match responses to outstanding
                queries can have serious consequences for interoperability.</t>
        </section>

        <section title="TCP Message Length Field" anchor="messagelength">
            <t> For reasons of efficiency, DNS clients and servers SHOULD pass the two-octet length
                field, and the message described by that length field, to the TCP layer at the same
                time (e.g., in a single "write" system call) to make it more likely that all the
                data will be transmitted in a single TCP segment.</t>

            <t> This additionally avoids problems due
                to some DNS servers being very sensitive to timeout conditions on receiving messages 
                (they might abort a TCP session if the first TCP segment does not contain both
                the length field and the entire message). Such behavior is certainly undesirable.
                As described in <xref target="idle"/>, servers SHOULD apply
                connection timeouts to the receipt of a full message and MUST NOT close a connection
                simply because the first "read" from the TCP layer does not contain the entire message.</t>
        </section>

        <section title="TCP Fast Open" anchor="fastopen">
            <t> This section is non-normative.</t>

            <t> TCP Fast Open <xref target="RFC7413"/> (TFO) allows data to be carried in the SYN
                packet, reducing the cost of re-opening TCP connections. It also saves up to one 
                RTT compared to standard TCP.</t>

            <t> TFO mitigates the security vulnerabilities inherent in sending data in the SYN,
                especially on a system like DNS where amplification attacks are possible, by use of
                a server-supplied cookie.  TFO clients request a server cookie in the initial SYN
                packet at the start of a new connection.  The server returns a cookie in its
                SYN-ACK. The client caches the cookie and reuses it when opening subsequent
                connections to the same server.</t>

            <t> The cookie is stored by the client's TCP stack (kernel) and persists if either the
                client or server processes are restarted.  TFO also falls back to a regular TCP
                handshake gracefully.</t>

            <t> DNS services taking advantage of IP anycast <xref target="RFC4786"/> might need to
                take additional steps when enabling TFO. From <xref target="RFC7413"> </xref>:
                <list><t><vspace/>Servers that accept connection requests to the same server IP
                    address should use the same key such that they generate identical Fast Open
                    Cookies for a particular client IP address. Otherwise a client may get different
                    cookies across connections; its Fast Open attempts would fall back to regular
                    3WHS. </t></list></t>
        </section>

        <section anchor="IANA" title="IANA Considerations">
            <t>This memo includes no request to IANA.</t>
        </section>

        <section title="Security Considerations" anchor="security">
            <t> Some DNS server operators have expressed concern that wider promotion and use of DNS over TCP will
                expose them to a higher risk of denial-of-service (DoS) attacks on TCP
                (both accidental and deliberate).</t>

            <t> Although there is a higher risk of some specific attacks against TCP-enabled servers,
                techniques for the mitigation of DoS attacks at the network level have improved
                substantially since DNS was first designed.</t>

            <t> Readers are advised to familiarise themselves with <xref target="CPNI-TCP"/>,
                a security assessment of TCP detailing known TCP attacks and countermeasures which
                references most of the relevant RFCs on this topic.</t>

            <t> To mitigate the risk of DoS attacks, DNS servers are advised to engage in TCP connection
                management. This could include maintaining state on existing connections,
                re-using existing connections and controlling request queues to enable fair use.
                It is likely to be advantageous to provide configurable connection management 
                options, for example:
                <list style="symbols">
                    <t> total number of TCP connections</t>
                    <t> maximum TCP connections per source IP address or subnet</t>
                    <t> TCP connection idle timeout </t>
                    <t> maximum DNS transactions per TCP connection</t>
                    <t> maximum TCP connection duration</t></list>
                No specific values are recommended for these parameters.
            </t>

            <t>Operators are advised to familiarise themselves with the configuration and tuning
                parameters available in the operating system TCP stack. However detailed advice on
                this is outside the scope of this document.</t>

            <t> Operators of recursive servers are advised to ensure that they only accept connections from
                expected clients (for example by the use of an ACL), and do not accept them from 
                unknown sources. In the case of UDP
                traffic, this will help protect against <xref target="RFC5358">reflection
                attacks</xref> and in the case of TCP traffic it will prevent an unknown client from
                exhausting the server's limits on the number of concurrent connections.</t>
        </section>

        <section anchor="Acknowledgements" title="Acknowledgements">
            <t> The authors would like to thank Francis Dupont and Paul Vixie for detailed review,
                Andrew Sullivan, Tony Finch, Stephane Bortzmeyer, Joe Abley, Tatuya Jinmei and 
                the many others who contributed
                to the mailing list discussion. Also Liang Zhu, Zi
                Hu, and John Heidemann for extensive DNS-over-TCP discussions and code. Lucie
                Guiraud and Danny McPherson for reviewing early versions of this document.
                We would also like to thank all those who contributed to RFC5966.</t>
        </section>
    </middle>

    <back>
        <references title="Normative References">
            &RFC2119;
            &RFC5966;
            &RFC5625;
            &RFC0793;
            &RFC0768;
            &RFC1123;
            &RFC1034;
            &RFC6891;
            &RFC4033;
            &RFC5155;
            &RFC1035;
            &RFC5358;
            &RFC4786;
            &RFC7230;
        </references>
        <references title="Informative References">
            &RFC7413;
            &RFC6824;
            &RFC5405;
	     <reference anchor="RRL">
	        <front>
	          <title>DNS Response Rate Limiting (DNS RRL)</title>

	          <author initials="P." surname="Vixie" fullname="Paul Vixie">
	            <organization>ISC</organization>
	            <address>
	              <email>vixie@isc.org</email>
	            </address>
	          </author>

	          <author initials="V." surname="Schryver" fullname="Vernon Schryver">
	            <organization>Rhyolite</organization>
	            <address>
	              <email>vjs@rhyolite.com</email>
	            </address>
	          </author>

	          <date year="2012" month="April"/>
	        </front>
	        <seriesInfo name="ISC-TN" value="2012-1-Draft1"/>
	        <format type="TXT" target="http://ss.vix.su/~vixie/isc-tn-2012-1.txt"/>
	      </reference>
            <reference anchor='edns-tcp-keepalive'> 
               <front> 
               <title>The edns-tcp-keepalive EDNS0 Option</title>
               <author initials='P' surname='Wouters' fullname='p. Wouters'> 
                   <organization>Red Hat</organization>
               </author>
               <author initials='J' surname='Abley' fullname='J. Abley'> 
                  <organization>Dyn, Inc.</organization>
               </author>
               <author initials='S' surname='Dickinson' fullname='S. Dickinson'> 
                <organization>Sinodun Internet Technologies</organization>
                </author>
               <author initials='R' surname='Bellis' fullname='R. Bellis'> 
               <organization>ISC</organization>
               </author>
               <date month='Oct' day='20' year='2015' /> 
               </front> 
               <seriesInfo name='Internet-Draft' value='draft-ietf-dnsop-edns-tcp-keepalive-04' /> 
               <format type='TXT' 
                     target='https://tools.ietf.org/html/draft-ietf-dnsop-edns-tcp-keepalive-04' /> 
            </reference>
            <reference anchor="CPNI-TCP"
                       target="http://www.gont.com.ar/papers/tn-03-09-security-assessment-TCP.pdf">
                <front>
                    <title>Security Assessment of the Transmission Control Protocol (TCP)</title>
                    <author>
                        <organization>CPNI</organization>
                    </author>
                    <date year="2009"/>
                </front>
            </reference>
            <reference anchor="fragmentation-considered-poisonous"
                       target="http://arxiv.org/abs/1205.4011">
                <front>
                    <title>Fragmentation Considered Poisonous</title>
                    <author initials="A." surname="Herzberg" fullname="Amir Herzberg">
                        <organization>Dept. of Computer Science, Bar Ilan University</organization>
                    </author>
                    <author initials="H." surname="Shulman" fullname="Haya Shulman">
                        <organization>Dept. of Computer Science, Bar Ilan University</organization>
                    </author>
                    <date month="May" year="2012"/>
                </front>
            </reference>

            <reference anchor="Connection-Oriented-DNS"
            target="http://www.isi.edu/~johnh/PAPERS/Zhu15b.pdf">
                <front>
                    <title>Connection-Oriented DNS to Improve Privacy and Security</title>
                    <author initials="L." surname="Zhu" fullname="Liang Zhu">
                        <organization>University of Southern California</organization>
                    </author>
                    <author initials="Z." surname="Hu" fullname="Zi Hu">
                        <organization>University of Southern California</organization>
                    </author>
                    <author initials="J." surname="Heidemann" fullname="Heidemann">
                        <organization>University of Southern California</organization>
                    </author>
                    <author fullname="Duane Wessels" initials="D." surname="Wessels">
                        <organization>Verisign Labs</organization>
                    </author>
                    <author fullname="Allison Mankin" initials="A." surname="Mankin">
                        <organization>Verisign Labs</organization>
                    </author>
                    <author initials="N." surname="Somaiya" fullname="Nikita Somaiya">
                        <organization>University of Southern California</organization>
                    </author
                    ><date/>
                </front>
            </reference>
    </references>

        <section anchor="Appendix-A" title="Summary of Advantages and Disadvantages to using TCP for DNS">
            <t> The TCP handshake generally prevents address spoofing and, therefore, the
                reflection/amplification attacks which plague UDP.</t>

            <t> IP fragmentation is less of a problem for TCP than it is for UDP.
                TCP stacks generally implement Path MTU Discovery so they can avoid
                IP fragmentation of TCP segments.  UDP, on the other hand, does not provide
                reassembly, which means datagrams that exceed the path MTU size must
                experience fragmentation <xref target="RFC5405"/>.
                Middleboxes are known to
                block IP fragments, leading to timeouts and forcing client implementations to "hunt"
                for EDNS0 reply size values supported by the network path. Additionally,
                fragmentation may lead to cache poisoning
                <xref target="fragmentation-considered-poisonous"/>.
              </t>

            <t> TCP setup costs an additional RTT compared to UDP queries. Setup costs can be
                amortized by reusing connections, pipelining queries, and enabling TCP Fast
                Open.</t>

            <t> TCP imposes additional state-keeping requirements on clients and servers.  The use
                of TCP Fast Open reduces the cost of closing and re-opening TCP connections.</t>

            <t> Long-lived TCP connections to anycast servers might be disrupted due to routing
                changes. Clients utilizing TCP for DNS need to always be prepared to re-establish
                connections or otherwise retry outstanding queries. It might also be possible for TCP
                Multipath <xref target="RFC6824"/> to allow a server to hand a connection over from
                the anycast address to a unicast address.</t>

            <t> There are many "Middleboxes" in use today that interfere with TCP over port 53
                <xref target="RFC5625"/>.  This document does not propose any solutions, other than
                to make it absolutely clear that TCP is a valid transport for DNS and support for
                it is a requirement for all implementations.</t>

            <t> A more in-depth discussion of connection orientated DNS can be found elsewhere
                <xref target="Connection-Oriented-DNS"/>.</t>
        </section>

        <section title="Changes between revisions">
            <t>[Note to RFC Editor: please remove this section prior to publication.]</t>
            <section title="Changes -03 to -04">
                    <t><list style="symbols">
                        <t>Re-stated how messages received over TCP should be mapped to queries.</t>
                        <t>Added wording to cover timeouts for server side behaviour for
                            when receiving TCP messages.</t>
                        <t>Added sentence to abstract stating this obsoletes RFC5966.</t>
                        <t>Moved reference to RFC6891 earlier in Discussion section.</t>
                        <t>Several minor wording updates to improve clarity.</t>
                        <t>Corrected nits and updated references.</t>
                    </list></t>
             </section>
             <section title="Changes -02 to -03">
                 <t>
                    <list style="symbols">
                        <t>Replaced certain lower case RFC2119 keywords to improve clarity.</t>

                        <t>Updated section 6.2.2 to recognise requirements for concurrent zone transfers.</t>

                        <t>Changed 'client IP address' to 'client IP address or subnet' when discussing
                            restrictions on TCP connections from clients.</t>

                        <t>Added reference to edns-tcp-keepalive draft.</t>

                        <t>Added wording to introduction to reference Appendix A and state TCP is a valid
                            transport alternative for DNS.</t>

                        <t>Improved description of CPNI-TCP as a general reference source on TCP security related RFCs.</t>
                    </list>
                </t>
            </section>

            <section title="Changes -01 to -02">
                <t>
                    <list style="symbols">
                        <t> Added more text to Introduction as background to TCP use.</t>

                        <t>Added definitions of Persistent connection and Idle session
                            to Terminology section.</t>

                        <t>Separated Connection Handling section into Current Practice and
                            Recommendations. Provide more detail on current practices and 
                            divided Recommendations up into more granular sub-sections.</t>

                        <t>Add section on Idle time with new text on recommendations for 
                            client idle behaviour.</t>

                        <t>Move TCP message field length discussion to separate section.</t>

                        <t>Removed references to system calls in TFO section.</t>

                        <t>Added more discussion on DoS mitigation in Security Considerations section.</t>

                        <t>Added statement that servers MAY use 0 idle timeout.</t>

                        <t>Re-stated position of TCP as an alternative to UDP in Discussion. </t>
                    
                        <t>Updated text on server limits on concurrent connections from a particular client. </t>
                    
                        <t>Added text that client retry logic is outside the scope of this document.</t>
                    
                        <t>Clarified that servers should answer all pipelined queries even if sent very close together.</t>
                    
                    </list>
                </t>
            </section>

            <section title="Changes -00 to -01">
                <t>
                    <list style="symbols">
                        <t> Changed updates to obsoletes RFC 5966.</t>

                        <t> Improved text in Section 4 Transport Protocol Selection to change "TCP
                            SHOULD NOT be used only for the transfers and as a fallback" to make the
                            intention clearer and more consistent.</t>

                        <t> Reference to TCP FASTOPEN updated now that it is an RFC.</t>

                        <t> Added paragraph to say that implementations MUST NOT send the TCP framing 2
                            byte length field in a separate packet to the DNS message.</t>

                        <t> Added Terminology section.</t>

                        <t> Changed should and RECOMMENDED in reference to parallel processing
                            to SHOULD in sections 7 and 8.</t>

                        <t> Added text to address what a server should do when a client closes the TCP
                            connection before pending responses are sent.</t>
                        <t> Moved the Advantages and Disadvantages section to an appendix.</t>
                    </list>
                </t>
            </section>

            <section title="Changes to RFC 5966">
                <t> This document differs from RFC 5966 in four additions:<list style="numbers"><t>
                    DNS implementations are recommended not only to support TCP but to support it on an
                    equal footing with UDP</t>
                <t> DNS implementations are recommended to support reuse of TCP connections</t>
              <t> DNS implementations are recommended to support pipelining and out of order processing
                  of the query stream</t>
              <t> A non-normative discussion of use of TCP Fast Open is added</t></list></t>
            </section>
        </section>
    </back>
</rfc>
