<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE rfc SYSTEM "http://xml.resource.org/authoring/rfc2629.dtd"
[
  <!ENTITY RFC1166 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.1166.xml'>
  <!ENTITY RFC2119 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml'>
  <!ENTITY RFC3986 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3986.xml'>
  <!ENTITY RFC5890 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.5890.xml'>
  <!ENTITY RFC5952 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.5952.xml'>
  <!ENTITY RFC6927 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.6927.xml'>
  <!ENTITY RFC7230 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.7230.xml'>
  <!ENTITY RFC7480 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.7480.xml'>
  <!ENTITY RFC7481 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.7481.xml'>
  <!ENTITY RFC7482 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.7482.xml'>
  <!ENTITY RFC7483 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.7483.xml'>
  <!ENTITY RFC7942 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.7942.xml'>
  <!ENTITY POSIX PUBLIC '' 'http://xml2rfc.ietf.org/public/rfc/bibxml6/reference.IEEE.1003.1_2013_EDITION.xml'>
]>
<?xml-stylesheet type="text/xsl" href="rfc2629.xslt"?>

<?rfc toc="yes"?>
<?rfc tocompact="yes"?>
<?rfc tocdepth="4"?>
<?rfc compact="yes"?>
<?rfc subcompact="yes"?>
<?rfc sortrefs="yes"?>
<?rfc symrefs="yes"?>
<?rfc iprnotified="no"?>

<rfc category="std" docName="draft-fregly-regext-rdap-search-regex-04" ipr="trust200902">
  <front>
    <title abbrev="RDAP Search using Regex">Registration Data Access Protocol (RDAP) Search Using POSIX Regular Expressions</title>

    <author initials="A." surname="Fregly" fullname="Andrew Fregly">
      <organization>Verisign Labs</organization>
      <address>
        <postal>
          <street>12061 Bluemont Way</street>
          <city>Reston</city>
          <region>VA</region>
          <code>20190</code>
          <country>USA</country>
        </postal>
        <email>afregly@verisign.com</email>
        <uri>http://www.verisignlabs.com/</uri>
      </address>
    </author>
    <author initials="S." surname="Sheth" fullname="Swapneel Sheth">
      <organization>Verisign Labs</organization>
      <address>
        <postal>
          <street>12061 Bluemont Way</street>
          <city>Reston</city>
          <region>VA</region>
          <code>20190</code>
          <country>USA</country>
        </postal>
        <email>ssheth@verisign.com</email>
        <uri>http://www.verisignlabs.com/</uri>
      </address>
    </author>
    <author initials="S." surname="Hollenbeck" fullname="Scott Hollenbeck">
      <organization>Verisign Labs</organization>
      <address>
        <postal>
          <street>12061 Bluemont Way</street>
          <city>Reston</city>
          <region>VA</region>
          <code>20190</code>
          <country>USA</country>
        </postal>
        <email>shollenbeck@verisign.com</email>
        <uri>http://www.verisignlabs.com/</uri>
      </address>
    </author>

    <date/>
    <area>Applications</area>
    <workgroup>Internet Engineering Task Force</workgroup>
    <keyword>RDAP</keyword>
    <keyword>POSIX</keyword>
    <keyword>REGEX</keyword>
    <keyword>regular expression</keyword>
    <keyword>Search</keyword>

    <abstract>
      <t>The Registration Data Access Protocol (RDAP) provides limited search functionality based on pattern matching. This document describes an RDAP query extension that provides additional search functionality using POSIX extended regular expressions.
      </t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>The search patterns for Registration Data Access Protocol (RDAP) search as described in RFC 7482 <xref target="RFC7482"/> are limited. The protocol described in this specification extends RDAP search capabilities by adding path segments for RDAP search functions using a RESTful web service and POSIX <xref target="IEEE.1003.1_2013_EDITION"/> extended regular expressions. The service is implemented using the Hypertext Transfer Protocol (HTTP) <xref target="RFC7230"/> and the conventions described in RFC 7480 <xref target="RFC7480"/>.
      </t>

      <section title="Conventions Used in This Document">
        <t>The key words &quot;MUST&quot;, &quot;MUST NOT&quot;, &quot;REQUIRED&quot;, &quot;SHALL&quot;, &quot;SHALL NOT&quot;,
          &quot;SHOULD&quot;, &quot;SHOULD NOT&quot;, &quot;RECOMMENDED&quot;, &quot;MAY&quot;, and &quot;OPTIONAL&quot; in this document are to be interpreted as described in RFC 2119 <xref target="RFC2119"/>.</t>
      </section>
    </section>

    <section anchor="pathseg" title="RDAP Path Segment Specification">

      <t>The path segments defined in this section are OPTIONAL extensions of path segments defined in RFC 7482 <xref target="RFC7482"/>.
        The resource type path segments for search are:
        <list style="symbols">
          <t>'domains': Used to identify a domain name information search using a pattern to match a fully-qualified domain name.</t>
          <t>'nameservers': Used to identify a name server information search using a pattern to match a host name.</t>
          <t>'entities': Used to identify an entity information search using a pattern to match a string identifier.</t>
        </list>
        The search patterns in the path segments MUST be POSIX extended regular expressions. Non-URL-safe characters in Search patterns MUST be percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. Percent-encoding will eliminate errors that might occur due to web-server or app-server interpretation of certain unsafe characters and will eliminate errors due to inconsistent encoding and decoding semantics for certain characters. For instance, the space character may be encoded as "+" when submitted through a HTML form and encoded as "%20" when submitted through the address bar of a Web browser. Detailed results can be retrieved using the HTTP GET method and the path segments specified here.</t>
        <t>This document defines an RDAP query parameter, "searchtype", that is used to identify search requests that require specialized processing beyond the limited functionality described in RFC 7482 <xref target="RFC7482"/>. Search processing using POSIX <xref target="IEEE.1003.1_2013_EDITION"/> extended regular expressions is indicated with a query string parameter value of "regex", e.g. "searchtype=regex". Other forms of search processing are possible and can be described in other specifications using other values for the "searchtype" query parameter. See <xref target="new_segments"/> for additional information.</t>

      <section title="Domain Search" anchor="domain_search">
        <t>Syntax: domains?name=&lt;domain search pattern&gt;&amp;searchtype=regex</t>
        <t>Syntax: domains?nsLdhName=&lt;domain search pattern&gt;&amp;searchtype=regex</t>
        <t>Syntax: domains?nsIp=&lt;domain search pattern&gt;&amp;searchtype=regex</t>

        <t>Searches for domain information by name are specified using this form:</t>

        <t>domains?name=XXXX&amp;searchtype=regex</t>

        <t>If the URL query string parameter "searchtype" has a value of "regex", then XXXX MUST be a POSIX extended regular expression. Non-URL-safe characters in XXXX MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against domains in a name space administered by the server operator. Domain names are as defined by RFC 5890 <xref target="RFC5890"/> in "letters, digits, hyphen" format. The following URL would be used to find information for domain names matching the "e[a-z]ample\.com" pattern:
        </t>

        <t>https://example.com/rdap/domains?name=e%5Ba-z%5Dample%5C.com&amp;searchtype=regex</t>

        <t>Internationalized Domain Names (IDNs) in U-label format <xref target="RFC5890"/> can also be matched by POSIX extended regular expression search patterns. Search patterns for these names are of the form /domains?name=XXXX&amp;searchtype=regex, where XXXX is a POSIX extended regular expression. Non-URL-safe characters in XXXX MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against domain names in U-label format. See section 6.1 of RFC 7482 <xref target="RFC7482"/> for information describing U-label character encoding. See <xref target="i18n"/> for other considerations relative to regular expression matching of IDNs.
        </t>

        <t>Searches for domain information by name server name are specified using this form:</t>

        <t>domains?nsLdhName=YYYY&amp;searchtype=regex</t>

        <t>If the URL query string parameter "searchtype" has a value of "regex", then YYYY MUST be a POSIX extended regular expression. Non-URL-safe characters in YYYY MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against host names in a name space administered by the server operator. Host names are as defined by RFC 5890 <xref target="RFC5890"/> in "letters, digits, hyphen" format. The following URL would be used to search for domains delegated to name servers matching the "ns[1-9]\.e[a-z]ample\.com" pattern:
        </t>

        <t>https://example.com/rdap/domains?nsLdhName=ns%5B1-9%5D%5C.e%5Ba-z%5Dample%5C.com&amp;searchtype=regex</t>

        <t>Searches for domain information by name server IP address are specified using this form:</t>

        <t>domains?nsIp=ZZZZ&amp;searchtype=regex</t>

        <t>If the URL query string parameter "searchtype" has a value of "regex", then ZZZZ MUST be a POSIX extended regular expression. Non-URL-safe characters in ZZZZ MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against IPv4 addresses <xref target="RFC1166"/> and IPv6 addresses <xref target="RFC5952"/> associated with specific name servers. The following URL would be used to search for domains that have been delegated to name servers that have IP addresses matching the "192\.0\.[1-9]\.0" pattern:
        </t>

        <t>https://example.com/rdap/domains?nsIp=192%5C.0%5C.%5B1-9%5D%5C.0&amp;searchtype=regex</t>
      </section>

      <section title="Name Server Search" anchor="ns_search">
       <t>Syntax: nameservers?name=&lt;name server search pattern&gt;&amp;searchtype=regex</t>
       <t>Syntax: nameservers?ip=&lt;name server search pattern&gt;&amp;searchtype=regex</t>

       <t>Searches for name server information by name server name are specified using this form:</t>

       <t>nameservers?name=XXXX&amp;searchtype=regex</t>

       <t>If the URL query string parameter "searchtype" has a value of "regex", then XXXX MUST be a POSIX extended regular expression. Non-URL-safe characters in XXXX MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against name server names in a name space administered by the server operator. Name server names are as defined in RFC 5890 <xref target="RFC5890"/> in "letters, digits, hyphen" format. Matches will return information for the matching name servers. The following URL would be used to find information for name server names matching the "ns[1-9]\.e[a-z]ample\.com" pattern:</t>

       <t>https://example.com/rdap/nameservers?name=ns%5B1-9%5D%5C.e%5Ba-z%5Dample%5C.com&amp;searchtype=regex</t>

       <t>Internationalized name server names in U-label format <xref target="RFC5890"/> can also be matched by POSIX extended regular expression search patterns. Search patterns for these names are of the form /nameservers?name=XXXX&amp;searchtype=regex, where XXXX is a POSIX extended regular expression. Non-URL-safe characters in XXXX MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against name server names in U-label format. See section 6.1 of RFC 7482 <xref target="RFC7482"/> for information describing U-label character encoding. See <xref target="i18n"/> for other considerations relative to regular expression matching of U-labels.
       </t>

       <t>Searches for name server information by name server IP address are specified using this form:</t>

       <t>nameservers?ip=YYYY&amp;searchtype=regex</t>

       <t>If the URL query string parameter "searchtype" has a value of "regex", then YYYY MUST be a POSIX extended regular expression. Non-URL-safe characters in YYYY MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against IPv4 addresses <xref target="RFC1166"/> and IPv6 addresses <xref target="RFC5952"/> associated with specific name servers. The following URL would be used to search for name server names that resolve to addresses matching the "192\.0\.[1-9]\.0" pattern:</t>

       <t>https://example.com/rdap/nameservers?ip=192%5C.0%5C.%5B1-9%5D%5C.0&amp;searchtype=regex</t>
     </section>

     <section title="Entity Search" anchor="entity_search">
       <t>Syntax: entities?fn=&lt;entity name search pattern&gt;&amp;searchtype=regex</t>
       <t>Syntax: entities?handle=&lt;entity handle search pattern&gt;&amp;searchtype=regex</t>

       <t>Searches for entity information by name are specified using this form:</t>

       <t>entities?fn=XXXX&amp;searchtype=regex</t>

       <t>If the URL query string parameter "searchtype" has a value of "regex", then XXXX MUST be a POSIX extended regular expression. Non-URL-safe characters in XXXX MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against the "FN" property of an entity (such as a contact, registrant, or registrar) name as specified in Section 5.1 of RFC 7483 <xref target="RFC7483"/>. The following URL would be used to find information for entity names matching the "Bobby[[:space:]]Joe[a-z]*" pattern:</t>

       <t>https://example.com/rdap/entities?fn=Bobby%5B%5B%3Aspace%3A%5D%5DJoe%5Ba-z%5D%2A&amp;searchtype=regex</t>

       <t>Searches for entity information by handle are specified using this form:</t>

       <t>entities?handle=XXXX&amp;searchtype=regex</t>

       <t>If the URL query string parameter "searchtype" has a value of "regex", then XXXX MUST be a POSIX extended regular expression. Non-URL-safe characters in XXXX MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against an entity (such as a contact, registrant, or registrar) identifier whose syntax is specific to the registration provider. The following URL would be used to find information for entity handles matching the "CID-4[0-9]*" pattern:</t>

       <t>https://example.com/rdap/entities?handle=CID-4%5B0-9%5D%2A&amp;searchtype=regex</t>

     </section>
     <section anchor="new_segments" title="Future Path Segments">
       <t>OPTIONAL extensions to new RDAP path segments defined in future RDAP specifications MAY be implemented to support POSIX extended regular expressions search capability. The syntax for such OPTIONAL extensions MUST be modeled on the syntax defined in <xref target="domain_search"/>, <xref target="ns_search"/>, and <xref target="entity_search"/>. The following syntax template MUST be followed:
        </t>
        <t>Syntax: {path_segment}?{property}=XXXX&amp;searchtype=regex</t>
        <t>If the URL query string parameter "searchtype" has a value of "regex", then XXXX MUST be a POSIX extended regular expression. Non-URL-safe characters in XXXX MUST be  percent-encoded. Percent-encoding MUST be as described in section 2.1 of RFC 3986 <xref target="RFC3986"/>. The supplied regular expression will be matched against the property specified by {property} for the path segment specified by {path_segment}. For example, if a new RDAP path segment "foo" is defined and has a property "bar", the following URL would be used to find information for the "foo" resource type with a "bar" property matching the "widget:.*mech.*" pattern:</t>

        <t>https://example.com/rdap/foo?bar=widget%3A.%2Amech.%2A&amp;searchtype=regex</t>
      </section>
    </section>

    <section anchor="syntax" title="Search Pattern Syntax">
      <t>POSIX extended regular expression search pattern syntax is defined in Section 9 of IEEE Std 1003.1, 2013 Edition <xref target="IEEE.1003.1_2013_EDITION"/>. An RDAP service implementation MAY implement a subset of the extended regular expression syntax and capabilities defined by the specification. An RDAP service implementation MUST specify the regular expression syntax and capabilities it supports in response to a query to the /help path segment as specified in section 3.1.6 of RFC 7482 <xref target="RFC7482"/>.
      </t>
      <t>Characters within a regular expression search pattern may be URI reserved characters. To avoid ambiguity in parsing a URL containing a regular expression search pattern, non-URL-safe character in the regular expression search pattern MUST be percent-encoded as described in RFC 3986 <xref target="RFC3986"/>.
      </t>
    </section>

    <section anchor="query_proc" title="Query Processing">
      <t>RDAP clients using regular expression search patterns MUST percent-encode non-URL-safe characters in the regular expression search pattern as described in RFC 3986 <xref target="RFC3986"/>. The regular expression SHOULD be consistent with the regular expression syntax and capabilities supported by the RDAP service implementation that is being queried in order to provide predictable results. The use of a regular expression that is not consistent with the capabilities of the RDAP service implementation MUST result in the return of an HTTP 400 response code as described in section 5.4 of RFC 7480 <xref target="RFC7480"/>.
      </t>
      <t>An RDAP service implementation will receive regular expressions search patterns that contain percent-encoded characters. Prior to processing a regular expression, the RDAP service MUST decode the received percent-encoded characters in regular expressions as described in RFC 3986 <xref target="RFC3986"/>. After decoding the received regular expression, the regular expression MUST be matched as described in <xref target="domain_search"/>, <xref target="ns_search"/> and <xref target="entity_search"/>. Matching records related to the search are then returned in the client.
      </t>
      <t>Server operating systems are typically configured to use a collection of regional and language rules that describe default processing conventions, such as sort order, date format, etc., as part of a “locale” setting.  The regular expression library used for an RDAP service implementation will typically acquire all the information it requires for the current locale from the underlying operating system.  The locale used by a regular expression library may impact the results of regular expression searches based on locale-specific processing.For example, a POSIX locale can have collating sequences to describe how certain characters or groups of characters can be ordered. In the Czech language, for example, “ch” can be treated as if it were one character. You can use the collating sequence element [.ch.] inside a bracketed expression to match “ch” when the Czech locale (cs-CZ) is active, but a similar collating sequence would not match the string if the system locale was, for example, en_US. Users submitting regular expression searches that do not take into account locale-specific processing may receive misleading or inaccurate results.
      As such, it is RECOMMENDED to identify the underlying locale in the “help” path segment as specified in section 3.1.6 of RFC 7482 <xref target="RFC7482"/>. This will help RDAP clients construct regular expressions that can be processed in a predictable way.
      </t>
      <t>The POSIX regular expression specification <xref target="IEEE.1003.1_2013_EDITION"/> allows implementations to provide case insensitive searching. RDAP service implementations SHOULD implement case insensitive searching as described in the specification. This will allow for consistency in search results regardless of the case of the RDAP data being searched. For example, some RDAP service implementations may represent domain names in upper case during searching while other RDAP service implementations may represent domain names in lower case or mixed case during searching. Case insensitive searching will alleviate the need for search clients to know how each RDAP service implementation represents the case of searchable data. RDAP service implementations that do not perform case insensitive searching may produce unexpected search results for entities that are not aware of how the service represents the case of searchable data.
      </t>
      <t>An RDAP service implementation MUST specify its support or lack of support for case insensitive searching in response to a query to the /help path segment as specified in section 3.1.6 of RFC 7482 <xref target="RFC7482"/>.
      </t>
      <t>Servers indicate the success or failure of query processing of a regular expression search pattern by returning an appropriate HTTP response code to the client.  Response codes not specifically identified in this document are described in RFC 7480 <xref target="RFC7480"/>.
      </t>
    </section>

    <section anchor="i18n" title="Internationalization Considerations">
      <t>An RDAP service implementation that supports regular expression search patterns MUST support pattern construction and pattern matching using UTF-8 encoded character strings. Other character encoding considerations are described in section 6.1 of RFC 7482 <xref target="RFC7482"/>.</t>
    </section>

    <section anchor="Implementation" title="Implementation Considerations">
      <t>The set of related records that may be returned in response to a search with a regular expression search pattern are subject to the constraints specified in section 4.2 of RFC 7482 <xref target="RFC7482"/>.
      </t>
      <t>An RDAP service implementation MAY choose to limit the scope of searches to RDAP data that is managed by the RDAP service implementation. For example, an RDAP response to a query that could be matched against multiple TLDs or data in related RDAP repositories (such as those distributed between domain registry and domain registrar) need only return matches for the data managed by the RDAP service implementation.
      </t>
      <t>Server operating systems are typically configured to use a collection of regional and language rules that describe default processing conventions, such as sort order, date format, etc., as part of a 'locale' setting.  The regular expression library used for an RDAP service implementation will typically acquire all the information it requires for the current locale from the underlying operating system.  The locale used by a regular expression library may impact the results of regular expression searches based on locale-specific processing. For example, a POSIX locale can have collating sequences to describe how certain characters or groups of characters can be ordered. In the Czech language, for example, 'ch' can be treated as if it were one character. You can use the collating sequence element [.ch.] inside a bracketed expression to match 'ch' when the Czech locale (cs-CZ) is active, but a similar collating sequence would not match the string if the system locale was, for example, en_US. Users submitting regular expression searches that do not take into account locale-specific processing may receive misleading or inaccurate results. As such, it is RECOMMENDED to identify the underlying locale in the 'help' path segment as defined in section 3.1.6 of RFC 7482 <xref target="RFC7482"/>. This will help RDAP clients construct regular expressions that can be processed in a predictable way.
      </t>
      <t>Implementors should take care to ensure that decoding of percent-encoded characters in a received regular expression is only performed once. Standard APIs for processing HTTP requests will likely perform decoding of percent-encoded characters prior to providing a received regular expression to the RDAP service implementation code. In such case, the RDAP service implementation code should not attempt to perform decoding for percent-encoded characters.
      </t>
      <t>Regular expression matching results for some search patterns may vary based on the regular expression search engine used, the version of the engine used, and configuration of the search engine. For example, POSIX <xref target="IEEE.1003.1_2013_EDITION"/> defines different semantics based on whether a search is using Basic Regular Expressions (BRE) or Extended Regular Expressions (ERE). Search mechanisms that perform search processing compliant with Perl Compatible Regular Expressions (PCRE) as defined by pcre.org <xref target="PCRE"/> and in Perl 5 <xref target="PERLRE"/> may also produce matches that differ from matches produced by POSIX compatible regular expression matching. Differences in regular expression matching between POSIX BRE, POSIX ERE and PCRE are illustrated in the examples below, where the "sed" command without the "-E" option is used for POSIX BRE matching, the "sed" command with the "-E" option is used for POSIX ERE matching, and the "perl" command is used for PCRE matching.</t>
      <figure>
        <artwork><![CDATA[

         $ echo 'abcdef' | sed 's/ab(cd)?(cdef)?/[xxxx]/'
         abcdef
         $ echo 'abcdef' | sed -E 's/ab(cd)?(cdef)?/[xxxx]/'
         [xxxx]
         $ echo 'abcdef' | perl -p -e 's/ab(cd)?(cdef)?/[xxxx]/'
         [xxxx]ef

         $ echo 'aaa' | sed 's/a\{3,\}/[xxxx]/'
         [xxxx]
         $ echo 'aaa' | sed 's/a{3,}/[xxxx]/'
         aaa
         $ echo 'aaa' | sed -E 's/a\{3,\}/[xxxx]/'
         aaa
         $ echo 'aaa' | sed -E 's/a{3,}/[xxxx]/'
         [xxxx]
         $ echo 'aaa' | perl -p -e 's/a\{3,\}/[xxxx]/'
         aaa
         $ echo 'aaa' | perl -p -e 's/a{3,}/[xxxx]/'
         [xxxx]

       ]]></artwork>
      </figure>
      <t>Use of POSIX extended regular expressions is motivated by broad support in the form of API availability <xref target="GNU"/> and database support, with the following major databases supporting POSIX extended regular expressions:
        <list hangIndent="8" style="hanging">
          <t>Oracle <xref target="ORACLE"/></t>
          <t>MySQL <xref target="MYSQL"/></t>
          <t>Postgres <xref target="POSTGRES"/></t>
        </list>
      </t>
    </section>

    <section anchor="IANA" title="IANA Considerations">
      <t>FOR DISCUSSION: The URL query parameter "searchtype" with a value of "regex" is specified here-in as syntax for specifying that the RDAP query search pattern is a POSIX extended regular expression. The same approach could be used for specifying future OPTIONAL RDAP search mechanisms. An IANA-maintained registry of RDAP search mechanisms is recommended for recording a list of allowable values for the "searchtype" query parameter.</t>
    </section>

    <section anchor="ImplementationStatus" title="Implementation Status">
      <t>Note to RFC Editor: Please remove this entire section before publication along with the reference to RFC7942 <xref target="RFC7942"/>.
      </t>
      <t>This section records the status of known implementations of the protocol defined by this specification at the time of posting of this Internet-Draft, and is based on a proposal described in RFC 7942. The description of implementations in this section is intended to assist the IETF in its decision processes in progressing drafts to RFCs. Please note that the listing of any individual implementation here does not imply endorsement by the IETF. Furthermore, no effort has been spent to verify the information presented here that was supplied by IETF contributors. This is not intended as, and must not be construed to be, a catalog of available implementations or their features. Readers are advised to note that other implementations may exist.</t>

      <t>According to RFC 7942, "this will allow reviewers and working groups to assign due consideration to documents that have the benefit of running code, which may serve as evidence of valuable experimentation and feedback that have made the implemented protocols more mature. It is up to the individual working groups to use this information as they see fit".</t>

      <section anchor="vlabs" title="Verisign Labs">
        <t><list style="none">
          <t>Responsible Organization: Verisign Labs</t>
          <t>Location: https://rdap.verisignlabs.com/</t>
          <t>Description: This implementation includes support for POSIX extended regular expression domain registry RDAP queries using live data from the .cc and .tv country code top-level domains.  This implementation also supports federated authentication using OpenID Connect providers as described in <xref target="RDAPOPENID"/>. Three access levels are provided based on the authenticated identity of the client:
            <list style="numbers">
              <t>Unauthenticated: Limited information is returned in response to queries from unauthenticated clients.</t>
              <t>Basic: Clients who authenticate using a publicly available identity provider like Google Gmail or Microsoft Hotmail will receive all of the information available to an unauthenticated client plus additional registration metadata, but no personally identifiable information associated with entities.</t>
              <t>Advanced: Clients who authenticate using a more restrictive identity provider will receive all of the information available to a Basic client plus whatever information the server operator deems appropriate for a fully authorized client. Currently supported identity providers include those developed by Verisign Labs (https://testprovider.rdap.verisignlabs.com/) and CZ.NIC (https://www.mojeid.cz/).</t>
            </list></t>
          <t>Level of Maturity: This is a "proof of concept" research implementation.</t>
          <t>Coverage: This implementation includes all of the features described in this specification.</t>
          <t>Contact Information: Swapneel Sheth, ssheth@verisign.com</t>
          </list>
        </t>
      </section>
      <section anchor="APNIC" title="APNIC RDAP Service">
        <t><list style="none">
          <t>Responsible Organization: Asia-Pacific Network Information Centre (APNIC)</t>
          <t>Location: https://testrdap.apnic.net/</t>
          <t>Description: This implementation includes support for POSIX extended regular expression RDAP queries for the domain and entity object classes. The data source is a subset of a snapshot of the production registry data.</t>
          <t>Level of Maturity: This is a "proof of concept" research implementation.</t>
          <t>Coverage: Aside from character class expressions, collating symbols, and equivalence class expressions, all of the features described in this specification are implemented.</t>
          <t>Contact Information: Tom Harrison, tomh@apnic.net</t>
          </list>
        </t>
      </section>
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>Security services for the operations specified in this document are described in RFC 7481 <xref target="RFC7481"/>.
      </t>

      <t>Search functionality typically requires more server resources (such as memory, CPU cycles, and network bandwidth) when compared to basic lookup functionality. This increases the risk of server resource exhaustion and subsequent denial of service due to abuse. This risk can be mitigated by developing and implementing controls to restrict search functionality to identified and authorized clients. If those clients behave badly, their search privileges can be suspended or revoked. Rate limiting as described in Section 5.5 of RFC 7480 <xref target="RFC7480"/> can also be used to control the rate of received search requests. Server operators can also reduce their risk by restricting the amount of information returned in response to a search request.
      </t>

      <t>Search functionality also increases the privacy risk of disclosing object relationships that might not otherwise be obvious. For example, a search that returns IDN variants <xref target="RFC6927"/> that do not explicitly match a client-provided search pattern can disclose information about registered domain names that might not be otherwise available. Implementers need to consider the policy and privacy implications of returning information that was not explicitly requested.
      </t>

      <t>Note that there might not be a single, static information return policy that applies to all clients equally. Client identity and associated authorizations can be a relevant factor in determining how broad the response set will be for any particular query.
      </t>
    </section>

    <section title="Acknowledgements">
      <t>The author would like to acknowledge the following individuals for their contributions to the development of this document: TBD.</t>
    </section>
  </middle>

  <back>
    <references title="Normative References">
      &RFC2119;
      &RFC7480;
      &RFC7481;
      &RFC7482;
      &RFC7483;
      &RFC7230;
      &RFC5890;
      &RFC5952;
      &RFC3986;
      &RFC7942;
      &POSIX;
    </references>

    <references title="Informative References">
      &RFC1166;
      &RFC6927;
      <reference anchor="PERLRE" target="http://perldoc.perl.org/perlre.html">
        <front>
          <title>Perl regular expressions</title>
          <author>
            <organization>perl.org</organization>
          </author>
          <date month="" year="" />
        </front>
      </reference>
      <reference anchor="PCRE" target="http://www.pcre.org/">
        <front>
          <title>Perl Compatible Regular Expressions</title>
          <author>
            <organization>pcre.org</organization>
          </author>
          <date month="" year="" />
        </front>
      </reference>
      <reference anchor="GNU" target="https://www.gnu.org/software/libc/manual/html_node/Regular-Expressions.html">
        <front>
          <title>GNU Regular Expression Matching</title>
          <author>
            <organization>gnu.org</organization>
          </author>
          <date month="" year="" />
        </front>
      </reference>
      <reference anchor="ORACLE" target="https://docs.oracle.com/database/121/ADFNS/adfns_regexp.htm#ADFNS231">
        <front>
          <title>Oracle SQL and POSIX Regular Expression Standard</title>
          <author>
            <organization>Oracle Corporation</organization>
          </author>
          <date month="" year="" />
        </front>
      </reference>
      <reference anchor="MYSQL" target="http://dev.mysql.com/doc/refman/5.7/en/regexp.html">
        <front>
          <title>MySQL Regular Expressions</title>
          <author>
            <organization>mysql.com</organization>
          </author>
          <date month="" year="" />
        </front>
      </reference>
      <reference anchor="POSTGRES" target="https://www.postgresql.org/docs/9.3/static/functions-matching.html">
        <front>
          <title>PostgreSQL POSIX Regular Expressions</title>
          <author>
            <organization>postgresql.org</organization>
          </author>
          <date month="" year="" />
        </front>
      </reference>
      <reference anchor="RDAPOPENID" target="https://tools.ietf.org/html/draft-hollenbeck-regext-rdap-openid-01.txt">
        <front>
          <title>Federated Authentication for the Registration Data Access Protocol (RDAP) using OpenID Connect</title>
          <author>
            <organization>ietf.org</organization>
          </author>
          <date month="" year="" />
        </front>
      </reference>
    </references>

    <section title="Change Log">
      <t>
        <list style="hanging">
          <t hangText="00:">Initial version.</t>
          <t hangText="01:">Renewed and moved invalid Normative References to Informative References</t>
          <t hangText="02:">Specified use of percent encoding for reserved URL reserved characters in regular expressions and removed specification for base64url encoding for regular expressions</t>
          <t hangText="03:">Added information related to implications of system locale on processing regular expression search. Also, updated the implementation status section with APNIC's information.</t>
          <t hangText="04:">Keepalive Refresh and minor spelling fixes</t>
        </list>
      </t>
    </section>
  </back>
</rfc>
