<?xml version="1.0" encoding="US-ASCII"?>
<!-- This template is for creating an Internet Draft using xml2rfc,
    which is available here: http://xml.resource.org. -->
<!DOCTYPE rfc SYSTEM "rfc2629.dtd">
<?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
<!-- used by XSLT processors -->
<!-- For a complete list and description of processing instructions (PIs), 
    please see http://xml.resource.org/authoring/README.html. -->
<!-- Below are generally applicable Processing Instructions (PIs) that most I-Ds might want to use.
    (Here they are set differently than their defaults in xml2rfc v1.32) -->
<?rfc strict="yes" ?>
<!-- give errors regarding ID-nits and DTD validation -->
<!-- control the table of contents (ToC) -->
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space 
    (using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="std" docName="draft-zhang-trans-ct-binary-codes-04"
     ipr="trust200902">
  <!-- category values: std, bcp, info, exp, and historic
    ipr values: trust200902, noModificationTrust200902, noDerivativesTrust200902,
       or pre5378Trust200902
    you can add the attributes updates="NNNN" and obsoletes="NNNN" 
    they will automatically be output with "(if approved)" -->

  <!-- ***** FRONT MATTER ***** -->

  <front>
    <!-- The abbreviated title is used in the page header - it is only necessary if the 
        full title is longer than 39 characters -->

    <title abbrev="CT for Binary Codes">CT for Binary Codes</title>

    <!-- add 'role="editor"' below for the editors if appropriate -->

    <!-- Another author who claims to be an editor -->

    <author fullname="Liang Xia" initials="L" role="editor" surname="Xia">
      <organization>Huawei</organization>

      <address>
        <postal>
          <street/>

          <!-- Reorder these if your country does things differently -->

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <email>frank.xialiang@huawei.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Dacheng Zhang" initials="D" surname="Zhang">
      <organization>Huawei</organization>

      <address>
        <postal>
          <street/>

          <!-- Reorder these if your country does things differently -->

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <email>dacheng.zhang@huawei.com</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Daniel Kahn Gillmor" initials="D" surname="Gillmor">
      <organization>CMRG</organization>

      <address>
        <postal>
          <street/>

          <!-- Reorder these if your country does things differently -->

          <city/>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <email>dkg@fifthhorseman.net</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <author fullname="Behcet Sarikaya" initials="B" surname="Sarikaya">
      <organization>Huawei USA</organization>

      <address>
        <postal>
          <street>5340 Legacy Dr. Building 3</street>

          <!-- Reorder these if your country does things differently -->

          <city>Plano, TX 75024</city>

          <region/>

          <code/>

          <country/>
        </postal>

        <phone/>

        <email>sarikaya@ieee.org</email>

        <!-- uri and facsimile elements may also be added -->
      </address>
    </author>

    <date year="2017"/>

    <!-- If the month and year are both specified and are the current ones, xml2rfc will fill 
        in the current day for you. If only the current year is specified, xml2rfc will fill 
	 in the current day and month for you. If the year is not the current one, it is 
	 necessary to specify at least a month (xml2rfc assumes day="1" if not specified for the 
	 purpose of calculating the expiry date).  With drafts it is normally sufficient to 
	 specify just the year. -->

    <!-- Meta-data Declarations -->

    <area>Security</area>

    <workgroup>TRANS</workgroup>

    <!-- WG name at the upperleft corner of the doc,
        IETF is fine for individual submissions.  
	 If this element is not present, the default is "Network Working Group",
        which is used by the RFC Editor as a nod to the history of the IETF. -->

    <keyword>template</keyword>

    <!-- Keywords will be incorporated into HTML output
        files in a meta tag but they have no effect on text or nroff
        output. If you submit your draft to the RFC Editor, the
        keywords will be used for the search engine. -->

    <abstract>
      <t>This document proposes a solution extending the Certificate
      Transparency protocol [I-D.ietf-trans-rfc6962-bis] for transparently
      logging the software binary codes (BC)or its digest with their
      signature, to enable anyone to monitor and audit the software provider
      activity and notice the distribution of suspect software as well as to
      audit the BC logs themselves. The solution is called "Binary
      Transparency" in this document. </t>
    </abstract>
  </front>

  <middle>
    <section title="Introduction">
      <t>Digital signatures have been widely used in software distributions to
      prove the authenticity of software. Through verifying signature, an end
      user can ensure that the gotten software is developed by a legal
      provider (e.g., Microsoft) and is not tampered during the distribution.
      If an end user does not have a direct trust relationship with the
      software provider, an certificate chain to a trust anchor that the user
      trusts should be provided. That is why many signature mechanisms for
      software distribution are based on public key infrastructure (PKI).
      However, signature mechanisms cannot prevent software provider from
      distributing software either with customized backdoors/drawbacks, or
      they do not own the right to distribute. Besides, it may be hard for a
      user to detect the differences between the software it got and the
      software provided to other users..</t>

      <t>This draft describes the Binary Transparency mechanism which extends
      the Certificate Transparency (CT) protocol specified in
      [I-D.ietf-trans-rfc6962-bis] to support logging binary codes. A software
      provider can submit its software Binary Codes (BC) (or digests of codes
      in order to e.g., save space or avoid violating license restrictions)
      with associated signature to one or more CT logs. Therefore, a user can
      easily detect the existence of software BC with customized backdoors, by
      comparing with the according CT log entries. The software provider can
      monitor the logs all the time to detect whether there are tempered
      copies of its software in the log, or its software is submitted into the
      log by other software providers without authority. In summary, the end
      users should be informed when all the above situations happen, how to
      achieve it is beyond the scope of this document. </t>

      <t>With this mechanism, when a section of binary codes and associated
      signature has been submitted to a log, if the provided certificate chain
      ends with a trust anchor that is accepted by the log, the log will
      accept it and return the Signed Binary Timestamp (SBT) to the software
      provider as the evidence of its acceptance provided to the users later.
      Thus, the users should only trust the software accompanied by SBT, even
      if it is associated with a proper signature. This approach then forces
      the software providers to submit their binary codes to logs before
      distributing them. </t>

      <t>Binary Transparency is an extension to Certificate Transparency,
      which comply with most of the specification in
      [I-D.ietf-trans-rfc6962-bis]. This document only focuses on the
      extension part of Binary Transparency mechanisms.</t>

      <section title="Requirements Language">
        <t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
        "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
        document are to be interpreted as described in <xref
        target="RFC2119">RFC 2119</xref>.</t>
      </section>
    </section>

    <section title="Cryptographic Components of Binary Transparency">
      <t>When applying CT for binary codes, a log is a single, ever-growing,
      append-only binary Merkle Hash Tree of software BC, with associated
      signature and certificate chain, complying with the Merkle Hash Tree
      specification in Section 2 of [I-D.ietf-trans-rfc6962-bis]. </t>
    </section>

    <section title="Motivation Scenarios">
      <t>The documents disclosed by Edward Snowden have raised the concerns of
      people on the vulnerability of the network devices to the passive
      attacks performed by NSA or other organizations. Meanwhile, the network
      device vendors are also concerned in their foreign markets because their
      products are suspected to have customized backdoors for adversaries to
      perform attacks. It is desired for vendors to publish the design details
      of the products and provide sufficient facilities for clients to check
      whether certain hardware or software of a device has been improperly
      modified. There are various techniques that could be used for this
      purpose. One way is to force a vendor to submit the binary codes of its
      firmwares to the public CT logs. Therefore, anyone can verify the
      correctness of each log entry and monitor when new software BCs are
      added to it. Specially, customers can easily detect whether the vendor
      is releasing the same firmware to everyone. In addition, under the
      assistance of the Binary Transparency, customer will have more
      confidence on the quality of firmware. Since the same codes are used by
      different customers all over the world, the drawbacks in firmware will
      be easier to be detected. </t>

      <t>There are similar requirements to detect the customized backdoors or
      misdistribution in the software market. Besides the software itself, a
      user may also concern whether there are customized backdoors in the
      patches. The Binary Transparency can help address such concerns in the
      same way. In addition, this mechanism can also show some advantages in
      the scenarios where the signer is not aware that their keys have been
      compromised. If their update system is required to use a CT log, they
      have the chance to find out about their compromise. </t>
    </section>

    <!-- This PI places the pagebreak correctly (before the section title) in the text output. -->

    <?rfc needLines="8" ?>

    <section title="Log Format and Operation Extensions">
      <t>The software provider can submit the software and the associated
      signature to any preferred CT logs before distributing it. In some
      cases, the software provider may select only to submit the signed digest
      of the software because of the license restriction or the space
      restriction of log entry. In order to verify the attribution of each log
      entry, a log SHALL publish a set of certificates that it trusts to
      benefit an software provider to construct an certificate chain
      connecting a trust anchor and the certificate containing the key used to
      sign the software. </t>

      <t>A log needs to verify the certificate chain provided by the software
      provider, and MUST refuse to accept the signed software/digest if the
      chain cannot lead back to a trusted anchor. If the software/digest and
      the signature are accepted by a log and an SBT is issued, the log MUST
      store the entire chain and MUST present this chain for auditing upon
      request. </t>

      <t>Complying with the log format definition in
      [I-D.ietf-trans-rfc6962-bis], some definitions remain the same: "Log
      ID", "Merkle Tree Head", "Signed Tree Head", "Merkle Consistency
      Proofs", "Merkle Inclusion Proofs", "Shutting down a log"... The other
      required log format extension for Binary Transparency are specified in
      the following sections: </t>

      <section title="Log Entries">
        <t>Each software entry in a log MUST include a "BinaryChainEntryV2"
        structure as below: </t>

        <figure align="center">
          <artwork align="left">
    enum { binary(TBD1), binary_digest(TBD2) } BIN_Signed_Type;

    opaque BINARY&lt;1..2^24-1&gt;;
    opaque ASN.1Cert&lt;1..2^24-1&gt;;
    struct {
       BIN_Signed_Type bin_signed_type;
       BINARY signed_software;      
       ASN.1Cert certificate_chain&lt;1..2^24-1&gt;;
    } BinaryChainEntryV2;
      </artwork>
        </figure>

        <t>"bin_signed_type" indicates whether the signature is generated
        based on the software or its digest.</t>

        <t>"signed_software" consists a ContentInfo structure specified in
        CMS[RFC5652]. Specifically, this field includes the binary codes/
        digest, the signature, and any other additional information used to
        describe the software and the issuer publishing the software. The
        software SHOULD be encapsulated and signed following the ways
        specified in CMS[RFC5652] . If signed_type is TBD1, the software
        binary code is encapsulated in this field. If signed_type is TBD2, the
        SHA-256 digest of software binary code is encapsulated in this field.
        </t>

        <t>"certificate_chain" includes the certificates constructing a chain
        from the certificate of software provider to a certificate trusted by
        the log. The first certificate MUST be the certificate of software
        provider. Each following certificate MUST directly certify the one
        preceding it. The final certificate MUST either be, or be issued by, a
        root certificate accepted by the log. If the certificate chain is
        provided in the "signed_software" field structure, this field is set
        to empty. </t>
      </section>

      <section title="TransItem Structure">
        <t>The extended "TransItem" structure is defined as below:</t>

        <figure align="center">
          <artwork align="left">
    enum {
           reserved(0),
           x509_entry_v2(1), precert_entry_v2(2),
           x509_sct_v2(3), precert_sct_v2(4),
           signed_tree_head_v2(5), consistency_proof_v2(6),
           inclusion_proof_v2(7), x509_sct_with_proof_v2(8),
           precert_sct_with_proof_v2(9), BIN_entry_v2(TBD3),
           BIN_sbt_v2(TBD4), BIN_sbt_with_proof_v2(TBD5),
           (65535)
         } VersionedTransType;
    

    
   struct {
           VersionedTransType versioned_type;
           select (versioned_type) {
               case x509_entry_v2: TimestampedCertificateEntryDataV2;
               case precert_entry_v2: TimestampedCertificateEntryDataV2;
               case x509_sct_v2: SignedCertificateTimestampDataV2;
               case precert_sct_v2: SignedCertificateTimestampDataV2;
               case signed_tree_head_v2: SignedTreeHeadDataV2;
               case consistency_proof_v2: ConsistencyProofDataV2;
               case inclusion_proof_v2: InclusionProofDataV2;
               case x509_sct_with_proof_v2: SCTWithProofDataV2;
               case precert_sct_with_proof_v2: SCTWithProofDataV2;
               case BIN_entry_v2: TimestampedBinaryEntryDataV2;
               case BIN_sbt_v2: SignedBinaryTimestampDataV2;
               case BIN_sbt_with_proof_v2: SBTWithProofDataV2;
           } data;
       } TransItem;
   </artwork>
        </figure>

        <t>"versioned_type " is the type of the encapsulated data structure of
        TransItem. Three new values are added to it -- BIN_entry_v2(TBD3),
        BIN_sbt_v2(TBD4), BIN_sbt_with_proof_v2(TBD5).</t>

        <t>For "data" structure, a new type structure of
        TimestampedBinaryEntryDataV2 is added.</t>
      </section>

      <section title="Merkle Tree Leaves">
        <t>Each Merkle Tree leaf is defined as the hash value of a "TransItem"
        structure of according type. Here, a new type ("BIN_entry_v2") of
        "TransItem" structure is created, which encapsulates a new
        "TimestampedBinaryEntryDataV2" structure defined as below:</t>

        <figure align="center">
          <artwork align="left">
    opaque TBSCertificate&lt;1..2^24-1&gt;;
    struct {
              uint64 timestamp;
              opaque issuer_key_hash&lt;32..2^8-1&gt;;
              BIN_Signed_Type bin_signed_type;
              TBSSignedSoftware tbs_signed_software;
              SbtExtension sbt_extensions&lt;0..2^16-1&gt;;
           } TimestampedBinaryEntryDataV2;
  </artwork>
        </figure>

        <t>"timestamp" is the NTP Time [RFC5905] at which the software binary
        code was accepted by the log, measured in milliseconds since the epoch
        (January 1, 1970, 00:00 UTC), ignoring leap seconds. Note that the
        leaves of a log&rsquo;s Merkle Tree are not required to be in strict
        chronological order. </t>

        <t>"issuer_key_hash" is the HASH of the public key of the software
        provider that signed the software, calculated over the DER encoding of
        the key represented as SubjectPublicKeyInfo [RFC5280]. This is needed
        to bind the software provider to the software binary code, making it
        impossible for the corresponding SBT to be valid for any other
        software whose TBSSignedSoftware matches "tbs_signed_software". The
        length of the "issuer_key_hash" MUST match HASH_SIZE. </t>

        <t>"bin_signed_type" indicates whether the signature is generated
        based on the software or its digest.</t>

        <t>"tbs_signed_software" is the DER encoded TBSSignedSoftware from the
        "signed_software" in the case of a "BinaryChainEntryV2". </t>

        <t/>
      </section>

      <section title="Structure of the Signed Binary Timestamp">
        <t>An SBT is a "TransItem" structure of type "bin_sbt_v2", which
        encapsulates a "SignedBinaryTimestampDataV2" structure:</t>

        <figure align="center">
          <artwork align="left">
    enum {
           reserved(65535)
         } SbtExtensionType;

  struct {
           SbtExtensionType sbt_extension_type;
           opaque sbt_extension_data&lt;0..2^16-1&gt;;
         } SbtExtension;

  struct {
           LogID log_id;
           uint64 timestamp;
           SbtExtension sbt_extensions&lt;0..2^16-1&gt;;
           digitally-signed struct {
               TransItem timestamped_entry;
           } signature;
         } SignedBinaryTimestampDataV2;
</artwork>
        </figure>

        <t>"log_id" is this log&rsquo;s unique ID, encoded in an opaque
        vector.</t>

        <t>"timestamp" is equal to the timestamp from the
        "TimestampedBinaryEntryDataV2" structure encapsulated in the
        "timestamped_entry".</t>

        <t>"sbt_extension_type" identifies a single extension from the IANA
        registry in Section 6. At the time of writing, no extensions are
        specified.</t>

        <t>The interpretation of the "sbt_extension_data" field is determined
        solely by the value of the "sbt_extension_type" field. Each document
        that registers a new "sbt_extension_type" must describe how to
        interpret the corresponding "sbt_extension_data".</t>

        <t>"sbt_extensions" is a vector of 0 or more SBT extensions. This
        vector MUST NOT include more than one extension with the same
        "sbt_extension_type". The extensions in the vector MUST be ordered by
        the value of the "sbt_extension_type" field, smallest value first. If
        an implementation sees an extension that it does not understand, it
        SHOULD ignore that extension. Furthermore, an implementation MAY
        choose to ignore any extension(s) that it does understand.</t>

        <t>The encoding of the digitally-signed element is defined in
        [RFC5246].</t>

        <t>"timestamped_entry" is a "TransItem" structure that MUST be of type
        "BIN_entry_v2".</t>
      </section>
    </section>

    <section title="Log Client Messages">
      <t>In Section 5 of [I-D.ietf-trans-rfc6962-bis], a set of messages is
      defined for clients to query and verify the correctness of the log
      entries they are interested in. In this document, a new message is
      defined and an existing message is extended for CT to support Binary
      Transparency. </t>

      <section title="Add Binary Code and Certificate Chain to Log">
        <figure align="center">
          <artwork align="left">
   POST https://&lt;log server&gt;/ct/v1/add-Binary-chain

   Inputs:
    bin_signed_type: indicates whether the input parameter "software" 
                     is constructed by the binary code or its digest.
    software: the binary code (or digest), the signature, and the 
              information used to describe the software and the software
              provider publishing the software, which are encapsulated 
              following the way specified in CMS[RFC5652] . The submitter 
              desires a SBT for this element.
    chain:  An array of base64-encoded certificates.  The first element is 
            the certificate used to sign the binary code (or digest); the
            second certifies the first and so on to the last, which either is, 
            or is certified by, an accepted trust anchor.If the certificate 
            chain information has been included in the "software" field, this 
            field could be empty.

  Outputs:
    sbt:  A base64 encoded "TransItem" of type "BIN_sbt_v2", signed by this 
          log, that corresponds to the submitted software.

  Error codes:
    Be identical with the according part in Section 5.1 (Add Chain to Log) of 
    [I-D.ietf-trans-rfc6962-bis].
</artwork>
        </figure>
      </section>

      <section title="Retrieve Entries and STH from Log">
        <figure align="center">
          <artwork align="left">
   GET https://&lt;log server&gt;/ct/v2/get-entries
   Inputs:
      start:  0-based index of first entry to retrieve, in decimal.
      end:  0-based index of last entry to retrieve, in decimal.
   Outputs:
      entries:  An array of objects, each consisting of
      leaf_input:  The base64 encoded "TransItem" structure of type 
                   "x509_entry_v2" or "precert_entry_v2" or "BIN_entry_v2" 
                   (see Section 4.3).
      log_entry:  The base64 encoded log entry (see Section 4.1).  In the 
                  case of an "x509_entry_v2" entry, this is the whole
                  "X509ChainEntry"; and in the case of a "precert_entry_v2",
                  this is the whole "PrecertChainEntryV2"; and in the case of a 
                  "BIN_entry_v2", this is the whole "BinaryChainEntryV2".
      sct:  The base64 encoded "TransItem" of type "x509_sct_v2" or "precert_sct_v2" 
            or "BIN_sbt_v2"corresponding to this log entry.
      sth:  A base64 encoded "TransItem" of type "signed_tree_head_v2", signed 
            by this log.
</artwork>
        </figure>

        <t>More details are identical with Section 5.7 of
        [I-D.ietf-trans-rfc6962-bis].</t>
      </section>

      <section title="Summary">
        <t>In summary, the above extensions of Binary Transparency enable the
        software providers, the end users, and anyone to monitor and audit the
        CT logs to mitigate the possible attacks induced by tampered software,
        or software misdistribution.</t>

        <t>This section gives a brief introduction to all the other aspects of
        Binary Transparency mechanisms for the reason of completeness, since
        they comply with the basic CT protocol specification. For more details
        please refer to the corresponding sections of
        [I-D.ietf-trans-rfc6962-bis].</t>

        <t>Software providers act the same as TLS servers in CT protocol. They
        present one or more SBTs from one or more logs to each end user while
        distributing the software, where each SBT corresponds to the software.
        Software providers SHOULD also present corresponding inclusion proofs
        and STHs. In which way the software providers present this information
        is beyond the scope of this document.</t>

        <t>The end users of software acts the same as Clients of logs
        described in CT protocol. They can perform various different
        functions, such as: get log metadata, exchange STHs they see, receive
        and validate SBTs, Validate inclusion proofs.</t>

        <t>Binary Transparency also provides monitoring and auditing functions
        with the same algorithms defined for CT protocol.</t>

        <t>Binary Transparency supports the same algorithm agility feature for
        signature algorithm and hash algorithm as CT protocol.</t>
      </section>
    </section>

    <section anchor="Acknowledgements" title="Acknowledgements">
      <t/>
    </section>

    <!-- Possibly a 'Contributors' section ... -->

    <section anchor="IANA" title="IANA Considerations">
      <t>To be added.</t>
    </section>

    <section anchor="Security" title="Security Considerations">
      <t>To be added.</t>
    </section>
  </middle>

  <!--  *****BACK MATTER ***** -->

  <back>
    <!-- References split into informative and normative -->

    <!-- There are 2 ways to insert reference entries from the citation libraries:
    1. define an ENTITY at the top, and use "ampersand character"RFC2629; here (as shown)
    2. simply use a PI "less than character"?rfc include="reference.RFC.2119.xml"?> here
       (for I-Ds: include="reference.I-D.narten-iana-considerations-rfc2434bis.xml")

    Both are cited textually in the same manner: by using xref elements.
    If you use the PI option, xml2rfc will, by default, try to find included files in the same
    directory as the including file. You can also define the XML_LIBRARY environment variable
    with a value containing a set of directories to search.  These can be either in the local
    filing system or remote ones accessed by http (http://domain/dir/... ).-->

    <references title="Normative References">
      <?rfc include="reference.RFC.2119"?>

      <?rfc include="reference.RFC.5652"?>

      <?rfc include="reference.RFC.5905"?>
    </references>

    <references title="Informative References">
      <?rfc include='reference.I-D.ietf-trans-rfc6962-bis'?>
    </references>

    <!-- Change Log

v00 2006-03-15  EBD   Initial version

v01 2006-04-03  EBD   Moved PI location back to position 1 -
                     v3.1 of XMLmind is better with them at this location.
v02 2007-03-07  AH    removed extraneous nested_list attribute,
                     other minor corrections
v03 2007-03-09  EBD   Added comments on null IANA sections and fixed heading capitalization.
                     Modified comments around figure to reflect non-implementation of
                     figure indent control.  Put in reference using anchor="DOMINATION".
                     Fixed up the date specification comments to reflect current truth.
v04 2007-03-09 AH     Major changes: shortened discussion of PIs,
                     added discussion of rfc include.
v05 2007-03-10 EBD    Added preamble to C program example to tell about ABNF and alternative 
                     images. Removed meta-characters from comments (causes problems).

v06 2010-04-01 TT     Changed ipr attribute values to latest ones. Changed date to
                     year only, to be consistent with the comments. Updated the 
                     IANA guidelines reference from the I-D to the finished RFC.  -->
  </back>
</rfc>
