% Copyright (c) 2014, Taylor R. Campbell
%
% Verbatim copying and distribution of this entire article are
% permitted worldwide, without royalty, in any medium, provided this
% notice, and the copyright notice, are preserved.

\documentclass{beamer}

\usepackage{amsmath}
\usepackage{graphicx}

\newcommand{\devrandom}{\texttt{/dev/random}}
\newcommand{\devurandom}{\texttt{/dev/urandom}}
\newcommand{\devxrandom}{\texttt{/dev/u?random}}

\title{The entropic principle}
\subtitle{\devxrandom\ and NetBSD}
\author{Taylor `Riastradh' Campbell \\
  \texttt{campbell@mumble.net} \\
  \texttt{riastradh@NetBSD.org}}
\date{EuroBSDcon 2014 \\
  Sofia, Bulgaria \\
  September 28, 2014}

\begin{document}

\frame{\titlepage
  {\let\thefootnote\relax
   \footnotetext{\tiny Copyright (c) 2014, Taylor R. Campbell.\\
     Verbatim copying and distribution of this entire article are
      permitted worldwide, without royalty, in any medium, provided
      this notice, and the copyright notice, are preserved.}}
}

\begin{frame}
  \begin{center}
    \LARGE
    Use \devurandom.
  \end{center}
  \note{If you take nothing else away from this talk, just remember
     that applications should use \devurandom\ and the OS needs to
     support that.}
\end{frame}

\begin{frame}
  \frametitle{What is \devxrandom?}

  \begin{itemize}
    \item \devrandom\ and \devurandom\ are device files on all modern
       Unix systems.
    \item Can't predict what you will read from them.
    \item (Writes influence future reads.)
    \item Reading from \devrandom\ sometimes blocks.
  \end{itemize}

  Much cargo-cult voodoo surrounds \devxrandom.
\end{frame}

\begin{frame}
  \frametitle{Unpredictability matters}

  \begin{itemize}
    \item There are bad guys on the internet.
    \item They want to eavesdrop on our conversations.
    \item They want to intercept our conversations.
  \end{itemize}

  \begin{center}
    \includegraphics[height=3cm]{james-clapper}
  \end{center}

  We need crypto to defend against this, and crypto needs unpredictable
   secrets.

  {\let\thefootnote\relax
   \footnotetext{\tiny Photo copyright (c) by Kit Fox/Medill DC under
     CC-BY~2.0, cropped from original at
     \url{https://www.flickr.com/photos/medilldc/6797228431/}\\}}
\end{frame}

\begin{frame}
  \frametitle{Unpredictability matters}

  What happens if you use crypto with predictable secrets?

  \begin{itemize}
    \item Smart cards generated keys for Taiwan's national identity
       database with broken RNGs leading to repeated and factorable
       keys: \url{http://smartfacts.cr.yp.to/}
    \item Sony used ECDSA with a broken RNG to sign Playstation
       firmware updates, revealing the signing key.
    \item Millions of embedded devices on the internet have private RSA
       keys with factors in common generated from the same RNG states:
       \url{https://factorable.net/} (Mining Your P's and Q's).
    \item The NSA chose to backdoor the US's pseudorandom number
       generation standard, NIST SP800-90A, with Dual\_EC\_DRBG.
  \end{itemize}
   \note{Sensible crypto systems don't need many unpredictable secrets:
      a 32-byte one when you start doing crypto --- e.g., generating
      session keys or long-term keys --- ought to be enough.
     ECDSA is not a sensible crypto system.}
\end{frame}

\begin{frame}
  \frametitle{Security modelling}

  \begin{itemize}
    \item APPLICATION: Generate bits with uniform distribution for user
       programs.
    \item THREAT MODEL(S):
      \begin{itemize}
        \item Attacker may read other bits from \devxrandom.
        \item Attacker may influence \devxrandom.
        \item Attacker may compromise the kernel and get the internal
           state of \devxrandom.
      \end{itemize}
    \item SECURITY PROPERTY: Attacker must not predict any outputs not
       witnessed!
  \end{itemize}

  (Can't attain the security property against attacker who compromises
   the kernel, but some people worry about theoretically approximating
   it for some reason.)
\end{frame}

\begin{frame}
  \frametitle{Formalizing unpredictability}

  \begin{itemize}
    \item Random variable $X$: observable physical system which can
       take on possible values $x_0$, $x_1$, \ldots, $x_{n - 1}$.
    \item Probability that we observe $X$ to have value $x$: $\Pr[X =
       x]$.
    \item Observation of $X$ may not be predictable, but how do we
       formalize measuring how unpredictable?
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Formalizing unpredictability: Shannon entropy}

  \begin{itemize}
    \item Popular approach to measure unpredictability of $X$ is
       \emph{Shannon entropy}, in units of bits:
      \begin{equation*}
        H[X] = -\sum_i \Pr[X = x_i] \log_2 \Pr[X = x_i],
      \end{equation*}
       giving average bits of information per bit of observation of
       $X$.
    \item If $X$ is drawn from $\{00,01,10,11\}$, and
      \begin{gather*}
        \Pr[X = 00] = \Pr[X = 11] = 1/2, \\
        \Pr[X = 01] = \Pr[X = 10] = 0,
      \end{gather*}
       then $H[X] = 1$, so since an observation of $X$ has two bits,
       $X$ has half a bit of information per bit of observation.
    \item Guessing this is only as hard as guessing what a single coin
       flip was, not two coin flips in a row.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Formalizing unpredictability: not Shannon entropy}

  \begin{itemize}
    \item Shannon entropy is no good for crypto: if there are $2^{255}
       + 1$ possibilities for $X$, and
      \begin{equation}
        \Pr[X = x] = \begin{cases}
          1/2, & \text{if $x = \text{`hunter2'}$}; \\
          1/2^{256} & \text{otherwise},
        \end{cases}
      \end{equation}
       then $H[X] \approx 128$, but I don't have to try $2^{127}$
       possibilities before I can probably guess your password --- I
       have a pretty good guess what it is!
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Formalizing unpredictability: min-entropy}

  \begin{itemize}
    \item Crypto instead uses \emph{min-entropy}:
      \begin{equation}
        H_\infty[X] = -\max_i \log_2 \Pr[X = x_i].
      \end{equation}
    \item Min-entropy estimates the difficulty of the \emph{best
       strategy} at guessing $X$.
    \item If $X$ is drawn uniformly from $k$-bit strings, then
       $H_\infty[X] = k$, which is as good as you can get!
    \item Standard crypto practice is to use uniform distributions with
       $k = 128$ or $k = 256$ for key material.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{\devxrandom\ as random variable}

  \begin{itemize}
    \item Kernel's job is to make the random variable of reading $k$
       bits from \devxrandom\ have $k$ bits of entropy.
    \item How does it choose the bits?
    \item Computer programs (single-threaded) are supposed to be
       deterministic!
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Entropy sources}

  \begin{itemize}
    \item Computers make nondeterministic observations: clock skew,
       network packets, keyboard input, disk seek times.
       \begin{center}
\begin{verbatim}
% gpg --gen-key
...
Please bang on the keyboard like a monkey
to ensure there's enough entropy!
\end{verbatim}
       \end{center}
    \item These \emph{entropy sources} are random variables with highly
       nonuniform distribution.
    \item Attacker may influence them: send regular network packets,
       bang on the keyboard like a robot, \&c.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Entropy pooling and distribution}

  \begin{itemize}
    \item Kernel combines entropy sources with crypto magic called
       entropy extractors.
    \item Kernel uses output as seed for deterministic pseudorandom
       number generator when you read from \devxrandom.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{What if there's not much entropy?}

  \begin{itemize}
    \item Your system autogenerated \texttt{sshd} keys from
       \devxrandom\ before you've had a chance to bang on the keyboard
       like a monkey!
    \item Keys are predictable!
      Bad guys can guess them and log in!
      Even Debian is laughing at you!
      How do we prevent this?
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{What if there's not much entropy?}

  \begin{itemize}
    \item Na\"ive answer: wait until system is unpredictable enough.
    \item \devrandom\ traditionally estimates how much entropy the
       system has observed, and blocks until it reaches a threshold.
    \item Use it as an `unpredictability barrier': read once from
       \devrandom\ and once it unblocks, use \devurandom\ to generate
       keys.
    \item \devurandom\ never blocks: whatever has been fed into the
       entropy pool, the kernel uses it to seed a pseudorandom number
       generator and generate output.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{What if there's not much entropy?}

  Problem:
  Can't say whether a \emph{state} is unpredictable.
  Can only say whether a \emph{process} is unpredictable.

  (Obligatory Dilbert reference.)

  \begin{center}
    \texttt{http://dilbert.com/strips/comic/2001-10-25/}

    {\tiny (Dilbert meets the accounting department's random number
      generator, a troll who only says `nine' over and over again.
     Scott Adams won't let me include his strip verbatim for this
      noncommercial use without paying him money.)}
    %\includegraphics[height=3cm]{dilbert-random}
  \end{center}

  Estimating this is a hard problem!
  No good solution.
  Typical approaches are \textit{ad hoc}.
\end{frame}

\begin{frame}
  \frametitle{Running out of entropy?}

  \begin{itemize}
    \item \devrandom\ \emph{also} traditionally blocks sometimes long
       after boot.
    \item Original theory was if you read too much from it you run out
       of entropy, so you'd better wait for more.
    \item But entropy is \emph{not a scarce resource} like oil.
    \item Entropy is a \emph{property of a physical process}.
    \item So why block after boot?
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Topping off the entropy tank}

  \begin{itemize}
    \item \devrandom\ blocking after boot is still useful!
    \item If you use it as an unpredictability barrier, you keep your
       blocking code paths exercised.
    \item If you use it to generate keys always, you will notice when
       your application blocks --- instead of being told two years from
       now that it doesn't work on an embedded system you never even
       heard of because it doesn't have enough entropy at boot!
    \item But most applications just need \devurandom.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{What if there is \emph{no} entropy?}

  \begin{itemize}
    \item What if there are no entropy sources?
    \item No disk, no mouse, no keyboard, no monkey.
    \item Kernel is totally deterministic: can't be unpredictable.
    \item Can't usefully serve \devxrandom.
    \item Example: embedded appliances (Mining P's \& Q's).
    \item Solution: save entropy from the factory installer onto small
       (32-byte) nonvolatile storage on install and shutdown, restore
       on boot.
    \item This system engineering avoids need for \devrandom\ as
       unpredictability barrier.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Exotic threat models}

  \begin{itemize}
    \item Attacker can influence network packet timings?  (Easy.)
    \item Attacker can influence keytrokes and timings?
    \item Attacker can compromise your CPU?  (Paranoid view of Intel
       \texttt{\textsc{rdrand}}!)
  \end{itemize}

  Good entropy extractors thwart manipulation of one entropy source or
   another.
\end{frame}

\begin{frame}
  \frametitle{Hardware random number generators}

  \begin{itemize}
    \item PCI devices: HIFN 7751, Broadcom BCM58xx.
    \item SoC on-board devices: Broadcom BCM2385 (Raspberry Pi).
    \item CPU instructions: Intel \texttt{\textsc{rdrand}},
       VIA~PadLock.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Hardware random number generators}

  \begin{itemize}
    \item \ldots The coin in your trouser pocket:
      \begin{center}
        \texttt{\% echo hhhtttthhththttthhht... >> /dev/random}
      \end{center}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{NetBSD}

  Current code written mainly by Thor Lancelot Simon and me.

  \begin{itemize}
    \item \devxrandom\ uses per-open or per-CPU pseudorandom number
       generator state, so it scales.
    \item Kernel uses slow NIST CTR\_DRBG with AES-128 for key material
       and \devxrandom: attacker must never predict unseen outputs.
    \item Kernel uses fast ChaCha8 without backtracking resistance for
       non-key material, e.g.\ NFS transaction ids: attacker must not
       predict new outputs ahead of time, but may predict old ones.
    \item Userland \texttt{arc4random(3)} API soon to be reimplemented
       with per-thread state and ChaCha8 instead of global RC4.
    \item (Let me know if you've heard of \texttt{arc4random(3)} being
       used for key material!
      I wouldn't recommend it!)
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Questions?}

  (Use /dev/urandom!)
\end{frame}

\begin{frame}[fragile]
  \frametitle{Appendix: Entropy game!}

  \url{http://www.loper-os.org/bad-at-entropy/manmach.html}

  \vskip 1cm

  My password manager:
  \begin{center}
\begin{verbatim}
{ tr -cd '[:graph:]' < /dev/urandom | head -c 20; } | \
scrypt enc /dev/stdin password.scrypt
\end{verbatim}
  \end{center}
\end{frame}

\end{document}
