From 11da511c784eca003deb90c23570f0873954e0de Mon Sep 17 00:00:00 2001 From: Duncan Wilkie Date: Sat, 18 Nov 2023 06:11:09 -0600 Subject: Initial commit. --- ic-reals-6.3/doc/implementation-notes/README | 2 + .../doc/implementation-notes/decimal_precision.tex | 704 +++++++++++++++++++++ 2 files changed, 706 insertions(+) create mode 100644 ic-reals-6.3/doc/implementation-notes/README create mode 100644 ic-reals-6.3/doc/implementation-notes/decimal_precision.tex (limited to 'ic-reals-6.3/doc/implementation-notes') diff --git a/ic-reals-6.3/doc/implementation-notes/README b/ic-reals-6.3/doc/implementation-notes/README new file mode 100644 index 0000000..b70ba06 --- /dev/null +++ b/ic-reals-6.3/doc/implementation-notes/README @@ -0,0 +1,2 @@ +This directory will eventually contain documents describing the +implementation. Regretfully, most are incomplete at present. diff --git a/ic-reals-6.3/doc/implementation-notes/decimal_precision.tex b/ic-reals-6.3/doc/implementation-notes/decimal_precision.tex new file mode 100644 index 0000000..7d8d14d --- /dev/null +++ b/ic-reals-6.3/doc/implementation-notes/decimal_precision.tex @@ -0,0 +1,704 @@ +\documentclass[11pt,a4paper]{article} + +\usepackage{amsfonts} +\usepackage{amsmath} + +\newtheorem{tm}{Theorem} +\newtheorem{df}[tm]{Def{i}nition} +\newtheorem{lemma}[tm]{Lemma} +\newtheorem{prop}[tm]{Proposition} +\newtheorem{cor}[tm]{Corollary} + +%\newenvironment{proof}{\textbf{Proof:} \footnotesize}{} +\newenvironment{proof} + { \begin{description} \item[Proof:] \footnotesize } + { \end{description} } + + +\begin{document} + +\title{Obtaining a Required Absolute Precision from an Exact Floating Point + Number} +\author{Marko Krznari\'{c}} +\date{\today} %\date{21 March 2000} +\maketitle + + +\section{Introduction} +A real number may be represented as a shrinking sequence of nested, + closed intervals with rational end--points whose length tends to + zero. We will work in the one--point compactification $\mathbb{R}^* + = \mathbb{R} \cup \{ \infty \}$ of the real line, which is usually + represented by the unit circle and the stereographic projection. In + the LFT approach to Exact Real Arithmetic the sequence of intervals + is generated by a sequence of one--dimensional linear fractional + transformations (LFTs) applied to a base interval, \cite{vui90, + niekor95, edapot97, pot98, hec99}. + +These intervals (LFTs applied to the base interval) are better and + better approximations to the real number. Knowing the length of the + interval we may tell how good the approximations are. Except of + one proposition in \cite{pot98} (Proposition 40, page 129), there + are no other references which tell us how to calculate the length + of the intervals. Here, we show how to determine the length of the + intervals and especially, how to obtain a required decimal + precision of a real number. + + +\section{Representation of Real Numbers} +Let us denote the set of matrices with integer coefficients by: + \[ \mathbb{M} = + \left\{ \left( \begin{array}{cc} a&c\\b&d \end{array} + \right)\ |\ a,b,c,d \in \mathbb{Z} \right\}. \] + A matrix induces an 1--dimensional LFT (a function from + $\mathbb{R}^*$ to $\mathbb{R}^*$) which is given by: + \[ \Theta \left( \begin{array}{cc} a&c\\b&d \end{array} + \right)(x) = \displaystyle { \frac{ax+c}{bx+d} }. \] + We can identify an LFT $\Theta(M)$ with the matrix $M$ and this + identification is unique up to scaling by a non--zero + integer. The composition of two 1--dimensional LFTs correspond + to matrix multiplication. A non--singular matrix $M$ maps an + interval to an interval: the interval $[p,q]$ is mapped to + $[Mp, Mq]$ for $\det M > 0$ and $[Mq,Mp]$ for $\det M < 0$. + In $\mathbb{R}^*$, the interval $[p,q]$ is the set of all of + the points which belong to the arc starting from $p$ and going + anti--clockwise to $q$ on the unit circle. For example, $[1,-1] = + \{ x\ |\ |x| \ge 1 \}$. + +One can easily verify that for any two intervals I and J with rational + end--points, there exists an LFT $M$ such that $M(I) = J$. This implies + that all rational intervals can be encoded as an LFT applied to a + fixed interval, which is called the \textsc{base interval}. Although + the choice of the base interval is essentially not relevant (whatever + holds for one, will hold, with minor adjustments, for any other + base interval), we should choose it in a way to make computations + as efficient as possible. There are two base intervals which have + been used, namely $[-1,1]$ and $[0,\infty]$. Check \cite{eda97, + edapot97, pot98, hec99} for more details. + +For a base interval $[a,b]$, we say that a matrix $M$ is \textsc{refining} + if $M[a,b] \subseteq [a,b]$. The set of all refining matrices is + denoted by $\mathbb{M}^+$. We also define the \textsc{information} of + an LFT $M$ by $\textbf{Info}_{[a,b]}(M) = {\bf Info}(M)=M[a,b]$. + +As we mentioned earlier, a real number can be represented as a + shrinking sequence of nested, closed intervals with rational + end--points whose length tends to zero. Or, using the facts given + above, as a sequence of 1--dimensional LFTs: + \[ \begin{array}{rccl} + & \{ x \} &=& \displaystyle{ \bigcap_n [p_n,q_n], + \qquad p_n,q_n \in \mathbb{Q}, } \vspace{1em} \\ + & [p_n,q_n] &=& \displaystyle{ M_0 M_1 \ldots M_n [a,b], + \qquad \forall n, } \vspace{1em} \\ + \Rightarrow & \{ x \} &=& \displaystyle{ \bigcap_n M_0 M_1 + \ldots M_n [a,b], } + \end{array} \] + where $[a,b]$ is the base interval, $M_0 \in \mathbb{M}$, and + $M_n \in \mathbb{M}^+,\ n>0$. This representation is called + \textsc{normal product}. The first matrix, $M_0$, determines an + interval which contains the real number $x$, while all other matrices + refine that interval more and more. By analogy with the usual + representation of the real numbers, the first matrix of the normal + product, $M_0 \in \mathbb{M}$, is called a \textsc{sign} matrix, + while the matrices $M_n \in \mathbb{M}^+$ are called \textsc{digit} + matrices. + + Edalat and Potts in~\cite{eda97,edapot97} proposed a standard form, + called \textsc{exact floating point}, EFP, where both, sign and digit + matrices, belong to predetermined finite sets of matrices. The + information in the sign matrices should overlap and cover + $\mathbb{R}^*$. The four sign matrices proposed by Edalat and Potts + correspond to rotations of the unit circle by $0^\circ$ (i.e identity), + $90^\circ, \ 180^\circ$ and $270^\circ$, and form a cyclic group. + Digit matrices should overlap, cover the base interval $[a,b]$, and + contract distances in $[a,b]$. + + +\section{The Base Interval $[0,\infty]$} +The main reason to choose $[0,\infty]$ as a base interval is that it is + very easy to calculate the information of a matrix $M=\begin{pmatrix} + a&c\\b&d \end{pmatrix}$: ${\bf Info}M=[\frac{c}{d},\frac{a}{b}]$ if + $\det M > 0$ or ${\bf Info}M=[\frac{a}{b},\frac{c}{d}]$ if $\det M < 0$. + In the base interval $[0,\infty]$, the four sign matrices are named + as follows: + \[ \begin{array}{rccclcrcl} + S_+ &=& \begin{pmatrix} 1&0 \\ 0&1 \end{pmatrix} &=& + S_\infty^{4k} = \textrm{Id}, &\qquad& + \textbf{Info}(S_+) &=& [0,\infty], \vspace{0.5em} \\ + S_{\infty} &=& \begin{pmatrix} 1&1 \\ -1&1 \end{pmatrix} &=& + S_\infty^{4k+1}, &\qquad& + \textbf{Info}(S_\infty) &=& [1,-1], \vspace{0.5em} \\ + S_- &=& \begin{pmatrix} 0&-1 \\ 1&0 \end{pmatrix} &=& + S_\infty^{4k+2}, &\qquad& + \textbf{Info}(S_-) &=& [\infty,0], \vspace{0.5em} \\ + S_0 &=& \begin{pmatrix} 1&-1 \\ 1&1 \end{pmatrix} &=& + S_\infty^{4k+3}, &\qquad& + \textbf{Info}(S_0) &=& [-1,1]. + \end{array} \] + It is easy to check that, for example, $S_0 S_\infty = + S_\infty S_0 = S_+ = \textrm{Id}$. + + For an integer $b \ge 2$ we define digit matrices in base $b$ by: + \[ D_k = \begin{pmatrix} b+k+1 & b+k-1 \\ b-k-1 & b-k+1 \end{pmatrix} + = S_\infty \begin{pmatrix} 1&k\\0&b \end{pmatrix} S_0, \] + where $k$ is an integer with $ |k| < b$. Products of digit matrices + can be obtained as follows: + \[ D_{d_1} D_{d_2} \ldots D_{d_n} = \begin{pmatrix} + b^n+c+1 & b^n+c-1 \\ b^n-c-1 & b^n-c+1 \end{pmatrix} =: + \mathfrak{D}_c^n, \] + where + \[ c = c(d_1,d_2,\ldots,d_n) = \sum_{i=1}^n d_i b^{n-i}. \] + Furthermore, $\mathfrak{D}_c^n D_d = \mathfrak{D}_{bc+d}^{n+1}$. The + number $c$ provides a compressed representation for this product + of digit matrices. However, the original sequence of digits usually + cannot be recovered from $\mathfrak{D}_c^n$. + +The most used base $b$ in both, theory and practise, is the base $b=2$. + In that case, we get the following three digit matrices: + \[ \begin{array}{rccclcrcl} + D_{-1} &=& \begin{pmatrix} 1&0 \\ 1&2 \end{pmatrix} &=& + S_\infty \begin{pmatrix} 1&-1 \\0&2 \end{pmatrix} S_0, &\qquad& + \textbf{Info}(D_{-1}) &=& [0,1], \vspace{0.5em} \\ + D_0 &=& \begin{pmatrix} 3&1 \\ 1&3 \end{pmatrix} &=& + S_\infty \begin{pmatrix} 1&0\\0&2 \end{pmatrix} S_0, &\qquad& + \textbf{Info}(D_0) &=& [\frac{1}{3},3], \vspace{0.5em} \\ + D_1 &=& \begin{pmatrix} 2&1 \\ 0&1 \end{pmatrix} &=& + S_\infty \begin{pmatrix} 1&1\\0&2 \end{pmatrix} S_0, &\qquad& + \textbf{Info}(D_1) &=& [1,\infty]. + \end{array} \] + Any sequence of $n$ digits in base $2$, $D_{d_1},\ D_{d_2},\ \ldots,\ + D_{d_n}$ can be compressed into the number $c(d_1,d_2,\ldots,d_n)$, + which can be represented in only $n+1$ bits of memory. + + +\subsection{$S_0 D_{d_1} D_{d_2} \ldots$} +The information of the sign matrix $S_0$ is the interval $[-1,1]$, + i.e. $\textbf{Info}(S_0) = [-1,1]$. Any digit in base $b$ will + contract distances on the real line by a factor $\frac{1}{b}$. + We have the following: + \begin{prop} \label{prop:Szero and width of information with base b} + \[ {\sf width} ( {\bf Info} ( S_0 D_{d_1} D_{d_2} + \ldots D_{d_n} ) ) = \frac{2}{b^n}, \] + where $D_{d_i},\ i=1 \ldots n$, are digits in the base $b$. + \end{prop} + \begin{proof} As $D_i = S_\infty \begin{pmatrix} 1&d\\0&b \end{pmatrix} + S_0$ and $S_\infty S_0 = \textrm{Id}$ we have: + \[ \begin{array}{rcl} + S_0 D_{d_1} D_{d_2} \ldots D_{d_n} &=& S_0 S_\infty + \begin{pmatrix} 1&d_1\\0&b \end{pmatrix} S_0 S_\infty + \begin{pmatrix} 1&d_2\\0&b \end{pmatrix} S_0 \ldots S_\infty + \begin{pmatrix} 1&d_n\\0&b \end{pmatrix} S_0 \\ + &=& \begin{pmatrix} 1&d_1\\0&b \end{pmatrix} + \begin{pmatrix} 1&d_2\\0&b \end{pmatrix} \ldots + \begin{pmatrix} 1&d_n\\0&b \end{pmatrix} S_0 \\ + &=& \begin{pmatrix} 1&c\\0&b^n \end{pmatrix} S_0, + \end{array} \] + where $c = \sum_{i=1}^n d_i b^{n-i}$. + Then, + \[ \begin{array}{rcl} + \textsf{width} (\textbf{Info}(S_0 D_{d_1} D_{d_2}\ldots D_{d_n})) + &=& \textsf{width} \left( \begin{pmatrix} 1&c\\0&b^n + \end{pmatrix} S_0 [0,\infty] \right) \\ + &=& \textsf{width} \left( + \begin{pmatrix} 1&c\\0&b^n \end{pmatrix} [-1,1] \right) \\ + &=& \displaystyle{ \textsf{width} \left( + \left[ \frac{-1+c}{b^n}, \frac{1+c}{b^n} \right] \right) } \\ + &=& \displaystyle{ \frac{2}{b^n} }. + \end{array} \] + \end{proof} + Knowing that, it is easy to calculate a sufficient number of digits + which will achieve any required accuracy. + + +\subsection{$S_+ D_{d_1} D_{d_2} \ldots$} \label{subsection spos} +Real numbers which can be represented by the product $S_+ D_{d_1} + D_{d_2} \ldots = D_{d_1} D_{d_2} \ldots$ are points of the + interval $[0,\infty]$. An infinite product of digits $D_{b-1}$ in + base $b$ represents $\infty$. Any other infinite product + can be written as a finite product of $D_{b-1}$ (exponent), followed + by an infinite product of digits starting with $D_k$ where $-(b-1) + \le k \le (b-2)$ (mantissa). Any such product represents a + non--negative real number $x \in [0,\infty)$. + +If the product represents $\infty$, as it happens with $\frac{1}{0}$, the + computation of the interval lengths will not finish in finite + time (unless we put a time limit). In all other cases, using + Proposition~\ref{prop:Spos and width of information with base b}, we + will be able to determine how many digit matrices will be sufficient + to satisfy the required accuracy. Proposition~\ref{prop:Spos and + width of information with base b} is a generalisation of Potts's + Proposition~\ref{prop:width of information with base 2}, + \cite{pot98}, which we present later. + \begin{prop} \label{prop:Spos and width of information with base b} + Let $D_{b-1}^e D_{d_1} D_{d_2} \ldots D_{d_n}$ be a finite product + of digits in base $b$ with $d_1 \ne (b-1)$. + Then: + \[ b^{e-n} < {\sf width} ( {\bf Info} ( D_{b-1}^e D_{d_1} + D_{d_2} \ldots D_{d_n} ) ) < 4 b^{e-n+2}. \] + \end{prop} + \begin{proof} + The product $D_{b-1}^e D_{d_1} D_{d_2} \ldots D_{d_n} = D_{b-1}^e + \mathfrak{D}_c^n$ can be compressed into $\mathfrak{D}_{c'}^{n'}$ where + \[ \begin{array}{rcl} + n' &=& e+n, \\ + c' &=& \left[ (b-1)b^{e+n-1} + \ldots + (b-1)b^n \right] + \left[ + d_1 b^{n-1} + \ldots + d_{n-1} b + d_n \right] \\ + &=& \displaystyle{ b^n (b-1) \frac{b^e-1}{b-1} + c } \\ + &=& b^{e+n} - b^n + c + \end{array} \] + Information of $D_{b-1}^e \mathfrak{D}_c^n$ is given by: + \[ \begin{array}{rcl} + {\bf Info} (D_{b-1}^e \mathfrak{D}_c^n) &=& \begin{pmatrix} + b^{n'}+c'+1 & b^{n'}+c'-1 \\ b^{n'}-c'-1 & b^{n'}-c'+1 + \end{pmatrix} [0,\infty] \\ + &=& \begin{pmatrix} 2b^{e+n}-(b^n-c-1) & 2b^{e+n}-(b^n-c+1) + \\ b^n-c-1 & b^n-c+1 \end{pmatrix} [0,\infty] \\ + &=& \left[ \displaystyle{ \frac{2b^{e+n}}{b^n-c+1} - 1, + \frac{2b^{e+n}}{b^n-c-1} - 1 } \right]. + \end{array} \] + Therefore, + \[ \begin{array}{rcl} + {\sf width} ( {\bf Info} ( D_{b-1}^e \mathfrak{D}_c^n ) ) &=& + \displaystyle{ \frac{2b^{e+n}}{b^n-c-1} - + \frac{2b^{e+n}}{b^n-c+1} } \\ + &=& \displaystyle{ \frac{4b^{e+n}}{(b^n-c)^2-1}. } + \end{array} \] + This holds for any sequence of digits. If, for example, all of the + $n$ digits compressed into $\mathfrak{D}_c^n$ are digits $D_{b-1}$, + then, + \[ \begin{array}{rcl} + \displaystyle{ \frac{4b^{e+n}}{(b^n-c)^2-1} } &=& + \displaystyle{ \frac{4b^{e+n}}{(b^n-(b^n-1))^2-1} } \\ + &=& \displaystyle{ \frac{4b^{e+n}}{0} } \\ + &=& \infty, + \end{array} \] + which corresponds to the length of the interval: + \[ \begin{array}{rcl} + {\bf Info} ( D_{b-1}^e \mathfrak{D}_{b^n-1}^n ) &=& + \displaystyle{ \left[ \frac{2b^{e+n}}{b^n-c+1} - 1, + \frac{2b^{e+n}}{b^n-c-1} -1 \right] } \\ + &=& [b^{e+n}-1, \infty]. + \end{array} \] + If $d_1 \ne b-1$, i.e. $-(b-1) \le d_1 < (b-1)$, we have: + \[ \begin{array}{rcl} + c &\ge& -(b-1) b^{n-1} - (b-1) b^{n-2} - \ldots - (b-1) b^0 \\ + &=& -b^n + 1 \vspace{1em} \\ + c &\le& (b-2) b^{n-1} + (b-1) b^{n-2} + (b-1) b^{n-3} + + \ldots + (b-1) b^0 \\ + &=& (b-2) b^{n-1} + (b^{n-1}-1) \\ + &=& (b-1) b^{n-1} - 1. + \end{array} \] + Therefore, + \[ \begin{array}{rcl} + {\sf width} ( {\bf Info} ( D_{b-1}^e \mathfrak{D}_c^n ) ) &=& + \displaystyle{ \frac{4b^{e+n}}{(b^n-c)^2-1} } \\ + &\le& \displaystyle{ \frac{4b^{e+n}}{(b^n-(b-1)b^{n-1}+1)^2 - 1} } \\ + &=& \displaystyle{ \frac{4b^{e+n}}{b^{2(n-1)}+2b^{n-1}} } \\ + &=& \displaystyle{ \frac{4b^{e+1}}{b^{n-1}+2} } \\ + &<& 4b^{e-n+2} \vspace{1em} \\ + {\sf width} ( {\bf Info} ( D_{b-1}^e \mathfrak{D}_c^n ) ) &\ge& + \displaystyle{ \frac{4b^{e+n}}{(b^n+b^n-1)^2-1} } \\ + &=& \displaystyle{ \frac{4b^{e+n}}{4b^{2n}-4b^n} } \\ + &=& \displaystyle{ \frac{b^e}{b^n-1} } \\ + &>& b^{e-n}. + \end{array} \] + \end{proof} + +Therefore, as soon as we get a digit in base $b$ which is not $D_{b-1}$, + i.e. when we determine a natural number $e$, we may use the formula + above to calculate the total number of digit matrices which will + guarantee the absolute tolerance we wish to achieve. + + +\subsection{$S_- D_{d_1} D_{d_2} \ldots$} \label{subsection sneg} +This case is basically the same as in the previous section. In the base + $b$, the sequence $S_- D_{-(b-1)} D_{-(b-1)} D_{-(b-1)} \ldots$ will + represent $\infty$. Hence, any attempt to obtain an absolute + precision will be futile. For any other sequence we can use a + variation of Proposition~\ref{prop:Spos and width of information with + base b}, in which $D_{-(b-1)}$ takes the position of $D_{b-1}$. + + +\subsection{$S_\infty D_{d_1} D_{d_2} \ldots$} +As we have seen earlier in Sections \ref{subsection spos} and + \ref{subsection sneg} the information of an EFP may contain $\infty$. + Of course, that will prevent us obtaining any absolute precision. That + is also the case when the sign digit of an EFP is $S_\infty$. We have + the following: + \begin{prop} \label{prop:the information containing infty} + The information of an EFP $S_\infty \mathfrak{D}_c^n$ will contain + $\infty$ if and only if $|c| \le 1$, i.e. $c=-1,0,1$. + \end{prop} + \begin{proof} + We have: + \[ \begin{array}{rcl} + {\bf Info} (S_\infty \mathfrak{D}_c^n) &=& \begin{pmatrix} 1&1\\-1&1 + \end{pmatrix} \begin{pmatrix} b^n+c+1 & b^n+c-1\\ b^n-c-1 & b^n-c+1 + \end{pmatrix} [0,\infty] \\ + &=& \begin{pmatrix} b^n & b^n \\ -1-c & 1-c \end{pmatrix} [0,\infty] \\ + &=& \displaystyle{ \left[ \frac{b^n}{1-c}, \frac{b^n}{-1-c} \right] }. + \end{array} \] + If $c \ge 2$ then $-1-c < 1-c <0$ which implies + \[ -\infty < \frac{b^n}{1-c} < \frac{b^n}{-1-c} < 0. \] + Similarly, if $c \le -2$ the interval ${\bf Info} (S_\infty + \mathfrak{D}_c^n )$ does not contain $\infty$. On the other hand, each + of the three intervals below: + \[ \begin{array}{rcl} + {\bf Info} (S_\infty \mathfrak{D}_{-1}^n) &=& \displaystyle{ \left[ + \frac{b^n}{2}, \infty \right] }, \\ + {\bf Info} (S_\infty \mathfrak{D}_0^n) &=& [b^n,-b^n], \\ + {\bf Info} (S_\infty \mathfrak{D}_1^n) &=& \displaystyle{ \left[ + \infty, -\frac{b^n}{2} \right] }, + \end{array} \] + contain $\infty$. This completes the proof. + \end{proof} + Note that $c(d_1,d_2,\ldots,d_n)=0$ iff $d_1=d_2=\ldots=d_n=0$. + Furthermore, $c(d_1,d_2,\ldots,d_n)$ is $1$, respectively $-1$, iff the + product of digit matrices $D_{d_1} D_{d_2} \ldots D_{d_n}$ is of the form + $D_0^e D_1 D_{-(b-1)}^{n-e-1}$, respectively $D_0^e D_{-1} D_{b-1}^{n-e-1}$, + for some $e \in \mathbb{N},\ e < n$. + \begin{prop} \label{prop:dneg db-1 = dzer dneg} + The following holds: + \[ \begin{array}{c} + D_{-1} D_{b-1} = D_0 D_{-1} \\ + D_1 D_{-(b-1)} = D_0 D_1 + \end{array} \] + \end{prop} + \begin{proof} + As $-1 \cdot b + (b-1) = -1$ we have that $D_{-1} D_{b-1} = + \mathfrak{D}_{-1}^2$. Similarly, $D_0 D_{-1} = \mathfrak{D}_{-1}^2$. + \end{proof} + +When the information of $S_\infty D_{d_1} D_{d_2} \ldots$ becomes bounded + ($|c|>1$) we can calculate the number of digits which will guarantee + any required absolute precision. We can do that with the help of the + following two propositions: + \begin{prop} \label{prop:Sinf and width of information with base b 1} + Let $S_\infty D_0^e D_{-1} D_{b-1}^f D_{d_1} D_{d_2} \ldots D_{d_n}$ + be a finite product of matrices in base $b$ with $d_1 \ne (b-1)$ + (that is, the information of such a product is a bounded interval). Then: + \[ \frac{1}{2} b^{e+f-n+1} < {\sf width} ( {\bf Info} ( S_\infty + D_0^e D_{-1} D_{b-1}^f \mathfrak{D}_c^n ) ) < 2b^{e+f-n+3}, \] + where $\mathfrak{D}_c^n = D_{d_1} D_{d_2} \ldots D_{d_n}$. + \end{prop} + \begin{proof} + Because of Proposition \ref{prop:dneg db-1 = dzer dneg} we have + $D_{-1} D_{b-1}^f = D_0^f D_{-1}$ and hence we can assume $f=0$ + and at the end, replace $e$ by $e+f$. We have: + \[ \begin{array}{rcl} + {\bf Info} (S_\infty D_0^e D_{-1} \mathfrak{D}_c^n) &=& + \begin{pmatrix} b^{e+n+1} & b^{e+n+1} \\ b^n-c-1 & b^n-c+1 + \end{pmatrix} [0,\infty] \\ + &=& \displaystyle{ \left[ \frac{b^{e+n+1}}{b^n-c+1}, + \frac{b^{e+n+1}}{b^n-c-1} \right]. } + \end{array} \] + Therefore, + \[ \begin{array}{rcl} + {\sf width} ( {\bf Info} ( S_\infty D_0^e D_{-1} + \mathfrak{D}_c^n ) ) &=& \displaystyle{ \frac{b^{e+n+1}}{b^n-c-1} + - \frac{b^{e+n+1}}{b^n-c+1} } \\ + &=& \displaystyle{ \frac{2b^{e+n+1}}{(b^n-c)^2-1}. } + \end{array} \] + As in the proof of the Proposition \ref{prop:Spos and width of + information with base b} we show that $-(b^n-1) \le c \le + (b-1)b^{n-1}-1$, which implies: + \[ \begin{array}{rcl} + {\sf width} ( {\bf Info} ( S_\infty D_0^e D_{-1} + \mathfrak{D}_c^n ) ) &=& \displaystyle{ + \frac{2b^{e+n+1}}{(b^n-c)^2-1} } \\ + &\le& \displaystyle{ \frac{2b^{e+n+1}}{(b^n-(b-1)b^{n-1}+1)^2 - 1} } \\ + &=& \displaystyle{ \frac{2b^{e+n+1}}{b^{2(n-1)}+2b^{n-1}} } \\ + &=& \displaystyle{ \frac{2b^{e+2}}{b^{n-1}+2} } \\ + &<& 2b^{e-n+3} \vspace{1em} \\ + {\sf width} ( {\bf Info} ( S_\infty D_0^e D_{-1} + \mathfrak{D}_c^n ) ) &\ge& \displaystyle{ + \frac{2b^{e+n+1}}{(b^n+b^n-1)^2-1} } \\ + &=& \displaystyle{ \frac{2b^{e+n+1}}{4b^{2n}-4b^n} } \\ + &=& \displaystyle{ \frac{b^{e+1}}{2(b^n-1)} } \\ + &>& \displaystyle{ \frac{1}{2} b^{e-n+1}. } + \end{array} \] + \end{proof} + \begin{prop} \label{prop:Sinf and width of information with base b 2} + Let $S_\infty D_0^e D_1 D_{-(b-1)}^f D_{d_1} D_{d_2} \ldots D_{d_n}$ + be a finite product of matrices in base $b$ with $d_1 \ne + -(b-1)$ (that is, the information of such a product is a + bounded interval). Then: + \[ \frac{1}{2} b^{e+f-n+1} < {\sf width} ( {\bf Info} ( S_\infty + D_0^e D_1 D_{-(b-1)}^f \mathfrak{D}_c^n ) ) < 2b^{e+f-n+3}, \] + where $\mathfrak{D}_c^n = D_{d_1} D_{d_2} \ldots D_{d_n}$. + \end{prop} + + +\subsection{Absolute Decimal Precision in Base $b=2$} +Usually we want to obtain the decimal precision of a given number. + In addition, base $b=2$ is the most used base in both theory + and practise. Therefore, the issue of obtaining the absolute decimal + precision in base $b=2$ is of a great importance \cite{errhec00}. + +Of course, we can use the propositions proved above, but we will use, + for performance reasons, better bounds, which we can obtain + because we take more digits in base $b=2$ into account. + + +\subsubsection{$S_0 D_{d_1} D_{d_2} \ldots$} +This case is quite easy. If we choose + \begin{equation} + n=\lceil 1 + 3.322 k \rceil, + \end{equation} + where $k$ is the required number of correct decimal digits, we have: + \[ \begin{array}{lrl} + & n & = \lceil 1 + 3.322 k \rceil, \\ + \Rightarrow & n & > 1 + k \log_2 10, \\ + \Rightarrow & -k \log_2 10 & > 1 - n, \\ + \Rightarrow & 10^{-k} & > 2^{1-n} \\ + && = {\sf width} ( {\bf Info} (S_0 D_{d_1} D_{d_2} \ldots D_{d_n} ) ). + \end{array} \] + + +\subsubsection{$S_+ D_{d_1} D_{d_2} \ldots,\ S_- D_{d_1} D_{d_2} \ldots$} +The following proposition is given in \cite{pot98}, page 129. ??????? + Check if it is correct or not ????????? + \begin{prop} \label{prop:width of information with base 2} + For any $e \in \mathbb{N},\ \alpha \in \{-1,0\},\ \beta \in + \{-1,0,1\}$ and $\gamma \in \mathbb{Z}$, if + \[ n = e - 1 - \gamma + (1+\alpha)(1+\beta) \] + then + \[ 2^{\gamma-1} < {\sf width} ( {\bf Info} (D_1^e D_\alpha + D_\beta \mathfrak{D}_c^n )) < 2^{\gamma+1} \] + for all $c \in \mathbb{Z}(2^n)$. + \end{prop} + +Suppose that we have a real number whose EFP represenation is given by + $S_+ D_{d_1} D_{d_2} \ldots$. Provided that $e$, the number of leading + digit matrices $D_1$ is finite (i.e. the number is not $\infty$), we + will calculate correctly the number up to $k$ decimal digits by + emitting not more than $e+2+n$ digit matrices, where + \begin{equation} + n = \lceil e + 3.322 k + 2 \rceil. + \end{equation} + We use $3.322$ as an upper bound for $\log_2 10$. The reasoning is + as follows: + \[ \begin{array}{lrl} + & n & = \lceil e + 3.322 k + 2 \rceil, \\ + \Rightarrow & n & > e + k \log_2 10 + 2, \\ + \Rightarrow & -k \log_2 10 & > e - n + 2, \\ + \Rightarrow & 10^{-k} & > 2^{e-n+2} \\ + && \ge 2^{e-n+(1+\alpha)(1+\beta)} \\ + && = 2^{\gamma + 1}, + \end{array} \] + where $\gamma = e-n-1+(1+\alpha)(1+\beta)$. By Proposition + \ref{prop:width of information with base 2}: + \[ {\sf width} ( {\bf Info} (D_1^e D_\alpha D_\beta + \mathfrak{D}_c^n )) < 2^{\gamma+1} < 10^{-k} \] + for all $c \in \mathbb{Z}(2^n)$. + +The same conclusion can be used in the case $S_- D_{d_1} D_{d_2} \ldots$. + The only difference is that $e$ is the number of leading digit matrices + $D_{-1}$. + + +\subsubsection{$S_\infty D_{d_1} D_{d_2} \ldots$} +If the sign digit is $S_0$, we do not have problems to yield any required + absolute precision. The information of $S_0$ is bounded interval and + every digit matrix will half the previous interval. In the case when + the sign digit is $S_+$ any conclusion is postponed until we get one + {\sc good} digit. By {\em good} digit in this case we mean a digit which + is not $D_1$ (recall that $S_+ D_1 D_1 \ldots$ represents $\infty$). + Then we will get a bounded interval (one which does not contain + $\infty$) and every subsequent digit will refine that interval. + Basically, we require that $e$, the number of leading digits $D_1$, is + finite. Similarly, if the sign is $S_-$ a {\em good} digit is a digit + which is not $D_{-1}$. + +Using Proposition \ref{prop:the information containing infty} is easy to + check that in the case when the leading matrix is $S_\infty$, we need two + {\em good} digits. The first {\em good} digit is $D_{-1}$, respectively + $D_1$, while the second one is either $D_{-1}$ or $D_0$, respectively + $D_0$ or $D_1$. The reason is that all of the sequences: + \[ \begin{array}{ll} + S_\infty D_0 D_0 \ldots &(\Leftrightarrow c=0) \\ + S_\infty D_0^e D_{-1} D_1 D_1 \ldots &(\Leftrightarrow c=-1) \\ + S_\infty D_0^e D_1 D_{-1} D_{-1} \ldots &(\Leftrightarrow c=1) + \end{array} \] + represent $\infty$. + +Once the information of $S_\infty D_{d_1} D_{d_2} \ldots$ is either + positive or negative (we got the first {\em good} digit) we can make + use of the proposition below. + \begin{prop} For $e,f \in \mathbb{N}$ we have: + \[ \begin{array}{c} + {\sf width} ( {\bf Info} ( S_\infty D_0^e D_{-1} D_1^f + \mathfrak{D}_c^n ) ) = {\sf width} ( {\bf Info} ( S_+ D_1^{e+f} + \mathfrak{D}_c^n ) ), \\ + {\sf width} ( {\bf Info} ( S_\infty D_0^e D_1 D_{-1}^f + \mathfrak{D}_c^n ) ) = {\sf width} ( {\bf Info} ( S_- D_{-1}^{e+f} + \mathfrak{D}_c^n ) ). + \end{array} \] + \end{prop} + \begin{proof} + Let us first prove the first equality. From proofs of + Proposition~\ref{prop:Spos and width of information with base b} and + Proposition~\ref{prop:Sinf and width of information with base b 1} we get: + \[ \begin{array}{rcl} + {\bf Info} (S_+ D_1^{e+f} \mathfrak{D}_c^n) &=& [A-1,B-1], \\ + {\bf Info} (S_\infty D_0^{e+f} D_{-1} \mathfrak{D}_c^n ) + &=& [A,B] + \end{array} \] + where + \[ A=\frac{2^{n+e+f+1}}{2^n-c+1} \qquad B=\frac{2^{n+e+f+1}}{2^n-c-1}. \] + This implies the first equality. Similarly, we prove the second one. + \end{proof} + The proposition above enables us to use Proposition \ref{prop:width of + information with base 2} in order to determine the number of necessary + digits which will produce required absolute decimal precision. + Provided that $e$ and $f$ are finite numbers, i.e. we got two {\em good} + digits, in order to achieve absolute decimal precision of $10^{-k},\ k + \in \mathbb{N}$ we need not more than $e+f+2+n$ digit matrices, where + \[ n = \lceil e+f+ 3.322k + 2 \rceil. \] + + +\section{The Base Interval $[-1,1]$} +Comparing with $[0,\infty]$, the base interval $[-1,1]$ has an obvious + disadvantage in amount of effort required to calculate the information + of a matrix $M=\begin{pmatrix} a&c\\b&d \end{pmatrix}$: ${\bf Info}M= + [\frac{c-a}{d-b},\frac{c+a}{d+b}]$ if $\det M > 0$ or ${\bf Info}M= + [\frac{c+a}{d+b}, \frac{c-a}{d-b}]$ if $\det M < 0$. Furthermore, + testing the refining property is more complex with $[-1,1]$ as the + base interval ($|M(-1)| = |\frac{c-a}{d-b}| \le 1$ and $|M(1)| = + |\frac{c+a}{d+b}| \le 1$), comparing it with $[0,\infty]$ as the base + interval ($a,b,c$ and $d$ are of the same sign). + +Despite drawbacks above, it seems that $[-1,1]$ as the base interval + pays off at later stages, since digit matrices, which are given below, + the representation of the functions by tensors, emission and absorbtion, + \cite{edapot97, pot98}, are simpler due to more zeros which appear in + such representations. Pros and cons are discussed in more details in + \cite{hec99}. + +The four possible sign matrices are given as follows: + \[ \begin{array}{rclcrcl} + S^0 &=& \begin{pmatrix} 1&0 \\ 0&1 \end{pmatrix} = \textrm{Id}, &\qquad& + \textbf{Info}(S^0) &=& [-1,1], \vspace{0.5em} \\ + S^1 &=& \begin{pmatrix} 1&1 \\ -1&1 \end{pmatrix}, &\qquad& + \textbf{Info}(S^1) &=& [0,\infty], \vspace{0.5em} \\ + S^2 &=& \begin{pmatrix} 0&-1 \\ 1&0 \end{pmatrix}, &\qquad& + \textbf{Info}(S^2) &=& [1,-1], \vspace{0.5em} \\ + S^3 &=& \begin{pmatrix} 1&-1 \\ 1&1 \end{pmatrix}, &\qquad& + \textbf{Info}(S^3) &=& [\infty,0]. + \end{array} \] + The indices may be observed as exponents as $S^i S^j = S^{(i+j) \mod 4}$. + Digit matrices in base $b$ are given by: + \[ A_k = \begin{pmatrix} 1&k\\ 0&b \end{pmatrix}, \] + for integer $k$ such that $|k| < b$. $A_k$ maps $[-1,1]$ into + the interval$[\frac{k-1}{b},\frac{k+1}{b}]$, whose length is $2/b$. + A product $A_{d_1} A_{d_2} \ldots A_{d_n}$ corresponds to a real number + $r = \sum_{i=1}^n d_i b^{-i} \in [-1,1]$. + +When $b=2$ we get three matrices ($k=-1,0,1$): + \[ \begin{array}{rclcl} + A_{-1} &=& \begin{pmatrix} 1&-1 \\0&2 \end{pmatrix}, &\qquad& + \textbf{Info}(A_{-1}) = [-1,0], \vspace{0.5em} \\ + A_0 &=& \begin{pmatrix} 1&0\\0&2 \end{pmatrix}, &\qquad& + \textbf{Info}(A_0) = [-\frac{1}{2},-\frac{1}{2}], \vspace{0.5em} \\ + A_1 &=& \begin{pmatrix} 1&1\\0&2 \end{pmatrix}, &\qquad& + \textbf{Info}(A_1) = [0,1]. + \end{array} \] + Then, the product of digits $A_{d_1} A_{d_2} \ldots A_{d_n}$ corresponds + to well--known signed binary representation of a number from $[-1,1]$. + +It is easy to check that there exists isomorphism between representations + with base intervals $[0,\infty$ and $[-1,1]$. Every EFP with base + interval $[-1,1]$ can be easily translated into EFP with base $[0,\infty]$: + \[ \begin{array}{rcl} + S^i A_{d_1} \ldots A_{d_n} &=& S^i A_{d_1} \ldots A_{d_n} \\ + &=& S^i (S^3 S^1) A_{d_1} (S^3 S^1) \ldots (S^3 S^1) A_{d_n} (S^3 S^1) \\ + &=& (S^i S^3) (S^1 A_{d_1} S^3) (S^1 \ldots S^3) (S^1 A_{d_n} S^3) S^1 \\ + &=& S^{i+3} D_{d_1} \ldots D_{d_n} S^1. + \end{array} \] + We used the facts that $S^3 S^1 = S^0 = {\textrm Id}$, $D_{d_i} = + S^1 A_{d_i} S^3$. As $S^1 [-1,1] = [0,\infty]$ we have that + \[ \begin{array}{rcl} + {\bf Info}_{[-1,1]} (S^i A_{d_1} \ldots A_{d_n}) &=& + (S^i A_{d_1} \ldots A_{d_n}) [-1,1] \\ + &=& S^{i+3} D_{d_1} \ldots D_{d_n} S^1 [-1,1] \\ + &=& {\bf Info}_{[0,\infty]} (S^{i+3} D_{d_1} \ldots D_{d_n}). + \end{array} \] + +Therefore, trying to obtain any absolute accuracy from EFP which is given + by $S^i A_{d_1} A_{d_2} \ldots$ with the base interval $[-1,1]$ is + equivalent to problem of obtaining the same absolute accuracy from + $S^{i+3} D_{d_1} D_{d_2} \ldots$ with $[0,\infty]$ as the base interval. + We have: + \[ \begin{array}{ccc} + S^0 A_{d_1} A_{d_2} \ldots A_{d_n} [-1,1] &\Longleftrightarrow& + S_0 D_{d_1} D_{d_2} \ldots D_{d_n} [0,\infty], \vspace{0.5em} \\ + S^1 A_{d_1} A_{d_2} \ldots A_{d_n} [-1,1] &\Longleftrightarrow& + S_+ D_{d_1} D_{d_2} \ldots D_{d_n} [0,\infty], \vspace{0.5em} \\ + S^2 A_{d_1} A_{d_2} \ldots A_{d_n} [-1,1] &\Longleftrightarrow& + S_\infty D_{d_1} D_{d_2} \ldots D_{d_n} [0,\infty], \vspace{0.5em} \\ + S^3 A_{d_1} A_{d_2} \ldots A_{d_n} [-1,1] &\Longleftrightarrow& + S_- D_{d_1} D_{d_2} \ldots D_{d_n} [0,\infty]. + \end{array} \] + + +\subsection{Alternative Approaches with Base Interval $[-1,1]$} +We will mention another two approaches, namely {\sc mantissa--exponent + approach} and {\sc integer part--fractional part approach}. In the + former, any real number $x \in \mathbb{R}$ can be represented in base + $b$ as $x=b^e u$, where $e$ is a non--negative integer and $u = + \sum d_i b^{-i} \in [-1,1]$. This corresponds to the product of matrices + $\begin{pmatrix} b^e&0 \\ 0&1 \end{pmatrix} A_{d_1} A_{d_2} \ldots$. + Obtaining any required absolute precision in this approach is + straightforward. As each of digit matrices in base $b$, $A_{d_i}$, + contracts distances by $1/b$, the product: + \[ \begin{pmatrix} b^e&0 \\ 0&1 \end{pmatrix} A_{d_1} A_{d_2} \ldots + A_{d_{e+k}} \] + will induce an interval whose length is $2b^{-k}$, for any $k \in + \mathbb{N}$. + +In the integer part--fractional part approach, a real number $x \in + \mathbb{R}$ is represented as $x=e+u$, where $e$ is an integer and + $u = \sum d_i b^{-i} \in [-1,1]$. Obtaining an absolute precision for $x$ + is even simpler. Calculating $u$ within the same absolute precision will + solve the problem. The product of $k$ digit matrices, $A_{d_1} A_{d_2} + \ldots A_{d_k}$ will induce an interval of the length $2b^{-k}$. Hence, + $x=e+u$ will be calculated with error not greater than $2b^{-k}$. + + +\section*{Acknowledgements} +I would like to thank Reinhold Heckmann for all his help. + + +\begin{thebibliography}{99} + \bibitem{eda97} Edalat, A.: + Domains for Computation in Mathemetics, Physics and Exact + Real Arithmetic. + Bulletin of Symbolic Logic, Vol.~3 (1997). + + \bibitem{edapot97} Edalat, A., Potts, P.~J.: + A New Representation for Exact Real Numbers. + Electronic Notes in Theoretical Computer Science, Vol.~6 (1997). + + \bibitem{errhec00} Errington, L., Heckmann, R.: + Using the C--LFT Library (2000). + + \bibitem{hec99} Heckmann, R.: + How Many Argument Digits are Needed to Produce $n$ + Result Digits? Electronic Notes in Theoretical Computer Science, + Vol.~24 (2000). + + \bibitem{niekor95} Nielsen, A., Kornerup P.: + MSB--First Digit Serial Arithmetic. + Journal of Univ. Comp. Science, 1(7):523--543 (1995). + + \bibitem{pot98} Potts, P.~J.: + Exact Real Arithmetic Using M\"obius Transformations. + PhD Thesis, University of London, Imperial College (1998). + + \bibitem{vui90} Vuillemin, J.~E.: + Exact Real Computer Arithmetic with Continued Fractions. + IEEE Transactions on Computers, 39(8):1087--1105 (1990). +\end{thebibliography} + + +\end{document} \ No newline at end of file -- cgit v1.2.3