diff options
author | Michael Baudin <michael.baudin@scilab.org> | 2010-04-21 14:34:13 +0200 |
---|---|---|
committer | Michael Baudin <michael.baudin@scilab.org> | 2010-04-21 14:34:13 +0200 |
commit | 0e47449e28a13005b74fcec7058d606b57ec5654 (patch) | |
tree | 2a4e6b595e844386e575a2bc4f156af9b8a0585e /scilab_doc | |
parent | 834b412acd9048119e86c1cb87f63deeb54bb7e4 (diff) | |
download | scilab-0e47449e28a13005b74fcec7058d606b57ec5654.zip scilab-0e47449e28a13005b74fcec7058d606b57ec5654.tar.gz |
Move to the forge
Change-Id: I837eac548d6f51a8e10b1f1836d110964ec3a495
Diffstat (limited to 'scilab_doc')
-rw-r--r-- | scilab_doc/scilabisnotnaive/Makefile | 34 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/complexdivision.tex | 540 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/conclusion.tex | 21 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/introduction.tex | 83 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/numericalderivative.tex | 431 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/pythagoreansum.tex | 180 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/quadratic.tex | 511 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/references.tex | 10 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/scilabisnotnaive.bib | 359 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/scilabisnotnaive.pdf | bin | 313874 -> 0 bytes | |||
-rw-r--r-- | scilab_doc/scilabisnotnaive/scilabisnotnaive.tex | 146 | ||||
-rw-r--r-- | scilab_doc/scilabisnotnaive/simpleexperiments.tex | 149 |
12 files changed, 0 insertions, 2464 deletions
diff --git a/scilab_doc/scilabisnotnaive/Makefile b/scilab_doc/scilabisnotnaive/Makefile deleted file mode 100644 index 26ec0e1..0000000 --- a/scilab_doc/scilabisnotnaive/Makefile +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | RAPPORT = scilabisnotnaive | ||
4 | |||
5 | LT = pdflatex | ||
6 | |||
7 | pdf: | ||
8 | $(LT) $(RAPPORT) | ||
9 | bibtex $(RAPPORT) | ||
10 | $(LT) $(RAPPORT) | ||
11 | $(LT) $(RAPPORT) | ||
12 | |||
13 | |||
14 | dvi: | ||
15 | $(LT) ${RAPPORT} | ||
16 | bibtex ${RAPPORT} | ||
17 | $(LT) ${RAPPORT} | ||
18 | |||
19 | clean: | ||
20 | rm -f *.aux | ||
21 | rm -f *.bbl | ||
22 | rm -f *.blg | ||
23 | rm -f *.log | ||
24 | rm -f *.out | ||
25 | rm -f *.toc | ||
26 | |||
27 | spell: | ||
28 | ispell -t ${RAPPORT}.tex | ||
29 | |||
30 | distclean: | ||
31 | make clean | ||
32 | rm -f ${RAPPORT}.pdf | ||
33 | rm -f ${RAPPORT}.dvi | ||
34 | |||
diff --git a/scilab_doc/scilabisnotnaive/complexdivision.tex b/scilab_doc/scilabisnotnaive/complexdivision.tex deleted file mode 100644 index e09ab56..0000000 --- a/scilab_doc/scilabisnotnaive/complexdivision.tex +++ /dev/null | |||
@@ -1,540 +0,0 @@ | |||
1 | \section{Complex division} | ||
2 | |||
3 | In that section, we analyse the problem of the complex division in Scilab. | ||
4 | We especially detail the difference between the mathematical, straitforward | ||
5 | formula and the floating point implementation. In the first part, we briefly report | ||
6 | the formulas which allow to | ||
7 | compute the real and imaginary parts of the division of two complex numbers. | ||
8 | We then present the naïve algorithm based on these mathematical formulas. | ||
9 | In the second part, we make some experiments in Scilab and compare our | ||
10 | naïve algorithm with the \emph{/} Scilab operator. | ||
11 | In the third part, we analyse | ||
12 | why and how floating point numbers must be taken into account when the | ||
13 | implementation of such division is required. | ||
14 | |||
15 | \subsection{Theory} | ||
16 | |||
17 | The formula which allows to compute the | ||
18 | real and imaginary parts of the division of two | ||
19 | complex numbers is | ||
20 | \begin{eqnarray} | ||
21 | \frac{a + ib}{c + id} = \frac{ac + bd}{c^2 + d^2} + i \frac{bc - ad}{c^2 + d^2} | ||
22 | \end{eqnarray} | ||
23 | |||
24 | The naive algorithm for the computation of the complex division | ||
25 | is presented in figure \ref{naive-complexdivision}. | ||
26 | |||
27 | \begin{figure}[htbp] | ||
28 | \begin{algorithmic} | ||
29 | \STATE $den \gets c^2 + d^2$ | ||
30 | \STATE $e \gets (ac + bd)/ den$ | ||
31 | \STATE $f \gets (bc - ad)/ den$ | ||
32 | \end{algorithmic} | ||
33 | \caption{Naive algorithm to compute the complex division} | ||
34 | \label{naive-complexdivision} | ||
35 | \end{figure} | ||
36 | |||
37 | |||
38 | \subsection{Experiments} | ||
39 | |||
40 | The following Scilab function is a straitforward implementation | ||
41 | of the previous formulas. | ||
42 | |||
43 | \lstset{language=Scilab} | ||
44 | \lstset{numbers=left} | ||
45 | \lstset{basicstyle=\footnotesize} | ||
46 | \lstset{keywordstyle=\bfseries} | ||
47 | \begin{lstlisting} | ||
48 | // | ||
49 | // naive -- | ||
50 | // Compute the complex division with a naive method. | ||
51 | // | ||
52 | function [cr,ci] = naive (ar , ai , br , bi ) | ||
53 | den = br * br + bi * bi; | ||
54 | cr = (ar * br + ai * bi) / den; | ||
55 | ci = (ai * br - ar * bi) / den; | ||
56 | endfunction | ||
57 | \end{lstlisting} | ||
58 | |||
59 | In the following script, one compares the naive implementation | ||
60 | against the Scilab implementation with two cases. | ||
61 | |||
62 | \lstset{language=Scilab} | ||
63 | \lstset{numbers=left} | ||
64 | \lstset{basicstyle=\footnotesize} | ||
65 | \lstset{keywordstyle=\bfseries} | ||
66 | \begin{lstlisting} | ||
67 | // Check that no obvious bug is in mathematical formula. | ||
68 | [cr ci] = naive ( 1.0 , 2.0 , 3.0 , 4.0 ) | ||
69 | (1.0 + 2.0 * %i)/(3.0 + 4.0 * %i) | ||
70 | // Check that mathematical formula does not perform well | ||
71 | // when large number are used. | ||
72 | [cr ci] = naive ( 1.0 , 1.0 , 1.0 , 1.e307 ) | ||
73 | (1.0 + 1.0 * %i)/(1.0 + 1.e307 * %i) | ||
74 | \end{lstlisting} | ||
75 | |||
76 | That prints out the following messages. | ||
77 | |||
78 | \begin{verbatim} | ||
79 | --> // Check that no obvious bug is in mathematical formula. | ||
80 | --> [cr ci] = naive ( 1.0 , 2.0 , 3.0 , 4.0 ) | ||
81 | ci = | ||
82 | 0.08 | ||
83 | cr = | ||
84 | 0.44 | ||
85 | --> (1.0 + 2.0 * %i)/(3.0 + 4.0 * %i) | ||
86 | ans = | ||
87 | 0.44 + 0.08i | ||
88 | --> // Check that mathematical formula does not perform well | ||
89 | --> // when large number are used. | ||
90 | --> [cr ci] = naive ( 1.0 , 1.0 , 1.0 , 1.e307 ) | ||
91 | ci = | ||
92 | 0. | ||
93 | cr = | ||
94 | 0. | ||
95 | --> (1.0 + 1.0 * %i)/(1.0 + 1.e307 * %i) | ||
96 | ans = | ||
97 | 1.000-307 - 1.000-307i | ||
98 | \end{verbatim} | ||
99 | |||
100 | The simple calculation confirms that there is no bug in the | ||
101 | naive implementation. But differences are apprearing when | ||
102 | large numbers are used. In the second case, the naive | ||
103 | implementation does not give a single exact digit ! | ||
104 | |||
105 | To make more complete tests, the following script allows to | ||
106 | compare the results of the naive and the Scilab methods. | ||
107 | We use three kinds of relative errors | ||
108 | \begin{enumerate} | ||
109 | \item the relative error on the complex numbers, as a whole $e=\frac{|e - c|}{|e|}$, | ||
110 | \item the relative error on the real part $e=\frac{|e_r - e_r|}{e_r}$, | ||
111 | \item the relative error on the imaginary part $e=\frac{|e_i - e_i|}{e_i}$. | ||
112 | \end{enumerate} | ||
113 | |||
114 | \lstset{language=Scilab} | ||
115 | \lstset{numbers=left} | ||
116 | \lstset{basicstyle=\footnotesize} | ||
117 | \lstset{keywordstyle=\bfseries} | ||
118 | \begin{lstlisting} | ||
119 | // | ||
120 | // compare -- | ||
121 | // Compare 3 methods for complex division: | ||
122 | // * naive method | ||
123 | // * Smith method | ||
124 | // * C99 method | ||
125 | // | ||
126 | function compare (ar, ai, br, bi, rr, ri) | ||
127 | printf("****************\n"); | ||
128 | printf(" a = %10.5e + %10.5e * I\n" , ar , ai ); | ||
129 | printf(" b = %10.5e + %10.5e * I\n" , br , bi ); | ||
130 | [cr ci] = naive ( ar, ai, br, bi); | ||
131 | printf("Naive --> c = %10.5e + %10.5e * I\n" , cr , ci ); | ||
132 | c = cr + %i * ci | ||
133 | r = rr + %i * ri; | ||
134 | error1 = abs(r - c)/abs(r); | ||
135 | if (rr==0.0) then | ||
136 | error2 = abs(rr - cr); | ||
137 | else | ||
138 | error2 = abs(rr - cr)/abs(rr); | ||
139 | end | ||
140 | if (ri==0.0) then | ||
141 | error3 = abs(ri - ci); | ||
142 | else | ||
143 | error3 = abs(ri - ci)/abs(ri); | ||
144 | end | ||
145 | printf(" e1 = %10.5e, e2 = %10.5e, e3 = %10.5e\n", error1, error2, error3); | ||
146 | |||
147 | a = ar + ai * %i; | ||
148 | b = br + bi * %i; | ||
149 | c = a/b; | ||
150 | cr = real(c); | ||
151 | ci = imag(c); | ||
152 | printf("Scilab --> c = %10.5e + %10.5e * I\n" , cr , ci ); | ||
153 | c = cr + %i * ci | ||
154 | error1 = abs(r - c)/abs(r); | ||
155 | if (rr==0.0) then | ||
156 | error2 = abs(rr - cr); | ||
157 | else | ||
158 | error2 = abs(rr - cr)/abs(rr); | ||
159 | end | ||
160 | if (ri==0.0) then | ||
161 | error3 = abs(ri - ci); | ||
162 | else | ||
163 | error3 = abs(ri - ci)/abs(ri); | ||
164 | end | ||
165 | printf(" e1 = %10.5e, e2 = %10.5e, e3 = %10.5e\n", error1, error2, error3); | ||
166 | endfunction | ||
167 | \end{lstlisting} | ||
168 | |||
169 | In the following script, we compare the naive and the Scilab | ||
170 | implementations of the complex division with 4 couples of | ||
171 | complex numbers. The first instruction "ieee(2)" configures the | ||
172 | IEEE system so that Inf and Nan numbers are generated instead | ||
173 | of Scilab error messages. | ||
174 | |||
175 | \lstset{language=Scilab} | ||
176 | \lstset{numbers=left} | ||
177 | \lstset{basicstyle=\footnotesize} | ||
178 | \lstset{keywordstyle=\bfseries} | ||
179 | \begin{lstlisting} | ||
180 | ieee(2); | ||
181 | // Check that naive implementation does not have a bug | ||
182 | ar = 1; | ||
183 | ai = 2; | ||
184 | br = 3; | ||
185 | bi = 4; | ||
186 | rr = 11/25; | ||
187 | ri = 2/25; | ||
188 | compare (ar, ai, br, bi, rr, ri); | ||
189 | |||
190 | // Check that naive implementation is not robust with respect to overflow | ||
191 | ar = 1; | ||
192 | ai = 1; | ||
193 | br = 1; | ||
194 | bi = 1e307; | ||
195 | rr = 1e-307; | ||
196 | ri = -1e-307; | ||
197 | compare (ar, ai, br, bi, rr, ri); | ||
198 | |||
199 | // Check that naive implementation is not robust with respect to underflow | ||
200 | ar = 1; | ||
201 | ai = 1; | ||
202 | br = 1e-308; | ||
203 | bi = 1e-308; | ||
204 | rr = 1e308; | ||
205 | ri = 0.0; | ||
206 | compare (ar, ai, br, bi, rr, ri); | ||
207 | |||
208 | \end{lstlisting} | ||
209 | |||
210 | The script then prints out the following messages. | ||
211 | |||
212 | \begin{verbatim} | ||
213 | **************** | ||
214 | a = 1.00000e+000 + 2.00000e+000 * I | ||
215 | b = 3.00000e+000 + 4.00000e+000 * I | ||
216 | Naive --> c = 4.40000e-001 + 8.00000e-002 * I | ||
217 | e1 = 0.00000e+000, e2 = 0.00000e+000, e3 = 0.00000e+000 | ||
218 | Scilab --> c = 4.40000e-001 + 8.00000e-002 * I | ||
219 | e1 = 0.00000e+000, e2 = 0.00000e+000, e3 = 0.00000e+000 | ||
220 | **************** | ||
221 | a = 1.00000e+000 + 1.00000e+000 * I | ||
222 | b = 1.00000e+000 + 1.00000e+307 * I | ||
223 | Naive --> c = 0.00000e+000 + -0.00000e+000 * I | ||
224 | e1 = 1.00000e+000, e2 = 1.00000e+000, e3 = 1.00000e+000 | ||
225 | Scilab --> c = 1.00000e-307 + -1.00000e-307 * I | ||
226 | e1 = 2.09614e-016, e2 = 1.97626e-016, e3 = 1.97626e-016 | ||
227 | **************** | ||
228 | a = 1.00000e+000 + 1.00000e+000 * I | ||
229 | b = 1.00000e-308 + 1.00000e-308 * I | ||
230 | Naive --> c = Inf + Nan * I | ||
231 | e1 = Nan, e2 = Inf, e3 = Nan | ||
232 | Scilab --> c = 1.00000e+308 + 0.00000e+000 * I | ||
233 | e1 = 0.00000e+000, e2 = 0.00000e+000, e3 = 0.00000e+000 | ||
234 | \end{verbatim} | ||
235 | |||
236 | The case \#2 and \#3 shows very surprising results. | ||
237 | With case \#2, the relative errors shows that the naive | ||
238 | implementation does not give any correct digits. | ||
239 | In case \#3, the naive implementation produces Nan and Inf results. | ||
240 | In both cases, the Scilab command "/" gives accurate results, i.e., | ||
241 | with at least 16 significant digits. | ||
242 | |||
243 | \subsection{Explanations} | ||
244 | |||
245 | In this section, we analyse the reason why the naive implementation | ||
246 | of the complex division leads to unaccurate results. | ||
247 | In the first section, we perform algebraic computations | ||
248 | and shows the problems of the naive formulas. | ||
249 | In the second section, we present the Smith's method. | ||
250 | |||
251 | \subsubsection{Algebraic computations} | ||
252 | |||
253 | Let's analyse the second test and check the division of test \#2 : | ||
254 | |||
255 | \begin{eqnarray} | ||
256 | \frac{1 + I}{1 + 10^{307} I } = 10^{307} - I * 10^{-307} | ||
257 | \end{eqnarray} | ||
258 | |||
259 | The naive formulas leads to the following results. | ||
260 | |||
261 | \begin{eqnarray} | ||
262 | den &=& c^2 + d^2 = 1^2 + (10^{307})^2 = 1 + 10^{614} \approx 10^{614} \\ | ||
263 | e &=& (ac + bd)/ den = (1*1 + 1*10^{307})/1e614 \approx 10^{307}/10^{614} \approx 10^{-307}\\ | ||
264 | f &=& (bc - ad)/ den = (1*1 - 1*10^{307})/1e614 \approx -10^{307}/10^{614} \approx -10^{-307} | ||
265 | \end{eqnarray} | ||
266 | |||
267 | To understand what happens with the naive implementation, one should | ||
268 | focus on the intermediate numbers. | ||
269 | If one uses the naive formula with double precision numbers, then | ||
270 | |||
271 | \begin{eqnarray} | ||
272 | den = c^2 + d^2 = 1^2 + (10^{307})^2 = Inf | ||
273 | \end{eqnarray} | ||
274 | |||
275 | This generates an overflow, because $(10^{307})^2 = 10^{614}$ is not representable | ||
276 | as a double precision number. | ||
277 | |||
278 | The $e$ and $f$ terms are then computed as | ||
279 | |||
280 | \begin{eqnarray} | ||
281 | e = (ac + bd)/ den = (1*1 + 1*10^{307})/Inf = 10^{307}/Inf = 0\\ | ||
282 | f = (bc - ad)/ den = (1*1 - 1*10^{307})/Inf = -10^{307}/Inf = 0 | ||
283 | \end{eqnarray} | ||
284 | |||
285 | The result is then computed without any single correct digit, | ||
286 | even though the initial numbers are all representable as double precision | ||
287 | numbers. | ||
288 | |||
289 | Let us check that the case \#3 is associated with an underflow. | ||
290 | We want to compute the following complex division : | ||
291 | |||
292 | \begin{eqnarray} | ||
293 | \frac{1 + I}{10^{-308} + 10^{-308} I}= 10^{308} | ||
294 | \end{eqnarray} | ||
295 | |||
296 | The naive mathematical formula gives | ||
297 | |||
298 | \begin{eqnarray} | ||
299 | den &=& c^2 + d^2 = (10^{-308})^2 + (10^{-308})^2 = 10^{-616}10^{-616} + 10^{-616} = 2 \times 10^{-616} \\ | ||
300 | e &=& (ac + bd)/ den = (1*10^{-308} + 1*10^{-308})/(2 \times 10^{-616}) \\ | ||
301 | &\approx& (2 \times 10^{-308})/(2 \times 10^{-616}) \approx 10^{-308} \\ | ||
302 | f &=& (bc - ad)/ den = (1*10^{-308} - 1*10^{-308})/(2 \times 10^{-616}) \approx 0/10^{-616} \approx 0 | ||
303 | \end{eqnarray} | ||
304 | |||
305 | With double precision numbers, the computation is not performed this way. | ||
306 | Terms which are lower than $10^{-308}$ are too small to be representable | ||
307 | in double precision and will be reduced to 0 so that an underflow occurs. | ||
308 | |||
309 | \begin{eqnarray} | ||
310 | den &=& c^2 + d^2 = (10^{-308})^2 + (10^{-308})^2 = 10^{-616} + 10^{-616} = 0 \\ | ||
311 | e &=& (ac + bd)/ den = (1*10^{-308} + 1*10^{-308})/0 \approx 2\times 10^{-308}/0 \approx Inf \\ | ||
312 | f &=& (bc - ad)/ den = (1*10^{-308} - 1*10^{-308})/0 \approx 0/0 \approx NaN \\ | ||
313 | \end{eqnarray} | ||
314 | |||
315 | \subsubsection{The Smith's method} | ||
316 | |||
317 | In this section, we analyse the Smith's method and present the detailed | ||
318 | steps of this algorithm in the cases \#2 and \#3. | ||
319 | |||
320 | In Scilab, the algorithm which allows to perform the complex | ||
321 | division is done by the the \emph{wwdiv} routine, which implements the | ||
322 | Smith's method \cite{368661}, \cite{WhatEveryComputerScientist}. | ||
323 | The Smith's algorithm is based on normalization, which allow to | ||
324 | perform the division even if the terms are large. | ||
325 | |||
326 | The starting point of the method is the mathematical definition | ||
327 | |||
328 | \begin{eqnarray} | ||
329 | \frac{a + ib}{c + id} = \frac{ac + bd}{c^2 + d^2} + i \frac{bc - ad}{c^2 + d^2} | ||
330 | \end{eqnarray} | ||
331 | |||
332 | The method of Smith is based on the rewriting of this formula in | ||
333 | two different, but mathematically equivalent formulas. The basic | ||
334 | trick is to make the terms $d/c$ or $c/d$ appear in the formulas. | ||
335 | When $c$ is larger than $d$, the formula involving $d/c$ is used. | ||
336 | Instead, when $d$ is larger than $c$, the formula involving $c/d$ is | ||
337 | used. That way, the intermediate terms in the computations rarely | ||
338 | exceeds the overflow limits. | ||
339 | |||
340 | Indeed, the complex division formula can be written as | ||
341 | \begin{eqnarray} | ||
342 | \frac{a + ib}{c + id} = \frac{a + b(d/c)}{c + d(d/c)} + i \frac{b - a(d/c)}{c + d(d/c)} \\ | ||
343 | \frac{a + ib}{c + id} = \frac{a(c/d) + b}{c(d/c) + d} + i \frac{b(c/d) - a}{c(d/c) + d} | ||
344 | \end{eqnarray} | ||
345 | |||
346 | These formulas can be simplified as | ||
347 | |||
348 | \begin{eqnarray} | ||
349 | \frac{a + ib}{c + id} = \frac{a + br}{c + dr} + i \frac{b - ar}{c + dr}, \qquad r = d/c \\ | ||
350 | \frac{a + ib}{c + id} = \frac{ar + b}{cr + d} + i \frac{br - a}{cr + d} , \qquad r = c/d | ||
351 | \end{eqnarray} | ||
352 | |||
353 | The Smith's method is based on the following algorithm. | ||
354 | |||
355 | \begin{algorithmic} | ||
356 | \IF {$( |d| <= |c| )$} | ||
357 | \STATE $r \gets d / c$ | ||
358 | \STATE $den \gets c + r * d$ | ||
359 | \STATE $e \gets (a + b * r)/ den $ | ||
360 | \STATE $f \gets (b - a * r)/ den $ | ||
361 | \ELSE | ||
362 | \STATE $r \gets c / d$ | ||
363 | \STATE $den \gets d + r * c$ | ||
364 | \STATE $e \gets (a * r + b)/den $ | ||
365 | \STATE $f \gets (b * r - a)/den$ | ||
366 | \ENDIF | ||
367 | \end{algorithmic} | ||
368 | |||
369 | As we are going to check immediately, the Smith's method | ||
370 | performs very well in cases \#2 and \#3. | ||
371 | |||
372 | In the case \#2 $\frac{1+i}{1+10^{-308} i}$, the Smith's method is | ||
373 | |||
374 | \begin{verbatim} | ||
375 | If ( |1e308| <= |1| ) > test false | ||
376 | Else | ||
377 | r = 1 / 1e308 = 0 | ||
378 | den = 1e308 + 0 * 1 = 1e308 | ||
379 | e = (1 * 0 + 1) / 1e308 = | ||
380 | f = (1 * 0 - 1) / 1e308 = -1e-308 | ||
381 | \end{verbatim} | ||
382 | |||
383 | In the case \#3 $\frac{1+i}{10^{-308}+10^{-308} i}$, the Smith's method is | ||
384 | |||
385 | \begin{verbatim} | ||
386 | If ( |1e-308| <= |1e-308| ) > test true | ||
387 | r = 1e-308 / 1e308 = 1 | ||
388 | den = 1e-308 + 1 * 1e-308 = 2e308 | ||
389 | e = (1 + 1 * 1) / 2e308 = 1e308 | ||
390 | f = (1 - 1 * 1) / 2e308 = 0 | ||
391 | \end{verbatim} | ||
392 | |||
393 | \subsection{One more step} | ||
394 | |||
395 | In that section, we show the limitations of the Smith's method. | ||
396 | |||
397 | Suppose that we want to perform the following division | ||
398 | |||
399 | \begin{eqnarray} | ||
400 | \frac{10^{307} + i 10^{-307}}{10^205 + i 10^{-205}} = 10^102 - i 10^-308 | ||
401 | \end{eqnarray} | ||
402 | |||
403 | The following Scilab script allows to compare the naive implementation | ||
404 | and Scilab's implementation based on Smith's method. | ||
405 | |||
406 | \lstset{language=Scilab} | ||
407 | \lstset{numbers=left} | ||
408 | \lstset{basicstyle=\footnotesize} | ||
409 | \lstset{keywordstyle=\bfseries} | ||
410 | \begin{lstlisting} | ||
411 | // Check that Smith method is not robust in complicated cases | ||
412 | ar = 1e307; | ||
413 | ai = 1e-307; | ||
414 | br = 1e205; | ||
415 | bi = 1e-205; | ||
416 | rr = 1e102; | ||
417 | ri = -1e-308; | ||
418 | compare (ar, ai, br, bi, rr, ri); | ||
419 | \end{lstlisting} | ||
420 | |||
421 | When executed, the script produces the following output. | ||
422 | |||
423 | \begin{verbatim} | ||
424 | **************** | ||
425 | a = 1.00000e+307 + 1.00000e-307 * I | ||
426 | b = 1.00000e+205 + 1.00000e-205 * I | ||
427 | Naive --> c = Nan + -0.00000e+000 * I | ||
428 | e1 = 0.00000e+000, e2 = Nan, e3 = 1.00000e+000 | ||
429 | Scilab --> c = 1.00000e+102 + 0.00000e+000 * I | ||
430 | e1 = 0.00000e+000, e2 = 0.00000e+000, e3 = 1.00000e+000 | ||
431 | \end{verbatim} | ||
432 | |||
433 | As expected, the naive method produces a Nan. | ||
434 | More surprisingly, the Scilab output is also quite approximated. | ||
435 | More specifically, the imaginary part is computed as zero, although | ||
436 | we know that the exact result is $10^-308$, which is representable | ||
437 | as a double precision number. | ||
438 | The relative error based on the norm of the complex number is | ||
439 | accurate ($e1=0.0$), but the relative error based on the imaginary | ||
440 | part only is wrong ($e3=1.0$), without any correct digits. | ||
441 | |||
442 | The reference \cite{1667289} cites an analysis by Hough which | ||
443 | gives a bound for the relative error produced by the Smith's method | ||
444 | |||
445 | \begin{eqnarray} | ||
446 | |zcomp - zref| <= eps |zref| | ||
447 | \end{eqnarray} | ||
448 | |||
449 | The paper \cite{214414} (1985), though, makes a distinction between | ||
450 | the norm $|zcomp - zref|$ and the relative error for the | ||
451 | real and imaginary parts. It especially gives an example | ||
452 | where the imaginary part is wrong. | ||
453 | |||
454 | In the following paragraphs, we detail the derivation | ||
455 | of an example inspired by \cite{214414}, but which | ||
456 | shows the problem with double precision numbers (the example | ||
457 | in \cite{214414} is based on an abstract machine with | ||
458 | exponent range $\pm 99$). | ||
459 | |||
460 | Suppose that $m,n$ are integers so that the following | ||
461 | conditions are satisfied | ||
462 | \begin{eqnarray} | ||
463 | m >> 0\\ | ||
464 | n >> 0\\ | ||
465 | n >> m | ||
466 | \end{eqnarray} | ||
467 | |||
468 | One can easily proove that the complex division can be approximated | ||
469 | as | ||
470 | \begin{eqnarray} | ||
471 | \frac{10^n + i 10^{-n}}{10^m + i 10^{-m}} &=& | ||
472 | \frac{10^{n+m} + 10^{-(m+n)}}{10^{2m} + 10^{-2m}} + | ||
473 | i \frac{10^{m-n} - 10^{n-m}}{10^{2m} + 10^{-2m}} | ||
474 | \end{eqnarray} | ||
475 | |||
476 | Because of the above assumptions, that leads to the following | ||
477 | approximation | ||
478 | \begin{eqnarray} | ||
479 | \frac{10^n + i 10^{-n}}{10^m + i 10^{-m}} \approx 10^{n-m} - i 10^{n-3m} | ||
480 | \end{eqnarray} | ||
481 | which is correct up to approximately several 100 digits. | ||
482 | |||
483 | One then consider $m,n<308$ but so that | ||
484 | \begin{eqnarray} | ||
485 | n - 3 m = -308 | ||
486 | \end{eqnarray} | ||
487 | |||
488 | For example, the couple $m=205$, $n=307$ satisfies all conditions. | ||
489 | That leads to the complex division | ||
490 | |||
491 | \begin{eqnarray} | ||
492 | \frac{10^{307} + i 10^{-307}}{10^{205} + i 10^{-205}} = 10^{102} - i 10^{-308} | ||
493 | \end{eqnarray} | ||
494 | |||
495 | It is easy to check that the naive implementation does not | ||
496 | proove to be accurate on that example. | ||
497 | We have already shown that the Smith's method is failing to | ||
498 | produce a non zero imaginary part. Indeed, the steps of the | ||
499 | Smith algorithm are the following | ||
500 | |||
501 | \begin{verbatim} | ||
502 | If ( |1e-205| <= |1e205| ) > test true | ||
503 | r = 1e-205 / 1e205 = 0 | ||
504 | den = 1e205 + 0 * 1e-205 = 1e205 | ||
505 | e = (10^307 + 10^-307 * 0) / 1e205 = 1e102 | ||
506 | f = (10^-307 - 10^307 * 0) / 1e205 = 0 | ||
507 | \end{verbatim} | ||
508 | |||
509 | The real part is accurate, but the imaginary part has no | ||
510 | correct digit. One can also check that the inequality $|zcomp - zref| <= eps |zref|$ | ||
511 | is still true. | ||
512 | |||
513 | The limits of Smith's method have been reduced in Stewart's paper \cite{214414}. | ||
514 | The new algorithm is based on the theorem which states that if $x_1 \ldots x_n$ | ||
515 | are $n$ floating point representable numbers then $\min_{i=1,n}(x_i) \max_{i=1,n}(x_i)$ | ||
516 | is also representable. The algorithm uses that theorem to perform a | ||
517 | correct computation. | ||
518 | |||
519 | Stewart's algorithm is superseded by the one by Li et Al \cite{567808}, but | ||
520 | also by Kahan's \cite{KAHAN1987}, which, from \cite{1039814}, is the one implemented | ||
521 | in the C99 standard. | ||
522 | |||
523 | \subsection{References} | ||
524 | |||
525 | The 1962 paper by R. Smith \cite{368661} describes the algorithm which is used in | ||
526 | Scilab. The Goldberg paper \cite{WhatEveryComputerScientist} introduces many | ||
527 | of the subjects presented in this document, including the problem of the | ||
528 | complex division. The 1985 paper by Stewart \cite{214414} gives insight to | ||
529 | distinguish between the relative error of the complex numbers and the relative | ||
530 | error made on real and imaginary parts. It also gives an algorithm based | ||
531 | on min and max functions. Knuth's bible \cite{artcomputerKnuthVol2} presents | ||
532 | the Smith's method in section 4.2.1, as exercize 16. Knuth gives also | ||
533 | references \cite{Wynn:1962:AAP} and \cite{DBLP:journals/cacm/Friedland67}. | ||
534 | The 1967 paper by Friedland \cite{DBLP:journals/cacm/Friedland67} describes | ||
535 | two algorithm to compute the absolute value of a complex number | ||
536 | $|x+iy| = \sqrt{x^2+y^2}$ and the square root of a | ||
537 | complex number $\sqrt{x+iy}$. | ||
538 | |||
539 | |||
540 | |||
diff --git a/scilab_doc/scilabisnotnaive/conclusion.tex b/scilab_doc/scilabisnotnaive/conclusion.tex deleted file mode 100644 index 04d191a..0000000 --- a/scilab_doc/scilabisnotnaive/conclusion.tex +++ /dev/null | |||
@@ -1,21 +0,0 @@ | |||
1 | \section{Conclusion} | ||
2 | |||
3 | We have presented several cases where the mathematically perfect | ||
4 | algorithm (i.e. without obvious bugs) do not produce accurate results | ||
5 | with the computer in particular situations. | ||
6 | In this paper, we have shown that specific methods can be used to | ||
7 | cure some of the problems. We have also shown that these methods | ||
8 | do not cure all the problems. | ||
9 | |||
10 | All Scilab algorithms take floating point values as inputs, | ||
11 | and returns floating point values as output. Problems arrive when the | ||
12 | intermediate calculations involve terms which are | ||
13 | not representable as floating point values. | ||
14 | |||
15 | That article should not discourage us | ||
16 | from implementing our own algorithms. Rather, it should warn | ||
17 | us and that some specific work is to do when we translate the | ||
18 | mathematical material into a algorithm. | ||
19 | That article shows us that accurate can be obtained with | ||
20 | floating point numbers, provided that we are less \emph{naïve}. | ||
21 | |||
diff --git a/scilab_doc/scilabisnotnaive/introduction.tex b/scilab_doc/scilabisnotnaive/introduction.tex deleted file mode 100644 index db7affb..0000000 --- a/scilab_doc/scilabisnotnaive/introduction.tex +++ /dev/null | |||
@@ -1,83 +0,0 @@ | |||
1 | \section{Introduction} | ||
2 | |||
3 | Scilab take cares with your numbers. | ||
4 | While most mathematic books deals with exact formulas, | ||
5 | Scilab uses algorithms which are specifically designed for | ||
6 | computers. | ||
7 | |||
8 | As a practical example of the problem considered in this | ||
9 | document, consider the following experiments. The following is an example of | ||
10 | a Scilab 5.1 session, where we compute 0.1 by two ways. | ||
11 | |||
12 | \begin{verbatim} | ||
13 | -->format(25) | ||
14 | -->0.1 | ||
15 | ans = | ||
16 | 0.1000000000000000055511 | ||
17 | -->1.0-0.9 | ||
18 | ans = | ||
19 | 0.0999999999999999777955 | ||
20 | \end{verbatim} | ||
21 | |||
22 | I guess that for a person who has never heard of these problems, | ||
23 | this experiment may be a shock. To get things clearer, let's | ||
24 | check that the sinus function is approximated. | ||
25 | |||
26 | \begin{verbatim} | ||
27 | -->format(25) | ||
28 | -->sin(0.0) | ||
29 | ans = | ||
30 | 0. | ||
31 | -->sin(%pi) | ||
32 | ans = | ||
33 | 0.0000000000000001224647 | ||
34 | \end{verbatim} | ||
35 | |||
36 | The difficulty is generated by the fact that, while | ||
37 | the mathematics treat with \emph{real} numbers, the | ||
38 | computer deals with their \emph{floating point representations}. | ||
39 | This is the difference between the | ||
40 | \emph{naive}, mathematical, approach, and the \emph{numerical}, | ||
41 | floating-point aware, implementation. | ||
42 | (The detailed explanations of the previous examples are presented | ||
43 | in the appendix of this document.) | ||
44 | |||
45 | In this article, we will show examples of these problems by | ||
46 | using the following theoric and experimental approach. | ||
47 | \begin{enumerate} | ||
48 | \item First, we will derive the basic theory at the core of a numerical | ||
49 | formula. | ||
50 | \item Then we will implement it in Scilab and compare with the | ||
51 | result given by the primitive provided by Scilab. | ||
52 | As we will see, some particular cases do not work well | ||
53 | with our formula, while the Scilab primitive computes a correct | ||
54 | result. | ||
55 | \item Then we will analyse the \emph{reasons} of the differences. | ||
56 | \end{enumerate} | ||
57 | |||
58 | When we compute errors, we use the relative error formula | ||
59 | \begin{eqnarray} | ||
60 | e_r=\frac{|x_c-x_e|}{|x_e|}, \qquad x_e\neq 0 | ||
61 | \end{eqnarray} | ||
62 | where $x_c\in\RR$ is the computed value, and $x_e\in\RR$ is the | ||
63 | expected value, i.e. the mathematically exact result. | ||
64 | The relative error is linked with the number of significant | ||
65 | digits in the computed value $x_c$. For example, if the relative | ||
66 | error $e_r=10^{-6}$, then the number of significant digits is 6. | ||
67 | |||
68 | When the expected value is zero, the relative error cannot | ||
69 | be computed, and we then use the absolute error instead | ||
70 | \begin{eqnarray} | ||
71 | e_a=|x_c-x_e|. | ||
72 | \end{eqnarray} | ||
73 | |||
74 | Before getting into the details, it is important to | ||
75 | know that real variables in the Scilab language are stored in | ||
76 | \emph{double precision} variables. Since Scilab is | ||
77 | following the IEEE 754 standard, that means that real | ||
78 | variables are stored with 64 bits precision. | ||
79 | As we shall see later, this has a strong influence on the | ||
80 | results. | ||
81 | |||
82 | |||
83 | |||
diff --git a/scilab_doc/scilabisnotnaive/numericalderivative.tex b/scilab_doc/scilabisnotnaive/numericalderivative.tex deleted file mode 100644 index 7ce0768..0000000 --- a/scilab_doc/scilabisnotnaive/numericalderivative.tex +++ /dev/null | |||
@@ -1,431 +0,0 @@ | |||
1 | \section{Numerical derivatives} | ||
2 | |||
3 | In this section, we detail the computation of the numerical derivative of | ||
4 | a given function. | ||
5 | |||
6 | In the first part, we briefly report the first order forward formula, which | ||
7 | is based on the Taylor theorem. | ||
8 | We then present the naïve algorithm based on these mathematical formulas. | ||
9 | In the second part, we make some experiments in Scilab and compare our | ||
10 | naïve algorithm with the \emph{derivative} Scilab primitive. | ||
11 | In the third part, we analyse | ||
12 | why and how floating point numbers must be taken into account when the | ||
13 | numerical derivatives are to compute. | ||
14 | |||
15 | \subsection{Theory} | ||
16 | |||
17 | The basic result is the Taylor formula with one variable \cite{dixmier} | ||
18 | |||
19 | \begin{eqnarray} | ||
20 | f(x+h) &=& f(x) | ||
21 | + h f^\prime(x) | ||
22 | +\frac{h^2}{2} f^{\prime \prime}(x) | ||
23 | +\frac{h^3}{6} f^{\prime \prime \prime}(x) | ||
24 | +\frac{h^4}{24} f^{\prime \prime \prime \prime}(x) + \mathcal{O}(h^5) | ||
25 | \end{eqnarray} | ||
26 | |||
27 | If we write the Taylor formulae of a one variable function $f(x)$ | ||
28 | \begin{eqnarray} | ||
29 | f(x+h) &\approx& f(x) + h \frac{\partial f}{\partial x}+ \frac{h^2}{2} f^{\prime \prime}(x) | ||
30 | \end{eqnarray} | ||
31 | we get the forward difference which approximates the first derivate at order 1 | ||
32 | \begin{eqnarray} | ||
33 | f^\prime(x) &\approx& \frac{f(x+h) - f(x)}{h} + \frac{h}{2} f^{\prime \prime}(x) | ||
34 | \end{eqnarray} | ||
35 | |||
36 | The naive algorithm to compute the numerical derivate of | ||
37 | a function of one variable is presented in figure \ref{naive-numericalderivative}. | ||
38 | |||
39 | \begin{figure}[htbp] | ||
40 | \begin{algorithmic} | ||
41 | \STATE $f'(x) \gets (f(x+h)-f(x))/h$ | ||
42 | \end{algorithmic} | ||
43 | \caption{Naive algorithm to compute the numerical derivative of a function of one variable} | ||
44 | \label{naive-numericalderivative} | ||
45 | \end{figure} | ||
46 | |||
47 | \subsection{Experiments} | ||
48 | |||
49 | The following Scilab function is a straitforward implementation | ||
50 | of the previous algorithm. | ||
51 | |||
52 | \lstset{language=Scilab} | ||
53 | \lstset{numbers=left} | ||
54 | \lstset{basicstyle=\footnotesize} | ||
55 | \lstset{keywordstyle=\bfseries} | ||
56 | \begin{lstlisting} | ||
57 | function fp = myfprime(f,x,h) | ||
58 | fp = (f(x+h) - f(x))/h; | ||
59 | endfunction | ||
60 | \end{lstlisting} | ||
61 | |||
62 | In our experiments, we will compute the derivatives of the | ||
63 | square function $f(x)=x^2$, which is $f'(x)=2x$. | ||
64 | The following Scilab script implements the square function. | ||
65 | |||
66 | \lstset{language=Scilab} | ||
67 | \lstset{numbers=left} | ||
68 | \lstset{basicstyle=\footnotesize} | ||
69 | \lstset{keywordstyle=\bfseries} | ||
70 | \begin{lstlisting} | ||
71 | function y = myfunction (x) | ||
72 | y = x*x; | ||
73 | endfunction | ||
74 | \end{lstlisting} | ||
75 | |||
76 | The most naïve idea is that the computed relative error | ||
77 | is small when the step $h$ is small. Because \emph{small} | ||
78 | is not a priori clear, we take $\epsilon\approx 10^{-16}$ | ||
79 | in double precision as a good candidate for \emph{small}. | ||
80 | In the following script, we compare the computed | ||
81 | relative error produced by our naïve method with step | ||
82 | $h=\epsilon$ and the \emph{derivative} primitive with | ||
83 | default step. | ||
84 | |||
85 | \lstset{language=Scilab} | ||
86 | \lstset{numbers=left} | ||
87 | \lstset{basicstyle=\footnotesize} | ||
88 | \lstset{keywordstyle=\bfseries} | ||
89 | \begin{lstlisting} | ||
90 | x = 1.0; | ||
91 | fpref = derivative(myfunction,x,order=1); | ||
92 | e = abs(fpref-2.0)/2.0; | ||
93 | mprintf("Scilab f''=%e, error=%e\n", fpref,e); | ||
94 | h = 1.e-16; | ||
95 | fp = myfprime(myfunction,x,h); | ||
96 | e = abs(fp-2.0)/2.0; | ||
97 | mprintf("Naive f''=%e, h=%e, error=%e\n", fp,h,e); | ||
98 | \end{lstlisting} | ||
99 | |||
100 | When executed, the previous script prints out : | ||
101 | |||
102 | \begin{verbatim} | ||
103 | Scilab f'=2.000000e+000, error=7.450581e-009 | ||
104 | Naive f'=0.000000e+000, h=1.000000e-016, error=1.000000e+000 | ||
105 | \end{verbatim} | ||
106 | |||
107 | Our naïve method seems to be quite inaccurate and has not | ||
108 | even 1 significant digit ! | ||
109 | The Scilab primitive, instead, has 9 significant digits. | ||
110 | |||
111 | Since our faith is based on the truth of the mathematical | ||
112 | theory, some deeper experiments must be performed. | ||
113 | We then make the following experiment, by taking an | ||
114 | initial step $h=1.0$ and then dividing $h$ by 10 at each | ||
115 | step of a loop with 20 iterations. | ||
116 | |||
117 | \lstset{language=Scilab} | ||
118 | \lstset{numbers=left} | ||
119 | \lstset{basicstyle=\footnotesize} | ||
120 | \lstset{keywordstyle=\bfseries} | ||
121 | \begin{lstlisting} | ||
122 | x = 1.0; | ||
123 | fpref = derivative(myfunction,x,order=1); | ||
124 | e = abs(fpref-2.0)/2.0; | ||
125 | mprintf("Scilab f''=%e, error=%e\n", fpref,e); | ||
126 | h = 1.0; | ||
127 | for i=1:20 | ||
128 | h=h/10.0; | ||
129 | fp = myfprime(myfunction,x,h); | ||
130 | e = abs(fp-2.0)/2.0; | ||
131 | mprintf("Naive f''=%e, h=%e, error=%e\n", fp,h,e); | ||
132 | end | ||
133 | \end{lstlisting} | ||
134 | |||
135 | Scilab then produces the following output. | ||
136 | |||
137 | \begin{verbatim} | ||
138 | Scilab f'=2.000000e+000, error=7.450581e-009 | ||
139 | Naive f'=2.100000e+000, h=1.000000e-001, error=5.000000e-002 | ||
140 | Naive f'=2.010000e+000, h=1.000000e-002, error=5.000000e-003 | ||
141 | Naive f'=2.001000e+000, h=1.000000e-003, error=5.000000e-004 | ||
142 | Naive f'=2.000100e+000, h=1.000000e-004, error=5.000000e-005 | ||
143 | Naive f'=2.000010e+000, h=1.000000e-005, error=5.000007e-006 | ||
144 | Naive f'=2.000001e+000, h=1.000000e-006, error=4.999622e-007 | ||
145 | Naive f'=2.000000e+000, h=1.000000e-007, error=5.054390e-008 | ||
146 | Naive f'=2.000000e+000, h=1.000000e-008, error=6.077471e-009 | ||
147 | Naive f'=2.000000e+000, h=1.000000e-009, error=8.274037e-008 | ||
148 | Naive f'=2.000000e+000, h=1.000000e-010, error=8.274037e-008 | ||
149 | Naive f'=2.000000e+000, h=1.000000e-011, error=8.274037e-008 | ||
150 | Naive f'=2.000178e+000, h=1.000000e-012, error=8.890058e-005 | ||
151 | Naive f'=1.998401e+000, h=1.000000e-013, error=7.992778e-004 | ||
152 | Naive f'=1.998401e+000, h=1.000000e-014, error=7.992778e-004 | ||
153 | Naive f'=2.220446e+000, h=1.000000e-015, error=1.102230e-001 | ||
154 | Naive f'=0.000000e+000, h=1.000000e-016, error=1.000000e+000 | ||
155 | Naive f'=0.000000e+000, h=1.000000e-017, error=1.000000e+000 | ||
156 | Naive f'=0.000000e+000, h=1.000000e-018, error=1.000000e+000 | ||
157 | Naive f'=0.000000e+000, h=1.000000e-019, error=1.000000e+000 | ||
158 | Naive f'=0.000000e+000, h=1.000000e-020, error=1.000000e+000 | ||
159 | \end{verbatim} | ||
160 | |||
161 | We see that the relative error begins by decreasing, and then | ||
162 | is increasing. | ||
163 | Obviously, the optimum step is approximately $h=10^{-8}$, where the | ||
164 | relative error is approximately $e_r=6.10^{-9}$. | ||
165 | We should not be surprised to see that Scilab has computed | ||
166 | a derivative which is near the optimum. | ||
167 | |||
168 | \subsection{Explanations} | ||
169 | |||
170 | \subsubsection{Floating point implementation} | ||
171 | |||
172 | With a floating point computer, the total | ||
173 | error that we get from the forward difference approximation | ||
174 | is (skipping the multiplication constants) the sum of the | ||
175 | linearization error $E_l = h$ (i.e. the $\mathcal{O}(h)$ term) | ||
176 | and the rounding error $rf(x)$ on the difference $f(x+h) - f(x)$ | ||
177 | \begin{eqnarray} | ||
178 | E = \frac{rf(x)}{h} + \frac{h}{2} f^{\prime \prime}(x) | ||
179 | \end{eqnarray} | ||
180 | When $h\rightarrow \infty$, the error is then the sum of a | ||
181 | term which converges toward $+\infty$ and a term which converges toward 0. | ||
182 | The total error is minimized when both terms are equal. | ||
183 | With a single precision computation, the rounding error is $r = 10^{-7}$ | ||
184 | and with a double precision computation, the rounding error is $r = 10^{-16}$. | ||
185 | We make here the assumption that the values $f(x)$ and | ||
186 | $f^{\prime \prime}(x)$ are near 1 so that the error can be written | ||
187 | \begin{eqnarray} | ||
188 | E = \frac{r}{h} + h | ||
189 | \end{eqnarray} | ||
190 | We want to compute the step $h$ from the rounding error $r$ with a | ||
191 | step satisfying | ||
192 | \begin{eqnarray} | ||
193 | h = r^\alpha | ||
194 | \end{eqnarray} | ||
195 | for some $\alpha > 0$. | ||
196 | The total error is therefore | ||
197 | \begin{eqnarray} | ||
198 | E = r^{1-\alpha} + r^\alpha | ||
199 | \end{eqnarray} | ||
200 | The total error is minimized when both terms are equal, that is, | ||
201 | when the exponents are equal $1-\alpha = \alpha$ which leads to | ||
202 | \begin{eqnarray} | ||
203 | \alpha = \frac{1}{2} | ||
204 | \end{eqnarray} | ||
205 | We conclude that the step which minimizes the error is | ||
206 | \begin{eqnarray} | ||
207 | h = r^{1/2} | ||
208 | \end{eqnarray} | ||
209 | and the associated error is | ||
210 | \begin{eqnarray} | ||
211 | E = 2 r^{1/2} | ||
212 | \end{eqnarray} | ||
213 | |||
214 | Typical values with single precision are $h = 10^{-4}$ and $E=2. 10^{-4}$ | ||
215 | and with double precision $h = 10^{-8}$ and $E=2. 10^{-8}$. | ||
216 | These are the minimum error which are achievable with a forward difference | ||
217 | numerical derivate. | ||
218 | |||
219 | To get a significant value of the step $h$, the step is computed | ||
220 | with respect to the point where the derivate is to compute | ||
221 | \begin{eqnarray} | ||
222 | h = r^{1/2} x | ||
223 | \end{eqnarray} | ||
224 | |||
225 | One can generalize the previous computation with the | ||
226 | assumption that the scaling parameter from the Taylor | ||
227 | expansion is $h^{\alpha_1}$ and the order of the formula | ||
228 | is $\mathcal{O}(h^{\alpha_2})$. The total error is then | ||
229 | \begin{eqnarray} | ||
230 | E = \frac{r}{h^{\alpha_1}} + h^{\alpha_2} | ||
231 | \end{eqnarray} | ||
232 | The optimal step is then | ||
233 | \begin{eqnarray} | ||
234 | h = r^{\frac{1}{\alpha_1 + \alpha_2}} | ||
235 | \end{eqnarray} | ||
236 | and the associated error is | ||
237 | \begin{eqnarray} | ||
238 | E = 2 r^{\frac{\alpha_2}{\alpha_1 + \alpha_2}} | ||
239 | \end{eqnarray} | ||
240 | |||
241 | |||
242 | An additional trick \cite{NumericalRecipes} is to compute the | ||
243 | step $h$ so that the rounding error for the sum $x+h$ is minimum. | ||
244 | This is performed by the following algorithm, which implies a temporary | ||
245 | variable $t$ | ||
246 | \begin{eqnarray} | ||
247 | t = x + h\\ | ||
248 | h = t - h | ||
249 | \end{eqnarray} | ||
250 | |||
251 | |||
252 | \subsubsection{Results} | ||
253 | |||
254 | In the following results, the variable $x$ is either a | ||
255 | scalar $x^in \RR$ or a vector $x\in \RR^n$. | ||
256 | When $x$ is a vector, the step $h_i$ is defined by | ||
257 | \begin{eqnarray} | ||
258 | h_i = (0,\ldots,0,1,0,\ldots,0) | ||
259 | \end{eqnarray} | ||
260 | so that the only non-zero component of $h_i$ is the $i$-th component. | ||
261 | |||
262 | \begin{itemize} | ||
263 | |||
264 | \item First derivate : forward 2 points | ||
265 | \begin{eqnarray} | ||
266 | f^\prime(x) &\approx& \frac{f(x+h) - f(x)}{h} + \mathcal{O}(h) | ||
267 | \end{eqnarray} | ||
268 | Optimal step : $h = r^{1/2}$ and error $E=2r^{1/2}$.\\ | ||
269 | Single precision : $h \approx 10^{-4}$ and $E\approx 10^{-4}$.\\ | ||
270 | Double precision $h \approx 10^{-8}$ and $E\approx 10^{-8}$. | ||
271 | |||
272 | \item First derivate : backward 2 points | ||
273 | \begin{eqnarray} | ||
274 | f^\prime(x) &\approx& \frac{f(x) - f(x-h)}{h} + \mathcal{O}(h) | ||
275 | \end{eqnarray} | ||
276 | Optimal step : $h = r^{1/2}$ and error $E=2r^{1/2}$.\\ | ||
277 | Single precision : $h \approx 10^{-4}$ and $E\approx 10^{-4}$.\\ | ||
278 | Double precision $h \approx 10^{-8}$ and $E\approx 10^{-8}$. | ||
279 | |||
280 | \item First derivate : centered 2 points | ||
281 | \begin{eqnarray} | ||
282 | f^\prime(x) &\approx& \frac{f(x+h) - f(x-h)}{2h} + \mathcal{O}(h^2) | ||
283 | \end{eqnarray} | ||
284 | Optimal step : $h = r^{1/3}$ and error $E=2r^{2/3}$.\\ | ||
285 | Single precision : $h \approx 10^{-3}$ and $E\approx 10^{-5}$.\\ | ||
286 | Double precision $h \approx 10^{-5}$ and $E\approx 10^{-10}$. | ||
287 | |||
288 | \end{itemize} | ||
289 | |||
290 | \subsubsection{Robust algorithm} | ||
291 | |||
292 | The robust algorithm to compute the numerical derivate of | ||
293 | a function of one variable is presented in figure \ref{robust-numericalderivative}. | ||
294 | |||
295 | \begin{figure}[htbp] | ||
296 | \begin{algorithmic} | ||
297 | \STATE $h \gets \sqrt{\epsilon}$ | ||
298 | \STATE $f'(x) \gets (f(x+h)-f(x))/h$ | ||
299 | \end{algorithmic} | ||
300 | \caption{A more robust algorithm to compute the numerical derivative of a function of one variable} | ||
301 | \label{robust-numericalderivative} | ||
302 | \end{figure} | ||
303 | |||
304 | \subsection{One more step} | ||
305 | |||
306 | In this section, we analyse the behaviour of \emph{derivative} | ||
307 | when the point $x$ is either large $x \rightarrow \infty$, | ||
308 | when $x$ is small $x \rightarrow 0$ and when $x = 0$. | ||
309 | We compare these results with the \emph{numdiff} command, | ||
310 | which does not use the same step strategy. As we are going | ||
311 | to see, both commands performs the same when $x$ is near 1, but | ||
312 | performs very differently when x is large or small. | ||
313 | |||
314 | We have allready explained the theory of the floating | ||
315 | point implementation of the \emph{derivative} command. | ||
316 | Is it completely \emph{bulletproof} ? Not exactly. | ||
317 | |||
318 | See for example the following Scilab session, where one computes the | ||
319 | numerical derivative of $f(x)=x^2$ for $x=10^{-100}$. The | ||
320 | expected result is $f'(x) = 2. \times 10^{-100}$. | ||
321 | |||
322 | \begin{verbatim} | ||
323 | -->fp = derivative(myfunction,1.e-100,order=1) | ||
324 | fp = | ||
325 | 0.0000000149011611938477 | ||
326 | -->fe=2.e-100 | ||
327 | fe = | ||
328 | 2.000000000000000040-100 | ||
329 | -->e = abs(fp-fe)/fe | ||
330 | e = | ||
331 | 7.450580596923828243D+91 | ||
332 | \end{verbatim} | ||
333 | |||
334 | The result does not have any significant digits. | ||
335 | |||
336 | The explanation is that the step is computed with $h = \sqrt{eps}\approx 10^{-8}$. | ||
337 | Then $f(x+h)=f(10^{-100} + 10^{-8}) \approx f(10^{-8}) = 10^{-16}$, because the | ||
338 | term $10^{-100}$ is much smaller than $10^{-8}$. The result of the | ||
339 | computation is therefore $(f(x+h) - f(x))/h = (10^{-16} + 10^{-200})/10^{-8} \approx 10^{-8}$. | ||
340 | |||
341 | The additionnal experiment | ||
342 | |||
343 | \begin{verbatim} | ||
344 | -->sqrt(%eps) | ||
345 | ans = | ||
346 | 0.0000000149011611938477 | ||
347 | \end{verbatim} | ||
348 | |||
349 | allows to check that the result of the computation simply is $\sqrt{eps}$. | ||
350 | That experiment shows that the \emph{derivative} command uses a | ||
351 | wrong defaut step $h$ when $x$ is very small. | ||
352 | |||
353 | To improve the accuracy of the computation, one can take control of the | ||
354 | step $h$. A reasonable solution is to use $h=\sqrt{\epsilon}|x|$ so that the | ||
355 | step is scaled depending on $x$. | ||
356 | The following script illustrates than method, which produces | ||
357 | results with 8 significant digits. | ||
358 | |||
359 | \begin{verbatim} | ||
360 | -->fp = derivative(myfunction,1.e-100,order=1,h=sqrt(%eps)*1.e-100) | ||
361 | fp = | ||
362 | 2.000000013099139394-100 | ||
363 | -->fe=2.e-100 | ||
364 | fe = | ||
365 | 2.000000000000000040-100 | ||
366 | -->e = abs(fp-fe)/fe | ||
367 | e = | ||
368 | 0.0000000065495696770794 | ||
369 | \end{verbatim} | ||
370 | |||
371 | But when $x$ is exactly zero, the scaling method cannot work, because | ||
372 | it would produce the step $h=0$, and therefore a division by zero | ||
373 | exception. In that case, the default step provides a good accuracy. | ||
374 | |||
375 | Another command is available in Scilab to compute the | ||
376 | numerical derivatives of a given function, that is \emph{numdiff}. | ||
377 | The \emph{numdiff} command uses the step | ||
378 | \begin{eqnarray} | ||
379 | h=\sqrt{\epsilon}(1+10^{-3}|x|). | ||
380 | \end{eqnarray} | ||
381 | In the following paragraphs, we try to analyse why this formula | ||
382 | has been chosen. As we are going to check experimentally, this step | ||
383 | formula performs better than \emph{derivative} when $x$ is | ||
384 | large. | ||
385 | |||
386 | As we can see the following session, the behaviour is approximately | ||
387 | the same when the value of $x$ is 1. | ||
388 | |||
389 | \begin{verbatim} | ||
390 | -->fp = numdiff(myfunction,1.0) | ||
391 | fp = | ||
392 | 2.0000000189353417390237 | ||
393 | -->fe=2.0 | ||
394 | fe = | ||
395 | 2. | ||
396 | -->e = abs(fp-fe)/fe | ||
397 | e = | ||
398 | 9.468D-09 | ||
399 | \end{verbatim} | ||
400 | |||
401 | The accuracy is slightly decreased with respect to the optimal | ||
402 | value 7.450581e-009 which was produced by derivative. But the number | ||
403 | of significant digits is approximately the same, i.e. 9 digits. | ||
404 | |||
405 | The goal of this step is to produce good accuracy when the value of $x$ | ||
406 | is large, where the \emph{numdiff} command produces accurate | ||
407 | results, while \emph{derivative} performs poorly. | ||
408 | |||
409 | \begin{verbatim} | ||
410 | -->numdiff(myfunction,1.e10) | ||
411 | ans = | ||
412 | 2.000D+10 | ||
413 | -->derivative(myfunction,1.e10,order=1) | ||
414 | ans = | ||
415 | 0. | ||
416 | \end{verbatim} | ||
417 | |||
418 | This step is a trade-off because it allows to keep a good accuracy with | ||
419 | large values of $x$, but produces a slightly sub-optimal step size when | ||
420 | $x$ is near 1. The behaviour near zero is the same, i.e. both commands | ||
421 | produce wrong results when $x \rightarrow 0$ and $x\neq 0$. | ||
422 | |||
423 | \subsection{References} | ||
424 | |||
425 | A reference for numerical derivates | ||
426 | is \cite{AbramowitzStegun1972}, chapter 25. "Numerical Interpolation, | ||
427 | Differentiation and Integration" (p. 875). | ||
428 | The webpage \cite{schimdtnd} and the book \cite{NumericalRecipes} give | ||
429 | results about the rounding errors. | ||
430 | |||
431 | |||
diff --git a/scilab_doc/scilabisnotnaive/pythagoreansum.tex b/scilab_doc/scilabisnotnaive/pythagoreansum.tex deleted file mode 100644 index 5833c69..0000000 --- a/scilab_doc/scilabisnotnaive/pythagoreansum.tex +++ /dev/null | |||
@@ -1,180 +0,0 @@ | |||
1 | \section{The Pythagorean sum} | ||
2 | |||
3 | In this section, we analyse the computation of the Pythagorean sum, | ||
4 | which is used in two different computations, that is the norm of a complex | ||
5 | number and the 2-norm of a vector of real values. | ||
6 | |||
7 | In the first part, we briefly present the mathematical formulas for these | ||
8 | two computations. | ||
9 | We then present the naïve algorithm based on these mathematical formulas. | ||
10 | In the second part, we make some experiments in Scilab and compare our | ||
11 | naïve algorithm with the \emph{abs} and \emph{norm} Scilab primitives. | ||
12 | In the third part, we analyse | ||
13 | why and how floating point numbers must be taken into account when the | ||
14 | Pythagorean sum is to compute. | ||
15 | |||
16 | \subsection{Theory} | ||
17 | |||
18 | \subsection{Experiments} | ||
19 | |||
20 | % TODO : compare both abs and norm. | ||
21 | |||
22 | \lstset{language=Scilab} | ||
23 | \lstset{numbers=left} | ||
24 | \lstset{basicstyle=\footnotesize} | ||
25 | \lstset{keywordstyle=\bfseries} | ||
26 | \begin{lstlisting} | ||
27 | // Straitforward implementation | ||
28 | function mn2 = mynorm2(a,b) | ||
29 | mn2 = sqrt(a^2+b^2) | ||
30 | endfunction | ||
31 | // With scaling | ||
32 | function mn2 = mypythag1(a,b) | ||
33 | if (a==0.0) then | ||
34 | mn2 = abs(b); | ||
35 | elseif (b==0.0) then | ||
36 | mn2 = abs(a); | ||
37 | else | ||
38 | if (abs(b)>abs(a)) then | ||
39 | r = a/b; | ||
40 | t = abs(b); | ||
41 | else | ||
42 | r = b/a; | ||
43 | t = abs(a); | ||
44 | end | ||
45 | mn2 = t * sqrt(1 + r^2); | ||
46 | end | ||
47 | endfunction | ||
48 | // With Moler & Morrison's | ||
49 | // At most 7 iterations are required. | ||
50 | function mn2 = mypythag2(a,b) | ||
51 | p = max(abs(a),abs(b)) | ||
52 | q = min(abs(a),abs(b)) | ||
53 | //index = 0 | ||
54 | while (q<>0.0) | ||
55 | //index = index + 1 | ||
56 | //mprintf("index = %d, p = %e, q = %e\n",index,p,q) | ||
57 | r = (q/p)^2 | ||
58 | s = r/(4+r) | ||
59 | p = p + 2*s*p | ||
60 | q = s * q | ||
61 | end | ||
62 | mn2 = p | ||
63 | endfunction | ||
64 | function compare(x) | ||
65 | mprintf("Re(x)=%e, Im(x)=%e\n",real(x),imag(x)); | ||
66 | p = abs(x); | ||
67 | mprintf("%20s : %e\n","Scilab",p); | ||
68 | p = mynorm2(real(x),imag(x)); | ||
69 | mprintf("%20s : %e\n","Naive",p); | ||
70 | p = mypythag1(real(x),imag(x)); | ||
71 | mprintf("%20s : %e\n","Scaling",p); | ||
72 | p = mypythag2(real(x),imag(x)); | ||
73 | mprintf("%20s : %e\n","Moler & Morrison",p); | ||
74 | endfunction | ||
75 | // Test #1 : all is fine | ||
76 | x = 1 + 1 * %i; | ||
77 | compare(x); | ||
78 | // Test #2 : more difficult when x is large | ||
79 | x = 1.e200 + 1 * %i; | ||
80 | compare(x); | ||
81 | // Test #3 : more difficult when x is small | ||
82 | x = 1.e-200 + 1.e-200 * %i; | ||
83 | compare(x); | ||
84 | \end{lstlisting} | ||
85 | |||
86 | \begin{verbatim} | ||
87 | *************************************** | ||
88 | Example #1 : simple computation with Scilab 5.1 | ||
89 | x(1)=1.000000e+000, x(2)=1.000000e+000 | ||
90 | Scilab : 1.414214e+000 | ||
91 | Naive : 1.414214e+000 | ||
92 | Scaling : 1.414214e+000 | ||
93 | Moler & Morrison : 1.414214e+000 | ||
94 | *************************************** | ||
95 | Example #2 : with large numbers ? | ||
96 | Scilab : Inf | ||
97 | Naive : Inf | ||
98 | Scaling : 1.000000e+200 | ||
99 | Moler & Morrison : 1.000000e+200 | ||
100 | *************************************** | ||
101 | Example #3 : with small numbers ? | ||
102 | x(1)=1.000000e-200, x(2)=1.000000e-200 | ||
103 | Scilab : 0.000000e+000 | ||
104 | Naive : 0.000000e+000 | ||
105 | Scaling : 1.414214e-200 | ||
106 | Moler & Morrison : 1.414214e-200 | ||
107 | *************************************** | ||
108 | > Conclusion : Scilab is naive ! | ||
109 | Octave 3.0.3 | ||
110 | *************************************** | ||
111 | octave-3.0.3.exe:29> compare(x); | ||
112 | *************************************** | ||
113 | x(1)=1.000000e+000, x(2)=1.000000e+000 | ||
114 | Octave : 1.414214e+000 | ||
115 | Naive : 1.414214e+000 | ||
116 | Scaling : 1.414214e+000 | ||
117 | Moler & Morrison : 1.414214e+000 | ||
118 | *************************************** | ||
119 | x(1)=1.000000e+200, x(2)=1.000000e+000 | ||
120 | Octave : 1.000000e+200 | ||
121 | Naive : Inf | ||
122 | Scaling : 1.000000e+200 | ||
123 | Moler & Morrison : 1.000000e+200 | ||
124 | *************************************** | ||
125 | octave-3.0.3.exe:33> compare(x) | ||
126 | x(1)=1.000000e-200, x(2)=1.000000e-200 | ||
127 | Octave : 1.414214e-200 | ||
128 | Naive : 0.000000e+000 | ||
129 | Scaling : 1.414214e-200 | ||
130 | Moler & Morrison : 1.414214e-200 | ||
131 | *************************************** | ||
132 | > Conclusion : Octave is not naive ! | ||
133 | |||
134 | With complex numbers. | ||
135 | *************************************** | ||
136 | |||
137 | Re(x)=1.000000e+000, Im(x)=1.000000e+000 | ||
138 | Scilab : 1.414214e+000 | ||
139 | Naive : 1.414214e+000 | ||
140 | Scaling : 1.414214e+000 | ||
141 | Moler & Morrison : 1.414214e+000 | ||
142 | *************************************** | ||
143 | Re(x)=1.000000e+200, Im(x)=1.000000e+000 | ||
144 | Scilab : 1.000000e+200 | ||
145 | Naive : Inf | ||
146 | Scaling : 1.000000e+200 | ||
147 | Moler & Morrison : 1.000000e+200 | ||
148 | *************************************** | ||
149 | Re(x)=1.000000e-200, Im(x)=1.000000e-200 | ||
150 | Scilab : 1.414214e-200 | ||
151 | Naive : 0.000000e+000 | ||
152 | Scaling : 1.414214e-200 | ||
153 | Moler & Morrison : 1.414214e-200 | ||
154 | *************************************** | ||
155 | > Conclusion : Scilab is not naive ! | ||
156 | \end{verbatim} | ||
157 | |||
158 | \subsection{Explanations} | ||
159 | |||
160 | \subsection{References} | ||
161 | |||
162 | The paper by Moler and Morrisson 1983 \cite{journals/ibmrd/MolerM83} gives an | ||
163 | algorithm to compute the Pythagorean sum $a\oplus b = \sqrt{a^2 + b^2}$ | ||
164 | without computing their squares or their square roots. Their algorithm is based on a cubically | ||
165 | convergent sequence. | ||
166 | The BLAS linear algebra suite of routines \cite{900236} includes the SNRM2, DNRM2 | ||
167 | and SCNRM2 routines which conpute the euclidian norm of a vector. | ||
168 | These routines are based on Blue \cite{355771} and Cody \cite{Cody:1971:SEF}. | ||
169 | In his 1978 paper \cite{355771}, James Blue gives an algorithm to compute the | ||
170 | Euclidian norm of a n-vector $\|x\| = \sqrt{\sum_{i=1,n}x_i^2}$. | ||
171 | The exceptionnal values of the \emph{hypot} operator are defined as the | ||
172 | Pythagorean sum in the IEEE 754 standard \cite{P754:2008:ISF,ieee754-1985}. | ||
173 | The \emph{\_\_ieee754\_hypot(x,y)} C function is implemented in the | ||
174 | Fdlibm software library \cite{fdlibm} developed by Sun Microsystems and | ||
175 | available at netlib. This library is used by Matlab \cite{matlab-hypot} | ||
176 | and its \emph{hypot} command. | ||
177 | |||
178 | |||
179 | |||
180 | |||
diff --git a/scilab_doc/scilabisnotnaive/quadratic.tex b/scilab_doc/scilabisnotnaive/quadratic.tex deleted file mode 100644 index 1a888f9..0000000 --- a/scilab_doc/scilabisnotnaive/quadratic.tex +++ /dev/null | |||
@@ -1,511 +0,0 @@ | |||
1 | \section{Quadratic equation} | ||
2 | |||
3 | In this section, we detail the computation of the roots of a quadratic polynomial. | ||
4 | As we shall see, there is a whole world from the mathematics formulas to the | ||
5 | implementation of such computations. In the first part, we briefly report the formulas which allow to | ||
6 | compute the real roots of a quadratic equation with real coefficients. | ||
7 | We then present the naïve algorithm based on these mathematical formulas. | ||
8 | In the second part, we make some experiments in Scilab and compare our | ||
9 | naïve algorithm with the \emph{roots} Scilab primitive. | ||
10 | In the third part, we analyse | ||
11 | why and how floating point numbers must be taken into account when the | ||
12 | implementation of such roots is required. | ||
13 | |||
14 | \subsection{Theory} | ||
15 | |||
16 | We consider the following quadratic equation, with real | ||
17 | coefficients $a, b, c \in \RR$ \cite{wikipediaquadratic,wikipedialossofsign,mathworldquadratic} : | ||
18 | |||
19 | \begin{eqnarray} | ||
20 | a x^2 + b x + c = 0. | ||
21 | \end{eqnarray} | ||
22 | |||
23 | The real roots of the quadratic equations are | ||
24 | \begin{eqnarray} | ||
25 | x_- &=& \frac{-b- \sqrt{b^2-4ac}}{2a}, \label{real:x-}\\ | ||
26 | x_+ &=& \frac{-b+ \sqrt{b^2-4ac}}{2a}, \label{real:x+} | ||
27 | \end{eqnarray} | ||
28 | with the hypothesis that the discriminant $\Delta=b^2-4ac$ | ||
29 | is positive. | ||
30 | |||
31 | The naive, simplified, algorithm which computes the roots of the | ||
32 | quadratic is presented in figure \ref{naive-quadratic}. | ||
33 | |||
34 | \begin{figure}[htbp] | ||
35 | \begin{algorithmic} | ||
36 | \STATE $\Delta\gets b^2-4ac$ | ||
37 | \STATE $s\gets \sqrt{\Delta}$ | ||
38 | \STATE $x_-\gets (-b-s)/(2a)$ | ||
39 | \STATE $x_+\gets (-b+s)/(2a)$ | ||
40 | \end{algorithmic} | ||
41 | \caption{Naive algorithm to compute the real roots of a quadratic equation} | ||
42 | \label{naive-quadratic} | ||
43 | \end{figure} | ||
44 | |||
45 | \subsection{Experiments} | ||
46 | |||
47 | The following Scilab function is a straitforward implementation | ||
48 | of the previous formulas. | ||
49 | |||
50 | \lstset{language=Scilab} | ||
51 | \lstset{numbers=left} | ||
52 | \lstset{basicstyle=\footnotesize} | ||
53 | \lstset{keywordstyle=\bfseries} | ||
54 | \begin{lstlisting} | ||
55 | function r=myroots(p) | ||
56 | c=coeff(p,0); | ||
57 | b=coeff(p,1); | ||
58 | a=coeff(p,2); | ||
59 | r=zeros(2,1); | ||
60 | r(1)=(-b+sqrt(b^2-4*a*c))/(2*a); | ||
61 | r(2)=(-b-sqrt(b^2-4*a*c))/(2*a); | ||
62 | endfunction | ||
63 | \end{lstlisting} | ||
64 | |||
65 | The goal of this section is to show that some additionnal | ||
66 | work is necessary to compute the roots of the quadratic equation | ||
67 | with sufficient accuracy. | ||
68 | We will especially pay attention to rounding errors and | ||
69 | overflow problems. | ||
70 | In this section, we show that the \emph{roots} command | ||
71 | of the Scilab language is not \emph{naive}, in the sense that it | ||
72 | takes into account for the floating point implementation details | ||
73 | that we will see in the next section. | ||
74 | |||
75 | |||
76 | |||
77 | \subsubsection{Rounding errors} | ||
78 | |||
79 | We analyse the rounding errors which are | ||
80 | appearing when the discriminant of the quadratic equation | ||
81 | is such that $b^2\approx 4ac$. | ||
82 | We consider the following quadratic equation | ||
83 | \begin{eqnarray} | ||
84 | \epsilon x^2 + (1/\epsilon)x - \epsilon = 0 | ||
85 | \end{eqnarray} | ||
86 | with $\epsilon=0.0001=10^{-4}$. | ||
87 | |||
88 | The two real solutions of the quadratic equation are | ||
89 | \begin{eqnarray} | ||
90 | x_- &=& \frac{-1/\epsilon- \sqrt{1/\epsilon^2+4\epsilon^2}}{2\epsilon} \approx -1/\epsilon^2, \\ | ||
91 | x_+ &=& \frac{-1/\epsilon+ \sqrt{1/\epsilon^2+4\epsilon^2}}{2\epsilon} \approx \epsilon^2 | ||
92 | \end{eqnarray} | ||
93 | |||
94 | The following Scilab script shows an example of the computation | ||
95 | of the roots of such a polynomial with the \emph{roots} | ||
96 | primitive and with a naive implementation. | ||
97 | Only the positive root $x_+ \approx \epsilon^2$ is considered in this | ||
98 | test (the $x_-$ root is so that $x_- \rightarrow -\infty$ in both | ||
99 | implementations). | ||
100 | |||
101 | \lstset{language=Scilab} | ||
102 | \lstset{numbers=left} | ||
103 | \lstset{basicstyle=\footnotesize} | ||
104 | \lstset{keywordstyle=\bfseries} | ||
105 | \begin{lstlisting} | ||
106 | p=poly([-0.0001 10000.0 0.0001],"x","coeff"); | ||
107 | e1 = 1e-8; | ||
108 | roots1 = myroots(p); | ||
109 | r1 = roots1(1); | ||
110 | roots2 = roots(p); | ||
111 | r2 = roots2(1); | ||
112 | error1 = abs(r1-e1)/e1; | ||
113 | error2 = abs(r2-e1)/e1; | ||
114 | printf("Expected : %e\n", e1); | ||
115 | printf("Naive method : %e (error=%e)\n", r1,error1); | ||
116 | printf("Scilab method : %e (error=%e)\n", r2, error2); | ||
117 | \end{lstlisting} | ||
118 | |||
119 | The script then prints out : | ||
120 | |||
121 | \begin{verbatim} | ||
122 | Expected : 1.000000e-008 | ||
123 | Naive method : 9.094947e-009 (error=9.050530e-002) | ||
124 | Scilab method : 1.000000e-008 (error=1.654361e-016) | ||
125 | \end{verbatim} | ||
126 | |||
127 | The result is surprising, since the naive root has | ||
128 | no correct digit and a relative error which is 14 orders | ||
129 | of magnitude greater than the relative error of the Scilab root. | ||
130 | |||
131 | The explanation for such a behaviour is that the expression of the | ||
132 | positive root is the following | ||
133 | |||
134 | \begin{eqnarray} | ||
135 | x_+ &=& \frac{-1/\epsilon+ \sqrt{1/\epsilon^2+4\epsilon^2}}{2\epsilon} | ||
136 | \end{eqnarray} | ||
137 | |||
138 | and is numerically evalutated as | ||
139 | |||
140 | \begin{verbatim} | ||
141 | \sqrt{1/\epsilon^2+4\epsilon^2} = 10000.000000000001818989 | ||
142 | \end{verbatim} | ||
143 | |||
144 | As we see, the first digits are correct, but the last digits | ||
145 | are polluted with rounding errors. When the expression $-1/\epsilon+ \sqrt{1/\epsilon^2+4\epsilon^2}$ | ||
146 | is evaluated, the following computations are performed~: | ||
147 | |||
148 | \begin{verbatim} | ||
149 | -1/\epsilon+ \sqrt{1/\epsilon^2+4\epsilon^2} | ||
150 | = -10000.0 + 10000.000000000001818989 | ||
151 | = 0.0000000000018189894035 | ||
152 | \end{verbatim} | ||
153 | |||
154 | The user may think that the result is extreme, but it | ||
155 | is not. Reducing furter the value of $\epsilon$ down to | ||
156 | $\epsilon=10^{-11}$, we get the following output : | ||
157 | |||
158 | \begin{verbatim} | ||
159 | Expected : 1.000000e-022 | ||
160 | Naive method : 0.000000e+000 (error=1.000000e+000) | ||
161 | Scilab method : 1.000000e-022 (error=1.175494e-016) | ||
162 | \end{verbatim} | ||
163 | |||
164 | The relative error is this time 16 orders of magnitude | ||
165 | greater than the relative error of the Scilab root. | ||
166 | In fact, the naive implementation computes a false root $x_+$ even for | ||
167 | a value of epsilon equal to $\epsilon=10^-3$, where the relative | ||
168 | error is 7 times greater than the relative error produced by the | ||
169 | \emph{roots} primitive. | ||
170 | |||
171 | \subsubsection{Overflow} | ||
172 | |||
173 | In this section, we analyse the overflow exception which is | ||
174 | appearing when the discriminant of the quadratic equation | ||
175 | is such that $b^2>> 4ac$. | ||
176 | We consider the following quadratic equation | ||
177 | \begin{eqnarray} | ||
178 | x^2 + (1/\epsilon)x + 1 = 0 | ||
179 | \end{eqnarray} | ||
180 | with $\epsilon\rightarrow 0$. | ||
181 | |||
182 | The roots of this equation are | ||
183 | \begin{eqnarray} | ||
184 | x_- &\approx& -1/\epsilon \rightarrow -\infty, \qquad \epsilon \rightarrow 0\\ | ||
185 | x_+ &\approx& -\epsilon \rightarrow 0^-, \qquad \epsilon \rightarrow 0 | ||
186 | \end{eqnarray} | ||
187 | To create a difficult case, we search $\epsilon$ so that | ||
188 | $1/\epsilon^2 = 10^{310}$, because we know that $10^{308}$ | ||
189 | is the maximum value available with double precision floating | ||
190 | point numbers. One possible solution is $\epsilon=10^{-155}$. | ||
191 | |||
192 | The following Scilab script shows an example of the computation | ||
193 | of the roots of such a polynomial with the \emph{roots} | ||
194 | primitive and with a naive implementation. | ||
195 | |||
196 | \lstset{language=Scilab} | ||
197 | \lstset{numbers=left} | ||
198 | \lstset{basicstyle=\footnotesize} | ||
199 | \lstset{keywordstyle=\bfseries} | ||
200 | \begin{lstlisting} | ||
201 | // Test #3 : overflow because of b | ||
202 | e=1.e-155 | ||
203 | a = 1; | ||
204 | b = 1/e; | ||
205 | c = 1; | ||
206 | p=poly([c b a],"x","coeff"); | ||
207 | expected = [-e;-1/e]; | ||
208 | roots1 = myroots(p); | ||
209 | roots2 = roots(p); | ||
210 | error1 = abs(roots1-expected)/norm(expected); | ||
211 | error2 = abs(roots2-expected)/norm(expected); | ||
212 | printf("Expected : %e %e\n", expected(1),expected(2)); | ||
213 | printf("Naive method : %e %e (error=%e)\n", roots1(1),roots1(2),error1); | ||
214 | printf("Scilab method : %e %e (error=%e)\n", roots2(1),roots2(2), error2); | ||
215 | \end{lstlisting} | ||
216 | |||
217 | The script then prints out : | ||
218 | |||
219 | \begin{verbatim} | ||
220 | Expected : -1.000000e-155 -1.000000e+155 | ||
221 | Naive method : Inf Inf (error=Nan) | ||
222 | Scilab method : -1.000000e-155 -1.000000e+155 (error=0.000000e+000) | ||
223 | \end{verbatim} | ||
224 | |||
225 | As we see, the $b^2-4ac$ term has been evaluated as $1/\epsilon^2-4$, | ||
226 | which is approximately equal to $10^{310}$. This number cannot | ||
227 | be represented in a floating point number. It therefore produces the | ||
228 | IEEE overflow exception and set the result as \emph{Inf}. | ||
229 | |||
230 | \subsection{Explanations} | ||
231 | |||
232 | The following tricks are extracted from the | ||
233 | \emph{quad} routine of the \emph{RPOLY} algorithm by | ||
234 | Jenkins \cite{Jenkins1975}. This algorithm is used by Scilab in the | ||
235 | roots primitive, where a special case is handled when the | ||
236 | degree of the equation is equal to 2, i.e. a quadratic equation. | ||
237 | |||
238 | \subsubsection{Properties of the roots} | ||
239 | |||
240 | One can easily show that the sum and the product of the roots | ||
241 | allow to recover the coefficients of the equation which was solve. | ||
242 | One can show that | ||
243 | \begin{eqnarray} | ||
244 | x_- + x_+ &=&\frac{-b}{a}\\ | ||
245 | x_- x_+ &=&\frac{c}{a} | ||
246 | \end{eqnarray} | ||
247 | Put in another form, one can state that the computed roots are | ||
248 | solution of the normalized equation | ||
249 | \begin{eqnarray} | ||
250 | x^2 - \left(\frac{x_- + x_+}{a}\right) x + x_- x_+ &=&0 | ||
251 | \end{eqnarray} | ||
252 | |||
253 | Other transformation leads to an alternative form for the roots. | ||
254 | The original quadratic equation can be written as a quadratic | ||
255 | equation on $1/x$ | ||
256 | \begin{eqnarray} | ||
257 | c(1/x)^2 + b (1/x) + a &=&0 | ||
258 | \end{eqnarray} | ||
259 | Using the previous expressions for the solution of $ax^2+bx+c=0$ leads to the | ||
260 | following expression of the roots of the quadratic equation when the | ||
261 | discriminant is positive | ||
262 | \begin{eqnarray} | ||
263 | x_- &=& \frac{2c}{-b+ \sqrt{b^2-4ac}}, \label{real:x-inverse}\\ | ||
264 | x_+ &=& \frac{2c}{-b- \sqrt{b^2-4ac}} \label{real:x+inverse} | ||
265 | \end{eqnarray} | ||
266 | These roots can also be computed from \ref{real:x-}, with the | ||
267 | multiplication by $-b+ \sqrt{b^2-4ac}$. | ||
268 | |||
269 | \subsubsection{Conditionning of the problem} | ||
270 | |||
271 | The conditionning of the problem may be evaluated with the | ||
272 | computation of the partial derivatives of the roots of the | ||
273 | equations with respect to the coefficients. | ||
274 | These partial derivatives measure the sensitivity of the | ||
275 | roots of the equation with respect to small errors which might | ||
276 | pollute the coefficients of the quadratic equations. | ||
277 | |||
278 | In the following, we note $x_-=\frac{-b- \sqrt{\Delta}}{2a}$ | ||
279 | and $x_+=\frac{-b+ \sqrt{\Delta}}{2a}$ when $a\neq 0$. | ||
280 | If the discriminant is stricly positive and $a\neq 0$, i.e. if the roots | ||
281 | of the quadratic are real, the partial derivatives of the | ||
282 | roots are the following : | ||
283 | \begin{eqnarray} | ||
284 | \frac{\partial x_-}{\partial a} &=& \frac{c}{a\sqrt{\Delta}} + \frac{b+\sqrt{\Delta}}{2a^2}, \qquad a\neq 0, \qquad \Delta\neq 0\\ | ||
285 | \frac{\partial x_+}{\partial a} &=& -\frac{c}{a\sqrt{\Delta}} + \frac{b-\sqrt{\Delta}}{2a^2}\\ | ||
286 | \frac{\partial x_-}{\partial b} &=& \frac{-1-b/\sqrt{\Delta}}{2a}\\ | ||
287 | \frac{\partial x_+}{\partial b} &=& \frac{-1+b/\sqrt{\Delta}}{2a}\\ | ||
288 | \frac{\partial x_-}{\partial c} &=& \frac{1}{\sqrt{\Delta}}\\ | ||
289 | \frac{\partial x_+}{\partial c} &=& -\frac{1}{\sqrt{\Delta}} | ||
290 | \end{eqnarray} | ||
291 | |||
292 | If the discriminant is zero, the partial derivatives of the | ||
293 | double real root are the following : | ||
294 | \begin{eqnarray} | ||
295 | \frac{\partial x_\pm}{\partial a} &=& \frac{b}{2a^2}, \qquad a\neq 0\\ | ||
296 | \frac{\partial x_\pm}{\partial b} &=& \frac{-1}{2a}\\ | ||
297 | \frac{\partial x_\pm}{\partial c} &=& 0 | ||
298 | \end{eqnarray} | ||
299 | |||
300 | The partial derivates indicate that if $a\approx 0$ or $\Delta\approx 0$, | ||
301 | the problem is ill-conditionned. | ||
302 | |||
303 | |||
304 | |||
305 | \subsubsection{Floating-Point implementation : fixing rounding error} | ||
306 | |||
307 | In this section, we show how to compute the roots of a | ||
308 | quadratic equation with protection against rounding | ||
309 | errors, protection against overflow and a minimum | ||
310 | amount of multiplications and divisions. | ||
311 | |||
312 | Few but important references deals with floating point | ||
313 | implementations of the roots of a quadratic polynomial. | ||
314 | These references include the important paper \cite{WhatEveryComputerScientist} by Golberg, | ||
315 | the Numerical Recipes \cite{NumericalRecipes}, chapter 5, section 5.6 | ||
316 | and \cite{FORSYTHE1991}, \cite{Nievergelt2003}, \cite{Kahan2004}. | ||
317 | |||
318 | The starting point is the mathematical solution of the quadratic equation, | ||
319 | depending on the sign of the discriminant $\Delta=b^2 - 4ac$ : | ||
320 | \begin{itemize} | ||
321 | \item If $\Delta> 0$, there are two real roots, | ||
322 | \begin{eqnarray} | ||
323 | x_\pm &=& \frac{-b\pm \sqrt{\Delta}}{2a}, \qquad a\neq 0 | ||
324 | \end{eqnarray} | ||
325 | \item If $\Delta=0$, there are one double root, | ||
326 | \begin{eqnarray} | ||
327 | x_\pm &=& -\frac{b}{2a}, \qquad a\neq 0 | ||
328 | \end{eqnarray} | ||
329 | \item If $\Delta< 0$, | ||
330 | \begin{eqnarray} | ||
331 | x_\pm &=&\frac{-b}{2a} \pm i \frac{\sqrt{-\Delta}}{2a}, \qquad a\neq 0 | ||
332 | \end{eqnarray} | ||
333 | \end{itemize} | ||
334 | |||
335 | |||
336 | In the following, we make the hypothesis that $a\neq 0$. | ||
337 | |||
338 | The previous experiments suggest that the floating point implementation | ||
339 | must deal with two different problems : | ||
340 | \begin{itemize} | ||
341 | \item rounding errors when $b^2\approx 4ac$ because of the cancelation of the | ||
342 | terms which have opposite signs, | ||
343 | \item overflow in the computation of the discriminant $\Delta$ when $b$ is | ||
344 | large in magnitude with respect to $a$ and $c$. | ||
345 | \end{itemize} | ||
346 | |||
347 | When $\Delta>0$, the rounding error problem can be splitted in two cases | ||
348 | \begin{itemize} | ||
349 | \item if $b<0$, then $-b+\sqrt{b^2-4ac}$ may suffer of rounding errors, | ||
350 | \item if $b>0$, then $-b-\sqrt{b^2-4ac}$ may suffer of rounding errors. | ||
351 | \end{itemize} | ||
352 | |||
353 | Obviously, the rounding problem will not appear when $\Delta<0$, | ||
354 | since the complex roots do not use the sum $-b+\sqrt{b^2-4ac}$. | ||
355 | When $\Delta=0$, the double root does not cause further trouble. | ||
356 | The rounding error problem must be solved only when $\Delta>0$ and the | ||
357 | equation has two real roots. | ||
358 | |||
359 | A possible solution may found in combining the following expressions for the | ||
360 | roots | ||
361 | \begin{eqnarray} | ||
362 | x_- &=& \frac{-b- \sqrt{b^2-4ac}}{2a}, \label{real:x-2}\\ | ||
363 | x_- &=& \frac{2c}{-b+ \sqrt{b^2-4ac}}, \label{real:x-inverse2}\\ | ||
364 | x_+ &=& \frac{-b+ \sqrt{b^2-4ac}}{2a}, \label{real:x+2}\\ | ||
365 | x_+ &=& \frac{2c}{-b- \sqrt{b^2-4ac}} \label{real:x+inverse2} | ||
366 | \end{eqnarray} | ||
367 | |||
368 | The trick is to pick the formula so that the sign of $b$ is the | ||
369 | same as the sign of the square root. | ||
370 | |||
371 | The following choice allow to solve the rounding error problem | ||
372 | \begin{itemize} | ||
373 | \item compute $x_-$ : if $b<0$, then compute $x_-$ from \ref{real:x-inverse2}, else | ||
374 | (if $b>0$), compute $x_-$ from \ref{real:x-2}, | ||
375 | \item compute $x_+$ : if $b<0$, then compute $x_+$ from \ref{real:x+2}, else | ||
376 | (if $b>0$), compute $x_+$ from \ref{real:x+inverse2}. | ||
377 | \end{itemize} | ||
378 | |||
379 | The solution of the rounding error problem can be adressed, by considering the | ||
380 | modified Fagnano formulas | ||
381 | \begin{eqnarray} | ||
382 | x_1 &=& -\frac{2c}{b+sgn(b)\sqrt{b^2-4ac}}, \\ | ||
383 | x_2 &=& -\frac{b+sgn(b)\sqrt{b^2-4ac}}{2a}, | ||
384 | \end{eqnarray} | ||
385 | where | ||
386 | \begin{eqnarray} | ||
387 | sgn(b)=\left\{\begin{array}{l} | ||
388 | 1, \textrm{ if } b\geq 0,\\ | ||
389 | -1, \textrm{ if } b< 0, | ||
390 | \end{array}\right. | ||
391 | \end{eqnarray} | ||
392 | The roots $x_{1,2}$ correspond to $x_{+,-}$ so that if $b<0$, $x_1=x_-$ and | ||
393 | if $b>0$, $x_1=x_+$. On the other hand, if $b<0$, $x_2=x_+$ and | ||
394 | if $b>0$, $x_2=x_-$. | ||
395 | |||
396 | An additionnal remark is that the division by two (and the multiplication | ||
397 | by 2) is exact with floating point numbers so these operations | ||
398 | cannot be a source of problem. But it is | ||
399 | interesting to use $b/2$, which involves only one division, instead | ||
400 | of the three multiplications $2*c$, $2*a$ and $4*a*c$. | ||
401 | This leads to the following expressions of the real roots | ||
402 | \begin{eqnarray} | ||
403 | x_- &=& -\frac{c}{(b/2)+sgn(b)\sqrt{(b/2)^2-ac}}, \\ | ||
404 | x_+ &=& -\frac{(b/2)+sgn(b)\sqrt{(b/2)^2-ac}}{a}, | ||
405 | \end{eqnarray} | ||
406 | which can be simplified into | ||
407 | \begin{eqnarray} | ||
408 | b'&=&b/2\\ | ||
409 | h&=& -\left(b'+sgn(b)\sqrt{b'^2-ac}\right)\\ | ||
410 | x_1 &=& \frac{c}{h}, \\ | ||
411 | x_2 &=& \frac{h}{a}, | ||
412 | \end{eqnarray} | ||
413 | where the discriminant is positive, i.e. $b'^2-ac>0$. | ||
414 | |||
415 | One can use the same value $b'=b/2$ with the complex roots in the | ||
416 | case where the discriminant is negative, i.e. $b'^2-ac<0$ : | ||
417 | \begin{eqnarray} | ||
418 | x_1 &=& -\frac{b'}{a} - i \frac{\sqrt{ac-b'^2}}{a}, \\ | ||
419 | x_2 &=& -\frac{b'}{a} + i \frac{\sqrt{ac-b'^2}}{a}, | ||
420 | \end{eqnarray} | ||
421 | |||
422 | A more robust algorithm, based on the previous analysis is presented in figure \ref{robust-quadratic}. | ||
423 | By comparing \ref{naive-quadratic} and \ref{robust-quadratic}, we can see that | ||
424 | the algorithms are different in many points. | ||
425 | |||
426 | \begin{figure}[htbp] | ||
427 | \begin{algorithmic} | ||
428 | \IF {$a=0$} | ||
429 | \IF {$b=0$} | ||
430 | \STATE $x_-\gets 0$ | ||
431 | \STATE $x_+\gets 0$ | ||
432 | \ELSE | ||
433 | \STATE $x_-\gets -c/b$ | ||
434 | \STATE $x_+\gets 0$ | ||
435 | \ENDIF | ||
436 | \ELSIF {$c=0$} | ||
437 | \STATE $x_-\gets -b/a$ | ||
438 | \STATE $x_+\gets 0$ | ||
439 | \ELSE | ||
440 | \STATE $b'\gets b/2$ | ||
441 | \STATE $\Delta\gets b'^2 - ac$ | ||
442 | \IF {$\Delta<0$} | ||
443 | \STATE $s\gets \sqrt{-\Delta}$ | ||
444 | \STATE $x_1^R\gets -b'/a$ | ||
445 | \STATE $x_1^I\gets -s/a$ | ||
446 | \STATE $x_2^R\gets x_-^R$ | ||
447 | \STATE $x_2^I\gets -x_1^I$ | ||
448 | \ELSIF {$\Delta=0$} | ||
449 | \STATE $x_1\gets -b'/a$ | ||
450 | \STATE $x_2\gets x_2$ | ||
451 | \ELSE | ||
452 | \STATE $s\gets \sqrt{\Delta}$ | ||
453 | \IF {$b>0$} | ||
454 | \STATE $g=1$ | ||
455 | \ELSE | ||
456 | \STATE $g=-1$ | ||
457 | \ENDIF | ||
458 | \STATE $h=-(b'+g*s)$ | ||
459 | \STATE $x_1\gets c/h$ | ||
460 | \STATE $x_2\gets h/a$ | ||
461 | \ENDIF | ||
462 | \ENDIF | ||
463 | \end{algorithmic} | ||
464 | \caption{A more robust algorithm to compute the roots of a quadratic equation} | ||
465 | \label{robust-quadratic} | ||
466 | \end{figure} | ||
467 | |||
468 | \subsubsection{Floating-Point implementation : fixing overflow problems} | ||
469 | |||
470 | The remaining problem is to compute $b'^2-ac$ without creating | ||
471 | unnecessary overflows. | ||
472 | |||
473 | Notice that a small improvment | ||
474 | has allread been done : if $|b|$ is close to the upper bound $10^{154}$, | ||
475 | then $|b'|$ may be less difficult to process since $|b'|=|b|/2 < |b|$. | ||
476 | One can then compute the square root by using normalization methods, | ||
477 | so that the overflow problem can be drastically reduced. | ||
478 | The method is based on the fact that the term $b'^2-ac$ can be | ||
479 | evaluted with two equivalent formulas | ||
480 | \begin{eqnarray} | ||
481 | b'^2-ac &=& b'^2\left[1-(a/b')(c/b')\right] \\ | ||
482 | b'^2-ac &=& c\left[b'(b'/c) - a\right] | ||
483 | \end{eqnarray} | ||
484 | |||
485 | \begin{itemize} | ||
486 | \item If $|b'|>|c|>0$, then the expression involving $\left(1-(a/b')(c/b')\right)$ | ||
487 | is so that no overflow is possible since $|c/b'| < 1$ and the problem occurs | ||
488 | only when $b$ is large in magnitude with respect to $a$ and $c$. | ||
489 | \item If $|c|>|b'|>0$, then the expression involving $\left(b'(b'/c) - a\right)$ | ||
490 | should limit the possible overflows since $|b'/c| < 1$. | ||
491 | \end{itemize} | ||
492 | These normalization tricks are similar to the one used by Smith in the | ||
493 | algorithm for the division of complex numbers \cite{Smith1962}. | ||
494 | |||
495 | \subsection{References} | ||
496 | |||
497 | The 1966 technical report by G. Forsythe \cite{Forsythe1966} | ||
498 | presents the floating point system and the possible large error | ||
499 | in using mathematical algorithms blindly. An accurate way of solving | ||
500 | a quadratic is outlined. A few general remarks are made about | ||
501 | computational mathematics. The 1991 paper by Goldberg | ||
502 | \cite{WhatEveryComputerScientist} is a general presentation of the floating | ||
503 | point system and its consequences. It begins with background on floating point | ||
504 | representation and rounding errors, continues with a discussion | ||
505 | of the IEEE floating point standard and concludes with examples of how | ||
506 | computer system builders can better support floating point. The section | ||
507 | 1.4, "Cancellation" specificaly consider the computation of the roots | ||
508 | of a quadratic equation. | ||
509 | One can also consult the experiments performed by Nievergelt in \cite{Nievergelt2003}. | ||
510 | |||
511 | |||
diff --git a/scilab_doc/scilabisnotnaive/references.tex b/scilab_doc/scilabisnotnaive/references.tex deleted file mode 100644 index 8f7463e..0000000 --- a/scilab_doc/scilabisnotnaive/references.tex +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | \section{References} | ||
2 | |||
3 | A central reference on this subject is the article | ||
4 | by Goldberg, "What Every Computer Scientist Should Know About Floating-Point Arithmetic", | ||
5 | \cite{WhatEveryComputerScientist}. | ||
6 | If one focuses on numerical algorithms, the "Numerical Recipes" \cite{NumericalRecipes}, | ||
7 | is another good sources of solutions for that problem. | ||
8 | The work of Kahan is also central in this domain, for example \cite{Kahan2004}. | ||
9 | |||
10 | |||
diff --git a/scilab_doc/scilabisnotnaive/scilabisnotnaive.bib b/scilab_doc/scilabisnotnaive/scilabisnotnaive.bib deleted file mode 100644 index 36384a0..0000000 --- a/scilab_doc/scilabisnotnaive/scilabisnotnaive.bib +++ /dev/null | |||
@@ -1,359 +0,0 @@ | |||
1 | # | ||
2 | # polynomialroots.bib -- | ||
3 | # | ||
4 | |||
5 | |||
6 | @book{AbramowitzStegun1972, | ||
7 | author = {Abramowitz, M. and Stegun, I. A.}, | ||
8 | title = {Handbook of Mathematical Functions with Formulas, Graphs, and Mathematical Tables}, | ||
9 | year = {1972}, | ||
10 | publisher= {}} | ||
11 | |||
12 | @UNPUBLISHED{wikipediaquadratic, | ||
13 | title = {Quadratic Equation}, | ||
14 | note = {\url{http://en.wikipedia.org/wiki/Quadratic_equation}} | ||
15 | } | ||
16 | |||
17 | @UNPUBLISHED{wikipediacubic, | ||
18 | title = {Cubic Equation}, | ||
19 | note = {\url{http://en.wikipedia.org/wiki/Cubic_equation}} | ||
20 | } | ||
21 | |||
22 | @UNPUBLISHED{wikipedialossofsign, | ||
23 | title = {Loss of significance}, | ||
24 | note = {\url{http://en.wikipedia.org/wiki/Loss_of_significance}} | ||
25 | } | ||
26 | |||
27 | @UNPUBLISHED{mathworldcubic, | ||
28 | title = {Cubic Equation}, | ||
29 | note = {\url{http://mathworld.wolfram.com/CubicFormula.html}} | ||
30 | } | ||
31 | |||
32 | @UNPUBLISHED{mathworldquadratic, | ||
33 | title = {Quadratic Equation}, | ||
34 | note = {\url{http://mathworld.wolfram.com/QuadraticEquation.html}} | ||
35 | } | ||
36 | |||
37 | @book{NumericalRecipes, | ||
38 | author = {W. H. Press and Saul A. Teukolsky and William T. Vetterling and Brian P. Flannery}, | ||
39 | title = {Numerical Recipes in C, Second Edition}, | ||
40 | year = {1992}, | ||
41 | publisher= {}} | ||
42 | |||
43 | @book{WhatEveryComputerScientist, | ||
44 | author = {David Goldberg}, | ||
45 | title = {What Every Computer Scientist Should Know About Floating-Point Arithmetic}, | ||
46 | month={March}, | ||
47 | year = {1991}, | ||
48 | publisher= {Association for Computing Machinery, Inc.}, | ||
49 | note = {\url{http://www.physics.ohio-state.edu/~dws/grouplinks/floating_point_math.pdf}}, | ||
50 | } | ||
51 | |||
52 | @article{Nievergelt2003, | ||
53 | jstor_articletype = {primary_article}, | ||
54 | title = {How (Not) to Solve Quadratic Equations}, | ||
55 | author = {Nievergelt, Yves}, | ||
56 | journal = {The College Mathematics Journal}, | ||
57 | jstor_issuetitle = {}, | ||
58 | volume = {34}, | ||
59 | number = {2}, | ||
60 | jstor_formatteddate = {Mar., 2003}, | ||
61 | pages = {90--104}, | ||
62 | url = {http://www.jstor.org/stable/3595780}, | ||
63 | ISSN = {07468342}, | ||
64 | abstract = {}, | ||
65 | publisher = {Mathematical Association of America}, | ||
66 | language = {}, | ||
67 | copyright = {Copyright © 2003 Mathematical Association of America}, | ||
68 | year = {2003}, | ||
69 | } | ||
70 | |||
71 | @book{FORSYTHE1991, | ||
72 | author = {George E. Forsythe}, | ||
73 | title = {How Do You Solve A Quadratic Equation ?}, | ||
74 | month={JUNE}, | ||
75 | year = {1966}, | ||
76 | publisher= {Computer Science Department, School of Humanities and Sciences, Stanford University}, | ||
77 | note = {\url{ftp://reports.stanford.edu/pub/cstr/reports/cs/tr/66/40/CS-TR-66-40.pdf}} | ||
78 | } | ||
79 | |||
80 | @article{Kahan2004, | ||
81 | title = {On the Cost of Floating-Point Computation Without Extra-Precise Arithmetic}, | ||
82 | author = {W. Kahan}, | ||
83 | note = {\url{http://www.cs.berkeley.edu/~wkahan/Qdrtcs.pdf}}, | ||
84 | year = {2004}, | ||
85 | } | ||
86 | |||
87 | @article{Smith1962, | ||
88 | author = {Smith,, Robert L.}, | ||
89 | title = {Algorithm 116: Complex division}, | ||
90 | journal = {Commun. ACM}, | ||
91 | volume = {5}, | ||
92 | number = {8}, | ||
93 | year = {1962}, | ||
94 | issn = {0001-0782}, | ||
95 | pages = {435}, | ||
96 | doi = {http://doi.acm.org/10.1145/368637.368661}, | ||
97 | publisher = {ACM}, | ||
98 | address = {New York, NY, USA}, | ||
99 | } | ||
100 | |||
101 | @article{Jenkins1975, | ||
102 | author = {Jenkins,, M. A.}, | ||
103 | title = {Algorithm 493: Zeros of a Real Polynomial [C2]}, | ||
104 | journal = {ACM Trans. Math. Softw.}, | ||
105 | volume = {1}, | ||
106 | number = {2}, | ||
107 | year = {1975}, | ||
108 | issn = {0098-3500}, | ||
109 | pages = {178--189}, | ||
110 | doi = {http://doi.acm.org/10.1145/355637.355643}, | ||
111 | publisher = {ACM}, | ||
112 | address = {New York, NY, USA}, | ||
113 | } | ||
114 | |||
115 | @article{368661, | ||
116 | author = {Smith,, Robert L.}, | ||
117 | title = {Algorithm 116: Complex division}, | ||
118 | journal = {Commun. ACM}, | ||
119 | volume = {5}, | ||
120 | number = {8}, | ||
121 | year = {1962}, | ||
122 | issn = {0001-0782}, | ||
123 | pages = {435}, | ||
124 | doi = {http://doi.acm.org/10.1145/368637.368661}, | ||
125 | publisher = {ACM}, | ||
126 | address = {New York, NY, USA}, | ||
127 | } | ||
128 | @article{214414, | ||
129 | author = {Stewart,, G. W.}, | ||
130 | title = {A note on complex division}, | ||
131 | journal = {ACM Trans. Math. Softw.}, | ||
132 | volume = {11}, | ||
133 | number = {3}, | ||
134 | year = {1985}, | ||
135 | issn = {0098-3500}, | ||
136 | pages = {238--241}, | ||
137 | doi = {http://doi.acm.org/10.1145/214408.214414}, | ||
138 | publisher = {ACM}, | ||
139 | address = {New York, NY, USA}, | ||
140 | } | ||
141 | @article{567808, | ||
142 | author = {Li,, Xiaoye S. and Demmel,, James W. and Bailey,, David H. and Henry,, Greg and Hida,, Yozo and Iskandar,, Jimmy and Kahan,, William and Kang,, Suh Y. and Kapur,, Anil and Martin,, Michael C. and Thompson,, Brandon J. and Tung,, Teresa and Yoo,, Daniel J.}, | ||
143 | title = {Design, implementation and testing of extended and mixed precision BLAS}, | ||
144 | journal = {ACM Trans. Math. Softw.}, | ||
145 | volume = {28}, | ||
146 | number = {2}, | ||
147 | year = {2002}, | ||
148 | issn = {0098-3500}, | ||
149 | pages = {152--205}, | ||
150 | doi = {http://doi.acm.org/10.1145/567806.567808}, | ||
151 | publisher = {ACM}, | ||
152 | address = {New York, NY, USA}, | ||
153 | } | ||
154 | @article{KAHAN1987, | ||
155 | author = {KAHAN, W.}, | ||
156 | title = {Branch cuts for complex elementary functmns, or much ado about nothing's sign bit.}, | ||
157 | year = {1987}, | ||
158 | pages = {165--211}, | ||
159 | publisher = {In The State of the Art m Numerzcal Analysts. Proceedings of the Joint IMA/SIAM Conference, A. Iserles and M J. D Powell, Eds. Clarendon Press, Oxford, England}, | ||
160 | } | ||
161 | @article{1039814, | ||
162 | author = {Priest,, Douglas M.}, | ||
163 | title = {Efficient scaling for complex division}, | ||
164 | journal = {ACM Trans. Math. Softw.}, | ||
165 | volume = {30}, | ||
166 | number = {4}, | ||
167 | year = {2004}, | ||
168 | issn = {0098-3500}, | ||
169 | pages = {389--401}, | ||
170 | doi = {http://doi.acm.org/10.1145/1039813.1039814}, | ||
171 | publisher = {ACM}, | ||
172 | address = {New York, NY, USA}, | ||
173 | } | ||
174 | @UNPUBLISHED{schimdtnd, | ||
175 | title = {Numerical Derivatives}, | ||
176 | note = {\url{http://fermi.la.asu.edu/PHY531/intro/node1.html}}, | ||
177 | author = {K.E. Schmidt} | ||
178 | } | ||
179 | @book{dixmier, | ||
180 | author = {J. Dixmier, P. Dugac}, | ||
181 | title = {Cours de Mathématiques du premier cycle, 1ère année}, | ||
182 | year = {1969}, | ||
183 | publisher= {Gauthier-Villars}} | ||
184 | |||
185 | @article{Forsythe1966, | ||
186 | author = {George E. Forsythe}, | ||
187 | title = {How Do You Solve A Quadratic Equation ?}, | ||
188 | year = {1966}, | ||
189 | publisher = {TECHNICAL REPORT NO. CS40}, | ||
190 | note = {\url{ftp://reports.stanford.edu/pub/cstr/reports/cs/tr/66/40/CS-TR-66-40.pdf}}, | ||
191 | } | ||
192 | |||
193 | |||
194 | @ARTICLE{1667289, | ||
195 | title={Underflow and the Denormalized Numbers}, | ||
196 | author={Coonen, J.T.}, | ||
197 | journal={Computer}, | ||
198 | year={1981}, | ||
199 | month={March }, | ||
200 | volume={14}, | ||
201 | number={3}, | ||
202 | pages={75-87}, | ||
203 | doi={10.1109/C-M.1981.220382}, | ||
204 | ISSN={0018-9162}, } | ||
205 | |||
206 | @book{artcomputerKnuthVol2, | ||
207 | author = {D. E. Knuth}, | ||
208 | title = {The Art of Computer Programming, Volume 2, Seminumerical Algorithms}, | ||
209 | year = {1998}, | ||
210 | publisher= {Third Edition, Addison Wesley, Reading, MA}} | ||
211 | |||
212 | @Article{Wynn:1962:AAP, | ||
213 | author = "P. Wynn", | ||
214 | title = "An Arsenal of {ALGOL} Procedures for Complex | ||
215 | Arithmetic", | ||
216 | journal = j-NORDISK-TIDSKR-INFORM-BEHAND, | ||
217 | volume = "2", | ||
218 | number = "4", | ||
219 | pages = "232--255", | ||
220 | month = dec, | ||
221 | year = "1962", | ||
222 | CODEN = "BITTEL, NBITAB", | ||
223 | DOI = "http://www.springerlink.com/openurl.asp?genre=article&id=doi:10.1007/BF01940171", | ||
224 | ISSN = "0006-3835 (print), 1572-9125 (electronic)", | ||
225 | bibdate = "Wed Jan 4 18:52:07 MST 2006", | ||
226 | bibsource = "ftp://ftp.math.utah.edu/pub/tex/bib/bit.bib; | ||
227 | http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0006-3835&volume=2&issue=4", | ||
228 | URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0006-3835&volume=2&issue=4&spage=232", | ||
229 | acknowledgement = ack-nhfb, | ||
230 | } | ||
231 | @article{DBLP:journals/cacm/Friedland67, | ||
232 | author = {Paul Friedland}, | ||
233 | title = {Algorithm 312: Absolute value and square root of a complex | ||
234 | number}, | ||
235 | journal = {Commun. ACM}, | ||
236 | volume = {10}, | ||
237 | number = {10}, | ||
238 | year = {1967}, | ||
239 | pages = {665}, | ||
240 | ee = {http://doi.acm.org/10.1145/363717.363780}, | ||
241 | bibsource = {DBLP, http://dblp.uni-trier.de} | ||
242 | } | ||
243 | |||
244 | @article{journals/ibmrd/MolerM83, | ||
245 | title = {Replacing Square Roots by Pythagorean Sums.}, | ||
246 | author = {Cleve B. Moler and Donald Morrison}, | ||
247 | journal = {IBM Journal of Research and Development}, | ||
248 | number = {6}, | ||
249 | pages = {577-581}, | ||
250 | url = {http://dblp.uni-trier.de/db/journals/ibmrd/ibmrd27.html#MolerM83}, | ||
251 | volume = {27}, | ||
252 | year = {1983}, | ||
253 | description = {dblp}, | ||
254 | date = {2002-01-03}, | ||
255 | keywords = {dblp } | ||
256 | } | ||
257 | |||
258 | @techreport{900236, | ||
259 | author = {Lawson,, C. L. and Hanson,, R. J. and Kincaid,, D. R. and Krogh,, F. T.}, | ||
260 | title = {Basic Linear Algebra Subprograms for FORTRAN Usage}, | ||
261 | year = {1977}, | ||
262 | source = {http://www.ncstrl.org:8900/ncstrl/servlet/search?formname=detail\&id=oai%3Ancstrlh%3Autexas_cs%3AUTEXAS_CS%2F%2FCS-TR-77-72}, | ||
263 | publisher = {University of Texas at Austin}, | ||
264 | address = {Austin, TX, USA}, | ||
265 | } | ||
266 | |||
267 | @article{355771, | ||
268 | author = {Blue,, James L.}, | ||
269 | title = {A Portable Fortran Program to Find the Euclidean Norm of a Vector}, | ||
270 | journal = {ACM Trans. Math. Softw.}, | ||
271 | volume = {4}, | ||
272 | number = {1}, | ||
273 | year = {1978}, | ||
274 | issn = {0098-3500}, | ||
275 | pages = {15--23}, | ||
276 | doi = {http://doi.acm.org/10.1145/355769.355771}, | ||
277 | publisher = {ACM}, | ||
278 | address = {New York, NY, USA}, | ||
279 | } | ||
280 | @InCollection{Cody:1971:SEF, | ||
281 | author = "W. J. Cody", | ||
282 | title = "Software for the Elementary Functions", | ||
283 | crossref = "Rice:1971:MS", | ||
284 | pages = "171--186", | ||
285 | year = "1971", | ||
286 | bibdate = "Thu Sep 15 18:56:47 1994", | ||
287 | bibsource = "garbo.uwasa.fi:/pc/doc-soft/fpbiblio.txt", | ||
288 | acknowledgement = ack-nj, | ||
289 | } | ||
290 | |||
291 | @techreport{ieee754-1985, | ||
292 | author = {Stevenson, David }, | ||
293 | booktitle = {ANSI/IEEE Std 754-1985}, | ||
294 | citeulike-article-id = {1677576}, | ||
295 | journal = {ANSI/IEEE Std 754-1985}, | ||
296 | keywords = {floating-point, para08}, | ||
297 | month = {August}, | ||
298 | posted-at = {2008-04-21 12:47:40}, | ||
299 | priority = {1}, | ||
300 | title = {IEEE standard for binary floating-point arithmetic}, | ||
301 | url = {http://ieeexplore.ieee.org/xpls/abs\_all.jsp?arnumber=30711}, | ||
302 | year = {1985} | ||
303 | } | ||
304 | |||
305 | @Book{P754:2008:ISF, | ||
306 | author = "{IEEE Task P754}", | ||
307 | title = "{IEEE 754-2008, Standard for Floating-Point | ||
308 | Arithmetic}", | ||
309 | publisher = pub-IEEE-STD, | ||
310 | address = pub-IEEE-STD:adr, | ||
311 | pages = "58", | ||
312 | day = "29", | ||
313 | month = aug, | ||
314 | year = "2008", | ||
315 | DOI = "http://doi.ieeecomputersociety.org/10.1109/IEEESTD.2008.4610935", | ||
316 | ISBN = "0-7381-5753-8 (paper), 0-7381-5752-X (electronic)", | ||
317 | ISBN-13 = "978-0-7381-5753-5 (paper), 978-0-7381-5752-8 | ||
318 | (electronic)", | ||
319 | bibdate = "Thu Sep 25 09:50:30 2008", | ||
320 | URL = "http://ieeexplore.ieee.org/servlet/opac?punumber=4610933; | ||
321 | http://en.wikipedia.org/wiki/IEEE_754-2008", | ||
322 | abstract = "This standard specifies interchange and arithmetic | ||
323 | formats and methods for binary and decimal | ||
324 | floating-point arithmetic in computer programming | ||
325 | environments. This standard specifies exception | ||
326 | conditions and their default handling. An | ||
327 | implementation of a floating-point system conforming to | ||
328 | this standard may be realized entirely in software, | ||
329 | entirely in hardware, or in any combination of software | ||
330 | and hardware. For operations specified in the normative | ||
331 | part of this standard, numerical results and exceptions | ||
332 | are uniquely determined by the values of the input | ||
333 | data, sequence of operations, and destination formats, | ||
334 | all under user control.", | ||
335 | acknowledgement = ack-nhfb, | ||
336 | } | ||
337 | |||
338 | @article{matlab-hypot, | ||
339 | note={\url{http://www.mathworks.com/access/helpdesk/help/techdoc/index.html?/access/helpdesk/help/techdoc/ref/hypot.html&http://www.google.fr/search?q=hypot&ie=utf-8&oe=utf-8&aq=t&rls=org.mozilla:fr:official&client=firefox-a}}, | ||
340 | title={Matlab - hypot : square root of sum of squares}, | ||
341 | } | ||
342 | |||
343 | @article{fdlibm, | ||
344 | title = {A Freely Distributable C Math Library}, | ||
345 | author = {Sun Microsystems, Inc.}, | ||
346 | year = {1993}, | ||
347 | note = {\url{http://www.netlib.org/fdlibm}}, | ||
348 | } | ||
349 | |||
350 | @book{261217, | ||
351 | author = {Muller,, Jean-Michel}, | ||
352 | title = {Elementary functions: algorithms and implementation}, | ||
353 | year = {1997}, | ||
354 | isbn = {0-8176-3990-X}, | ||
355 | publisher = {Birkhauser Boston, Inc.}, | ||
356 | address = {Secaucus, NJ, USA}, | ||
357 | } | ||
358 | |||
359 | |||
diff --git a/scilab_doc/scilabisnotnaive/scilabisnotnaive.pdf b/scilab_doc/scilabisnotnaive/scilabisnotnaive.pdf deleted file mode 100644 index 7ef8eaa..0000000 --- a/scilab_doc/scilabisnotnaive/scilabisnotnaive.pdf +++ /dev/null | |||
Binary files differ | |||
diff --git a/scilab_doc/scilabisnotnaive/scilabisnotnaive.tex b/scilab_doc/scilabisnotnaive/scilabisnotnaive.tex deleted file mode 100644 index 331fce0..0000000 --- a/scilab_doc/scilabisnotnaive/scilabisnotnaive.tex +++ /dev/null | |||
@@ -1,146 +0,0 @@ | |||
1 | % | ||
2 | % scilabisnotnaive.tex -- | ||
3 | % Some notes about floating point issues in Scilab. | ||
4 | % | ||
5 | % Copyright 2008 Michael Baudin | ||
6 | % | ||
7 | \documentclass[12pt]{article} | ||
8 | |||
9 | %% Good fonts for PDF | ||
10 | \usepackage[cyr]{aeguill} | ||
11 | |||
12 | %% Package for page headers | ||
13 | \usepackage{fancyhdr} | ||
14 | |||
15 | %% Package to include graphics | ||
16 | %% Comment for DVI | ||
17 | \usepackage[pdftex]{graphicx} | ||
18 | |||
19 | %% Figures formats: jpeg or pdf | ||
20 | %% Comment for DVI | ||
21 | \DeclareGraphicsExtensions{.jpg,.pdf} | ||
22 | |||
23 | %% Package to create Hyperdocuments | ||
24 | %% Comment for DVI | ||
25 | \usepackage[pdftex,colorlinks=true,linkcolor=blue,citecolor=blue,urlcolor=blue]{hyperref} | ||
26 | |||
27 | %% Package to control printed area size | ||
28 | \usepackage{anysize} | ||
29 | %% ...by defining margins {left}{right}{top}{bottom} | ||
30 | \marginsize{22mm}{14mm}{12mm}{25mm} | ||
31 | |||
32 | %% Package used to include a bibliography | ||
33 | \usepackage{natbib} | ||
34 | |||
35 | %% R for real numbers | ||
36 | \usepackage{amssymb} | ||
37 | |||
38 | %% User defined commands | ||
39 | |||
40 | %% Figure reference | ||
41 | \newcommand{\figref}[1]{figure~\ref{#1}} | ||
42 | |||
43 | %% Equation reference | ||
44 | \newcommand{\Ref}[1]{(\ref{#1})} | ||
45 | |||
46 | %% Emphasize a word or a group of words | ||
47 | \newcommand{\empha}[1]{\textit{\textbf{#1}}} | ||
48 | |||
49 | %% Derivation operators | ||
50 | \newcommand{\D}{\partial} | ||
51 | \newcommand{\Dt}{\partial_t} | ||
52 | \newcommand{\Dx}{\partial_x} | ||
53 | \newcommand{\Dy}{\partial_y} | ||
54 | |||
55 | \usepackage{url} | ||
56 | |||
57 | % Scilab macros | ||
58 | \newcommand{\scimacro}[1]{\textit{#1}} | ||
59 | \newcommand{\scicommand}[1]{\textit{#1}} | ||
60 | |||
61 | % To highlight source code | ||
62 | \usepackage{listings} | ||
63 | \usepackage{algorithmic} | ||
64 | |||
65 | % Define theorem environments | ||
66 | \newtheorem{theorem}{Theorem}[section] | ||
67 | \newtheorem{lemma}[theorem]{Lemma} | ||
68 | \newtheorem{proposition}[theorem]{Proposition} | ||
69 | \newtheorem{corollary}[theorem]{Corollary} | ||
70 | |||
71 | \newenvironment{proof}[1][Proof]{\begin{trivlist} | ||
72 | \item[\hskip \labelsep {\bfseries #1}]}{\end{trivlist}} | ||
73 | \newenvironment{definition}[1][Definition]{\begin{trivlist} | ||
74 | \item[\hskip \labelsep {\bfseries #1}]}{\end{trivlist}} | ||
75 | \newenvironment{example}[1][Example]{\begin{trivlist} | ||
76 | \item[\hskip \labelsep {\bfseries #1}]}{\end{trivlist}} | ||
77 | \newenvironment{remark}[1][Remark]{\begin{trivlist} | ||
78 | \item[\hskip \labelsep {\bfseries #1}]}{\end{trivlist}} | ||
79 | |||
80 | \newcommand{\qed}{\nobreak \ifvmode \relax \else | ||
81 | \ifdim\lastskip<1.5em \hskip-\lastskip | ||
82 | \hskip1.5em plus0em minus0.5em \fi \nobreak | ||
83 | \vrule height0.75em width0.5em depth0.25em\fi} | ||
84 | |||
85 | % Maths shortcuts | ||
86 | \newcommand{\RR}{\mathbb{R}} | ||
87 | |||
88 | % Algorithms | ||
89 | \usepackage{algorithm2e} | ||
90 | %%\usepackage{algorithmic} | ||
91 | |||
92 | \begin{document} | ||
93 | \author{Michael Baudin} | ||
94 | \date{February 2009} | ||
95 | \title{Scilab is not naive} | ||
96 | \maketitle | ||
97 | \begin{abstract} | ||
98 | Most of the time, the mathematical formula is | ||
99 | directly used in the Scilab source code. | ||
100 | But, in many algorithms, some additionnal work is | ||
101 | performed, which takes into account the fact that | ||
102 | the computer does not process mathematical real values, | ||
103 | but performs computations with their floating | ||
104 | point representation. | ||
105 | The goal of this article is to show that, in many | ||
106 | situations, Scilab is not naive and use algorithms | ||
107 | which have been specifically tailored for floating point | ||
108 | computers. We analyse in this article the | ||
109 | particular case of the quadratic equation, the | ||
110 | complex division and the numerical derivatives. | ||
111 | In each example, we show that the naive algorithm | ||
112 | is not sufficiently accurate, while Scilab's implementation | ||
113 | is much more accurate. | ||
114 | \end{abstract} | ||
115 | |||
116 | \tableofcontents | ||
117 | |||
118 | \include{introduction} | ||
119 | |||
120 | \include{quadratic} | ||
121 | |||
122 | \include{numericalderivative} | ||
123 | |||
124 | \include{complexdivision} | ||
125 | |||
126 | |||
127 | \include{conclusion} | ||
128 | |||
129 | \clearpage | ||
130 | |||
131 | \appendix | ||
132 | |||
133 | \include{simpleexperiments} | ||
134 | |||
135 | %\include{pythagoreansum} | ||
136 | |||
137 | %% Bibliography | ||
138 | |||
139 | |||
140 | \addcontentsline{toc}{section}{Bibliography} | ||
141 | \bibliographystyle{plain} | ||
142 | \bibliography{scilabisnotnaive} | ||
143 | |||
144 | |||
145 | \end{document} | ||
146 | |||
diff --git a/scilab_doc/scilabisnotnaive/simpleexperiments.tex b/scilab_doc/scilabisnotnaive/simpleexperiments.tex deleted file mode 100644 index bea9803..0000000 --- a/scilab_doc/scilabisnotnaive/simpleexperiments.tex +++ /dev/null | |||
@@ -1,149 +0,0 @@ | |||
1 | \section{Simple experiments} | ||
2 | |||
3 | In this section, we analyse the examples given in the introduction of this | ||
4 | article. | ||
5 | |||
6 | \subsection{Why $0.1$ is rounded} | ||
7 | |||
8 | In this section, we present a brief explanation for the | ||
9 | following Scilab session. | ||
10 | |||
11 | \begin{verbatim} | ||
12 | -->format(25) | ||
13 | -->x1=0.1 | ||
14 | x1 = | ||
15 | 0.1000000000000000055511 | ||
16 | -->x2 = 1.0-0.9 | ||
17 | x2 = | ||
18 | 0.0999999999999999777955 | ||
19 | -->x1==x2 | ||
20 | ans = | ||
21 | F | ||
22 | \end{verbatim} | ||
23 | |||
24 | In fact, only the 17 first digits $0.100000000000000005$ are | ||
25 | significant and the last digits are a artifact of Scilab's | ||
26 | displaying system. | ||
27 | |||
28 | The number $0.1$ can be represented as the normalized number | ||
29 | $1.0 \times 10^{-1}$. But the binary floating point representation | ||
30 | of $0.1$ is approximately \cite{WhatEveryComputerScientist} | ||
31 | $1.100110011001100110011001... \times 2^{-4}$. As you see, the decimal | ||
32 | representation is made of a finite number of digits while the | ||
33 | binary representation is made of an infinite sequence of | ||
34 | digits. Because Scilab computations are based on double precision numbers | ||
35 | and because that numbers only have 64 bits to represent the number, | ||
36 | some \emph{rounding} must be performed. | ||
37 | |||
38 | In our example, it happens that $0.1$ falls between two | ||
39 | different binary floating point numbers. After rounding, | ||
40 | the binary floating point number is associated with the decimal | ||
41 | representation "0.100000000000000005", that is "rounding up" | ||
42 | in this case. On the other side, $0.9$ is also not representable | ||
43 | as an exact binary floating point number (but 1.0 is exactly represented). | ||
44 | It happens that, after the substraction "1.0-0.9", the decimal representation of the | ||
45 | result is "0.09999999999999997", which is different from the rounded | ||
46 | value of $0.1$. | ||
47 | |||
48 | \subsection{Why $sin(\pi)$ is rounded} | ||
49 | |||
50 | In this section, we present a brief explanation of the following | ||
51 | Scilab 5.1 session, where the function sinus is applied to the | ||
52 | number $\pi$. | ||
53 | |||
54 | \begin{verbatim} | ||
55 | -->format(25) | ||
56 | -->sin(0.0) | ||
57 | ans = | ||
58 | 0. | ||
59 | -->sin(%pi) | ||
60 | ans = | ||
61 | 0.0000000000000001224647 | ||
62 | \end{verbatim} | ||
63 | |||
64 | Two kinds of approximations are associated with the previous | ||
65 | result | ||
66 | \begin{itemize} | ||
67 | \item $\pi=3.1415926...$ is approximated by Scilab | ||
68 | as the value returned by $4*atan(1.0)$, | ||
69 | \item the $sin$ function is approximated by a polynomial. | ||
70 | \end{itemize} | ||
71 | |||
72 | This article is too short to make a complete presentation | ||
73 | of the computation of elementary functions. The interested | ||
74 | reader may consider the direct analysis of the Fdlibm library | ||
75 | as very instructive \cite{fdlibm}. | ||
76 | The "Elementary Functions" book by Muller \cite{261217} | ||
77 | is a complete reference on this subject. | ||
78 | |||
79 | In Scilab, the "sin" function is directly performed by a | ||
80 | fortran source code (sci\_f\_sin.f) and no additionnal | ||
81 | algorithm is performed directly by Scilab. | ||
82 | At the compiler level, though, the "sin" function is | ||
83 | provided by a library which is compiler-dependent. | ||
84 | The main structure of the algorithm which computes | ||
85 | "in" is probably the following | ||
86 | |||
87 | \begin{itemize} | ||
88 | \item scale the input $x$ so that in lies in a restricted | ||
89 | interval, | ||
90 | \item use a polynomial approximation of the local | ||
91 | behaviour of "sin" in the neighbourhood of 0, with a guaranteed | ||
92 | precision. | ||
93 | \end{itemize} | ||
94 | |||
95 | In the Fdlibm library for example, the scaling interval is | ||
96 | $[-\pi/4,\pi/4]$. | ||
97 | The polynomial approximation of the sine function has the general form | ||
98 | |||
99 | \begin{eqnarray} | ||
100 | sin(x) &\approx& x + a_3x^3 + \ldots + a_{2n+1} x^{2n+1}\\ | ||
101 | &\approx & x + x^3 p(x^2) | ||
102 | \end{eqnarray} | ||
103 | |||
104 | In the Fdlibm library, 6 terms are used. | ||
105 | |||
106 | For the inverse tan "atan" function, which is | ||
107 | used to compute an approximated value of $\pi$, the process is the same. | ||
108 | All these operations are guaranteed with some precision. | ||
109 | For example, suppose that the functions are guaranteed with 14 significant | ||
110 | digits. That means that 17-14 + 1 = 3 digits may be rounded in the process. | ||
111 | In our current example, the value of $sin(\pi)$ is approximated | ||
112 | with 17 digits after the point as "0.00000000000000012". That means that | ||
113 | 2 digits have been rounded. | ||
114 | |||
115 | \subsection{One more step} | ||
116 | |||
117 | In fact, it is possible to reduce the number of | ||
118 | significant digits of the sine function to as low as 0 significant digits. | ||
119 | The mathematical theory is $sin(2^n \pi) = 0$, but that is not true with | ||
120 | floating point numbers. In the following Scilab session, we | ||
121 | |||
122 | \begin{verbatim} | ||
123 | -->for i = 1:5 | ||
124 | -->k=10*i; | ||
125 | -->n = 2^k; | ||
126 | -->sin(n*%pi) | ||
127 | -->end | ||
128 | ans = | ||
129 | - 0.0000000000001254038322 | ||
130 | ans = | ||
131 | - 0.0000000001284135242063 | ||
132 | ans = | ||
133 | - 0.0000001314954487872237 | ||
134 | ans = | ||
135 | - 0.0001346513391512239052 | ||
136 | ans = | ||
137 | - 0.1374464882277985633419 | ||
138 | \end{verbatim} | ||
139 | |||
140 | For $sin(2^{50})$, all significant digits are lost. This computation | ||
141 | may sound \emph{extreme}, but it must be noticed that it is inside the | ||
142 | double precision possibility, since $2^{50} \approx 3.10^{15} \ll 10^{308}$. | ||
143 | The solution may be to use multiple precision numbers, such as in the | ||
144 | Gnu Multiple Precision system. | ||
145 | |||
146 | If you know a better algorithm, based on double precision only, | ||
147 | which allows to compute accurately such kind of values, the Scilab | ||
148 | team will surely be interested to hear from you ! | ||
149 | |||