% Copyright INRIA %\documentstyle{book} %\begin{document} %\def\verbatok#1{\expandafter \begin{verbatim} %\input{#1}} \label{stochastic realization} \chapter{Stochastic realization} \index{stochastic!realization} Let $y_k$ be a wide sense stationary\index{stationary} gaussian~\index{gaussian!process} process with covariance function $\{R_n;n\in {\Bbb Z} \}$. It is well-known that $y_k$ may be considered as the output of a filter $F$ with a white noise $e_t$ as input. The {\em Stochastic Realization \em} problem for $y_k$ is the construction of an algorithm giving $F$ ; in addition, $F$ is usually asked to be causal and minimum delay\index{minimum!delay} (i.e. with causal inverse), for reasons of realizability. One major consequence of this additional constraint is that $F$~, when it exists, is minimal and unique, up to a transformation of coordinates. The filter $F$ is called the {\em Modeling Filter \em} \index{modeling filter} and its inverse $F^{-1}$ the {\em Whitening Filter \em} \index{whitening filter}. Let us consider the following examples, in which the information on $y_k$ is given two different ways. First, let be given $S_y(z)$, the spectrum \index{spectrum} of $y_k$. Then, the whitening filter of $y_k$ is the solution $E(z)$ of the {\em Spectral Factorization \em}\index{spectral!factorization} problem of $S_y(z)$, that is the solution of : \begin{eqnarray} E(z)S_y(z)E^{T}(z^{-1})=I \label{e.ricc.1} \end{eqnarray} such that $E(z)$ and $E^{-1}(z)$ are analytical in the unit disc, that is that the modeling filter of $y_k$ is causal and minimum delay. The stochastic realization problem is then the computation of $E(z)$, given $S_y(e^{i\theta})$. Solutions to this problem will be given in section \ref{spfact} with direct factorization of matrix polynomials\index{matrix polynomial} and in \ref{ricca} via a state-space approach. Another example is when the covariance function $R_n=E(y_ky_{k-n}^{T})$ of $y_k$ is given - the information on $y_k$ is then equivalent to that in the previous example- The whitening filter giving the innovation\index{innovation}, or prediction error, is obtained by minimizing with respect to the coefficients $A_k$, the mean square prediction error : \begin{eqnarray} E(\| e_t \| ^2)=E\{ \| y_k - \sum_{k>0}A_k y_{t-k}\|^2\} \label{e.ricc.2} \end{eqnarray} The stochastic realization problem is then the computation of the $A_k$ as the solution of the following {\em Yule-Walker \em}, \index{yule-walker} normal equations : \begin{equation} \left[ \begin{array}[c]{ccc} R_0 & R_1 & R_2 \ldots \\ R_1^T & R_0 & R_1 \ldots \\ R_2^T & R_1^T & R_0 \ldots \\ \vdots \end{array} \right] \left[ \begin{array}[c]{c} A_1 \\A_2 \\A_3 \\ \vdots \end{array} \right] = \left[ \begin{array}[c]{c} R_1 \\ R_2 \\ R_3 \\ \vdots \end{array} \right] \label{e.ricc.3} \end{equation} This system being To\"{e}plitz, \index{toeplitz} it may be efficiently solved using a Levinson-type algorithm ,as will be exposed in section \ref{levin}. \section{The {\tt sfact} primitive} \label{spfact} \index{macro,{\tt spfact}} Given a matrix polynomial $H(z)$ with size $n \times m$ and rank $n$, the Scilab function {\tt sfact} gives the spectral factorization \index{spectral!factorization} of $H(z)$, that is a matrix polynomial $D$ such that: \begin{equation} H(z)=D(z)D(1/z)z^{n} \label{e.spfact.1} \end{equation} Consider for example the following $2\times 2$ matrix polynomial $a(z)$, generated from the simulation of a two-dimensional process with three poles followed by a levinson\index{levinson} filtering (see section \ref{levin}): \verbatok{\Diary spfact.dia} \end{verbatim} \section{Spectral Factorization via state-space models} \label{ricca} As mentioned in the introduction, the stochastic realization problem may be formulated in terms of factorization of rational matrices - leading to factoring polynomials - or in a state-variable framework. In this section, we will concentrate on the state-space approach, with the underlying markovian representation following the treatment of \cite{ruckebusch} or that uses a gaussian spaces\index{gaussian!space} formulation of this problem. Thus, given $y_k$ a zero-mean, stationary gaussian\index{gaussian!process} process, we seek after markovian models\index{markovian!model} of the following type: \begin{equation} \left\{ \begin{array}{ll} x_{k+1}=Fx_k + Ju_{k+1}\\ y_{k+1}=Hx_k + Lu_{k+1} \end{array} \right. \label{e.ricc.4} \end{equation} with the following hypotheses:\\ $x_k$ is q-dimensional.\\ $u_k$ is a q-dimensional, standard, gaussian\index{gaussian!white noise} white noise.\\ $F, H, J, L$ are arbitrary matrices with F asymptotically stable. Furthermore we shall restrict ourselves to minimal models, that is with $F$ having minimum dimensions. \subsection{Spectral Study} Let $\{R_k; k\in {\Bbb Z}\}$ be the covariance function of $y_k$. Defining $G$ as : \begin{eqnarray} G=E(x_0 y_0^T) \label{e.ricc.5} \end{eqnarray} we have that \begin{eqnarray} \forall n \geq 1, & R_n=HF^{n-1}G \label{e.ricc.6} \end{eqnarray} Letting $Y(z)$ and $U(z)$ be the z-transforms of $y_k$ and $u_k$ respectively, it follows from (\ref{e.ricc.4}) that : \begin{eqnarray} Y(z)=\Gamma (z)U(z) \label{e.ricc.7} \end{eqnarray} where \begin{eqnarray} \Gamma (z)=J+Hz(I-Fz)^{-1}L \label{e.ricc.8} \end{eqnarray} is a rational matrix without poles in a vicinity of the unit disc. Thus, the spectrum \index{spectrum} $S_y$ of $y_k$ may be written in the following factored form: \begin{eqnarray} S_y(\theta)=\Gamma (e^{i\theta})\Gamma ^{*}(e^{-i\theta}) & \theta \in [-\pi, \pi] \label{e.ricc.9} \end{eqnarray} where $\Gamma ^{*}$ denotes the transposed conjugate of $\Gamma$. Furthermore, from (\ref{e.ricc.7}), we can write: \begin{eqnarray} U(z)=\Gamma ^{-1}(z)Y(z) \label{e.ricc.10} \end{eqnarray} and when $J>0$ (in the sense of positive matrices) : \begin{eqnarray} \Gamma ^{-1}(z)=J^{-1}-J^{-1}Hz(I-(F-LJ^{-1}H)z)^{-1}LJ^{-1} \label{e.ricc.11} \end{eqnarray} so that $u_k$ may be obtained from $y_k$ by a Laurent expansion of $\Gamma ^{-1}$ in the vicinity of the unit circle, leading to the whitening filter . It is worth noting that when $y_k$ is scalar, then $\Gamma (z)=B(z)/A(z)$, where $A$ and $B$ are coprime polynomials and $B$ has no zero in a vicinity of the unit disc; in other words, $y_k$ is an ARMA process \index{arma process}. %%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%% \subsection{The Filter Model} \label{filter} \index{filter!model} Among the many markovian representations of $y_k$, one is of particular importance, as we shall see later on: The {\em Filter Model {\em or} Innovations Model} \index{innovations!model}. To introduce it, we need some definitions: first, let us define the {\em filter} $\tilde{x}_k$ of $x_k$ as: \begin{eqnarray} \tilde{x}_k=E(x_k/ Y^-_{k-1}) \label{e.ricc.12} \end{eqnarray} where $Y^-_{k}$ is the gaussian space \index{gaussian!space} generated by the coordinates of $y_k$, $k\leq n$. It is the filter of the process $x_k$ by the process $y_k$. We need also the innovations process\index{innovations!process} $\tilde{w}_k$ defined as follows : \begin{eqnarray} \tilde{w}_k=y_k-E(y_k/Y^-_{k-1}) \label{e.ricc.13} \end{eqnarray} $\tilde{w}_k$ is a stationary, gaussian white noise with covariance $\tilde{R}$. From $\tilde{w}_k$ the standard gaussian white noise $\tilde{u}_k$ may be obtained as: $\tilde{u}_k=\tilde{R}^{-1/2}\tilde{w}_k$.\\ We are now able to introduce the innovations model: \begin{equation} \left\{ \begin{array}{ll} \tilde{x}_{k+1}=F\tilde{x}_k + T\tilde{w}_{k+1}\\ y_{k+1}=H\tilde{x}_k + \tilde{w}_{k+1} \end{array} \right. \label{e.ricc.14} \end{equation} where \begin{eqnarray} T=E(x_k\tilde{w}_k^T)\tilde{R}^{-1} \label{e.ricc.15} \end{eqnarray} From (\ref{e.ricc.14}), we get the following model too : \begin{equation} \left\{ \begin{array}{ll} \tilde{x}_{k+1}=F\tilde{x}_k + T\tilde{R}^{1/2}\tilde{u}_{k+1}\\ y_{k+1}=H\tilde{x}_k + \tilde{R}^{1/2}\tilde{u}_{k+1} \end{array} \right. \label{e.ricc.16} \end{equation} which is of the desired type (\ref{e.ricc.4}). The transfer function matrix $\tilde{\Gamma }(z)$ of the model (\ref{e.ricc.16}) writes : \begin{eqnarray} \tilde{\Gamma }(z)=[I+Hz(I-Fz)^{-1}T]\tilde{R}^{1/2} \label{e.ricc.17} \end{eqnarray} and is a maximal factorization of the spectral density\index{spectral!density} of $y_k$, known as the {\em minimum-delay factorization} \index{minimum!delay!factorization}. One consequence of this fact is that the innovation\index{innovation} may be calculated as : \begin{eqnarray} \tilde{w}_k=y_{k+1}-H\tilde{x}_k \label{e.ricc.18} \end{eqnarray} The importance of the filter model lies in that {\em all the minimal markovian\index{markovian!representation} representations of $y_k$ have the same filter}, so that the problem we are faced with is to find this filter, given the statistics of $y_k$. For this reason of uniqueness, we will say that $\tilde{x}_k$ is {\em the} filter of $y_k$. \section{Computing the solution} We assume now that we could get in some way the covariance sequence of the process $y_k$. The models we consider being minimal with dimension $q$, it is a well-known fact in automatic control that the observability matrix \index{observability matrix}: \begin{equation} O=\left[ \begin{array}{c}H\\HF\\\vdots\\HF^{q-1}\end{array}\right] \label{e.ricc.19} \end{equation} has its rank equal to $q$ and is defined up to a similarity transformation; thus the pair $(H, F)$ is unique in the same sense. For evident reasons G, defined in (\ref{e.ricc.5}), shares the same property. Thus we can conclude that the triple $(H, F, G)$ is unique up to a similarity transformation. It may be easily obtained from the empirical covariance function $\{R_k\} $ with algorithms such that Ho's \cite{ho} or Rissanen's \cite{rissanen}. It is this point that we shall investigate now. \subsection{Estimation of the matrices H F G} Numerous algorithms exist for solving this problem and we shall focus on just one of them: the so-called {\em Principal Hankel Component} (PHC) \index{principal hankel component} \cite{kung} approximation method, which is singular value decomposition (SVD) based \index{singular value decomposition}. It is as follows: from the covariance sequence, form the Hankel matrix:\\ \index{hankel} \begin{equation} \cal{H}_{M,N}=\left[ \begin{array}[c]{ccccc} R_0 & R_1 & R_2 & \ldots & R_{N-1}\\ R_1 & R_2 & R_3 & \ldots & R_{N}\\ R_2 & R_3 & R_4 & \ldots & R_{N+1}\\ \vdots & \vdots & \vdots & & \vdots\\ R_{M-1} & R_{M} & R_{M+1} & \ldots & R_{M+N-2} \end{array} \right] \label{e.ricc.20} \end{equation} The reason for the wide use of this matrix is the Kronecker's theorem which says that its rank is theoretically equal to the order of the associated system, i.e. equal to the dimension of state-vectors of minimal state-space realizations. Defining the matrix $C$ as : \begin{equation} C=\left[ \begin{array}{cccc} G & FG & \ldots & F^{q-1}G\end{array}\right] \label{e.ricc.21} \end{equation} we have that : \begin{eqnarray} \cal{H}_{M,N}=OC \label{e.ricc.22} \end{eqnarray} Now, from the observability matrix $O$, define the two following matrices: \begin{equation} O^{'}=\left[ \begin{array}{c}H\\HF\\ \vdots\\ HF^{q-2}\end{array}\right] \label{e.ricc.23} \end{equation} and \begin{equation} O^{\uparrow}=\left[ \begin{array}{c}HF\\ \vdots \\HF^{q-1}\end{array}\right] \label{e.ricc.24} \end{equation} It is straightforward that: \begin{eqnarray} O^{\uparrow}=O^{'}F \label{e.ricc.25} \end{eqnarray} so that the matrix $F$ is obtained as the least-squares solution of (\ref{e.ricc.25}). $H$ is obtained as the first bloc-row of $O$ and $G$ as the first bloc-column of $C$ : this is the PHC approximation method. Numerically, the factorization (\ref{e.ricc.22}) is done via singular-value decomposition: \begin{eqnarray} \cal{H}_{M,N}=USV^T \label{e.ricc.26} \end{eqnarray} $O$ and $C$ are obtained as : \begin{eqnarray} O=US^{1/2} & & C=S^{1/2}V^{T} \label{e.ricc.27} \end{eqnarray} \paragraph{The {\tt phc} macro} This macro implements the preceding algorithm to find the triple $(H, F, G)$. In the following example, a 64-point length covariance sequence has been generated for a two-dimensional process, the first component of which is the sum of two sinusoids with discrete frequencies $\pi/10$ and $2\pi/10$, while the second component is the sum of two sinusoids with frequencies $\pi/10$ and $1.9\pi/10$, both being in additive, gaussian white noise. This is done as follows: \verbatok{\Diary simul1.dia} \end{verbatim} Then the Hankel matrix $\cal{H}_{M,N}$ is built with the function {\tt hank}. Finally, the Scilab function {\tt phc} gives the desired triple $(H, F, G)$. \subsection{computation of the filter} Now, we have obtained the triple $(H, F, G)$ and we shall investigate the matrices $T$ and $\tilde{R}$ that have still to be computed to completely determine the filter model (\ref{e.ricc.16}). From this model, let us compute the convenient covariances:\\ \begin{eqnarray} R_0=H\tilde{P}H^T+\tilde{R} \label{e.ricc.28} \end{eqnarray} \begin{eqnarray} G=F\tilde{P}H^T+T\tilde{R} \label{e.ricc.29} \end{eqnarray} \begin{eqnarray} \tilde{P}=F\tilde{P}F^T+T\tilde{R}T^T \label{e.ricc.30} \end{eqnarray} from which the following relations hold: \begin{eqnarray} \tilde{R}=R_0-H\tilde{P}H^T \label{e.ricc.31} \end{eqnarray} \begin{eqnarray} T=(G-F\tilde{P}H^T)(R_0-H\tilde{P}H^T)^{-1} \label{e.ricc.32} \end{eqnarray} Noting that $\tilde{R}$ and $T$ depend solely on $\tilde{P}$ and supposing that $\tilde{R}$ is positive, we can write after elimination of $\tilde{R}$ between (\ref{e.ricc.28}), (\ref{e.ricc.29}) and (\ref{e.ricc.30}): \begin{eqnarray} \tilde{P}=F\tilde{P}F^T+(G-F\tilde{P}H^T)(R_0-H\tilde{P}H^T)^{-1}(G^T-H\tilde{P}F^T) \label{e.ricc.33} \end{eqnarray} which is the well-known {\em algebraic Riccati equation} \index{riccati equation}. A matrix $\tilde{P}$ is called a solution of the Riccati equation if it is positive definite, such that $R_0-H\tilde{P}H^T>0$ and satisfies equation (\ref{e.ricc.33}). Although this equation has several solutions, the following result gives an answer to our problem: {\em the covariance} $\tilde{P}$ of the filter is the minimal solution of the algebraic Riccati equation. We shall give now two algorithms giving this minimal solution : the Faurre algorithm \index{Faurre algorithm} and the Lindquist algorithm \index{Lindquist algorithm}. \paragraph{The Faurre algorithm} \cite{faurre}: in this method, the solution $\tilde{P}$ is obtained as the growing limit of the sequence of matrices $P_N$ such that: \begin{equation} P_{N+1}=FP_NF^T+(G-FP_NH^T)(R_0-HP_NH^T)^{-1}(G^T-HP_NF^T) \label{e.ricc.34} \end{equation} with the initial condition: \begin{equation} P_0=GR_0^{-1}G^T \label{e.ricc.35} \end{equation} Setting $P_0=0$ is allowed too for this leads to $P_1=GR_0^{-1}G^T$ hence to a simple delay in the iterations. %The matrices $T_N$ and $R_N$ are obtained by the following recursions: %\begin{equation} %R_N=R_0-HP_NH^T %\label{e.ricc.36} %\end{equation} %\begin{equation} %T_N=(G-FP_NH^T)(R_0-HP_NH^T)^{-1} %\label{e.ricc.37} %\end{equation} To conclude, having obtained $\tilde{P}$ via the recursion (\ref{e.ricc.34}), the matrices $\tilde{R}$ and $T$ are computed with equations (\ref{e.ricc.31}) and (\ref{e.ricc.32}) respectively. This is done with the macro {\tt srfaur}. The recursion for $P_N$ is not implemented as in equation (\ref{e.ricc.34}) for it may not converge, especially when the matrix $F$ has poles near the unit circle. To overcome this difficulty, a factored form, Chandrasekhar\index{Chandrasekhar} type recursion has been implemented, leading to the sought after solution even in the precedent defavourable situation\footnote{A mistake was found in the initialization of the algorithm and could not be corrected in time for the present document}. The Scilab function {\tt srfaur} implements this sqare-root algorithm. Finally, the filter and the corresponding estimated output are calculated with the help of the model (\ref{e.ricc.16}) by using the macro {\tt ltitr} which simulates linear systems. To summarize, the preceding example has been generated the following way: \verbatok{\Diary simul3.dia} \end{verbatim} \paragraph{The Lindquist algorithm} \cite{faurre}: This algorithm makes use of the fact that $\tilde{R}$ and $T$ may be computed from $K=\tilde{P}H^T$ instead of $\tilde{P}$ itself, leading to substantial computational savings when the observation has a much lower dimension that the state, the most frequent situation. Refering the reader to \cite{faurre} for the derivations, we give now the algorithm itself: \begin{equation} \left\{ \begin{array}{l} K_{N+1}=K_N+ \Gamma _N \tilde{R}^{-1}_N \Gamma ^T_N H^T \\ \Gamma_ {N+1}= [F-(G-FK_N)(R_0-HK_N)^{-1}H]\Gamma _N \\ \tilde{R}_{N+1}=\tilde{R}_N-\Gamma ^T_NH^T(R_0-HK_N)^{-1}H\Gamma _N \label{e.ricc.36} \end{array} \right. \end{equation} with the initial conditions: \begin{eqnarray} K_0=0 & \Gamma _0=G & \tilde{R}_0=R-0 \label{e.ricc.37} \end{eqnarray} Then the expression for $T$ is the following: \begin{eqnarray} T=(G-FK)(R_0-HK)^{-1} \label{e.ricc.38} \end{eqnarray} and {\tt lindquist} is the corresponding Scilab function. \section{Levinson filtering} \label{levin} \index{levinson filters!lattice filters} %\cite{anderson} We still consider here $y_k$, a stationary, vector-valued time-series \index{time series}, from which we have available a sample $y_{k-1}, y_{k-2}, \ldots , y_{k-N}$; the scope here is to estimate $y_k$ from this sample, by some $\hat{y}_k$, say, which may be written then as a linear combination of the $y_{k-j}$, $j=1, \ldots , N$: $\hat{y}_k=\sum_{j=1}^{N}A^N_j y_{k-j}$. As mentioned in the introduction, this problem may be stated as a stochastic realization problem: attempting to minimize the mean-square prediction error of order $N$ at time $k$: \begin{eqnarray} E(\| e_k(N) \| ^2)=E\{ \| y_k - \sum_{j=1}^NA^N_j y_{k-j}\|^2\} \label{e.lev.1} \end{eqnarray} the filter coefficients $A^N_j$, where the superscript $N$ indicates the dependence on the sample length, are found to be the solutions of the following To\"{e}plitz system \index{toeplitz}: \begin{equation} \left[ \begin{array}[c]{ccccc} R_0 & R_1 & R_2 & \ldots & R_{N-1}\\ R_1^T & R_0 & R_1 & \ldots & R_{N-2}\\ R_2^T & R_1^T & R_0 & \ldots & R_{N-3}\\ \vdots & \vdots & \vdots & \ddots & \vdots \\ R_{N-1}^T & R_{N-2}^T & R_{N-3}^T & \ldots & R_0 \end{array} \right] \left[ \begin{array}[c]{c} A_1 \\A_2 \\A_3 \\ \vdots \\A_N \end{array} \right] = \left[ \begin{array}[c]{c} R_1 \\ R_2 \\ R_3 \\ \vdots \\R_N \end{array} \right] \label{e.lev.2} \end{equation} The mean-square error $\Sigma_N$ is easily calculated: \begin{eqnarray} \Sigma_N&=&E((y_k-\hat{y}_k)(y_k-\hat{y}_k)^T) \nonumber \\ \\ &=&E((y_k-\hat{y}_k)y^T_k) \nonumber \\ \\ &=&E(y_ky^T_k)-\sum_{j=1}^N A^N_{j}E(y_{k-j}y^T_k) \nonumber \\ &=&R_0-\sum_{j=1}^N A^N_{j}R_{-j} \label{e.lev.3} \end{eqnarray} where the second equality holds from the orthogonality principle between the estimate and the prediction error. Classically, $\Sigma_N$ is taken as a measure of the quality of the estimation, since it is a monotone decreasing function of $N$ (the longer the sample, the better the estimation). One may for example compare it to a preassigned mean-square estimation error. So, the problem is to find a procedure which calculates, successively for each $N$, the coefficients $A^N_j$ and $\Sigma_N$. An answer is given with Levinson-type\index{levinson} algorithms, an important property of which is their recursivity in the order: one particularity, relative to other input-output representations such as state-space models or rational matrices, is that in this approach the filter structure - its order for example- is considered as a parameter as well as any other, leading to the idea of lattice filter and to cascade-type realizations of transfer functions. Let us describe now the Levinson algorithm in the case of a vector-valued time-series, noting that in the scalar case the backward prediction error is no use because $R_{-k}=R_k$, leading to simpler derivations, albeit similar. For the exposition we shall make use of a Hilbert space approach, following the treatment given in \ref{levalgo}. \subsection{The Levinson algorithm} \label{levalgor} Suppose we are given the vector-valued time-series $y_k$. We begin with some definitions:\\ $e_k(N)$ and $f_k(N)$ will denote respectively the forward and backward prediction errors of order $N$ at time k: \begin{eqnarray} e_k(N)=y_k-\sum_{j=1}^N A^N_jy_{k-j} \label{e.lev.4} \end{eqnarray} \begin{eqnarray} f_k(N)=y_{k-N}-\sum_{j=1}^N B^N_jy_{k-N+j} \label{e.lev.5} \end{eqnarray} with the convention that: $e_k(0)=f_k(0)=y_k$. We shall need also the following linear space: $Y^q_p=span\{y_p, \ldots , y_q\}$. In the present geometrical approach, the covariance $E(xy^T)$ of $x$ and $y$ will be noted $[x,y]$ (scalar product) and if $E(xy^T)=0$, we shall write $x\; \bot \; y$ (x orthogonal to y), as well as $A\; \bot \; B$ for two orthogonal linear spaces. In view of these definitions, the following relations hold: \begin{eqnarray} e_k(N)\in Y^k_{k-N} & \mbox{and} & e_k(N) \; \bot \; Y^{k-1}_{k-N} \label{e.lev.6} \end{eqnarray} \begin{eqnarray} f_k(N)\in Y^k_{k-N} & \mbox{and} & f_k(N) \; \bot \; Y^{k}_{k-N+1} \label{e.lev.7} \end{eqnarray} From (\ref{e.lev.4}), we get: \begin{eqnarray} e_k(N+1)-e_k(N)\in Y^{k-1}_{k-N-1} \label{e.lev.8} \end{eqnarray} while $e_k(N+1) \; \bot \; Y^{k-1}_{k-N-1}$ and $e_k(N) \; \bot \; Y^{k-1}_{k-N}$ imply that: \begin{eqnarray} e_k(N+1)-e_k(N) \; \bot \; Y^{k-1}_{k-N} \label{e.lev.9} \end{eqnarray} Recalling (\ref{e.lev.7}), relations (\ref{e.lev.8}) and (\ref{e.lev.9}) caracterize the space spanned by $f_{k-1}(N)$; hence there exists a matrix $K_N$ such that: \begin{eqnarray} e_k(N+1)-e_k(N)=-K_Nf_{k-1}(N) \label{e.lev.10} \end{eqnarray} $K_N$ is determined with the help of the orthogonality condition: \begin{eqnarray} e_k(N+1) \; \bot \; y_{k-N-1} \label{e.lev.11} \end{eqnarray} this relation implies that: \begin{equation} \begin{array}{l} [e_k(N+1),y_{k-N-1}] \\ =[e_k(N),y_{k-N-1}] \; -K_N.[f_{k-1}(N),y_{k-N-1}] \\=0 \end{array} \label{e.lev.12} \end{equation} hence giving: \begin{eqnarray} K_N=[e_k(N),y_{k-N-1}][f_{k-1}(N),y_{k-N-1}]^{-1} \label{e.lev.13} \end{eqnarray} We recognize the second scalar product as the backward mean-square error $\Gamma_N$. Relations for the backward prediction error may be obtained the same way; they are: \begin{eqnarray} f_k(N+1)-f_{k-1}(N)\in Y^{k}_{k-N} \; \mbox{and} \; \bot \; Y^{k-1}_{k-N} \label{e.lev.14} \end{eqnarray} which caracterize the space spanned by $e_k(N)$; thus there exists a matrix $K^{*}_N$ such that: \begin{eqnarray} f_k(N+1)-f_{k-1}(N)=-K^{*}_Ne_{k}(N) \label{e.lev.15} \end{eqnarray} and determined by the orthogonality condition: \begin{eqnarray} f_k(N+1) \; \bot \; y_k \label{e.lev.16} \end{eqnarray} which leads to: \begin{eqnarray} K^{*}_N=[f_{k-1}(N),y_{k}][e_{k}(N),y_{k}]^{-1} \label{e.lev.17} \end{eqnarray} Here too the second scalar product is seen to be the forward mean-square error $\Sigma_N$. Relations (\ref{e.lev.10}), (\ref{e.lev.13}), (\ref{e.lev.15}), (\ref{e.lev.17}) give the sought after recursions; their lattice structure may be explicited with the help of the following matrix polynomials: \begin{eqnarray} A_N(z)=I-\sum_{j=1}^N A^N_jz^j\\ B_N(z)=z^NI-\sum_{j=1}^NB^N_jz^{N-j} \label{e.lev.18} \end{eqnarray} and the covariance matrices: $R_n=[y_k,y_{k-n}]$, from which $K_N$ and $K^{*}_N$ may be expressed: \begin{eqnarray} K_N&=&(R_{N+1}-\sum_{j=1}^NA^N_jR_{N+1-j})(R_0-\sum_{j=1}^NB^N_jR_j)^{-1} \nonumber \\ &=&\beta_N \Gamma^{-1}_N \label{e.lev.19} \end{eqnarray} \begin{eqnarray} K^{*}_N&=&(R_{-N-1}-\sum_{j=1}^NB^N_jR_{-N-1+j})(R_0-\sum_{j=1}^NA^N_jR_{-j})^{-1} \nonumber \\ &=&\alpha_N \Sigma^{-1}_N \label{e.lev.20} \end{eqnarray} with evident definitions. The last recursion to be given is that for $\Sigma_N$, for which we will use the expressions of $K_N$ (\ref{e.lev.19}) and $K^{*}_N$ in (\ref{e.lev.20}), and the definition of $\Sigma_N$: \begin{eqnarray} \Sigma_N=[e_k(N),y_k] \label{e.lev.21} \end{eqnarray} Then we can compute $\Sigma_{N+1}$ as follows: \begin{eqnarray} \Sigma_{N+1}&=&[e_k(N+1),y_k] \nonumber \\ &=&[e_k(N)-\beta_N\Gamma^{-1}_Nf_{k-1}(N),y_k] \nonumber \\ &=&[e_k(N),y_k] - \beta_N\Gamma^{-1}_N[f_{k-1}(N),y_k] \nonumber \\ &=&\Sigma_N- \beta_N\Gamma^{-1}_N\alpha_N \label{e.lev.22} \end{eqnarray} In the same fashion, $\Gamma_{N+1}$ will be written as: \begin{eqnarray} \Gamma_{N+1}=\Gamma_N- \alpha_N\Sigma^{-1}_N\beta_N \label{e.lev.23} \end{eqnarray} To summarize,the algorithm is as follows: \\ \begin{equation} \left\{ \begin{array}{l} \left[ \begin{array}{c} A_{N+1}(z) \\ B_{N+1}(z) \end{array} \right]=\left[ \begin{array}{cc} I & -K_N\; z \\ -K^{*}_N & z\; I \end{array} \right] \left[ \begin{array}{c} A_{N}(z) \\ B_{N}(z) \end{array} \right] \\ \\ K_N=\beta_N \Gamma^{-1}_N \\ \\ K^{*}_N=\alpha_N \Sigma^{-1}_N \\ \\ \Sigma_{N+1}=\Sigma_N- \beta_N\Gamma^{-1}_N\alpha_N \\ \\ \Gamma_{N+1}=\Gamma_N- \alpha_N\Sigma^{-1}_N\beta_N \\ \\ \alpha_N=(R_{-N-1}-\sum_{j=1}^NB^N_jR_{-N-1+j}) \\ \\ \beta_N=(R_{N+1}-\sum_{j=1}^NA^N_jR_{N+1-j}) \end{array} \right. \label{e.lev.24} \end{equation} with the initial conditions: \begin{equation} \left\{ \begin{array}{l} \alpha_0=R_{-N-1}; \beta_0=R_{N+1}\\ \Sigma_0=\Gamma_0=R_0; A_0(z)=B_0(z)=I \end{array} \right. \label{e.lev.25} \end{equation} %To obtain the recursion for $\Sigma_N$, it is worth noting that %$\Sigma_N$ appears in matrix $K^{*}_N$, so that with an evident definition, %$K^{*}_N$ may be written as: %\begin{eqnarray} %K^{*}_N=\alpha_N \Sigma^{-1}_N %\label{e.lev.20} %\end{eqnarray} %In the same way and with evident definitions too, $K_N$ %(\ref{e.lev.19} may be written as: %\begin{eqnarray} %K_N=\beta_N \Gamma^{-1}_N %\label{e.lev.21} %\end{eqnarray} %From: %\begin{eqnarray} %\Sigma_N=[e_k(N),y_k] %\label{e.lev.22} %\end{eqnarray} %we can compute $\Sigma_{N+1}$: %\begin{eqnarray} %\Sigma_{N+1}&=&[e_k(N+1),y_k]\\ %&=&[e_k(N)-\beta_N\Gamma^{-1}_Nf_{k-1}(N),y_k]\\ %&=&[e_k(N),y_k] - \beta_N\Gamma^{-1}_N[f_{k-1}(N),y_k]\\ %&=&\Sigma_N- \beta_N\Gamma^{-1}_N\alpha_N %\label{e.lev.23} %\end{eqnarray} %In the same fashion, $\Gamma_{N+1}$ will be written as: %\begin{eqnarray} %\Gamma_{N+1}=\Gamma_N- \alpha_N\Sigma^{-1}_N\beta_N %\label{e.lev.24} %\end{eqnarray} %This ends the derivation of the Levinson recursions %for the coefficients $A^N_j$ and for the mean-square error $\Sigma_N$. The corresponding Scilab function is {\tt levin}. %\end{document}