From 3b4e35900feeb72cd829c5d91b419926644560cd Mon Sep 17 00:00:00 2001
From: "Radu C. Martin" <radu.c.martin@gmail.com>
Date: Mon, 21 Jun 2021 19:24:29 +0200
Subject: [PATCH] WIP: Thesis update

---
 30_Gaussian_Processes_Background.tex |   4 +-
 40_CARNOT_model.tex                  |   2 +
 70_Implementation.tex                | 150 ++++++++++++++++++++++++---
 glossaries.tex                       |   7 +-
 main.tex                             |   6 +-
 references.bib                       |  65 ++++++++++++
 6 files changed, 214 insertions(+), 20 deletions(-)

diff --git a/30_Gaussian_Processes_Background.tex b/30_Gaussian_Processes_Background.tex
index 74fbd19..7cf4e1a 100644
--- a/30_Gaussian_Processes_Background.tex
+++ b/30_Gaussian_Processes_Background.tex
@@ -1,4 +1,4 @@
-\section{Gaussian Processes Background}
+\section{Gaussian Processes Background}\label{sec:gaussian_processes}
 
 The \acrfull{gp} is a member of the \textit{kernel machines} class of algorithms
 in the field of machine learning.
@@ -250,7 +250,7 @@ sparse prior:
     \right)
 \end{equation}
 
-\subsubsection{Evidence Lower Bound}
+\subsubsection{Evidence Lower Bound}\label{sec:elbo}
 
 Computing the log likelihood is one of the most expensive parts of model
 training, due to inversion of the kernel matrix term ${\left(K +
diff --git a/40_CARNOT_model.tex b/40_CARNOT_model.tex
index 36a1b76..a37db86 100644
--- a/40_CARNOT_model.tex
+++ b/40_CARNOT_model.tex
@@ -45,6 +45,8 @@ The final Simulink schema is presented in Figure~\ref{fig:CARNOT_complete}:
     \label{fig:CARNOT_complete}
 \end{figure}
 
+% TODO: [CARNOT] Redo this part
+
 \clearpage
 
 \subsection{Physical dimensions}\label{sec:Physical_dimensions}
diff --git a/70_Implementation.tex b/70_Implementation.tex
index 5e82442..d6be215 100644
--- a/70_Implementation.tex
+++ b/70_Implementation.tex
@@ -1,5 +1,15 @@
 \section{Implementation}
 
+This section goes into the details of the implementation of the Simulink plant
+and Python controller setup.
+
+A high-level view of the setup is presented in Figure~\ref{fig:setup_diagram}.
+The Simulink model's main responsability is running the CARNOT simulation. It
+also has the task of providing the \acrshort{mpc} with information on the
+weather forecast, since the weather information for the simulation comes from a
+CARNOT \acrshort{wdb} object. A detailed view of all the information available
+in the \acrshort{wdb} object is given in Section~\ref{sec:CARNOT_WDB}.
+
 
 \begin{figure}[ht]
     \centering
@@ -8,19 +18,133 @@
     \label{fig:setup_diagram}
 \end{figure}
 
+\subsection{Simulink Model}
 
-% TODO: [Implementation] Reference implementation details for CARNOT and WDB
+
+% TODO: [Implementation] Move the simulink schema here, with explanations of tcp
+
+The final Simulink schema is presented in Figure~\ref{fig:CARNOT_complete}:
+
+\begin{figure}[ht]
+    \centering
+    \includegraphics[width = \textwidth]{Images/polydome_python.pdf}
+    \caption{Simulink Schema of the Complete Simulation}
+    \label{fig:Simulink_complete}
+\end{figure}
+
+The secondary functions of the Simulink model is the weather prediction, as well
+as communication with the Python controller.
+
+The communication between Simulink and the controller is done using three
+separate TCP/IP sockets: one for sending the control signal, one for reading the
+temperature measurement, and one for reading the weather forecast. This is
+mainly due to a Simulink limitation which can only transfer one signal over a
+single socket. This implementation also has the benefit of providing an
+additional layer of abstraction for the controller and the controlled plant: as
+long as the measurements, the actuators and the weather prediction can
+communicate over TCP, these elements can all be implemented completely
+separately, which is much more similar to a real-life implementation.
+
+With this structure, the only information received and sent by the Python
+controller is the actual sampled data, without any additional information. And
+while the controller needs information on the control horizon in order to read
+the correct amount of data for the weather predictions and to properly generate
+the optimization problem, the discrete/continuous transition and vice-versa
+happens on the Simulink side. This simplifies the adjustment of the sampling
+time, with the downside of harder inclusion of meta-data such as hour of the
+day, day of the week, etc.\ in the \acrlong{gp} Model.
+
+The weather prediction is done using the information present in the CARNOT
+\acrshort{wdb} object. Since the sampling time and control horizon of the
+controller can be adjusted, the required weather predictions can lie within an
+arbitrary time interval. At each sampling point, the weather measurements are
+linearly interpolated for the span of time ranging from the most recent
+measurement to the next measurement after the last required prediction time.
+This provides a better approximation that pure linear interpolation over the
+starting and ending points, while retaining a simple implementation.
 
 \subsection{Gaussian Processes}
 
-% TODO: [Implementation] Cite Tensorflow
-% TODO: [Implementation] Cite GPflow
+As described in Section~\ref{sec:gaussian_processes}, both training and
+evaluating a \acrshort{gp} has an algotirhmic complexity of $\mathcal{O}(n^3)$.
+This means that naive implementations can get too expensive in terms of
+computation time very quickly.
 
-\subsection{Classical Gaussian Process training}
-\subsection{Sparse and Variational Gaussian Process training}
+In order to have as smallest of a bottleneck as possible when dealing with
+\acrshort{gp}s, a very optimized implementation of \acrlong{gp} Models was
+used, in the form of GPflow~\cite{matthewsGPflowGaussianProcess2017}. It is
+based on TensorFlow~\cite{tensorflow2015-whitepaper}, which has very efficient
+imeplentation of all the necessary Linear Algebra operations. Another benefit of
+this implementation is the very simple use of any additional computational
+resources, such as a GPU, TPU, etc.
 
+\subsubsection{Classical Gaussian Process training}
+
+For the training of the classical \acrshort{gp} models, the Scipy optimizer
+provided by GPflow was used. By default, it uses the `L-BFGS-B' optimization
+method, which runs until a local minimum of the negative log likelihood is
+found. Since in the present implementation the \acrshort{gp} models are trained
+only once, with a small amount of initial data, this approach was not only
+sufficient, but also faster than the more complex implementation of training
+used for \acrshort{svgp} models.
+
+\subsubsection{Sparse and Variational Gaussian Process training}
+
+The \acrshort{svgp} models have a more involved oprimization procedure due to to
+several factors. First, when training an \acrshort{svgp} model, the optimization
+objective is the value of the \acrshort{elbo} (cf. Section~\ref{sec:elbo}).
+After several implementations, the more complex \textit{Adam} optimizer turned
+out to provide much faster convergence compared to other optimizers. Second, in
+the case of updating the model once per day with all the historical information,
+the training dataset keeps getting larger each time. In order to combat this,
+the sparse model was trained on minibatches of 1000 datapoints for 10000
+iterations. Evaluating the \acrshort{elbo} on minibatches provide an unbiased
+estimate of the actual value, given enough training iterations. This
+implementation has the benefit of taking constant training time, which becomes
+important later in the simulation, where the training dataset is much larger
+than the initial amount of data.  
 
 \subsection{Optimal Control Problem}
+
+The \acrlong{ocp} has been implemented using the
+CasADi~\cite{anderssonCasADiSoftwareFramework2019} algorithmic differentiation
+framework. It provides an interface between a high-level definition of the
+optimization problems, and the very efficient low-level solvers. 
+
+The implementation of the \acrshort{ocp} defined in
+Equation~\ref{eq:optimal_control_problem} has a few particularities, discussed
+in the following subsections.
+
+\subsubsection{Integrating GPflow models in CasADi}
+
+The first particularity of the implementing the \acrshort{ocp} with CasADi is
+the use of the CasADi callback objects. The purpose of these objects is
+integration of external functions into CasADi. Generally, using callbacks is not
+advised because each call to the external function incurs additional overhead.
+These callbacks usually can't directly provide information on the
+forward/reverse sensitivities, used by CasADi to drastically reduce computation
+time. In the absence of this information, CasADi has to do many evaluations
+around a point in order to approximate the gradients. TensorFlow keeps track of
+all the computational graph's gradients, which can be accessed at a cost
+slightly higher than the evaluation cost of the function.
+
+Integrating the gradient information into the CasADi callbacks reduces the
+number of function calls by around an order of magnitude, which already
+drastically reduces computation time.
+
+Another significant speed improvement comes from transforming the Python calls
+to TensofFlow into native tf-functions. This change incurs a small overhead the
+first time the optimization problem is run since all the TensorFlow functions
+have to be compiled before execution, but afterwards speeds up the execution by
+around another order of magnitude.
+
+The last optimization done to the CasADi implementation is the use of the MA27
+linear solver provided by the HSL optimization
+library~\cite{HSLCollectionFortran}. This change results in an speedup of around
+10\% compared to using the default MUMPS solver. While not as drastic as the
+other improvements, this still provides a significant reduction in the runtime
+of the whole year simulation.
+
 \subsubsection{Sparse Implementation of the Optimization Problem}
 
 The optimization problem as presented in
@@ -36,6 +160,9 @@ much faster to solve than the original problem.
 
 Let $w_l$, $u_l$, and $y_l$ be the lengths of the state vector components
 $\mathbf{w}$, $\mathbf{u}$, $\mathbf{y}$ (cf. Equation~\ref{eq:components}).
+Also, let X be the matrix of all the system states over the optimization horizon
+and W be the matrix of the predicted disturbances for all the future steps. The
+original \acrlong{ocp} can be rewritten as:
 
 \begin{subequations}\label{eq:sparse_optimal_control_problem}
     \begin{align}
@@ -51,15 +178,12 @@ $\mathbf{w}$, $\mathbf{u}$, $\mathbf{y}$ (cf. Equation~\ref{eq:components}).
     \end{align}
 \end{subequations}
 
-where X is the matrix of all the system states and W is the matrix of the
-disturbances.
+\subsection{Python server}
 
-\subsubsection{RENAME: Python implementation of the control problem}
-% TODO: [Implementation] Cite CasADi
-% TODO: [Implementation] Cite HSL solvers for using MA27
-
-
-\subsection{Python server and controller objects}
+The Python server is responsible for the control part of the simulation. It
+delegates which controller is active, is responsible for training and updating
+the \acrshort{gp} and \acrshort{svgp} models, as well as keeping track of all
+the intermediate results for analysis.
 
 
 \clearpage
diff --git a/glossaries.tex b/glossaries.tex
index 85db4cf..3111019 100644
--- a/glossaries.tex
+++ b/glossaries.tex
@@ -4,6 +4,11 @@
 
 % Acronyms
 
+\newacronym{ocp}{OCP}{Optimal Control Problem}
+\newacronym{mpc}{MPC}{Model Predictive Control}
+
+\newacronym{wdb}{WDB}{Weather Data Bus}
+
 \newacronym{hvac}{HVAC}{Heating and Ventilation System}
 \newacronym{dni}{DNI}{Direct Normal Irradiance}
 \newacronym{dhi}{DHI}{Diffuse Horizontal Irradiance}
@@ -34,5 +39,3 @@
 \newacronym{noe}{NOE}{Nonlinear output error}
 \newacronym{narmax}{NARMAX}{Nonlinear autoregressive and moving average model
 with exogenous input}
-
-\newacronym{ocp}{OCP}{Optimal Control Problem}
diff --git a/main.tex b/main.tex
index b444bb7..5d6e2a2 100644
--- a/main.tex
+++ b/main.tex
@@ -89,7 +89,8 @@
 }
 \renewcommand{\familydefault}{\sfdefault}
 
-\title{Inter-seasonal performance of Gaussian Process models for building temperature prediction}
+\title{Multi-seasonal performance of Gaussian Process models for building
+temperature control}
 \author{Radu C. Martin}
 
 %header
@@ -97,8 +98,7 @@
 \usepackage{fancyhdr}
 \pagestyle{fancy}
 \setlength\headheight{35pt}
-%\fancyhf{Inter-seasonal performance of GP models for building temperature prediction}
-\fancyhf{Inter-seasonal GP performance for HVAC}
+\fancyhf{Multi-season GP performance for buildings}
 \rhead{\includegraphics[width=2cm]{Logo-EPFL.png}}
 \lhead{}
 \cfoot{\thepage}
diff --git a/references.bib b/references.bib
index c7a0229..0620b63 100644
--- a/references.bib
+++ b/references.bib
@@ -238,6 +238,22 @@
   langid = {english}
 }
 
+@article{kabzanLearningBasedModelPredictive2019,
+  title = {Learning-{{Based Model Predictive Control}} for {{Autonomous Racing}}},
+  author = {Kabzan, Juraj and Hewing, Lukas and Liniger, Alexander and Zeilinger, Melanie N.},
+  date = {2019-10},
+  journaltitle = {IEEE Robotics and Automation Letters},
+  volume = {4},
+  pages = {3363--3370},
+  issn = {2377-3766},
+  doi = {10.1109/LRA.2019.2926677},
+  abstract = {In this letter, we present a learning-based control approach for autonomous racing with an application to the AMZ Driverless race car gotthard. One major issue in autonomous racing is that accurate vehicle models that cover the entire performance envelope of a race car are highly nonlinear, complex, and complicated to identify, rendering them impractical for control. To address this issue, we employ a relatively simple nominal vehicle model, which is improved based on measurement data and tools from machine learning.The resulting formulation is an online learning data-driven model predictive controller, which uses Gaussian processes regression to take residual model uncertainty into account and achieve safe driving behavior. To improve the vehicle model online, we select from a constant in-flow of data points according to a criterion reflecting the information gain, and maintain a small dictionary of 300 data points. The framework is tested on the full-size AMZ Driverless race car, where it is able to improve the vehicle model and reduce lap times by \$ \textbackslash mathbf10\%\$ while maintaining safety of the vehicle.},
+  eventtitle = {{{IEEE Robotics}} and {{Automation Letters}}},
+  file = {/home/radu/Zotero/storage/CUJ9MGH7/Kabzan et al. - 2019 - Learning-Based Model Predictive Control for Autono.pdf;/home/radu/Zotero/storage/77YZEZQB/8754713.html},
+  keywords = {Adaptive systems,autonomous racing,Autonomous vehicles,learning and adaptive systems,Learning systems,Model learning for control,model predictive control,Predictive control,Vehicle dynamics},
+  number = {4}
+}
+
 @online{KernelCookbooka,
   title = {Kernel {{Cookbook}}},
   url = {https://www.cs.toronto.edu/~duvenaud/cookbook/},
@@ -266,6 +282,26 @@
   series = {Advances in {{Industrial Control}}}
 }
 
+@article{liuExperimentalAnalysisSimulated2006,
+  title = {Experimental Analysis of Simulated Reinforcement Learning Control for Active and Passive Building Thermal Storage Inventory: {{Part}} 2: {{Results}} and Analysis},
+  shorttitle = {Experimental Analysis of Simulated Reinforcement Learning Control for Active and Passive Building Thermal Storage Inventory},
+  author = {Liu, Simeng and Henze, Gregor P.},
+  date = {2006-02-01},
+  journaltitle = {Energy and Buildings},
+  shortjournal = {Energy and Buildings},
+  volume = {38},
+  pages = {148--161},
+  issn = {0378-7788},
+  doi = {10.1016/j.enbuild.2005.06.001},
+  url = {https://www.sciencedirect.com/science/article/pii/S0378778805000861},
+  urldate = {2021-06-20},
+  abstract = {This paper is the second part of a two-part investigation of a novel approach to optimally control commercial building passive and active thermal storage inventory. The proposed building control approach is based on simulated reinforcement learning, which is a hybrid control scheme that combines features of model-based optimal control and model-free learning control. An experimental study was carried out to analyze the performance of a hybrid controller installed in a full-scale laboratory facility. The first paper introduced the theoretical foundation of this investigation including the fundamental theory of reinforcement learning control. This companion paper presents a discussion and analysis of the experiment results. The results confirm the feasibility of the proposed control approach. Operating cost savings were attained with the proposed control approach compared with conventional building control; however, the savings are lower than for the case of model-based predictive optimal control As for the case of model-based predictive control, the performance of the hybrid controller is largely affected by the quality of the training model, and extensive real-time learning is required for the learning controller to eliminate any false cues it receives during the initial training period. Nevertheless, compared with standard reinforcement learning, the proposed hybrid controller is much more readily implemented in a commercial building.},
+  file = {/home/radu/Zotero/storage/S7QXQJVH/Liu and Henze - 2006 - Experimental analysis of simulated reinforcement l.pdf;/home/radu/Zotero/storage/I3GBEBHA/S0378778805000861.html},
+  keywords = {Learning control,Load shifting,Optimal control,Reinforcement learning,Thermal Energy Storage (TES)},
+  langid = {english},
+  number = {2}
+}
+
 @article{liuUnderstandingComparingScalable2019,
   title = {Understanding and Comparing Scalable {{Gaussian}} Process Regression for Big Data},
   author = {Liu, Haitao and Cai, Jianfei and Ong, Yew-Soon and Wang, Yi},
@@ -449,4 +485,33 @@
   organization = {{Medium}}
 }
 
+@online{zengAdaptiveMPCScheme2021,
+  title = {An Adaptive {{MPC}} Scheme for Energy-Efficient Control of Building {{HVAC}} Systems},
+  author = {Zeng, Tingting and Barooah, Prabir},
+  date = {2021-02-07},
+  url = {http://arxiv.org/abs/2102.03856},
+  urldate = {2021-06-20},
+  abstract = {An autonomous adaptive MPC architecture is presented for control of heating, ventilation and air condition (HVAC) systems to maintain indoor temperature while reducing energy use. Although equipment use and occupant changes with time, existing MPC methods are not capable of automatically relearning models and computing control decisions reliably for extended periods without intervention from a human expert. We seek to address this weakness. Two major features are embedded in the proposed architecture to enable autonomy: (i) a system identification algorithm from our prior work that periodically re-learns building dynamics and unmeasured internal heat loads from data without requiring re-tuning by experts. The estimated model is guaranteed to be stable and has desirable physical properties irrespective of the data; (ii) an MPC planner with a convex approximation of the original nonconvex problem. The planner uses a descent and convergent method, with the underlying optimization problem being feasible and convex. A year long simulation with a realistic plant shows that both of the features of the proposed architecture - periodic model and disturbance update and convexification of the planning problem - are essential to get the performance improvement over a commonly used baseline controller. Without these features, though MPC can outperform the baseline controller in certain situations, the benefits may not be substantial enough to warrant the investment in MPC.},
+  archiveprefix = {arXiv},
+  eprint = {2102.03856},
+  eprinttype = {arxiv},
+  file = {/home/radu/Zotero/storage/5DGTWGXU/Zeng and Barooah - 2021 - An adaptive MPC scheme for energy-efficient contro.pdf;/home/radu/Zotero/storage/TYEAZ4EJ/2102.html},
+  keywords = {Electrical Engineering and Systems Science - Systems and Control},
+  primaryclass = {cs, eess},
+  version = {1}
+}
+
+@inproceedings{zengAutonomousMPCScheme2020,
+  title = {An Autonomous {{MPC}} Scheme for Energy-Efficient Control of Building {{HVAC}} Systems},
+  booktitle = {2020 {{American Control Conference}} ({{ACC}})},
+  author = {Zeng, Tingting and Barooah, Prabir},
+  date = {2020-07},
+  pages = {4213--4218},
+  issn = {2378-5861},
+  doi = {10.23919/ACC45564.2020.9147753},
+  abstract = {Model Predictive Control (MPC) is a promising technique for energy efficient control of Heating, Ventilation, and Air Conditioning (HVAC) systems. However, the need for human involvement limits current MPC strategies from widespread deployment, since (i) model identification algorithms require re-tuning of hyper-parameters, and (ii) optimizers may fail to converge within the available control computation time, or get stuck in a local minimum. In this work we propose an autonomous MPC scheme to overcome these issues. Two major features are embedded in this architecture to enable autonomy: (i) a convex identification algorithm with adaptation to time-varying building dynamics, and (ii) a convex optimizer. The model identification algorithm re-runs periodically so as to handle changes in the building's dynamics. The estimated model is guaranteed to be stable and has desirable physical properties. The optimizer uses a descent and convergent algorithm, with the underlying optimization problem being feasible and convex. Numerical results show that the proposed convex formulation is more reliable in control computation compared to the nonconvex one, and the proposed autonomous MPC architecture reduces energy consumption significantly over a conventional controller.},
+  eventtitle = {2020 {{American Control Conference}} ({{ACC}})},
+  keywords = {Adaptation models,Architecture,Atmospheric modeling,Buildings,Computational modeling,Computer architecture,Heuristic algorithms}
+}
+