Updated Notebook comments

2021-07-31 12:57:47 +02:00 · 2021-07-31 12:57:47 +02:00 · 633c4d12d3
commit 633c4d12d3
parent 12c879f016
5 changed files with 216 additions and 2413 deletions
--- a/Notebooks/38_gp_hyperparameter_estimation.ipynb
+++ b/Notebooks/38_gp_hyperparameter_estimation.ipynb
@ -4,7 +4,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Bayesian Optimisation of starting Gaussian Process hyperparameters"
+    "# Gaussian Process Model Training and Performance Evaluation"
   ]
  },
  {
@ -213,7 +213,7 @@
    "id": "0aba0df5-b0e3-4738-bb61-1dad869d1ea3"
   },
   "source": [
-    "## Load previously exported data"
+    "## Load previously exported CARNOT 'experimental' data"
   ]
  },
  {
@ -226,6 +226,13 @@
    "dfs_test = []"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Separate into training and testing data sets:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 16,
@ -249,6 +256,13 @@
    "    dfs_test.append(pd.read_csv(f\"../Data/Good_CARNOT/{exp}_table.csv\").rename(columns = {'Power': 'SimulatedHeat'}))"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Separate columns into exogenous inputs, controlled inputs and outputs:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 18,
@ -262,6 +276,13 @@
    "y_cols = ['SimulatedTemp']"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Impose the autoregressive lags for each input group:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 19,
@ -511,6 +532,13 @@
    "    return df_gpr"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Merge all the training dataframes:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 28,
@ -667,13 +695,19 @@
    "df_gpr_train.head()"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Select all points in the training dataset:"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
-    "train_dataset_size = 15 * 96\n",
    "train_dataset_size = -1"
   ]
  },
@ -1401,6 +1435,13 @@
    "y_range = np.arange(1,6)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Iterate over all combination of lags and compute for each the RMSE, SMSE, LPD and MSLL errors:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -1545,6 +1586,13 @@
    "## Multistep prediction"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Select the dataset which will be used for multistep prediction:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 54,
@ -1556,6 +1604,13 @@
    "df_output = dfs_gpr_test[test_dataset_idx][dict_cols['y'][1]]"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Select the starting index in the test dataset and the number of consecutive points to simulate:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 55,
@ -1617,26 +1672,6 @@
    "plt.title(f\"Multi step prediction over {N_pred} steps for Test dataset {test_dataset_idx}\")\n",
    "plt.savefig(f\"../Thesis/Plots/GP_{w_lags}{u_lags}{y_lags}_{train_dataset_size}pts_test_prediction_{N_pred}_steps.pdf\")"
   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 141,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "TensorShape([2612, 7])"
-      ]
-     },
-     "execution_count": 141,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "m.data[0].shape"
-   ]
  }
 ],
 "metadata": {
--- a/Notebooks/39_svgp_hyperparameter_estimation.ipynb
+++ b/Notebooks/39_svgp_hyperparameter_estimation.ipynb
@ -4,7 +4,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Bayesian Optimisation of starting Gaussian Process hyperparameters"
+    "# Sparse and Variational Gaussian Process Model Training and Performance Evaluation"
   ]
  },
  {
@ -78,9 +78,6 @@
   "cell_type": "markdown",
   "metadata": {
    "id": "90fdac33-eed4-4ab4-b2b1-de0f1f27727b",
-    "jupyter": {
-     "source_hidden": true
-    },
    "tags": []
   },
   "source": [
@ -199,7 +196,7 @@
    "id": "0aba0df5-b0e3-4738-bb61-1dad869d1ea3"
   },
   "source": [
-    "## Load previously exported data"
+    "## Load previously exported CARNOT 'experimental' data"
   ]
  },
  {
@ -212,6 +209,13 @@
    "dfs_test = []"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Separate into training and testing data sets:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 13,
@ -235,6 +239,13 @@
    "    dfs_test.append(pd.read_csv(f\"../Data/Good_CARNOT/{exp}_table.csv\").rename(columns = {'Power': 'SimulatedHeat'}))"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Separate columns into exogenous inputs, controlled inputs and outputs:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 15,
@ -248,6 +259,13 @@
    "y_cols = ['SimulatedTemp']"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Impose the autoregressive lags for each input group:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 16,
@ -497,6 +515,13 @@
    "    return df_gpr"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Merge all the training dataframes:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 25,
@ -647,15 +672,6 @@
    "df_gpr_train.head()"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#df_gpr_train = df_gpr_train.sample(n = 500)"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": 27,
@ -1245,6 +1261,13 @@
    "y_lags = 5"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Iterate over all combination of lags and compute for each the RMSE, SMSE, LPD and MSLL errors:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 54,
@ -1470,6 +1493,13 @@
    "## Multistep prediction"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Select the dataset which will be used for multistep prediction:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 47,
@ -1481,6 +1511,13 @@
    "df_output = dfs_gpr_test[test_dataset_idx][dict_cols['y'][1]]"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Select the starting index in the test dataset and the number of consecutive points to simulate:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 48,
--- a/Notebooks/41_casadi_gp_test.ipynb
+++ b/Notebooks/41_casadi_gp_test.ipynb
@ -75,6 +75,13 @@
    "## GP model"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define the underlying function that will be modeled:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 8,
@ -108,6 +115,13 @@
    "})"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Sample `n` points and add measurement noise:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 11,
@ -142,6 +156,13 @@
    "Y_sampled = Y_sampled + noise"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Plot the function and the sampled points:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 14,
@ -167,6 +188,13 @@
    "plt.show()"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train the GP"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 15,
@ -362,6 +390,13 @@
    "## CasADi part"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define the CasADi callback:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 23,
@ -1074,6 +1109,13 @@
    "grads"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define another casadi callback, which directly uses the GPflow gradients:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 29,
--- a/Notebooks/50_mpc_formulation.ipynb
+++ b/Notebooks/50_mpc_formulation.ipynb
--- a/Notebooks/70_Server_result_analysis.ipynb
+++ b/Notebooks/70_Server_result_analysis.ipynb
@ -169,6 +169,14 @@
    "### Compute the time index"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "0a39bb25-f673-488b-99a4-57ee0d789f54",
+   "metadata": {},
+   "source": [
+    "The time index is computed by adding the elapsed time (`sample nr.` * `Tsample`) to the dataset start time. Since the CARNOT weather set represents the year 2010, the starting time is taken as 2010-01-01 at midnight."
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 14,
@ -278,6 +286,14 @@
    "### Get reference temperature"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "8b53a7c0-966f-4c75-8857-1a96ec3135b0",
+   "metadata": {},
+   "source": [
+    "The reference temperature is computed according to the SIA norm, as a function of the last 48h of outside temperature (at a `Tsample` of 15min this comes out to 2 * 96 sample points)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 16,
@ -292,6 +308,14 @@
    "df_tref = df_tref.shift(1) # The reference at time t is computed using info up to t-1"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "5d2e03c9-044a-454b-8a6b-c1a2a6adf6cc",
+   "metadata": {},
+   "source": [
+    "Compute mean value and standard deviation of tracking error:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 17,
@ -358,6 +382,14 @@
    "}"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "b0fc6fbc-ed75-4724-a5cb-8896a89f5039",
+   "metadata": {},
+   "source": [
+    "Scale the dataset and compute the autoregressive inputs that are passed to the GP:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 20,
@ -548,6 +580,14 @@
    "df_output = df_gpr[dict_cols['y'][1]]"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "ddbfd08f-7d5d-4f2e-b43e-a282a0503931",
+   "metadata": {},
+   "source": [
+    "Load the only trained model in the GP case, and the first trained model in the SVGP case:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 23,
@ -571,6 +611,14 @@
    "    m = model"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "c18135f0-06d8-44e3-b86c-4a84b2508f79",
+   "metadata": {},
+   "source": [
+    "Plot the multistep prediction performance for 25 consecutive points:"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 25,
@ -1091,6 +1139,14 @@
    "output_notebook()"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "3ec059bc-9ce6-4271-b5cd-5003d1330a0b",
+   "metadata": {},
+   "source": [
+    "### Plot evolution of reference/measured temperature"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 22,
@ -1393,6 +1449,14 @@
    "plt.savefig(f\"../Thesis/Plots/{sim_id}_abserr.pdf\", bbox_inches='tight')"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "4b0595d2-59d7-4536-b8a3-8c8aced19dc4",
+   "metadata": {},
+   "source": [
+    "### Plot evolution of hyperparameters"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 29,