{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# AI Tools for Actuaries\n", "## Chapter 5: LocalGLMnet in Python - PyTorch\n", "### Author: Marco Maggi, Mario Wuthrich\n", "### Version Summer School August 2025" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Import required libraries\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "import statsmodels.api as sm\n", "from statsmodels.formula.api import glm\n", "\n", "pd.options.mode.chained_assignment = None\n", "\n", "# Set random seed\n", "rng = np.random.default_rng(500)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the data (load the data with the entity embeddings)\n", "df = pd.read_parquet(\"../../Data/freMTPL2freqEmb.parquet\")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Add random component\n", "df[\"RandN\"] = rng.normal(0, 1, size=len(df))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learn = df[df[\"LearnTest\"] == \"L\"]\n", "test = df[df[\"LearnTest\"] == \"T\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pre-process data for LocalGLMnet" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.compose import ColumnTransformer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import (\n", " FunctionTransformer,\n", " OrdinalEncoder,\n", " StandardScaler,\n", ")\n", "\n", "\n", "def clip_and_scale(upper):\n", " pipe = Pipeline(\n", " steps=[\n", " (\"clip\", FunctionTransformer(lambda x: np.clip(x, a_min=0, a_max=upper))),\n", " (\"scale\", StandardScaler()),\n", " ]\n", " )\n", " return pipe\n", "\n", "\n", "density = Pipeline(\n", " steps=[\n", " (\"log\", FunctionTransformer(lambda x: np.log(x).round(2))),\n", " (\"scale\", StandardScaler()),\n", " ]\n", ")\n", "\n", "area = Pipeline(\n", " steps=[\n", " (\"encode\", OrdinalEncoder()),\n", " (\"scale\", StandardScaler()),\n", " ]\n", ")\n", "\n", "preprocessor = ColumnTransformer(\n", " transformers=[\n", " (\n", " \"clip_and_scale\",\n", " clip_and_scale([20, 90, 150, 15]),\n", " [\"VehAge\", \"DrivAge\", \"BonusMalus\", \"VehPower\"],\n", " ),\n", " (\n", " \"scale\",\n", " StandardScaler(),\n", " [\"RegionEmb1\", \"RegionEmb2\", \"VehBrandEmb1\", \"VehBrandEmb2\"],\n", " ),\n", " (\"area\", area, [\"Area\"]),\n", " (\"density\", density, [\"Density\"]),\n", " (\n", " \"veh_gas\",\n", " FunctionTransformer(lambda x: (x == \"Diesel\").astype(np.float32)),\n", " [\"VehGas\"],\n", " ),\n", " (\"veh_brand\", OrdinalEncoder(), [\"VehBrand\", \"Region\"]),\n", " (\"passthrough\", \"passthrough\", [\"RandN\", \"ClaimNb\"]),\n", " ],\n", " verbose_feature_names_out=False,\n", ")\n", "\n", "\n", "# Just a check: Fit preprocessor to training data and apply to some lines from test\n", "preprocessor.set_output(transform=\"pandas\").fit(learn)\n", "preprocessor.transform(test.head())\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f\"Learning set size: {len(learn)}\")\n", "print(f\"Test set size: {len(test)}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_learn = preprocessor.fit_transform(learn)\n", "X_learn.head(3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### GLM baseline analysis" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# We start with a baseline GLM to initialize the LocalGLMnet suitably\n", "\n", "# Features\n", "features = [\n", " \"Area\",\n", " \"VehPower\",\n", " \"VehAge\",\n", " \"DrivAge\",\n", " \"BonusMalus\",\n", " \"VehGas\",\n", " \"Density\",\n", " \"VehBrandEmb1\",\n", " \"VehBrandEmb2\",\n", " \"RegionEmb1\",\n", " \"RegionEmb2\",\n", " \"RandN\",\n", "]\n", "\n", "# Fit a Poisson GLM using the package statsmodels.\n", "# Set `ClaimNb` as response variable and `features` as covariates.\n", "# Use log `Exposure` as offset.\n", "model = None # replace None with your code\n", "\n", "glm_results = model.fit()\n", "\n", "# Display model summary\n", "glm_results.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Calculate deviance losses" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X_test = preprocessor.transform(test)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_poisson_deviance\n", "\n", "# Get predictions\n", "learn[\"GLM\"] = glm_results.predict(X_learn)\n", "test[\"GLM\"] = glm_results.predict(X_test)\n", "\n", "# Calculate in-sample and out-of-sample deviance\n", "learn_deviance = 100 * mean_poisson_deviance(\n", " learn[\"ClaimNb\"] / learn[\"Exposure\"], learn[\"GLM\"], sample_weight=learn[\"Exposure\"]\n", ")\n", "test_deviance = 100 * mean_poisson_deviance(\n", " test[\"ClaimNb\"] / test[\"Exposure\"], test[\"GLM\"], sample_weight=test[\"Exposure\"]\n", ")\n", "\n", "print(\"Deviance Losses:\")\n", "print(f\"Learning sample: {learn_deviance:.3f}\")\n", "print(f\"Test sample: {test_deviance:.3f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## LocalGLMnet\n", "We have performed all the preparatory work above, and now we dive into the LocalGLMnet" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define the LocalGLMnet arichtecture (of depth 3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from torch import nn\n", "from torch.nn import init\n", "\n", "simpel = nn.Linear(2, 4)\n", "simpel.weight" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from torch import nn\n", "from torch.nn import init\n", "\n", "\n", "class FNN(nn.Module):\n", " def __init__(self, seed, n_features, hidden_layers, intercept, glm_coefs):\n", " super().__init__()\n", " torch.manual_seed(seed)\n", " self.hidden_layers = nn.ModuleList()\n", " for i in range(len(hidden_layers)):\n", " if i == 0:\n", " self.hidden_layers.append(nn.Linear(n_features, hidden_layers[i]))\n", " else:\n", " self.hidden_layers.append(\n", " nn.Linear(hidden_layers[i - 1], hidden_layers[i])\n", " )\n", " # Define the last layer of the neural network which fits the\n", " # `attention weights`\n", " self.local_glm = None # replace None with your code\n", " # Initialize the weights and biases of the last layer such that the SGD\n", " # will start from the MLE estimates of the GLM.\n", " self.local_glm.bias.data = None # replace None with your code\n", " init.constant_(None, 0.0) # replace None with your code\n", " # Define the intercept as trainable parameter and initialize it with the GLM intercept.\n", " self.intercept = nn.Parameter(None) # replace None with your code\n", "\n", " def forward(self, design, v, get_attentions=False):\n", " # Implement the forward pass of the LocalGLMnet.\n", " x = torch.tanh(self.hidden_layers[0](design))\n", " for layer in self.hidden_layers[1:]:\n", " x = None # replace None with your code\n", " x = None # replace None with your code\n", " if get_attentions:\n", " pass # replace pass with your code\n", " skip_connection = torch.einsum(\"ij,ij->i\", x, design).unsqueeze(1)\n", " x = None # replace None with your code\n", " return torch.exp(x).flatten() * v\n", "\n", "\n", "SEED = 21456783\n", "M_FEAT = len(features) # number of features\n", "HIDDEN = [20, 15, 10]\n", "# Create model with three hidden layers\n", "model = FNN(\n", " SEED,\n", " n_features=M_FEAT,\n", " hidden_layers=HIDDEN,\n", " intercept=glm_results.params.iloc[0],\n", " glm_coefs=glm_results.params.to_numpy()[1:],\n", ")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Check that the LocalGLMnet before training replicates the GLM." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Backtest the initalization\n", "model.eval()\n", "X_test_tensor = torch.tensor(X_test[features].values, dtype=torch.float32)\n", "X_learn_tensor = torch.tensor(X_learn[features].values, dtype=torch.float32)\n", "exposure_test_tensor = torch.tensor(\n", " test[\"Exposure\"].astype(\"float32\").values, dtype=torch.float32\n", ")\n", "exposure_learn_tensor = torch.tensor(\n", " learn[\"Exposure\"].astype(\"float32\").values, dtype=torch.float32\n", ")\n", "\n", "# call the LocalGLMnet to get predictions on test and learn data.\n", "# replace None with your code.\n", "test_GLM = None.detach().numpy()\n", "learn_GLM = None.detach().numpy()\n", "\n", "# Exposure\n", "V_learn = learn[\"Exposure\"]\n", "V_test = test[\"Exposure\"]\n", "\n", "# Response\n", "Y_learn = learn[\"ClaimNb\"]\n", "Y_test = test[\"ClaimNb\"]\n", "\n", "poisson_deviance_train_glm = 100 * mean_poisson_deviance(\n", " Y_learn / V_learn, learn_GLM / V_learn, sample_weight=V_learn\n", ")\n", "poisson_deviance_test_glm = 100 * mean_poisson_deviance(\n", " Y_test / V_test, test_GLM / V_test, sample_weight=V_test\n", ")\n", "print(\n", " \"Poisson Deviance (Train, Test):\",\n", " round(poisson_deviance_train_glm, 3),\n", " round(poisson_deviance_test_glm, 3),\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train the LocalGLMnet model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from copy import deepcopy\n", "\n", "\n", "def train_model(\n", " model,\n", " X_train,\n", " y_train,\n", " v_train,\n", " X_val,\n", " y_val,\n", " v_val,\n", " optimizer,\n", " checkpoint_path,\n", " batch_size,\n", " n_epochs=100,\n", "):\n", " loss_fn = nn.PoissonNLLLoss(log_input=False, reduction=\"sum\")\n", " best_val_loss = float(\"inf\")\n", " history = {\"loss\": [], \"val_loss\": []}\n", "\n", " # Create dataset indices for batching\n", " num_batches = (len(X_train) + batch_size - 1) // batch_size\n", "\n", " for epoch in range(n_epochs):\n", " # Training phase\n", " model.train()\n", " epoch_loss = 0.0\n", " # Indices can be shuffled for each epoch. We don't do it here.\n", " indices = torch.arange(len(X_train))\n", "\n", " for i in range(num_batches):\n", " # Get batch indices\n", " batch_indices = indices[\n", " i * batch_size : min((i + 1) * batch_size, len(X_train))\n", " ]\n", "\n", " # Get batch data\n", " X_batch = X_train[batch_indices]\n", " v_batch = v_train[batch_indices]\n", " y_batch = y_train[batch_indices]\n", "\n", " # Forward pass\n", " pred_batch = model(X_batch, v_batch)\n", " loss = loss_fn(pred_batch, y_batch)\n", "\n", " # Backward pass and optimize\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " epoch_loss += loss.item()\n", "\n", " # Average loss for the epoch\n", " epoch_loss /= v_train.sum().item()\n", " history[\"loss\"].append(epoch_loss)\n", "\n", " # Validation phase\n", " model.eval()\n", " with torch.no_grad():\n", " pred_val = model(X_val, v_val)\n", " val_loss = (loss_fn(pred_val, y_val) / v_val.sum()).item()\n", " history[\"val_loss\"].append(val_loss)\n", "\n", " # Store best model\n", " if val_loss < best_val_loss and isinstance(checkpoint_path, str):\n", " best_val_loss = val_loss\n", " best_model = deepcopy(model)\n", "\n", " # Print progress\n", " if (epoch + 1) % 10 == 0:\n", " print(\n", " f\"Epoch {epoch + 1}/{n_epochs}, Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}\"\n", " )\n", " torch.save(best_model.state_dict(), checkpoint_path)\n", " return history\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "convert_to_tensor = lambda x: torch.tensor(x.values, dtype=torch.float32)\n", "train, val = train_test_split(learn, test_size=0.1, random_state=125548)\n", "\n", "X_learn = convert_to_tensor(preprocessor.transform(learn)[features])\n", "X_train = convert_to_tensor(preprocessor.transform(train)[features])\n", "X_val = convert_to_tensor(preprocessor.transform(val)[features])\n", "X_test = convert_to_tensor(preprocessor.transform(test)[features])\n", "\n", "y_learn, v_learn = convert_to_tensor(learn.ClaimNb), convert_to_tensor(learn.Exposure)\n", "y_train, v_train = convert_to_tensor(train.ClaimNb), convert_to_tensor(train.Exposure)\n", "y_val, v_val = convert_to_tensor(val.ClaimNb), convert_to_tensor(val.Exposure)\n", "y_test, v_test = convert_to_tensor(test.ClaimNb), convert_to_tensor(test.Exposure)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "optimizer = torch.optim.NAdam(model.parameters())\n", "checkpoint_path = f\"./Networks/LocalGLMnet_{SEED}.pt\"\n", "history = train_model(\n", " model,\n", " X_train,\n", " y_train,\n", " v_train,\n", " X_val,\n", " y_val,\n", " v_val,\n", " optimizer,\n", " checkpoint_path,\n", " batch_size=5_000,\n", " n_epochs=100,\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot training history (vertical line at best validation loss)\n", "fig = (\n", " pd.DataFrame({\"loss\": history[\"loss\"], \"val_loss\": history[\"val_loss\"]})\n", " .rename(columns={\"loss\": \"Training\", \"val_loss\": \"Validation\"})\n", " .plot(xlabel=\"Epoch - 1\", ylabel=\"Loss\", title=\"Loss During Training\", grid=True)\n", ")\n", "_ = fig.axvline(np.argmin(history[\"val_loss\"]), color=\"black\", linestyle=\"--\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### LocalGLMnet results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_poisson_deviance\n", "\n", "\n", "# Helper functions to evaluate the model via average Poisson deviance\n", "def score(model, X, y, v):\n", " \"\"\"Evaluate the model using sklearn's mean_poisson_deviance.\"\"\"\n", " pred = model(X, v).detach().numpy()\n", " return 100 * mean_poisson_deviance(\n", " y.detach().numpy() / v, pred / v, sample_weight=v\n", " )\n", "\n", "\n", "# Load best weights and evaluate\n", "model.load_state_dict(torch.load(checkpoint_path))\n", "model.eval()\n", "\n", "print(\"===GLM===\")\n", "print(\n", " \"Poisson Deviance (Train, Test):\",\n", " round(poisson_deviance_train_glm, 3),\n", " round(poisson_deviance_test_glm, 3),\n", ")\n", "print(\"===LocalGLMnet===\")\n", "print(f\"Poisson Deviance (Learn): {score(model, X_learn, y_learn, v_learn):.3f}\")\n", "print(f\"Poisson Deviance (Test): {score(model, X_test, y_test, v_test):.3f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Illustrate the LocalGLMnet results: extract attention weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# replace None with your code\n", "attention_weights = None.detach().numpy()\n", "attention_df = pd.DataFrame(attention_weights, columns=features)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Boxplot of the attention weights (RandNX is unrelated to the response)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Calculate standard deviation of RandNX (which does not impact the response)\n", "randnx_std = attention_df[\"RandN\"].std()\n", "threshold = 2.576 * randnx_std\n", "\n", "# Plot attention weights\n", "plt.figure(figsize=(12, 6))\n", "attention_df.boxplot()\n", "plt.xticks(rotation=45)\n", "plt.title(\"boxplot of attention weights\")\n", "plt.axhline(y=threshold, color=\"r\", linestyle=\"--\", label=\"+2.576 std dev (99% CI)\")\n", "plt.axhline(y=-threshold, color=\"r\", linestyle=\"--\", label=\"-2.576 std dev (99% CI)\")\n", "plt.axhline(y=0, color=\"k\", linestyle=\"-\", label=\"zero line\")\n", "plt.legend()\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Compute importance measure for all variables/terms" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Calculate and plot the variable importance measure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plot individual attention weights for selected variables" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Function to create individual attention weight plots\n", "def plot_attention_weights(feature_name, alpha):\n", " # Get unique values for x-axis\n", " x_values = np.sort(test[feature_name].unique())\n", "\n", " # Create the plot\n", " plt.figure(figsize=(10, 6))\n", "\n", " # Plot the attention weights\n", " plt.scatter(\n", " test[feature_name],\n", " attention_df[feature_name],\n", " alpha=0.5,\n", " s=20,\n", " label=\"attention weights\",\n", " )\n", "\n", " # Add reference lines\n", " plt.axhline(y=0, color=\"cyan\", linestyle=\"-\", label=\"zero line\")\n", " plt.axhline(\n", " y=0.674 * randnx_std,\n", " color=\"orange\",\n", " linestyle=\"-\",\n", " label=\"0.674 std.dev. (50%)\",\n", " )\n", " plt.axhline(y=-0.674 * randnx_std, color=\"orange\", linestyle=\"-\")\n", " plt.axhline(\n", " y=2.576 * randnx_std, color=\"red\", linestyle=\"-\", label=\"2.576 std.dev. (99%)\"\n", " )\n", " plt.axhline(y=-2.576 * randnx_std, color=\"red\", linestyle=\"-\")\n", "\n", " # Add shaded area\n", " plt.fill_between(\n", " [test[feature_name].min(), test[feature_name].max()],\n", " [-0.674 * randnx_std, -0.674 * randnx_std],\n", " [0.674 * randnx_std, 0.674 * randnx_std],\n", " color=\"orange\",\n", " alpha=0.3,\n", " )\n", "\n", " # Add local regression fit\n", " from statsmodels.nonparametric.smoothers_lowess import lowess\n", "\n", " # Sort the data for local regression\n", " sorted_indices = np.argsort(test[feature_name])\n", " x_sorted = test[feature_name].iloc[sorted_indices]\n", " y_sorted = attention_df[feature_name].iloc[sorted_indices]\n", "\n", " # Fit local regression\n", " lowess_fit = lowess(y_sorted, x_sorted, frac=alpha, it=3)\n", "\n", " # Plot the local regression fit\n", " plt.plot(\n", " lowess_fit[:, 0],\n", " lowess_fit[:, 1],\n", " color=\"lightgreen\",\n", " label=\"local regression fit\",\n", " )\n", "\n", " # Customize the plot\n", " plt.title(f\"attention weights: {feature_name}\", fontsize=14)\n", " plt.xlabel(feature_name, fontsize=12)\n", " plt.ylabel(\"attention weights\", fontsize=12)\n", " plt.legend(loc=\"lower right\")\n", "\n", " # Set y-axis limits\n", " # ylim0 = max(abs(attention_df[feature_name]))\n", " ylim0 = np.max(np.abs(attention_df))\n", " plt.ylim(-ylim0, ylim0)\n", "\n", " plt.tight_layout()\n", " plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This is the purely random variable not impacting the response\n", "# we perform a local regression which is a bit time consuming\n", "plot_attention_weights(\"RandN\", 0.3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# BonusMalus is the most significant term\n", "# the local regression is not fully sensible because BonusMalus values cluster at the lowest level\n", "plot_attention_weights(\"BonusMalus\", 0.6)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Driver age variabel\n", "plot_attention_weights(\"DrivAge\", 0.3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "col1 = \"DrivAge\"\n", "col2 = \"BonusMalus\"\n", "y = attention_df[col1]\n", "mask = rng.choice(range(attention_df.shape[0]), size=5000, replace=False)\n", "x = test[col1]\n", "color = test[col2]\n", "color_median = 60\n", "color_binary = (color >= color_median).astype(int)\n", "q1, q2, q3 = 51, 65, 85\n", "color_bins = pd.cut(\n", " color, bins=[-np.inf, q1, q2, q3, np.inf], labels=[0, 1, 2, 3], include_lowest=True\n", ").astype(int)\n", "fig, ax = plt.subplots(figsize=(8, 6))\n", "# ax.set_title(col)\n", "scatter = ax.scatter(\n", " x.values[mask],\n", " y.values[mask],\n", " s=1,\n", " c=color_bins.values[mask],\n", " cmap=\"RdYlBu\",\n", " alpha=0.8,\n", ")\n", "cbar = plt.colorbar(scatter, ax=ax)\n", "cbar.set_ticks([0, 1, 2, 3])\n", "cbar.set_ticklabels(\n", " [\n", " f\"< {q1:.1f}\",\n", " f\"{q1:.1f}-{q2:.1f}\",\n", " f\"{q2:.1f}-{q3:.1f}\",\n", " f\">= {q3:.1f}\",\n", " ]\n", ")\n", "cbar.set_label(f\"{col2}\")\n", "ax.set_ylabel(\"$\\\\beta(\\\\boldsymbol{x})$\")\n", "ax.set_xlabel(col1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gradient of interactions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch.autograd as autograd\n", "\n", "n, p = X_test.shape\n", "gradients = np.empty((p, n, p))\n", "input_tensor = X_test\n", "\n", "input_tensor.requires_grad = True\n", "attentions = model(X_test, v_test, get_attentions=True)\n", "for i in range(p):\n", " grad_scaling = torch.ones_like(attentions[:, i])\n", " gradient_i = autograd.grad(\n", " attentions[:, i], input_tensor, grad_scaling, create_graph=True\n", " )\n", " gradients[i, :, :] = gradient_i[0].numpy(force=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# plot the gradient of the attention for the variable `DrivAge`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ensemble" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Train the same neural network using different random seeds, which impact the initial weights of the hidden layers." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "seeds = [\n", " 1752305036,\n", " 4284935567,\n", " 909886011,\n", " 4253642063,\n", " 3875387572,\n", " 2984734056,\n", " 56601707,\n", " 803726624,\n", " 215740934,\n", " 1236640324,\n", "]\n", "for seed in seeds:\n", " model = None # replace None with your code\n", " optimizer = torch.optim.NAdam(model.parameters())\n", " checkpoint_path = f\"./Networks/LocalGLMnet_{seed}.pt\"\n", " history = None # replace None with your code\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get the derivatives of the attentions with respect to the input features for each trained network, then take the average over all trained networks." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "n, p = X_test.shape\n", "gradients = np.empty((10, p, n, p))\n", "input_tensor = X_test\n", "input_tensor.requires_grad = True\n", "for k, seed in enumerate(seeds):\n", " checkpoint_path = f\"./Networks/LocalGLMnet_{seed}.pt\"\n", " model.load_state_dict(torch.load(checkpoint_path))\n", " attentions = model(X_test, v_test, get_attentions=True)\n", " for i in range(p):\n", " grad_scaling = torch.ones_like(attentions[:, i])\n", " gradient_i = autograd.grad(\n", " attentions[:, i], input_tensor, grad_scaling, create_graph=True\n", " )\n", " gradients[k, i, :, :] = gradient_i[0].numpy(force=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# plot the gradient of the attention for the variable `DrivAge`, now based on the ensemble model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# plot the derivative of the attention for the variable `DrivAge` with respect to `BonusMalus`, showing the results of each model of the ensemble" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 4 }