ml_exercises/notebooks/5b_quality_prediction_backend.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Case Study: Backend\n",
    "\n",
    "This notebook contains the backend code for the app. We first load the model you've trained (and saved as `\"strength_model.pkl\"`) in the previous notebook `a` and then make the predictions and optimize the water content of the concrete mixture. \n",
    "\n",
    "The backend is built using [FastAPI](https://fastapi.tiangolo.com/), a modern, fast (high-performance), web framework for building APIs with Python. Please note that normally this code would be run as a regular script, not in a notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import joblib\n",
    "import nest_asyncio\n",
    "import uvicorn\n",
    "from fastapi import Body, FastAPI\n",
    "from pydantic import BaseModel, Field\n",
    "from scipy.optimize import minimize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load your trained model - make sure it's saved under the right name!\n",
    "model = joblib.load(\"strength_model.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# configure the host and port where the server will run\n",
    "# host depends on your setup -> \"127.0.0.1\" if running locally, \"0.0.0.0\" inside a docker container\n",
    "host = \"127.0.0.1\"\n",
    "port = 8000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# helper function to compute the optimized water content\n",
    "def optimize_water(model, x, value_min=120., value_max=250., target_strength=42.5):\n",
    "    \"\"\"\n",
    "    Optimize the water content for a concrete mixture.\n",
    "    \n",
    "    Inputs:\n",
    "        - model: the trained model\n",
    "        - x: pandas dataframe row with one data point\n",
    "        - value_min: minimum bound for water content (default: 120.)\n",
    "        - value_max: maximum bound for water content (default: 250.)\n",
    "        - target_strength: what we would like the output to be (default: 42.5)\n",
    "    Returns:\n",
    "        - water_org: original water content\n",
    "        - water_new: optimized water content\n",
    "        - pred_org: original strength prediction of the model\n",
    "        - pred_new: strength prediction with optimized water content\n",
    "    \"\"\"\n",
    "    # original situation\n",
    "    water_org = x[\"water\"].values[0]\n",
    "    pred_org = model.predict(x)[0]\n",
    "    print(f\"original prediction with water content {water_org:.1f}: {pred_org:.2f} MPa\")\n",
    "    \n",
    "    def _loss_fun(water_value):\n",
    "        \"\"\"\n",
    "        Nested function (i.e., has access to all variables from the enclosing function)\n",
    "        to compute the squared error between the models strength prediction with the given \n",
    "        water value and our target strength value.\n",
    "        \n",
    "        Inputs:\n",
    "            - water_value: np.array with a single value, the proposed water content\n",
    "        Returns:\n",
    "            - loss: the squared error between the predicted and target strength\n",
    "        \"\"\"\n",
    "        # insert the new value into our original data point\n",
    "        new_x = x.copy()\n",
    "        new_x[\"water\"] = water_value[0]\n",
    "        # predict strength with new water content\n",
    "        pred_strength = model.predict(new_x)\n",
    "        # optimization loss = squared difference to target value\n",
    "        loss = (target_strength - pred_strength)**2\n",
    "        return loss\n",
    "    \n",
    "    # use scipy's minimize function to find a value for 'water'\n",
    "    # where the model predicts something close to our target value.\n",
    "    # the start value for the optimization is the original water content.\n",
    "    # to get realistic values, we additionaly specify bounds\n",
    "    # based on the actual min/max values for the water content\n",
    "    res = minimize(_loss_fun, np.array([water_org]), bounds=[(value_min, value_max)], method=\"Powell\")\n",
    "    # the optimized water content is stored in res.x (again a np.array)\n",
    "    water_new = res.x[0]\n",
    "    # check the final strength prediction\n",
    "    new_x = x.copy()\n",
    "    new_x[\"water\"] = water_new\n",
    "    pred_new = model.predict(new_x)[0]\n",
    "    print(f\"new prediction with water content {water_new:.1f}: {pred_new:.2f} MPa\")\n",
    "    return water_org, water_new, pred_org, pred_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# FastAPI app instance\n",
    "app = FastAPI()\n",
    "\n",
    "# pydantic model used for data validation\n",
    "class ConcreteRecipe(BaseModel):\n",
    "    cement: float = Field(ge=50., le=650., description=\"Cement amount in kg/m^3\")\n",
    "    slag: float = Field(ge=0., le=500., description=\"Slag amount in kg/m^3\")\n",
    "    fly_ash: float = Field(ge=0., le=300., description=\"Fly ash amount in kg/m^3\")\n",
    "    fine_aggregate: float = Field(ge=400., le=1100., description=\"Fine aggregate amount in kg/m^3\")\n",
    "    coarse_aggregate: float = Field(ge=500., le=1500., description=\"Coarse aggregate amount in kg/m^3\")\n",
    "    plasticizer: float = Field(ge=0., le=50., description=\"Plasticizer amount in kg/m^3\")\n",
    "    water: float = Field(ge=50., le=500., description=\"Water amount in kg/m^3\")\n",
    "    target_strength: float = Field(default=42.5, ge=0., le=100., description=\"Desired compressive strength in MPa\")\n",
    "\n",
    "# endpoint for the base URI, which can be queried with a GET request\n",
    "@app.get(\"/\")\n",
    "def home():\n",
    "    return \"Congratulations! Your API is working.\"\n",
    "\n",
    "# endpoint for /predict, which can be queried with a POST request to send the concrete recipe data\n",
    "@app.post(\"/predict\")\n",
    "def predict_and_optimize(concrete_recipe: ConcreteRecipe):\n",
    "    target_strength = concrete_recipe.target_strength\n",
    "    # transform the given data into a pandas dataframe\n",
    "    concrete_recipe = concrete_recipe.dict()\n",
    "    features = [\"cement\", \"slag\", \"fly_ash\", \"water\", \"plasticizer\", \"coarse_aggregate\", \"fine_aggregate\"]\n",
    "    x = pd.DataFrame({c: [concrete_recipe[c]] for c in features}, columns=features)\n",
    "    # make predictions and get optimized water content\n",
    "    water_org, water_new, pred_org, pred_new = optimize_water(model, x, target_strength=target_strength)\n",
    "    # return the computed values as a JSON\n",
    "    return {\n",
    "        \"water_org\": water_org, \n",
    "        \"water_new\": water_new, \n",
    "        \"pred_org\": pred_org, \n",
    "        \"pred_new\": pred_new,\n",
    "    }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By running the following cell you will spin up the server!\n",
    "\n",
    "This causes the notebook to block (no cells/code can run) until you manually interrupt the kernel. You can do this by clicking on the Kernel tab and then on Interrupt or by entering Jupyter's command mode by pressing the `ESC` key and tapping the `I` key twice."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# allow the server to be run in this interactive environment\n",
    "nest_asyncio.apply()\n",
    "# spin up the server  \n",
    "uvicorn.run(app, host=host, port=port)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}