keras-team · fchollet · Nov 11, 2023 · Nov 9, 2023 · Nov 10, 2023
diff --git a/examples/vision/img/mixup/mixup_15_1.png b/examples/vision/img/mixup/mixup_15_1.png
diff --git a/examples/vision/ipynb/mixup.ipynb b/examples/vision/ipynb/mixup.ipynb
@@ -10,7 +10,7 @@
     "\n",
     "**Author:** [Sayak Paul](https://twitter.com/RisingSayak)<br>\n",
     "**Date created:** 2021/03/06<br>\n",
-    "**Last modified:** 2021/03/06<br>\n",
+    "**Last modified:** 2023/07/24<br>\n",
     "**Description:** Data augmentation using the mixup technique for image classification."
    ]
   },
@@ -47,9 +47,7 @@
     "mixup is specifically useful when we are not sure about selecting a set of augmentation\n",
     "transforms for a given dataset, medical imaging datasets, for example. mixup can be\n",
     "extended to a variety of data modalities such as computer vision, naturallanguage\n",
-    "processing, speech, and so on.\n",
-    "\n",
-    "This example requires TensorFlow 2.4 or higher."
+    "processing, speech, and so on."
    ]
   },
   {
@@ -63,16 +61,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
+    "\n",
     "import numpy as np\n",
-    "import tensorflow as tf\n",
+    "import keras\n",
     "import matplotlib.pyplot as plt\n",
-    "from tensorflow.keras import layers"
+    "\n",
+    "from keras import layers\n",
+    "\n",
+    "# TF imports related to tf.data preprocessing\n",
+    "from tensorflow import data as tf_data\n",
+    "from tensorflow import image as tf_image\n",
+    "from tensorflow.random import gamma as tf_random_gamma\n",
+    ""
    ]
   },
   {
@@ -83,27 +92,27 @@
    "source": [
     "## Prepare the dataset\n",
     "\n",
-    "In this example, we will be using the [FashionMNIST](https://github.com/zalandoresearch/fashion-mnist/) dataset. But this same recipe can\n",
+    "In this example, we will be using the [FashionMNIST](https://github.com/zalandoresearch/fashion-mnist) dataset. But this same recipe can\n",
     "be used for other classification datasets as well."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
-    "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()\n",
+    "(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()\n",
     "\n",
     "x_train = x_train.astype(\"float32\") / 255.0\n",
     "x_train = np.reshape(x_train, (-1, 28, 28, 1))\n",
-    "y_train = tf.one_hot(y_train, 10)\n",
+    "y_train = keras.ops.one_hot(y_train, 10)\n",
     "\n",
     "x_test = x_test.astype(\"float32\") / 255.0\n",
     "x_test = np.reshape(x_test, (-1, 28, 28, 1))\n",
-    "y_test = tf.one_hot(y_test, 10)"
+    "y_test = keras.ops.one_hot(y_test, 10)"
    ]
   },
   {
@@ -117,13 +126,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
-    "AUTO = tf.data.AUTOTUNE\n",
+    "AUTO = tf_data.AUTOTUNE\n",
     "BATCH_SIZE = 64\n",
     "EPOCHS = 10"
    ]
@@ -139,7 +148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -151,22 +160,22 @@
     "new_x_train, new_y_train = x_train[val_samples:], y_train[val_samples:]\n",
     "\n",
     "train_ds_one = (\n",
-    "    tf.data.Dataset.from_tensor_slices((new_x_train, new_y_train))\n",
+    "    tf_data.Dataset.from_tensor_slices((new_x_train, new_y_train))\n",
     "    .shuffle(BATCH_SIZE * 100)\n",
     "    .batch(BATCH_SIZE)\n",
     ")\n",
     "train_ds_two = (\n",
-    "    tf.data.Dataset.from_tensor_slices((new_x_train, new_y_train))\n",
+    "    tf_data.Dataset.from_tensor_slices((new_x_train, new_y_train))\n",
     "    .shuffle(BATCH_SIZE * 100)\n",
     "    .batch(BATCH_SIZE)\n",
     ")\n",
     "# Because we will be mixing up the images and their corresponding labels, we will be\n",
     "# combining two shuffled datasets from the same training data.\n",
-    "train_ds = tf.data.Dataset.zip((train_ds_one, train_ds_two))\n",
+    "train_ds = tf_data.Dataset.zip((train_ds_one, train_ds_two))\n",
     "\n",
-    "val_ds = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(BATCH_SIZE)\n",
+    "val_ds = tf_data.Dataset.from_tensor_slices((x_val, y_val)).batch(BATCH_SIZE)\n",
     "\n",
-    "test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)"
+    "test_ds = tf_data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)"
    ]
   },
   {
@@ -179,41 +188,42 @@
     "\n",
     "To perform the mixup routine, we create new virtual datasets using the training data from\n",
     "the same dataset, and apply a lambda value within the [0, 1] range sampled from a [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution)\n",
-    "— such that, for example, `new_x = lambda * x1 + (1 - lambda) * x2` (where\n",
+    "\u2014 such that, for example, `new_x = lambda * x1 + (1 - lambda) * x2` (where\n",
     "`x1` and `x2` are images) and the same equation is applied to the labels as well."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
     "\n",
     "def sample_beta_distribution(size, concentration_0=0.2, concentration_1=0.2):\n",
-    "    gamma_1_sample = tf.random.gamma(shape=[size], alpha=concentration_1)\n",
-    "    gamma_2_sample = tf.random.gamma(shape=[size], alpha=concentration_0)\n",
+    "    gamma_1_sample = tf_random_gamma(shape=[size], alpha=concentration_1)\n",
+    "    gamma_2_sample = tf_random_gamma(shape=[size], alpha=concentration_0)\n",
     "    return gamma_1_sample / (gamma_1_sample + gamma_2_sample)\n",
     "\n",
     "\n",
     "def mix_up(ds_one, ds_two, alpha=0.2):\n",
     "    # Unpack two datasets\n",
     "    images_one, labels_one = ds_one\n",
     "    images_two, labels_two = ds_two\n",
-    "    batch_size = tf.shape(images_one)[0]\n",
+    "    batch_size = keras.ops.shape(images_one)[0]\n",
     "\n",
     "    # Sample lambda and reshape it to do the mixup\n",
     "    l = sample_beta_distribution(batch_size, alpha, alpha)\n",
-    "    x_l = tf.reshape(l, (batch_size, 1, 1, 1))\n",
-    "    y_l = tf.reshape(l, (batch_size, 1))\n",
+    "    x_l = keras.ops.reshape(l, (batch_size, 1, 1, 1))\n",
+    "    y_l = keras.ops.reshape(l, (batch_size, 1))\n",
     "\n",
     "    # Perform mixup on both images and labels by combining a pair of images/labels\n",
     "    # (one from each dataset) into one image/label\n",
     "    images = images_one * x_l + images_two * (1 - x_l)\n",
     "    labels = labels_one * y_l + labels_two * (1 - y_l)\n",
-    "    return (images, labels)\n"
+    "    return (images, labels)\n",
+    ""
    ]
   },
   {
@@ -238,15 +248,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
     "# First create the new dataset using our `mix_up` utility\n",
     "train_ds_mu = train_ds.map(\n",
-    "    lambda ds_one, ds_two: mix_up(ds_one, ds_two, alpha=0.2), num_parallel_calls=AUTO\n",
+    "    lambda ds_one, ds_two: mix_up(ds_one, ds_two, alpha=0.2),\n",
+    "    num_parallel_calls=AUTO,\n",
     ")\n",
     "\n",
     "# Let's preview 9 samples from the dataset\n",
@@ -270,27 +281,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
     "\n",
     "def get_training_model():\n",
-    "    model = tf.keras.Sequential(\n",
+    "    model = keras.Sequential(\n",
     "        [\n",
-    "            layers.Conv2D(16, (5, 5), activation=\"relu\", input_shape=(28, 28, 1)),\n",
+    "            layers.Input(shape=(28, 28, 1)),\n",
+    "            layers.Conv2D(16, (5, 5), activation=\"relu\"),\n",
     "            layers.MaxPooling2D(pool_size=(2, 2)),\n",
     "            layers.Conv2D(32, (5, 5), activation=\"relu\"),\n",
     "            layers.MaxPooling2D(pool_size=(2, 2)),\n",
     "            layers.Dropout(0.2),\n",
-    "            layers.GlobalAvgPool2D(),\n",
+    "            layers.GlobalAveragePooling2D(),\n",
     "            layers.Dense(128, activation=\"relu\"),\n",
     "            layers.Dense(10, activation=\"softmax\"),\n",
     "        ]\n",
     "    )\n",
-    "    return model\n"
+    "    return model\n",
+    ""
    ]
   },
   {
@@ -305,14 +318,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
     "initial_model = get_training_model()\n",
-    "initial_model.save_weights(\"initial_weights.h5\")"
+    "initial_model.save_weights(\"initial_weights.weights.h5\")"
    ]
   },
   {
@@ -326,14 +339,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
     "model = get_training_model()\n",
-    "model.load_weights(\"initial_weights.h5\")\n",
+    "model.load_weights(\"initial_weights.weights.h5\")\n",
     "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n",
     "model.fit(train_ds_mu, validation_data=val_ds, epochs=EPOCHS)\n",
     "_, test_acc = model.evaluate(test_ds)\n",
@@ -351,14 +364,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
     "model = get_training_model()\n",
-    "model.load_weights(\"initial_weights.h5\")\n",
+    "model.load_weights(\"initial_weights.weights.h5\")\n",
     "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n",
     "# Notice that we are NOT using the mixed up dataset here\n",
     "model.fit(train_ds_one, validation_data=val_ds, epochs=EPOCHS)\n",
@@ -387,7 +400,7 @@
    "source": [
     "## Notes\n",
     "\n",
-    "* With mixup, you can create synthetic examples — especially when you lack a large\n",
+    "* With mixup, you can create synthetic examples \u2014 especially when you lack a large\n",
     "dataset - without incurring high computational costs.\n",
     "* [Label smoothing](https://www.pyimagesearch.com/2019/12/30/label-smoothing-with-keras-tensorflow-and-deep-learning/) and mixup usually do not work well together because label smoothing\n",
     "already modifies the hard labels by some factor.\n",
@@ -430,4 +443,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}