diff --git a/13_loading_and_preprocessing_data.ipynb b/13_loading_and_preprocessing_data.ipynb index 0c82bb2..0986941 100644 --- a/13_loading_and_preprocessing_data.ipynb +++ b/13_loading_and_preprocessing_data.ipynb @@ -2040,8 +2040,8 @@ "outputs": [], "source": [ "train_set = mnist_dataset(train_filepaths, shuffle_buffer_size=60000)\n", - "valid_set = mnist_dataset(train_filepaths)\n", - "test_set = mnist_dataset(train_filepaths)" + "valid_set = mnist_dataset(valid_filepaths)\n", + "test_set = mnist_dataset(test_filepaths)" ] }, { @@ -2274,7 +2274,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "But let's pretend the dataset does not fit in memory, just to make things more interesting. Luckily, each review fits on just one line (they use `
` to indicate line breaks), so we can read the reviews using a `TextLineDataset`. If they didn't we would have to preprocess the input files (e.g., converting them to TFRecords). For very large datasets, it would make sense a tool like Apache Beam for that." + "But let's pretend the dataset does not fit in memory, just to make things more interesting. Luckily, each review fits on just one line (they use `
` to indicate line breaks), so we can read the reviews using a `TextLineDataset`. If they didn't we would have to preprocess the input files (e.g., converting them to TFRecords). For very large datasets, it would make sense to use a tool like Apache Beam for that." ] }, { @@ -2473,7 +2473,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's run it on the same `X_example`, just to make sure the word IDs are larger now, since the vocabulary bigger:" + "Let's run it on the same `X_example`, just to make sure the word IDs are larger now, since the vocabulary is bigger:" ] }, { @@ -2540,7 +2540,7 @@ "source": [ "class BagOfWords(keras.layers.Layer):\n", " def __init__(self, n_tokens, dtype=tf.int32, **kwargs):\n", - " super().__init__(dtype=tf.int32, **kwargs)\n", + " super().__init__(dtype=dtype, **kwargs)\n", " self.n_tokens = n_tokens\n", " def call(self, inputs):\n", " one_hot = tf.one_hot(inputs, self.n_tokens)\n",