· 5 years ago · Mar 20, 2020, 03:18 AM
1# -*- coding: utf-8 -*-
2"""tpu.ipynb
3
4Automatically generated by Colaboratory.
5
6Original file is located at
7 https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/tpu.ipynb
8
9##### Copyright 2018 The TensorFlow Authors.
10"""
11
12#@title Licensed under the Apache License, Version 2.0 (the "License");
13# you may not use this file except in compliance with the License.
14# You may obtain a copy of the License at
15#
16# https://www.apache.org/licenses/LICENSE-2.0
17#
18# Unless required by applicable law or agreed to in writing, software
19# distributed under the License is distributed on an "AS IS" BASIS,
20# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21# See the License for the specific language governing permissions and
22# limitations under the License.
23
24"""# Use a TPU
25
26<table class="tfo-notebook-buttons" align="left">
27 <td>
28 <a target="_blank" href="https://www.tensorflow.org/guide/tpu"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
29 </td>
30 <td>
31 <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/guide/tpu.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
32 </td>
33 <td>
34 <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/guide/tpu.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
35 </td>
36 <td>
37 <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/guide/tpu.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
38 </td>
39</table>
40
41Experimental support for Cloud TPUs is currently available for Keras and Google Colab. Before you run this Colab notebooks, ensure that your hardware accelerator is a TPU by checking your notebook settings: Runtime > Change runtime type > Hardware accelerator > TPU.
42"""
43
44# Commented out IPython magic to ensure Python compatibility.
45from __future__ import absolute_import, division, print_function, unicode_literals
46
47try:
48 # %tensorflow_version only exists in Colab.
49# %tensorflow_version 2.x
50except Exception:
51 pass
52import tensorflow as tf
53
54import os
55import tensorflow_datasets as tfds
56
57"""## Distribution strategies
58This guide demonstrates how to use the distribution strategy `tf.distribute.experimental.TPUStrategy` to drive a Cloud TPU and train a Keras model. A distribution strategy is an abstraction that can be used to drive models on CPU, GPUs or TPUs. Simply swap out the distribution strategy and the model will run on the given device. See the [distribution strategy guide](./distributed_training.ipynb) for more information.
59
60Below is the code that connects to a TPU and creates the `TPUStrategy` object. Note that the `tpu` argument to `TPUClusterResolver` is a special address just for Colab. In the case that you are running on Google Compute Engine (GCE), you should instead pass in the name of your CloudTPU.
61"""
62
63resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
64tf.config.experimental_connect_to_cluster(resolver)
65tf.tpu.experimental.initialize_tpu_system(resolver)
66
67"""Below is a simple MNIST model, unchanged from what you would use on CPU or GPU."""
68
69def create_model():
70 return tf.keras.Sequential(
71 [tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
72 tf.keras.layers.Flatten(),
73 tf.keras.layers.Dense(128, activation='relu'),
74 tf.keras.layers.Dense(10)])
75
76"""## Input datasets
77Efficient use of the `tf.data.Dataset` API is critical when using a Cloud TPU, as it is impossible to use the Cloud TPUs unless you can feed them data quickly enough. See [Input Pipeline Performance Guide](./data_performance.ipynb) for details on dataset performance.
78
79For all but the simplest experimentation (using `tf.data.Dataset.from_tensor_slices` or other in-graph data) you will need to store all data files read by the Dataset in Google Cloud Storage (GCS) buckets.
80
81For most use-cases, it is recommended to convert your data into `TFRecord` format and use a `tf.data.TFRecordDataset` to read it. See [TFRecord and tf.Example tutorial](../tutorials/load_data/tfrecord.ipynb) for details on how to do this. This, however, is not a hard requirement and you can use other dataset readers (`FixedLengthRecordDataset` or `TextLineDataset`) if you prefer.
82
83Small datasets can be loaded entirely into memory using `tf.data.Dataset.cache`.
84
85Regardless of the data format used, it is strongly recommended that you use large files, on the order of 100MB. This is especially important in this networked setting as the overhead of opening a file is significantly higher.
86
87Here you should use the `tensorflow_datasets` module to get a copy of the MNIST training data. Note that `try_gcs` is specified to use a copy that is available in a public GCS bucket. If you don't specify this, the TPU will not be able to access the data that is downloaded.
88"""
89
90def get_dataset(batch_size=200):
91 datasets, info = tfds.load(name='mnist', with_info=True, as_supervised=True,
92 try_gcs=True)
93 mnist_train, mnist_test = datasets['train'], datasets['test']
94
95 def scale(image, label):
96 image = tf.cast(image, tf.float32)
97 image /= 255.0
98
99 return image, label
100
101 train_dataset = mnist_train.map(scale).shuffle(10000).batch(batch_size)
102 test_dataset = mnist_test.map(scale).batch(batch_size)
103
104 return train_dataset, test_dataset
105
106"""## Create and train a model
107
108Nothing here is TPU specific, you would write the same code below if you had mutliple GPUs and where using a `MirroredStrategy` rather than a `TPUStrategy`.
109"""
110
111strategy = tf.distribute.experimental.TPUStrategy(resolver)
112with strategy.scope():
113 model = create_model()
114 model.compile(optimizer='adam',
115 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
116 metrics=['sparse_categorical_accuracy'])
117
118train_dataset, test_dataset = get_dataset()
119
120model.fit(train_dataset,
121 epochs=5,
122 validation_data=test_dataset)
123
124"""## Next steps
125
126* [Google Cloud TPU Documentation](https://cloud.google.com/tpu/docs/) - Set up and run a Google Cloud TPU.
127* [Distributed training with TensorFlow](./distributed_training.ipynb) - How to use distribution strategy and links to many example showing best practices.
128* [TensorFlow Official Models](https://github.com/tensorflow/models/tree/master/official) - Examples of state of the art TensorFlow 2.x models that are Cloud TPU compatible.
129* [The Google Cloud TPU Performance Guide](https://cloud.google.com/tpu/docs/performance-guide) - Enhance Cloud TPU performance further by adjusting Cloud TPU configuration parameters for your application.
130"""