· 4 years ago · Mar 26, 2021, 12:58 PM
1import glob
2import os
3import sys
4import random
5import time
6import numpy as np
7import cv2
8import math
9import tensorflow as tf
10from collections import deque
11from keras.applications.xception import Xception
12from keras.layers import Dense, GlobalAveragePooling2D
13from keras.optimizers import Adam
14from keras.models import Model
15from keras.callbacks import TensorBoard
16import keras.backend.tensorflow_backend as backend
17from threading import Thread
18from tqdm import tqdm
19
20try:
21 sys.path.append(glob.glob('../../../Carla/Carla_0.9.11/CARLA_0.9.11/WindowsNoEditor/PythonAPI/carla/dist/carla-*%d.%d-%s.egg' % (
22 sys.version_info.major,
23 sys.version_info.minor,
24 'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
25except IndexError:
26 pass
27
28import carla
29
30actor_list = []
31IM_WIDTH = 640
32IM_HEIGHT = 480
33SHOW_PREVIEW = False # wheter or not we want to display the actual camera form a vehicle
34SECONDS_PER_EPISODE = 10
35REPLAY_MEMORY_SIZE = 5_000
36MIN_REPLAY_MEMORY_SIZE = 1_000
37MINIBATCH_SIZE = 16
38PREDICTION_BATCH_SIZE = 1
39TRAINING_BATCH_SIZE = MINIBATCH_SIZE // 4
40UPDATE_TARGET_EVERY = 5
41MODEL_NAME = "Xception"
42MIN_REWARD = -200
43
44MEMORY_FRACTION = 0.4 # rtx want to allocate more memory than it has for some reason - its a solution
45
46DISCOUNT = 0.99
47EPISODES = 100
48
49epsilon = 1
50EPSILON_DECAY = 0.95 # 0.9975 0.99975
51MIN_EPSILON = 0.001
52AGGREGATE_STATS_EVERY = 10
53
54# Own Tensorboard class
55class ModifiedTensorBoard(TensorBoard):
56 # Overriding init to set initial step and writer (we want one log file for all .fit() calls)
57 def __init__(self, **kwargs):
58 super().__init__(**kwargs)
59 self.step = 1
60 self.writer = tf.summary.create_file_writer(self.log_dir)
61
62 # Overriding this method to stop creating default log writer
63 def set_model(self, model):
64 pass
65
66 # Overrided, saves logs with our step number
67 # (otherwise every .fit() will start writing from 0th step)
68 def on_epoch_end(self, epoch, logs=None):
69 self.update_stats(**logs)
70
71 # Overrided
72 # We train for one batch only, no need to save anything at epoch end
73 def on_batch_end(self, batch, logs=None):
74 pass
75
76 # Overrided, so won't close writer
77 def on_train_end(self, _):
78 pass
79
80 # Custom method for saving own metrics
81 # Creates writer, writes custom metrics and closes writer
82 def update_stats(self, **stats):
83 self._write_logs(stats, self.step)
84
85class CarEnv:
86 SHOW_CAM = SHOW_PREVIEW
87 STEER_AMT = 1.0 # for now do full turns every single time
88 im_width = IM_WIDTH
89 im_height = IM_HEIGHT
90 front_camera = None
91
92 def __init__(self):
93 self.client = carla.Client("localhost", 2000)
94 self.client.set_timeout(10.0)
95 self.world = self.client.get_world()
96 self.blueprint_library = self.world.get_blueprint_library()
97 self.model_3 = self.blueprint_library.filter('model3')[0]
98
99 def reset(self):
100 self.collision_hist = []
101 # collision is sometimes detected e.g. when you are speeding and driving through a hill
102 # so later on we should put here some sort of magnitude of collision and detect only those which are bigger
103 self.actor_list = []
104 self.transform = random.choice(self.world.get_map().get_spawn_points())
105 self.vehicle = self.world.spawn_actor(self.model_3, self.transform)
106
107 self.actor_list.append(self.vehicle)
108
109 self.rgb_cam = self.blueprint_library.find("sensor.camera.rgb")
110 self.rgb_cam.set_attribute("image_size_x", f"{self.im_width}")
111 self.rgb_cam.set_attribute("image_size_y", f"{self.im_height}")
112 self.rgb_cam.set_attribute("fov", f"{110}")
113
114 transform = carla.Transform(carla.Location(x=2.5, z=0.7))
115 self.sensor = self.world.spawn_actor(self.rgb_cam, transform, attach_to=self.vehicle)
116 self.actor_list.append(self.sensor)
117 self.sensor.listen(lambda data: self.process_img(data))
118
119 self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
120 time.sleep(4)
121
122 colsensor = self.blueprint_library.find("sensor.other.collision")
123 self.colsensor = self.world.spawn_actor(colsensor, transform, attach_to=self.vehicle)
124 self.actor_list.append(self.colsensor)
125 self.colsensor.listen(lambda event: self.collision_data(event))
126
127 while self.front_camera is None:
128 time.sleep(0.01)
129
130 self.episode_start = time.time()
131 self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
132
133 return self.front_camera
134
135 def collision_data(self, event):
136 self.collision_hist.append(event)
137
138 def process_img(self, image):
139 # dir() shows that image has raw_data
140 i = np.array(image.raw_data)
141 # i.shape() shows that it has one dim and also it returns also alfa values not only rgb
142 i2 = i.reshape((self.im_height, self.im_width, 4))
143 # lets go rid of it
144 i3 = i2[:, :, :3]
145
146 if self.SHOW_CAM:
147 cv2.imshow("", i3)
148 cv2.waitKey(1)
149
150 self.front_camera = i3
151
152 def step(self, action):
153 if action == 0: # left
154 self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*self.STEER_AMT))
155 elif action == 1: # straight
156 self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=0))
157 elif action == 2: # right
158 self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=1*self.STEER_AMT))
159
160 v = self.vehicle.get_velocity()
161 kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
162
163 # if there was a collision
164 if len(self.collision_hist) != 0:
165 done = True
166 reward = -200
167 elif kmh < 50:
168 done = False
169 reward = -1
170 else:
171 done = False
172 reward = 1
173
174 if self.episode_start + SECONDS_PER_EPISODE < time.time():
175 done = True
176
177 return self.front_camera, reward, done, None
178
179class DQNAgent:
180 def __init__(self):
181 self.model = self.create_model()
182 self.target_model = self.create_model()
183 self.target_model.set_weights(self.model.get_weights())
184
185 self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
186
187 self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
188
189 self.target_update_counter = 0
190 self.graph = tf.compat.v1.get_default_graph()
191
192 self.terminate = False
193 self.last_log_episode = 0
194 self.training_initialized = False
195
196 def create_model(self):
197 base_model = Xception(weights=None, include_top=False, input_shape=(IM_HEIGHT, IM_WIDTH, 3))
198
199 x = base_model.output
200 x = GlobalAveragePooling2D()(x)
201
202 predictions = Dense(3, activation='linear')(x)
203 model = Model(inputs=base_model.input, outputs=predictions)
204 model.compile(loss='mse', optimizer=Adam(lr=0.001), metrics=['accuracy'], run_eagerly=True)
205 return model
206
207 def update_replay_memory(self, transition):
208 # transition = (current_state, action, reward, new_state, done)
209 self.replay_memory.append(transition)
210
211 def train(self):
212 if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
213 return
214
215 minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
216
217 current_states = np.array([transition[0] for transition in minibatch])/255
218 with self.graph.as_default():
219 current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE)
220
221 new_current_states = np.array([transition[3] for transition in minibatch]) / 255
222 with self.graph.as_default():
223 future_qs_list = self.target_model.predict(new_current_states, PREDICTION_BATCH_SIZE)
224
225 X = []
226 y = []
227
228 # we want to update q table only when there exists a future state. If a car crashed just save the last q values
229 for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
230 if not done:
231 max_future_q = np.max(future_qs_list[index])
232 new_q = reward + DISCOUNT * max_future_q
233 else:
234 new_q = reward
235
236 current_qs = current_qs_list[index]
237 current_qs[action] = new_q
238
239 X.append(current_state)
240 y.append(current_qs)
241
242 log_this_step = False
243 if self.tensorboard.step > self.last_logged_episode:
244 log_this_step = True
245 self.last_logged_episode = self.tensorboard.step
246
247 with self.graph.as_default():
248 self.model.fit(np.array(X)/255, np.array(y), batch_size=TRAINING_BATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorboard] if log_this_step else None)
249
250 if log_this_step:
251 self.target_update_counter += 1
252
253 if self.target_update_counter > UPDATE_TARGET_EVERY:
254 self.target_model.set_weights(self.model.get_weights())
255 self.target_update_counter = 0
256
257 def get_qs(self, state):
258 return self.model.predict(np.array(state).reshape(-1, *state.shape)/255)[0]
259
260 def train_in_loop(self):
261 # first prediction/train is slow so we dont want to initialize everything everytime.
262 # For some reason at the beginning we not only load the weights but also do some weird stuff which we dont know what it is
263 # So firstly, we gonna do fitment on nothing and then go to our traing
264 X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32)
265 y = np.random.uniform(size=(1, 3)).astype(np.float32)
266
267 with self.graph.as_default():
268 self.model.fit(X, y, verbose=False, batch_size=1)
269
270 self.training_initialized = True
271
272 while True:
273 if self.terminate:
274 return
275 self.train()
276 time.sleep(0.01)
277
278
279if __name__ == '__main__':
280 FPS = 20
281 # For stats
282 ep_rewards = [-200]
283
284 # For more repetitive results
285 random.seed(1)
286 np.random.seed(1)
287 tf.random.set_seed(1)
288
289 # Memory fraction, used mostly when training multiple agents
290 gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION)
291 tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)))
292
293 # Create models folder
294 if not os.path.isdir("models"):
295 os.makedirs("models")
296
297 # Create agent and environment
298 agent = DQNAgent()
299 env = CarEnv()
300
301 # Start training thread and wait for training to be initialized
302 trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
303 trainer_thread.start()
304 while not agent.training_initialized:
305 time.sleep(0.01)
306
307 # Initialize predictions - first prediction takes longer as of initialization that has to be done
308 # It's better to do a first prediction then before we start iterating over episode steps
309 agent.get_qs(np.ones((env.im_height, env.im_width, 3)))
310
311 # Iteration over episodes
312 for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit="episodes"):
313 # collision history
314 env.collision_hist = []
315
316 # Update tensorboard step every episode
317 agent.tensorboard.step = episode
318
319 # Restarting episode - reset episode reward and step number
320 episode_reward = 0
321 step = 1
322
323 # Reset env and get initial state
324 current_state = env.reset()
325
326 # Reset flag and start iterating until episode ends
327 done = False
328 episode_start = time.time()
329
330 # Play for given number of seconds only
331 while True:
332 if np.random.random() > epsilon:
333 # Get action from Q table
334 action = np.argmax(agent.get_qs(current_state))
335 else:
336 # Get random action
337 action = np.random.randint(0, 3)
338 # This takes no time, so we add a delay matching 60 FPS (prediction above takes longer)
339 time.sleep(1/FPS)
340
341 new_state, reward, done, _ = env.step(action)
342
343 # Transform new continous state to new discrete state and count the reward
344 episode_reward += reward
345
346 # Every step we update replay memory
347 agent.update_replay_memory((current_state, action, reward, new_state, done))
348
349 current_state = new_state
350 step += 1
351
352 if done:
353 break
354
355 # End of episode - destroy agents
356 for actor in env.actor_list:
357 actor.destroy()
358
359 # Append episode reward to a list and log stats (every given number of episodes)
360 ep_rewards.append(episode_reward)
361 if not episode % AGGREGATE_STATS_EVERY or episode == 1:
362 average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
363 min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
364 max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
365 agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward,
366 epsilon=epsilon)
367
368 # Save model, but only when min reward is greater or equal a set value
369 if min_reward >= MIN_REWARD:
370 agent.model.save(
371 f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
372
373 # Decay epsilon
374 if epsilon > MIN_EPSILON:
375 epsilon *= EPSILON_DECAY
376 epsilon = max(MIN_EPSILON, epsilon)
377
378
379 # Set termination flag for training thread and wait for it to finish
380 agent.terminate = True
381 trainer_thread.join()
382 agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397