my1n1gKK

· 4 years ago · Mar 26, 2021, 12:58 PM
1import glob
2import os
3import sys
4import random
5import time
6import numpy as np
7import cv2
8import math
9import tensorflow as tf
10from collections import deque
11from keras.applications.xception import Xception
12from keras.layers import Dense, GlobalAveragePooling2D
13from keras.optimizers import Adam
14from keras.models import Model
15from keras.callbacks import TensorBoard
16import keras.backend.tensorflow_backend as backend
17from threading import Thread
18from tqdm import tqdm
19
20try:
21    sys.path.append(glob.glob('../../../Carla/Carla_0.9.11/CARLA_0.9.11/WindowsNoEditor/PythonAPI/carla/dist/carla-*%d.%d-%s.egg' % (
22        sys.version_info.major,
23        sys.version_info.minor,
24        'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
25except IndexError:
26    pass
27
28import carla
29
30actor_list = []
31IM_WIDTH = 640
32IM_HEIGHT = 480
33SHOW_PREVIEW = False # wheter or not we want to display the actual camera form a vehicle
34SECONDS_PER_EPISODE = 10
35REPLAY_MEMORY_SIZE = 5_000
36MIN_REPLAY_MEMORY_SIZE = 1_000
37MINIBATCH_SIZE = 16
38PREDICTION_BATCH_SIZE = 1
39TRAINING_BATCH_SIZE = MINIBATCH_SIZE // 4
40UPDATE_TARGET_EVERY = 5
41MODEL_NAME = "Xception"
42MIN_REWARD = -200
43
44MEMORY_FRACTION = 0.4 # rtx want to allocate more memory than it has for some reason - its a solution
45
46DISCOUNT = 0.99
47EPISODES = 100
48
49epsilon = 1
50EPSILON_DECAY = 0.95 # 0.9975 0.99975
51MIN_EPSILON = 0.001
52AGGREGATE_STATS_EVERY = 10
53
54# Own Tensorboard class
55class ModifiedTensorBoard(TensorBoard):
56    # Overriding init to set initial step and writer (we want one log file for all .fit() calls)
57    def __init__(self, **kwargs):
58        super().__init__(**kwargs)
59        self.step = 1
60        self.writer = tf.summary.create_file_writer(self.log_dir)
61
62    # Overriding this method to stop creating default log writer
63    def set_model(self, model):
64        pass
65
66    # Overrided, saves logs with our step number
67    # (otherwise every .fit() will start writing from 0th step)
68    def on_epoch_end(self, epoch, logs=None):
69        self.update_stats(**logs)
70
71    # Overrided
72    # We train for one batch only, no need to save anything at epoch end
73    def on_batch_end(self, batch, logs=None):
74        pass
75
76    # Overrided, so won't close writer
77    def on_train_end(self, _):
78        pass
79
80    # Custom method for saving own metrics
81    # Creates writer, writes custom metrics and closes writer
82    def update_stats(self, **stats):
83        self._write_logs(stats, self.step)
84
85class CarEnv:
86    SHOW_CAM = SHOW_PREVIEW
87    STEER_AMT = 1.0 # for now do full turns every single time
88    im_width = IM_WIDTH
89    im_height = IM_HEIGHT
90    front_camera = None
91
92    def __init__(self):
93        self.client = carla.Client("localhost", 2000)
94        self.client.set_timeout(10.0)
95        self.world = self.client.get_world()
96        self.blueprint_library = self.world.get_blueprint_library()
97        self.model_3 = self.blueprint_library.filter('model3')[0]
98
99    def reset(self):
100        self.collision_hist = []
101        # collision is sometimes detected e.g. when you are speeding and driving through a hill
102        # so later on we should put here some sort of magnitude of collision and detect only those which are bigger
103        self.actor_list = []
104        self.transform = random.choice(self.world.get_map().get_spawn_points())
105        self.vehicle = self.world.spawn_actor(self.model_3, self.transform)
106
107        self.actor_list.append(self.vehicle)
108
109        self.rgb_cam = self.blueprint_library.find("sensor.camera.rgb")
110        self.rgb_cam.set_attribute("image_size_x", f"{self.im_width}")
111        self.rgb_cam.set_attribute("image_size_y", f"{self.im_height}")
112        self.rgb_cam.set_attribute("fov", f"{110}")
113
114        transform = carla.Transform(carla.Location(x=2.5, z=0.7))
115        self.sensor = self.world.spawn_actor(self.rgb_cam, transform, attach_to=self.vehicle)
116        self.actor_list.append(self.sensor)
117        self.sensor.listen(lambda data: self.process_img(data))
118
119        self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
120        time.sleep(4)
121
122        colsensor = self.blueprint_library.find("sensor.other.collision")
123        self.colsensor = self.world.spawn_actor(colsensor, transform, attach_to=self.vehicle)
124        self.actor_list.append(self.colsensor)
125        self.colsensor.listen(lambda event: self.collision_data(event))
126
127        while self.front_camera is None:
128            time.sleep(0.01)
129
130        self.episode_start = time.time()
131        self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0))
132
133        return self.front_camera
134
135    def collision_data(self, event):
136        self.collision_hist.append(event)
137
138    def process_img(self, image):
139        # dir() shows that image has raw_data
140        i = np.array(image.raw_data)
141        # i.shape() shows that it has one dim and also it returns also alfa values not only rgb
142        i2 = i.reshape((self.im_height, self.im_width, 4))
143        # lets go rid of it
144        i3 = i2[:, :, :3]
145
146        if self.SHOW_CAM:
147            cv2.imshow("", i3)
148            cv2.waitKey(1)
149
150        self.front_camera = i3
151
152    def step(self, action):
153        if action == 0: # left
154            self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=-1*self.STEER_AMT))
155        elif action == 1: # straight
156            self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=0))
157        elif action == 2: # right
158            self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer=1*self.STEER_AMT))
159
160        v = self.vehicle.get_velocity()
161        kmh = int(3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2))
162
163        # if there was a collision
164        if len(self.collision_hist) != 0:
165            done = True
166            reward = -200
167        elif kmh < 50:
168            done = False
169            reward = -1
170        else:
171            done = False
172            reward = 1
173
174        if self.episode_start + SECONDS_PER_EPISODE < time.time():
175            done = True
176
177        return self.front_camera, reward, done, None
178
179class DQNAgent:
180    def __init__(self):
181        self.model = self.create_model()
182        self.target_model = self.create_model()
183        self.target_model.set_weights(self.model.get_weights())
184
185        self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
186
187        self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
188
189        self.target_update_counter = 0
190        self.graph = tf.compat.v1.get_default_graph()
191
192        self.terminate = False
193        self.last_log_episode = 0
194        self.training_initialized = False
195
196    def create_model(self):
197        base_model = Xception(weights=None, include_top=False, input_shape=(IM_HEIGHT, IM_WIDTH, 3))
198
199        x = base_model.output
200        x = GlobalAveragePooling2D()(x)
201
202        predictions = Dense(3, activation='linear')(x)
203        model = Model(inputs=base_model.input, outputs=predictions)
204        model.compile(loss='mse', optimizer=Adam(lr=0.001), metrics=['accuracy'], run_eagerly=True)
205        return model
206
207    def update_replay_memory(self, transition):
208        # transition = (current_state, action, reward, new_state, done)
209        self.replay_memory.append(transition)
210
211    def train(self):
212        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
213            return
214
215        minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
216
217        current_states = np.array([transition[0] for transition in minibatch])/255
218        with self.graph.as_default():
219            current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE)
220
221        new_current_states = np.array([transition[3] for transition in minibatch]) / 255
222        with self.graph.as_default():
223            future_qs_list = self.target_model.predict(new_current_states, PREDICTION_BATCH_SIZE)
224
225        X = []
226        y = []
227
228        # we want to update q table only when there exists a future state. If a car crashed just save the last q values
229        for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
230            if not done:
231                max_future_q = np.max(future_qs_list[index])
232                new_q = reward + DISCOUNT * max_future_q
233            else:
234                new_q = reward
235
236            current_qs = current_qs_list[index]
237            current_qs[action] = new_q
238
239            X.append(current_state)
240            y.append(current_qs)
241
242        log_this_step = False
243        if self.tensorboard.step > self.last_logged_episode:
244            log_this_step = True
245            self.last_logged_episode = self.tensorboard.step
246
247        with self.graph.as_default():
248            self.model.fit(np.array(X)/255, np.array(y), batch_size=TRAINING_BATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorboard] if log_this_step else None)
249
250        if log_this_step:
251            self.target_update_counter += 1
252
253        if self.target_update_counter > UPDATE_TARGET_EVERY:
254            self.target_model.set_weights(self.model.get_weights())
255            self.target_update_counter = 0
256
257    def get_qs(self, state):
258        return self.model.predict(np.array(state).reshape(-1, *state.shape)/255)[0]
259
260    def train_in_loop(self):
261        # first prediction/train is slow so we dont want to initialize everything everytime.
262        # For some reason at the beginning we not only load the weights but also do some weird stuff which we dont know what it is
263        # So firstly, we gonna do fitment on nothing and then go to our traing
264        X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32)
265        y = np.random.uniform(size=(1, 3)).astype(np.float32)
266
267        with self.graph.as_default():
268            self.model.fit(X, y, verbose=False, batch_size=1)
269
270        self.training_initialized = True
271
272        while True:
273            if self.terminate:
274                return
275            self.train()
276            time.sleep(0.01)
277
278
279if __name__ == '__main__':
280    FPS = 20
281    # For stats
282    ep_rewards = [-200]
283
284    # For more repetitive results
285    random.seed(1)
286    np.random.seed(1)
287    tf.random.set_seed(1)
288
289    # Memory fraction, used mostly when training multiple agents
290    gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=MEMORY_FRACTION)
291    tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)))
292
293    # Create models folder
294    if not os.path.isdir("models"):
295        os.makedirs("models")
296
297    # Create agent and environment
298    agent = DQNAgent()
299    env = CarEnv()
300
301    # Start training thread and wait for training to be initialized
302    trainer_thread = Thread(target=agent.train_in_loop, daemon=True)
303    trainer_thread.start()
304    while not agent.training_initialized:
305        time.sleep(0.01)
306
307    # Initialize predictions - first prediction takes longer as of initialization that has to be done
308    # It's better to do a first prediction then before we start iterating over episode steps
309    agent.get_qs(np.ones((env.im_height, env.im_width, 3)))
310
311    # Iteration over episodes
312    for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit="episodes"):
313        # collision history
314        env.collision_hist = []
315
316        # Update tensorboard step every episode
317        agent.tensorboard.step = episode
318
319        # Restarting episode - reset episode reward and step number
320        episode_reward = 0
321        step = 1
322
323        # Reset env and get initial state
324        current_state = env.reset()
325
326        # Reset flag and start iterating until episode ends
327        done = False
328        episode_start = time.time()
329
330        # Play for given number of seconds only
331        while True:
332            if np.random.random() > epsilon:
333                # Get action from Q table
334                action = np.argmax(agent.get_qs(current_state))
335            else:
336                # Get random action
337                action = np.random.randint(0, 3)
338                # This takes no time, so we add a delay matching 60 FPS (prediction above takes longer)
339                time.sleep(1/FPS)
340
341            new_state, reward, done, _ = env.step(action)
342
343            # Transform new continous state to new discrete state and count the reward
344            episode_reward += reward
345
346            # Every step we update replay memory
347            agent.update_replay_memory((current_state, action, reward, new_state, done))
348
349            current_state = new_state
350            step += 1
351
352            if done:
353                break
354
355        # End of episode - destroy agents
356        for actor in env.actor_list:
357            actor.destroy()
358
359        # Append episode reward to a list and log stats (every given number of episodes)
360        ep_rewards.append(episode_reward)
361        if not episode % AGGREGATE_STATS_EVERY or episode == 1:
362            average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
363            min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
364            max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
365            agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward,
366                                           epsilon=epsilon)
367
368            # Save model, but only when min reward is greater or equal a set value
369            if min_reward >= MIN_REWARD:
370                agent.model.save(
371                    f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
372
373        # Decay epsilon
374        if epsilon > MIN_EPSILON:
375            epsilon *= EPSILON_DECAY
376            epsilon = max(MIN_EPSILON, epsilon)
377
378
379    # Set termination flag for training thread and wait for it to finish
380    agent.terminate = True
381    trainer_thread.join()
382    agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397