· 6 years ago · Feb 28, 2020, 07:00 PM
1#!/usr/bin/env python3.7
2# Copyright (c) 2014-present, Facebook, Inc.
3
4import asyncio
5import atexit
6import logging
7import sys
8from datetime import datetime
9from json import dumps
10from os import getpid
11from pathlib import Path
12from shutil import rmtree
13from subprocess import run
14from tempfile import gettempdir
15from typing import Awaitable, List, Optional, Sequence, Union
16
17import aiohttp
18import click
19import docker
20
21from fbc_sp_client import SPClient
22
23from fbc_sp.cli.releases import blacklist_release
24from fbc_sp.cli.token import add_token_to_db
25from fbc_sp.db import AioDB, create_tables, read_db_conf
26from fbc_sp.utils import calc_blob_sha
27
28
29LOG = logging.getLogger(__name__)
30JSON_INDENT = 2
31EXPECTED_DOWNLOAD_SHA = (
32 "1c9760d133d9664386720901681372cbd3a0daa5b4ba368ec168bebe8ae0e226"
33)
34
35# CONFIG
36CI_ROOT = Path(__file__).parent
37CI_NETWORK_NAME = "fbc_sp_network"
38CI_DB_CONF = {
39 "storage_engine": "posix",
40 "base_dir": Path(gettempdir()) / f"sp_base_dir_{getpid()}",
41 "staging_dir": Path(gettempdir()) / f"sp_staging_dir_{getpid()}",
42 "load_balancer_hostname": "sw.terragraph.ci",
43 "local_private_key_file": CI_ROOT / "id_25519",
44 "private_key_file": "/certs/id_25519",
45 "statsd_hostname": "localhost",
46 "statsd_port": "6969",
47}
48PSQL_CI_CONTAINER_NAME = "fbc_sp_ci_db"
49PSQL_ENV = {"POSTGRES_DB": "tg_software", "POSTGRES_PASSWORD": "developtg123"}
50PSQL_IMAGE = "postgres:latest"
51PSQL_PORTS = {"5432/tcp": 5432}
52SP_CI_CONTAINER_NAME = "fbc_sp_ci"
53SP_CI_TAG = "ci"
54SP_ENV = {"DB_CONF": "/conf/db-ci.conf"}
55SP_PORTS = {"8000/tcp": 8000}
56TEST_SUITE = "ci_suite"
57TEST_DEV_SUITE = "ci_suite_dev"
58TEST_RELEASE = "ci_release"
59TEST_TAG = "ci_tag"
60TEST_URL = "http://localhost:8000/"
61
62SAMPLE_MD_PATHS = (
63 Path(__file__).parent / "fbc_sp" / "tests" / "SAMPLE.md",
64 Path(__file__).parent / "fbc_sp" / "tests" / "SAMPLE2.md",
65)
66
67
68def _handle_debug(
69 ctx: click.core.Context,
70 param: Union[click.core.Option, click.core.Parameter],
71 debug: Union[bool, int, str],
72) -> Union[bool, int, str]:
73 """Turn on debugging if asked otherwise INFO default"""
74 log_level = logging.DEBUG if debug else logging.INFO
75 logging.basicConfig(
76 format="[%(asctime)s] %(levelname)s: %(message)s (%(filename)s:%(lineno)d)",
77 level=log_level,
78 )
79 return debug
80
81
82def _check_if_container_running(
83 dc: docker.client.DockerClient, container_name: str
84) -> None:
85 try:
86 running_container = dc.containers.get(container_name)
87 if running_container.status == "running":
88 running_container.stop()
89 running_container.wait()
90 running_container.remove()
91 except Exception as e:
92 LOG.debug(f"Did not find {container_name} to stop: ({e})")
93
94
95def _cleanup_instances(
96 containers: Sequence[docker.models.containers.Container],
97) -> None:
98 for container in containers:
99 container.reload()
100 if container.status == "running":
101 LOG.debug(f"{container.image} is running. Stopping.")
102 container.stop()
103 container.wait()
104 container.remove()
105
106
107def _cleanup_storage() -> None:
108 for key in ("base_dir", "staging_dir"):
109 if CI_DB_CONF[key].exists():
110 rmtree(CI_DB_CONF[key])
111
112
113def _create_storage_dirs() -> None:
114 for key in ("base_dir", "staging_dir"):
115 CI_DB_CONF[key].mkdir(exist_ok=True)
116
117
118def create_keypair(private_key_file: Path) -> bool:
119 run(
120 ["ssh-keygen", "-t", "ed25519", "-N", "", "-f", str(private_key_file),],
121 input=b"y",
122 check=True,
123 )
124 return private_key_file.exists()
125
126
127async def _wait_for_image_to_start(
128 container: docker.models.containers.Container, attempts_max: int = 10
129) -> bool:
130 attempts = 0
131 while True:
132 if container.status == "running":
133 await asyncio.sleep(1)
134 return True
135 LOG.info(
136 f"{container.image} has not started (Status = {container.status}). "
137 + "Sleeping 2 seconds to give it time to start."
138 )
139
140 attempts += 1
141 if attempts >= attempts_max:
142 LOG.error(f"{container.name} / {container.image} never == 'running'")
143 break
144
145 await asyncio.sleep(2)
146 container.reload()
147
148 return False
149
150
151async def create_test_network(
152 dc: docker.client.DockerClient,
153) -> docker.models.networks.Network:
154 try:
155 LOG.info(f"Used pre-existing {CI_NETWORK_NAME} network")
156 return dc.networks.get(CI_NETWORK_NAME)
157 except docker.errors.NotFound:
158 LOG.info(f"Created {CI_NETWORK_NAME} network")
159 return dc.networks.create(CI_NETWORK_NAME, driver="bridge")
160
161
162async def check_key_signing(spc: SPClient) -> bool:
163 temp_priv_key = Path(gettempdir()) / "unittest_id25519"
164 temp_pub_key = Path(gettempdir()) / "unittest_id25519.pub"
165 if not create_keypair(temp_priv_key):
166 LOG.error("Unable to create a client id25519 keypair")
167 return False
168
169 with temp_pub_key.open("r") as pkfp:
170 public_key = pkfp.read()
171
172 try:
173 public_cert_json = await spc.sign(public_key)
174 finally:
175 temp_priv_key.unlink()
176 temp_pub_key.unlink()
177
178 print(f"/sign JSON:\n{public_cert_json}")
179 return "public_cert" in public_cert_json
180
181
182async def init_db() -> Optional[Path]:
183 try:
184 db_conf_path = CI_ROOT / "db.conf"
185 db_conf = read_db_conf(db_conf_path)
186 LOG.debug(f"Creating DB Tables with {db_conf['sa_connect_str']}")
187 create_tables(db_conf["sa_connect_str"])
188 except Exception as e:
189 LOG.error(f"Failed DB init: {e}")
190 return None
191 return db_conf_path
192
193
194async def load_ci_conf(aiodb: AioDB) -> bool:
195 now = datetime.utcnow()
196 conf_coros: List[Awaitable] = []
197 for conf_key, conf_value in CI_DB_CONF.items():
198 conf_coros.append(aiodb.add_config_key(conf_key, str(conf_value), "ci", now))
199
200 return all(await asyncio.gather(*conf_coros))
201
202
203async def http_test(
204 url: str, expected_status: int = 200, timeout: float = 2.0, post: bool = False
205) -> bool:
206 async with aiohttp.ClientSession() as session:
207 http_method = session.post if post else session.get
208 try:
209 async with http_method(url, timeout=timeout) as response:
210 return response.status == expected_status
211 except asyncio.TimeoutError:
212 LOG.exception(f"http_test to {url} took longer than {timeout}s")
213
214 return False
215
216
217async def start_ci_pgsql(
218 dc: docker.client.DockerClient,
219) -> Optional[docker.models.containers.Container]:
220 _check_if_container_running(dc, PSQL_CI_CONTAINER_NAME)
221
222 # Start a fresh test instance on the selected version
223 LOG.info(f"Pulling {PSQL_IMAGE} image")
224 dc.images.pull(PSQL_IMAGE)
225 LOG.info(f"Starting {PSQL_CI_CONTAINER_NAME} with {PSQL_IMAGE} container")
226 psql_container = dc.containers.run(
227 name=PSQL_CI_CONTAINER_NAME,
228 image=PSQL_IMAGE,
229 detach=True,
230 environment=PSQL_ENV,
231 network=CI_NETWORK_NAME,
232 ports=PSQL_PORTS,
233 )
234 if not psql_container:
235 return None
236
237 if await _wait_for_image_to_start(psql_container):
238 return psql_container
239 return None
240
241
242async def start_sp(
243 dc: docker.client.DockerClient,
244) -> Optional[docker.models.containers.Container]:
245 _check_if_container_running(dc, SP_CI_CONTAINER_NAME)
246
247 LOG.info("Building software_portal image")
248 sp_image, _json_build_logs = dc.images.build(
249 path=CI_ROOT.as_posix(), pull=True, tag=SP_CI_TAG
250 )
251
252 LOG.info("Starting software_portal image")
253 sp_mounts = [
254 docker.types.Mount(
255 target="/conf",
256 source=CI_ROOT.resolve().as_posix(),
257 type="bind",
258 read_only=True,
259 ),
260 docker.types.Mount(
261 target="/certs",
262 source=CI_ROOT.resolve().as_posix(),
263 type="bind",
264 read_only=True,
265 ),
266 ]
267 sp_container = dc.containers.run(
268 name=SP_CI_CONTAINER_NAME,
269 image=sp_image.id,
270 detach=True,
271 environment=SP_ENV,
272 mounts=sp_mounts,
273 network=CI_NETWORK_NAME,
274 ports=SP_PORTS,
275 )
276 if not sp_container:
277 return None
278
279 if await _wait_for_image_to_start(sp_container):
280 return sp_container
281 return None
282
283
284async def async_main(debug: bool, do_not_cleanup: bool) -> int: # noqa: C901
285 docker_client = docker.from_env()
286
287 # Ensure we have a test network for our two containers
288 # TODO: Potentially clean it up ...
289 await create_test_network(docker_client)
290
291 psql_container = await start_ci_pgsql(docker_client)
292 if not psql_container:
293 LOG.error("Unable to start psql")
294 return 1
295
296 db_conf_path = await init_db()
297 if not db_conf_path:
298 LOG.error("Error initalizing the pgsql database")
299 return 2
300
301 aiodb = AioDB(db_conf_path)
302 await aiodb.create_engine()
303 if not await load_ci_conf(aiodb):
304 LOG.error(f"Failed to load DB Configuration settings for CI Run")
305 return 2
306
307 # Create SSH Keys so we can test /sign endpoint
308 if not create_keypair(CI_DB_CONF["local_private_key_file"]):
309 LOG.error("Unable to generate server unitttest ed25519 keys")
310 return 3
311
312 # Build fresh software_portal_ci container
313 sp_container = await start_sp(docker_client)
314 if not sp_container:
315 LOG.error("Unable to start Software Portal")
316 return 3
317
318 if not do_not_cleanup:
319 atexit.register(_cleanup_instances, (sp_container, psql_container))
320 atexit.register(_cleanup_storage)
321 # Cleanup Generated SSH Keys
322 atexit.register(CI_DB_CONF["local_private_key_file"].unlink)
323 atexit.register(
324 (CI_DB_CONF["local_private_key_file"].parent / "id_25519.pub").unlink
325 )
326
327 # Check we can hit / route of aiohttp service
328 if not await http_test(TEST_URL):
329 LOG.error(f"Unable to get a seccuessful response from {TEST_URL}")
330 return 4
331
332 # TODO: Populate Database so this test has more meaning
333 upload_url = f"{TEST_URL}upload"
334 if not await http_test(upload_url, expected_status=401, post=True):
335 LOG.error(f"Unable to get a seccuessful response from {TEST_URL}")
336 return 5
337
338 # Make suites and error if one fails
339 if not all(
340 await asyncio.gather(
341 aiodb.add_suite(TEST_SUITE, "CI"), aiodb.add_suite(TEST_DEV_SUITE, "CI")
342 )
343 ):
344 LOG.error(f"Unable to create a suite")
345 return 6
346
347 # We should not be playing directly with the DB anymore
348 asyncio.create_task(aiodb.close("ci.py"))
349
350 # Make a token - opens it's own DB connection
351 token_data = await add_token_to_db(
352 db_conf_path, 20, True, "CI Token", "ci", [TEST_SUITE, TEST_DEV_SUITE]
353 )
354 if not token_data:
355 LOG.error(f"Unable to make CI API Token")
356 return 7
357 LOG.info(f"Made a token: {token_data}")
358
359 # Get a FBC SP Client Instance for prod suite and dev only suite
360 spc = SPClient(
361 {
362 "api_id": token_data.token_id,
363 "api_token": token_data.token,
364 "api_url": TEST_URL,
365 },
366 TEST_SUITE,
367 )
368 spc_dev = SPClient(
369 {
370 "api_id": token_data.token_id,
371 "api_token": token_data.token,
372 "api_url": TEST_URL,
373 },
374 TEST_DEV_SUITE,
375 )
376
377 # Test TEST_SUITE's /suites returns that we are in TEST_SUITE
378 suites_accessible = await spc.suites()
379 if TEST_SUITE not in suites_accessible["suites"]:
380 LOG.error(
381 f"Did not get {TEST_SUITE} returned from in /suites call: "
382 + f"{suites_accessible}"
383 )
384 return 8
385
386 # Create storage dirs
387 _create_storage_dirs()
388
389 # Upload a sample file + create release with upload + TAG
390 # Do twice to test original TAG + TAG moving
391 last_test_release = ""
392 for suite, prod_rel, client in (
393 (TEST_SUITE, True, spc),
394 (TEST_DEV_SUITE, False, spc_dev),
395 ):
396 for rel_num, sample_md_path in enumerate(SAMPLE_MD_PATHS):
397 last_test_release = f"{TEST_RELEASE}_{rel_num}"
398
399 LOG.info(f"Upload {sample_md_path} to suite {suite} release {TEST_RELEASE}")
400 upload_response = await client.upload(
401 last_test_release,
402 sample_md_path,
403 production=(bool(rel_num) and prod_rel),
404 )
405 print(dumps(upload_response, indent=JSON_INDENT))
406 if upload_response["code"] != 0 and upload_response["status"] != 200:
407 LOG.error(
408 "Unable to upload to localhost Software Portal. Response: "
409 + f" {upload_response}"
410 )
411 return 9
412
413 # TAG the release to TEST_TAG
414 LOG.info(f"Attempting to tag {suite} {last_test_release} with {TEST_TAG}")
415 tag_response = await client.tag(last_test_release, [TEST_TAG])
416 print(dumps(tag_response, indent=JSON_INDENT))
417 if not tag_response or tag_response["code"] != 0:
418 LOG.error(f"Unable to tag {suite} {last_test_release} with {TEST_TAG}")
419 return 10
420
421 # List - Check production + all releases
422 # TEST_DEV_SUITE should show all release for both production + dev/all
423 list_params = {
424 TEST_SUITE: {"production": (True, 2), "all": (False, 3)},
425 TEST_DEV_SUITE: {"production": (True, 3), "all": (False, 3)},
426 }
427 for suite, client in ((TEST_SUITE, spc), (TEST_DEV_SUITE, spc_dev)):
428 for rel_type, data in list_params[suite].items():
429 LOG.info(f"Listing {rel_type} releases in {suite}")
430 list_json = await client.list(production=data[0])
431 print(dumps(list_json, indent=JSON_INDENT))
432 if last_test_release not in list_json:
433 LOG.error(f"{last_test_release} not found in returned /list JSON")
434 return 11
435 # TODO: Evaluate if we want status in this dict. Seems pointless!
436 prod_keys = list(list_json.keys())
437 if len(prod_keys) != data[1]:
438 LOG.error(
439 f"{last_test_release} has more than {data[1] - 1} "
440 + f"{rel_type} release"
441 )
442 return 11
443
444 # There is no HTTPS API so lets directly flag a release as blacklisted and check
445 # Then remove and check - remove == Remove blacklist
446 for remove in (False, True):
447 await blacklist_release(
448 db_conf_path, TEST_SUITE, last_test_release, remove=remove
449 )
450 releases = await spc.list()
451 if not remove and last_test_release in releases:
452 LOG.error(f"Blacklisted release {last_test_release} was in /list output")
453 return 12
454 if remove and last_test_release not in releases:
455 LOG.error(
456 f"Release {last_test_release} not in /list output after "
457 + f"being removed from blacklist"
458 )
459 return 12
460
461 # Check tags exists and correctly tagged
462 if (
463 "tags" not in list_json[last_test_release]
464 and TEST_TAG not in list_json[last_test_release]["tags"]
465 ):
466 LOG.error(f"{TEST_TAG} is not in the /list JSON")
467 return 13
468
469 # Download sample file via release and via tag
470 for the_type, reltag in (("release", last_test_release), ("tag", TEST_TAG)):
471 download_dest = Path(gettempdir()) / f"sp_test_download_{reltag}_{getpid()}.md"
472 LOG.info(
473 f"Downloading via {reltag} {the_type} {SAMPLE_MD_PATHS[1].name} "
474 + f"to {download_dest}"
475 )
476
477 if not await spc.download(reltag, SAMPLE_MD_PATHS[1].name, download_dest):
478 LOG.error(f"Failed to download {download_dest} using reltag {reltag}")
479 return 14
480
481 downloaded_sha = calc_blob_sha(download_dest)
482 download_dest.unlink()
483 if downloaded_sha != EXPECTED_DOWNLOAD_SHA:
484 LOG.error(f"{download_dest} has a bad shasum - {downloaded_sha}")
485 return 15
486 LOG.info(f"{download_dest} has correct sha256 sum")
487
488 # Check we can sign a key
489 if not await check_key_signing(spc):
490 LOG.error(f"Problems signing a key")
491 return 16
492
493 LOG.info(f"All CI checks have passed. Commence building offical Docker Images")
494 return 0
495
496
497@click.command(context_settings={"help_option_names": ["-h", "--help"]})
498@click.option(
499 "--do-not-cleanup",
500 is_flag=True,
501 show_default=True,
502 help="Don't stop and cleanup containers",
503)
504@click.option(
505 "--debug",
506 is_flag=True,
507 callback=_handle_debug,
508 show_default=True,
509 help="Turn on debug logging",
510)
511@click.pass_context
512def main(ctx: click.core.Context, **kwargs) -> None:
513 LOG.debug(f"Starting {sys.argv[0]}")
514 ctx.exit(asyncio.run(async_main(**kwargs)))
515
516
517if __name__ == "__main__":
518 exit(main())