· 5 years ago · Jan 17, 2021, 10:38 AM
1from requests import exceptions
2import argparse
3import requests
4import cv2
5import os
6
7def search_images():
8 # 設定命令列參數
9 ap = argparse.ArgumentParser()
10 ap.add_argument("-q", "--query", required=True,
11 help="response query to response Bing Image API for")
12 ap.add_argument("-o", "--output", required=True,
13 help="path to output directory of images")
14 args = vars(ap.parse_args())
15
16 # 設定 Bing Search v7 API 的 API 呼叫資訊 (API key & endpoint API URL)
17 API_KEY = "09f968b4c3a8468782bcd68cd7383bc9"
18 SEARCH_URL = "https://api.bing.microsoft.com/v7.0/images/response"
19 MAX_RESULTS = 250 # 資料筆數上限
20 BATCH_SIZE = 50 # 每批次發送請求的資料筆數
21 EXCEPTIONS = set([IOError, FileNotFoundError,
22 exceptions.RequestException, exceptions.HTTPError,
23 exceptions.ConnectionError, exceptions.Timeout])
24
25 headers = {"Ocp-Apim-Subscription-Key" : API_KEY}
26 search_term = args["query"]
27 params = {"q": search_term, "offset": 0, "count": BATCH_SIZE}
28
29 # 發送第一個請求以探測可用資料的資訊
30 print(f"[INFO] 利用 Bing-Search-v7 API 搜尋有關 '{search_term}' 的圖片")
31 print("[INFO] (1) 正在發送第一個請求以探測可用資料的資訊: ")
32 response = requests.get(SEARCH_URL, headers=headers, params=params)
33 response.raise_for_status()
34
35 # 抓出 response 的 json 以獲取 '至多能下載多少圖片' (json key: "totalEstimatedMatches")
36 # 若超過自訂上限 MAX_RESULTS 則限制最大下載資料筆數為 MAX_RESULTS
37 search_results = response.json()
38 estimated_result_num = min(search_results["totalEstimatedMatches"], MAX_RESULTS)
39 print(f"[INFO] '{search_term}'圖片預計下載資料筆數: {estimated_result_num}")
40
41 # 初始化目前'已下載圖片資料筆數'為 0
42 total = 0
43
44 # 利用 Bing Search v7 API 逐一下載圖片
45 # 並透過 offset 紀錄當前下載進度 及 作為發送請求的參數
46 for offset in range(0, estimated_result_num, BATCH_SIZE):
47 print(f"[INFO] 正在發送獲取圖片請求 | 進度: {offset}-{offset+BATCH_SIZE} / {estimated_result_num} ({(offset+BATCH_SIZE)*100/estimated_result_num} %)")
48 params["offset"] = offset # update 'offset' parameter of request to specify where to download current image
49 response = requests.get(SEARCH_URL, headers=headers, params=params)
50 response.raise_for_status()
51 results = response.json()
52 print("[INFO] 正在儲存圖片 | 進度: {offset}-{offset+BATCH_SIZE} / {estimated_result_num} ({(offset+BATCH_SIZE)*100/estimated_result_num} %)")
53 for v in results["value"]:
54 try:
55 pass
56 except Exception as e:
57 if e in EXCEPTIONS:
58 print(f'[INFO] 跳過: {v["contentUrl"]}')
59 continue
60
61
62if __name__ == "__main__":
63 search_images()