· this year · Oct 11, 2024, 12:05 AM
1/**
2 * @fileoverview A Google Apps Script to retrieve file names, IDs, and paths from a
3 * specified Google Drive folder and populate them into a Google Sheets document.
4 * Supports recursive retrieval of files from subfolders if enabled, and uses
5 * the Advanced Drive API with pagination for handling large datasets.
6 *
7 * This script is ideal for automating data collection from Google Drive into
8 * Google Sheets, particularly in enterprise environments where scalability and
9 * error handling are critical.
10 *
11 * @version 1.5.1
12 * @date 2024-10-11
13 * @modifiedBy u/IAmMoonie
14 * @see {@link https://developers.google.com/apps-script/advanced/drive} - For using the Advanced Drive API.
15 * @see {@link https://developers.google.com/apps-script/reference/properties/properties-service} - For storing progress using PropertiesService.
16 * @see {@link https://developers.google.com/apps-script/reference/spreadsheet/spreadsheet-app} - For interacting with Google Sheets.
17 * @see {@link https://developers.google.com/apps-script/guides/v8-runtime} - Information on V8 runtime and ES6 compatibility.
18 */
19
20/**
21 * Configuration object for user-defined variables.
22 * @constant {Object} CONFIG
23 * @property {string} folderId - The ID of the Google Drive folder to retrieve files from.
24 * @property {number} rangeStartRow - The starting row number in the Google Sheet where the data should be written.
25 * @property {number} fileNameColumn - The column number in the Google Sheet where file names will be written (e.g., 5 for column E).
26 * @property {number} fileIdColumn - The column number in the Google Sheet where file IDs will be written (e.g., 9 for column I).
27 * @property {number} filePathColumn - The column number in the Google Sheet where file paths will be written.
28 * @property {number} batchSize - The number of files to retrieve per API call for efficient pagination.
29 * @property {string} propertyKey - The key used to store the last processed page token in the script's properties, enabling script continuity.
30 * @property {boolean} recursive - If true, retrieves files from all subfolders recursively.
31 * @property {number} chunkSize - The number of rows to write to the sheet at a time to avoid memory issues.
32 * @property {number} initialBackoff - The initial delay (in milliseconds) for exponential backoff when handling rate limits.
33 */
34const CONFIG = {
35 folderId: "<Google Drive Folder>",
36 rangeStartRow: 2,
37 fileNameColumn: 1,
38 fileIdColumn: 2,
39 filePathColumn: 3,
40 batchSize: 100,
41 propertyKey: "LAST_PAGE_TOKEN",
42 recursive: false,
43 chunkSize: 500,
44 initialBackoff: 1000
45};
46let retryCount = 0;
47
48/**
49 * Retrieves file names, IDs, and paths from a specified Google Drive folder and writes
50 * them to the active Google Sheet. Supports recursive exploration of subfolders.
51 * Uses the Advanced Drive API for pagination and batch operations for large datasets.
52 *
53 * @function getFileIds
54 * @returns {void} This function does not return a value but writes data directly to the Google Sheet.
55 * @throws {Error} Throws an error if an issue occurs while retrieving files or writing data.
56 */
57function getFileIds() {
58 validateConfig_();
59 try {
60 log_("Starting file retrieval process...");
61 const sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
62 let pageToken = PropertiesService.getScriptProperties().getProperty(
63 CONFIG.propertyKey
64 );
65 const names = [];
66 const ids = [];
67 const paths = [];
68 fetchFilesAndSubfolders_(
69 CONFIG.folderId,
70 pageToken,
71 names,
72 ids,
73 paths,
74 0,
75 ""
76 );
77 if (names.length) {
78 const lastRow = sheet.getLastRow() + 1;
79 log_(`Writing ${names.length} files to the sheet in chunks.`);
80 writeInChunks_(sheet, names, ids, paths, lastRow);
81 retryCount = 0;
82 } else {
83 log_(
84 "No files found in the specified folder and its subfolders.",
85 "warn"
86 );
87 }
88 PropertiesService.getScriptProperties().deleteProperty(CONFIG.propertyKey);
89 PropertiesService.getScriptProperties().deleteProperty(
90 "LAST_PROCESSED_FOLDER"
91 );
92 log_("File retrieval and data writing completed successfully.");
93 } catch (error) {
94 handleRateLimit_(error);
95 }
96}
97
98/**
99 * Recursively fetches files from a folder and its subfolders if recursion is enabled.
100 * Uses the Advanced Drive API for paginated results.
101 *
102 * @param {string} folderId - The ID of the Google Drive folder to retrieve files from.
103 * @param {string|null} pageToken - The page token for continuing a previous fetch.
104 * @param {Array} names - The array to store file names.
105 * @param {Array} ids - The array to store file IDs.
106 * @param {Array} paths - The array to store file paths.
107 * @param {number} depth - The current depth of recursion.
108 * @param {string} currentPath - The path of the current folder.
109 * @returns {void}
110 * @private
111 */
112function fetchFilesAndSubfolders_(
113 folderId,
114 pageToken,
115 names,
116 ids,
117 paths,
118 depth,
119 currentPath
120) {
121 do {
122 const response = Drive.Files.list({
123 q: `'${folderId}' in parents and trashed = false`,
124 fields: "nextPageToken, files(id, name, mimeType)",
125 pageSize: CONFIG.batchSize,
126 pageToken: pageToken || null
127 });
128 const files = response.files || [];
129 for (const { mimeType, name, id } of files) {
130 const fullPath = `${currentPath}/${name}`;
131 if (
132 mimeType === "application/vnd.google-apps.folder" &&
133 CONFIG.recursive
134 ) {
135 log_(`Exploring subfolder: ${name} (${id}), Depth: ${depth + 1}`);
136 fetchFilesAndSubfolders_(
137 id,
138 null,
139 names,
140 ids,
141 paths,
142 depth + 1,
143 fullPath
144 );
145 } else {
146 names.push([name]);
147 ids.push([id]);
148 paths.push([fullPath]);
149 }
150 }
151 pageToken = response.nextPageToken;
152 if (pageToken) {
153 PropertiesService.getScriptProperties().setProperty(
154 CONFIG.propertyKey,
155 pageToken
156 );
157 PropertiesService.getScriptProperties().setProperty(
158 "LAST_PROCESSED_FOLDER",
159 folderId
160 );
161 }
162 } while (pageToken);
163}
164
165/**
166 * Writes data in chunks to the Google Sheet to avoid memory issues.
167 *
168 * @param {GoogleAppsScript.Spreadsheet.Sheet} sheet - The Google Sheet to write data to.
169 * @param {Array} names - The array of file names.
170 * @param {Array} ids - The array of file IDs.
171 * @param {Array} paths - The array of file paths.
172 * @param {number} startRow - The starting row for writing data.
173 * @returns {void}
174 * @private
175 */
176function writeInChunks_(sheet, names, ids, paths, startRow) {
177 const { chunkSize, fileNameColumn, fileIdColumn, filePathColumn } = CONFIG;
178 for (let i = 0; i < names.length; i += chunkSize) {
179 const nameChunk = names.slice(i, i + chunkSize);
180 const idChunk = ids.slice(i, i + chunkSize);
181 const pathChunk = paths.slice(i, i + chunkSize);
182 sheet
183 .getRange(startRow, fileNameColumn, nameChunk.length, 1)
184 .setValues(nameChunk);
185 sheet
186 .getRange(startRow, fileIdColumn, idChunk.length, 1)
187 .setValues(idChunk);
188 sheet
189 .getRange(startRow, filePathColumn, pathChunk.length, 1)
190 .setValues(pathChunk);
191 startRow += nameChunk.length;
192 log_(
193 `Wrote ${nameChunk.length} rows to the sheet, starting from row ${
194 startRow - nameChunk.length
195 }.`
196 );
197 }
198}
199
200/**
201 * Handles API rate limit errors with exponential backoff.
202 *
203 * @param {Error} error - The error object.
204 * @private
205 */
206function handleRateLimit_({ message }) {
207 log_(`Error in getFileIds: ${message}`, "error");
208 if (message.includes("Rate Limit Exceeded")) {
209 const delay = Math.min(60000, CONFIG.initialBackoff * 2 ** retryCount);
210 log_(`Rate limit exceeded, retrying in ${delay / 1000} seconds...`);
211 Utilities.sleep(delay);
212 retryCount++;
213 getFileIds();
214 } else {
215 throw new Error(
216 "An error occurred while retrieving files or writing data. Check logs for details."
217 );
218 }
219}
220
221/**
222 * Validates the configuration settings.
223 * @returns {void}
224 * @private
225 */
226function validateConfig_() {
227 if (!CONFIG.folderId)
228 throw new Error(
229 "CONFIG.folderId is not set. Please specify a valid folder ID."
230 );
231 if (CONFIG.chunkSize <= 0)
232 throw new Error("CONFIG.chunkSize must be greater than 0.");
233}
234
235/**
236 * Logs messages with different levels.
237 *
238 * @param {string} message - The message to log.
239 * @param {"info"|"warn"|"error"} [level="info"] - The level of the log message.
240 * @returns {void}
241 * @private
242 */
243function log_(message, level = "info") {
244 console[level](message);
245}
246