· 5 years ago · Nov 08, 2020, 07:34 AM
1/*
2 * This program is composed of two java files:
3 * YoutubeUserCommentHunter.java: Main program, contains the main API logic and
4 * many helper methods to aid in data processing and writeout.
5 * ExtendableGenericArray.java: Custom collections-ish (no extend) to let me
6 * index into anywhere in an array, and store a generic object where I
7 * couldn't directly.
8 *
9 * Everything should be documented relatively well, so if you're wondering how
10 * you can do something like this, feel free to take a look around. There's some
11 * API integration, file IO, and string processing, plus the generics and array
12 * extending semantics in the array class I wrote.
13 */
14/*----------------------------------------------------
15 * YoutubeUserCommentHunter.java
16 *--------------------------------------------------*/
17package YoutubeAPIGradle;
18
19import java.io.BufferedWriter;
20import java.io.FileInputStream;
21import java.io.FileWriter;
22import java.io.IOException;
23import java.security.GeneralSecurityException;
24import java.time.LocalDateTime;
25import java.time.format.DateTimeFormatter;
26import java.util.ArrayList;
27import java.util.Arrays;
28import java.util.List;
29import java.util.Properties;
30
31import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
32import com.google.api.client.googleapis.json.GoogleJsonResponseException;
33import com.google.api.client.http.javanet.NetHttpTransport;
34import com.google.api.client.json.JsonFactory;
35import com.google.api.client.json.jackson2.JacksonFactory;
36import com.google.api.services.youtube.YouTube;
37import com.google.api.services.youtube.model.Comment;
38import com.google.api.services.youtube.model.CommentSnippet;
39import com.google.api.services.youtube.model.CommentThread;
40import com.google.api.services.youtube.model.CommentThreadListResponse;
41
42/**
43 * This class uses the YouTube Data API v3 to scan a youtube video for all
44 * comments by a target channel id (commenter), and compile their comments into
45 * an HTML file.
46 *
47 * @author Melody Griesen
48 *
49 */
50public class YoutubeUserCommentHunter {
51
52 // MODIFIABLE PARAMETERS
53 /** Video that we're scanning for comments. */
54 private static final String VIDEO_ID = "UwxatzcYf9Q";
55 /**
56 * Channel ID of the user whose comments we are isolating. This is Anxley's
57 * channel ID.
58 */
59 private static final String TARGET_CHANNEL_ID = "UCSr1Me3Ls-4_lIpYsK_A7nA";
60
61 // FIXED PARAMETERS
62 // Misc
63 /** Pastebin link where the source code is held. */
64 private static final String CODE_PASTE = "https://pastebin.com/nw59HjjC";
65 /** Date time format for printing out comments. */
66 private static final String DATE_TIME_FORMAT = "MMMM d, uuuu (h:mm a)";
67 /** Name of the application. */
68 private static final String APPLICATION_NAME = "YouTube User Comment Hunter";
69 // Files
70 /** File storing my developer key. Do not steal. */
71 private static final String DEVELOPER_KEY_FILE = "src/main/resources/developer_key.prop";
72 /** Output file for the generated HTML. */
73 private static final String OUT_FILE = "src/main/resources/index.html";
74 // HTML File Contents
75 /** Header string from Melody, developer. */
76 private static final String HEADER_NOTE = "Note from Melody, the gal who created this:\n"
77 + "This paste was generated by a Java program I wrote that uses the YouTube Data API v3.\n"
78 + "I'm going to try to keep this updated with the latest posts roughly every week.\n"
79 + "To see the code that generates these pages, look <a href=" + CODE_PASTE + ">here</a>.\n"
80 + "Much love from your local trans woman,\n" + "\tMelody Griesen <3";
81 /** Start of the HTML document. */
82 private static final String HTML_START = "<!DOCTYPE html>\r\n" + "<html>\r\n" + "<head>\r\n"
83 + " <title>Anxley's 365 Day Challenge</title>\r\n" + "</head>\r\n" + "<body>";
84 /** End of the HTML document. */
85 private static final String HTML_END = "</body>" + "</html>";
86
87 // VARIABLES
88 /** List of comments, organized by day. */
89 private static ExtendableGenericArray<List<Comment>> commentsByDay;
90 /** Collection of all comments that we don't recognize a day for. */
91 private static List<Comment> unrecognizedComments;
92 /** Next page token to use when requesting a page of data. */
93 private static String nextPageToken;
94 /** JSON Factory for creating the YouTube object. */
95 private static final JsonFactory JSON_FACTORY = JacksonFactory.getDefaultInstance();
96
97 /**
98 * Build and return an authorized API client service.
99 *
100 * @return an authorized API client service
101 * @throws GeneralSecurityException, IOException
102 */
103 public static YouTube getService() throws GeneralSecurityException, IOException {
104 final NetHttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
105 return new YouTube.Builder(httpTransport, JSON_FACTORY, null).setApplicationName(APPLICATION_NAME).build();
106 }
107
108 /**
109 * Call function to create API service object. Define and execute API request.
110 * Loop over each page of responses, collecting comments. Filter by comments
111 * that match the target channel ID. When done, print out all found comments.
112 *
113 * @throws GeneralSecurityException, IOException, GoogleJsonResponseException
114 */
115 public static void main(String[] args) throws GeneralSecurityException, IOException, GoogleJsonResponseException {
116 // Get the YoTube object we need to perform API calls.
117 YouTube youtubeService = getService();
118
119 // Load our developer key from the file
120 // First, get the property file loaded
121 Properties prop = new Properties();
122 prop.load(new FileInputStream(DEVELOPER_KEY_FILE));
123
124 // Then, load the "key" property from the file (in the file, it looks like
125 // "key=...")
126 final String DEVELOPER_KEY = prop.getProperty("key");
127
128 /*
129 * Define the API request.
130 *
131 * 1. Specify a comment threads request.
132 *
133 * 2. Ask for just the comment snippets, AKA just the top-level comments.
134 *
135 * 3. Set my developer key so that I can authenticate with YouTube.
136 *
137 * 4. Set the video ID for the video that we're scanning.
138 *
139 * 5. Get the max results (100) per page.
140 *
141 * 6. Sort by relevance (as opposed to "time"), because Anxley is popular, so
142 * more of their comments will be at the top by that sorting.
143 */
144 YouTube.CommentThreads.List request = youtubeService.commentThreads()
145 .list(Arrays.asList("snippet"/* , "replies" */)).setKey(DEVELOPER_KEY).setVideoId(VIDEO_ID)
146 .setMaxResults((long) 100).setOrder("relevance");
147
148 // Initialize our comment fields
149 commentsByDay = new ExtendableGenericArray<List<Comment>>();
150 unrecognizedComments = new ArrayList<Comment>();
151
152 // Set up the next page token for the first iteration. For the first page, any
153 // random string will do - we'll set it after we get the first response.
154 nextPageToken = "FIRST PAGE";
155
156 // While there's another page of data
157 while (nextPageToken != null) {
158
159 // Execute the API call to get one page of responses.
160 CommentThreadListResponse response = request.execute();
161
162 // Loop over all of the CommentThreads we got in this page
163 for (CommentThread ct : response.getItems()) {
164 // Call our helper method to handle this CommentThread
165 processComment(ct.getSnippet().getTopLevelComment());
166 }
167
168 // Get the token of the next page of comments to read
169 nextPageToken = response.getNextPageToken();
170 // Use that token to prep the next request to get the next page of comments.
171 request.setPageToken(nextPageToken);
172 }
173 // Done scanning in data from YouTube!
174
175 // Time to print it out all nice and pretty-like into an HTML file.
176 writeToFile(OUT_FILE);
177 }
178
179 /**
180 * Process one CommentThread, a.k.a. top-level comment. If it doesn't match the
181 * target channel ID, skip it. Otherwise, try to figure out what day it is and
182 * add it to that day in the commentsByDay list. Otherwise, add it to the
183 * unrecognizedComments list.
184 *
185 * @param ct the CommentThread to parse and maybe
186 */
187 private static void processComment(Comment comment) {
188 // Get just the top-level snippet - this contains all of the useful data like
189 // author, comment text, likes, date, etc.
190 CommentSnippet topLevelSnippet = comment.getSnippet();
191
192 // Get the Author's channel id
193 String authorChannelId = topLevelSnippet.getAuthorChannelId().getValue();
194
195 // If the channel ID of this comment matches the target ID
196 if (authorChannelId.equals(TARGET_CHANNEL_ID)) {
197
198 // Try to identify which day the comment was written, so that we can place it in
199 // the commentsByDay list
200 try {
201 // Get day from the comment
202 int day = getDayFromComment(comment);
203
204 // Make sure the Days array has a List for that entry
205 if (commentsByDay.get(day) == null) {
206 commentsByDay.set(day, new ArrayList<Comment>());
207 }
208 // Add this comment to the list for this day
209 commentsByDay.get(day).add(comment);
210 // Yay! We registered it by day
211
212 } catch (IllegalArgumentException e) {
213 // Oops! This comment doesn't match the expected format.
214 // Better add it to our unknowns list.
215 unrecognizedComments.add(comment);
216 }
217 }
218 }
219
220 /**
221 * Write the data in the two comment lists to the output file in HTML format.
222 *
223 * @param outfile the file to output the data to
224 * @throws IOException if cannot write to the outfile
225 */
226 private static void writeToFile(String outfile) throws IOException {
227 // Start by opening the file itself
228 BufferedWriter out = new BufferedWriter(new FileWriter(outfile));
229
230 // Write the header HTML information
231 out.write(HTML_START);
232
233 // Write the programmer info header
234 out.write("<pre>" + HEADER_NOTE + "</pre>");
235
236 // Write the start of the list!
237 out.write("<h1>Anxley's 365 Day Challenge</h1>");
238
239 // Start an unordered list
240 out.write("<ul>\n");
241
242 // Loop over our daily list
243 for (int i = 1; i < commentsByDay.length(); i++) {
244
245 // Start one list item
246 out.write("<li>\n");
247
248 // Get the list of comments for each day
249 List<Comment> entries = commentsByDay.get(i);
250
251 // If null, we haven't seen a comment for this day.
252 if (entries == null) {
253 // Oh no!
254 out.write("<em>No comment for day " + i + "!</em>");
255 }
256 // Otherwise, there's at least one comment to print.
257 else {
258 // If we've got more than 1 comment for the day
259 if (entries.size() > 1) {
260 // Let the user know we've got multiple comments for this day.
261 out.write("<em>" + entries.size() + " comments for this day!</em>");
262 }
263
264 // Print out each comment
265 for (Comment c : entries) {
266 out.write("<pre style=\"font-family: Roboto, Arial, sans-serif;\">" + printComment(c) + "</pre>");
267 }
268
269 }
270 // End list item
271 out.write("</li>");
272 }
273
274 // End the unordered list
275 out.write("</ul>");
276
277 // Loop over our unknowns
278 out.write("<p>Unrecognizable comments from Anxley:</p>");
279 if (unrecognizedComments.size() > 0) {
280 // Start another unordered list
281 out.write("<ul>");
282 // For each unrecognized comment, print out just its text
283 for (Comment comm : unrecognizedComments) {
284 out.write(comm.getSnippet().getTextDisplay());
285 }
286 // Finish unordered list
287 out.write("</ul>");
288 }
289 // If we had no unrecognized comments, let the user know
290 else {
291 out.write("<p>(none)</p>");
292 }
293
294 // Write the end of the HTML file
295 out.write(HTML_END);
296 // Close the output stream. All done!
297 out.close();
298 }
299
300 /**
301 * Creates a String holding the data for one Comment. Each string holds the
302 * comment string, then one line each for likes, date, and URL.
303 *
304 * @param comment the comment to print
305 */
306 private static String printComment(Comment comment) {
307 // Use a StringBuilder to compile all of the things
308 StringBuilder sb = new StringBuilder();
309
310 // Get the content of the comment itself
311 String commentStr = comment.getSnippet().getTextDisplay();
312 // Replace special characters
313 commentStr = commentStr.replace("'", "'").replace("<br />", "\n");
314 // Print out comment itself
315 sb.append("<div style=\"font-size: 15px;\">" + commentStr + "</div>");
316
317 // Get the comment's like count
318 Long likeCount = comment.getSnippet().getLikeCount();
319 // Print out comment likes
320 sb.append("\t\t\tLikes: " + likeCount + "\n");
321
322 // Get the comment's (pretty) post date
323 String prettyDate = getPrettyDate(comment);
324 // Get a URL for this comment
325 String url = getURLForComment(comment);
326 // Print out URL
327 sb.append("\t\t\t<a href=" + url + ">" + prettyDate + "</a>");
328
329 // Return built string
330 return sb.toString();
331 }
332
333 /**
334 * Retrieves the publication date of the specified comment, and returns it in a
335 * nice format specified by DATE_TIME_FORMAT.
336 *
337 * @param comment the comment to get the date of
338 * @return a nicely formatted date
339 */
340 private static String getPrettyDate(Comment comment) {
341 // Format to use when reading in data
342 DateTimeFormatter iso8601Format = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
343 // This object stores the date and time so that we can re-format it in a nicer
344 // format
345 LocalDateTime dateTimeData = iso8601Format.parse(comment.getSnippet().getPublishedAt(), LocalDateTime::from);
346 // This is a much nicer format to view data in.
347 DateTimeFormatter niceFormat = DateTimeFormatter.ofPattern(DATE_TIME_FORMAT);
348 // Convert the date and time into that nicer format
349 String dateTimeString = dateTimeData.format(niceFormat) + " GMT";
350 // Return that nice-looking date and time. Lookin' good, datetime.
351 return dateTimeString;
352 }
353
354 /**
355 * Generates a URL for the given Comment object from the video ID and the
356 * comment ID.
357 *
358 * @param comment the Comment to generate a URL for
359 * @return a URL string linking to the given Comment
360 */
361 private static String getURLForComment(Comment comment) {
362 // All URLs are going to start with this general link
363 String prefix = "https://www.youtube.com/watch";
364 // Video argument is the video ID that the comment is on
365 String video = "v=" + comment.getSnippet().getVideoId();
366 // Comment argument is the id of this comment
367 String cid = "lc=" + comment.getId();
368 // Stitch the URL together with ? and & syntax to give variables to the URL
369 String url = prefix + "?" + video + "&" + cid;
370 // All done! Send it back to the user.
371 return url;
372 }
373
374 /**
375 * Gets the day that Anxley posted this log
376 *
377 * @param comment the Anxley comment to check
378 * @throws NumberFormatException if the comment cannot be parsed
379 * @return the day it was logged
380 */
381 private static int getDayFromComment(Comment comment) {
382 // Get the comment string itself
383 String commentStr = comment.getSnippet().getTextDisplay();
384
385 // Special cases
386 // Day 1 has a bunch of stuff at the start before the entry itself
387 if (commentStr.substring(0, 10).equals("Im doing t")) {
388 return 1;
389 }
390 // Day 72 has an extra colon after "Day" and before "72"
391 else if (commentStr.substring(0, 10).equals("Day: 72: I")) {
392 return 72;
393 }
394
395 // Expecting the general format of:
396 // Day ###: ...
397 // Strategy to get that ###: Find the first space and the first colon, get
398 // everything between the two, and convert to an int.
399
400 // Find the first colon in the comment
401 int colonIdx = commentStr.indexOf(":");
402 // Find the first space in the comment
403 int spaceIdx = commentStr.indexOf(" ");
404 // If this doesn't have the format we expect, we might run into trouble
405 try {
406 // Get just the characters we want: those between the space and the colon.
407 String intStr = commentStr.substring(spaceIdx + 1, colonIdx);
408 // Try to read those characters as a number. If successful, return that number
409 // as the day that this comment matches to.
410 return Integer.parseInt(intStr);
411 }
412 // Either getting the substring or parsing the string as a number can throw an
413 // exception
414 catch (StringIndexOutOfBoundsException | NumberFormatException e) {
415 // If either of these problems arise, tell the caller that the comment they
416 // passed us couldn't be read
417 throw new IllegalArgumentException();
418 }
419 }
420}
421
422
423
424
425
426
427
428
429
430
431/*----------------------------------------------------
432 * ExtendableGenericArray.java
433 *--------------------------------------------------*/
434package YoutubeAPIGradle;
435
436/**
437 * Allows an Array of generic objects to be stored. Any index can be accessed,
438 * and the array will adapt its internal size to hold at least that many
439 * elements.
440 *
441 * @author Melody Griesen
442 *
443 * @param <E> generic object to store in this custom Array
444 */
445public class ExtendableGenericArray<E> {
446
447 /** Internal array used for storage. */
448 E[] list;
449 /** Default initial capacity of 7. */
450 public static final int DEFAUT_CAPACITY = 7;
451
452 /**
453 * Initialize the custom Array with the specified initial capacity.
454 *
455 * @param capacity initial capacity for the Array. Cannot be negative.
456 * @throws IllegalArgumentException if capacity is negative
457 */
458 @SuppressWarnings("unchecked")
459 public ExtendableGenericArray(int capacity) {
460 if (capacity < 0)
461 throw new IllegalArgumentException("Array cannot have negative capacity!");
462 list = (E[]) new Object[capacity];
463 }
464
465 /**
466 * Initialize the custom Array with the default initial capacity.
467 */
468 public ExtendableGenericArray() {
469 this(DEFAUT_CAPACITY);
470 }
471
472 /**
473 * Set the element at the specified index. The previous element is returned. If
474 * the array is not currently big enough, it will be extended to allow for such
475 * an index to be accessed.
476 *
477 * @param idx the index of the element to set
478 * @param data the new element to place at the index
479 * @return the previous element there
480 */
481 public E set(int idx, E data) {
482 // Grow the array to include idx, if needed
483 ensureAddressable(idx);
484
485 // Store previous data
486 E old = list[idx];
487 // Set new data
488 list[idx] = data;
489 // Return old data
490 return old;
491 }
492
493 /**
494 * Retrieves the element at the specified index. If the array is not currently
495 * big enough, it will be extended to allow for such an index to be accessed.
496 *
497 * @param idxthe index of the element to retrieve.
498 * @return the element at the specified index
499 */
500 public E get(int idx) {
501 // Grow the array to include idx, if needed
502 ensureAddressable(idx);
503 // Return the data at the specified index
504 return list[idx];
505 }
506
507 /**
508 * Returns the current length of the underlying array.
509 *
510 * @return the internal array's length.
511 */
512 public int length() {
513 return list.length;
514 }
515
516 /**
517 * Ensures that the underlying array can accomodate an access to the given index
518 * by growing the array similar to an ArrayList if needed.
519 *
520 * @param idx
521 */
522 private void ensureAddressable(int idx) {
523 // If we already have enough space, cool, we're done
524 if( idx < length() )
525 return;
526
527 // Calculate new capacity
528 int oldCapacity = length(), newCapacity = oldCapacity * 2 + 1;
529 while( idx >= newCapacity ) {
530 oldCapacity = newCapacity;
531 newCapacity = oldCapacity * 2 + 1;
532 }
533
534 // Create new list object
535 @SuppressWarnings("unchecked")
536 E[] newList = (E[]) new Object[newCapacity];
537
538 // Copy over existing data
539 for (int i = 0; i < length(); i++)
540 newList[i] = list[i];
541
542 // Set the underlying list to be the new list
543 list = newList;
544 }
545}
546