nw59HjjC

· 5 years ago · Nov 08, 2020, 07:34 AM
1/* 
2 * This program is composed of two java files:
3 * YoutubeUserCommentHunter.java: Main program, contains the main API logic and
4 * 		many helper methods to aid in data processing and writeout.
5 * ExtendableGenericArray.java: Custom collections-ish (no extend) to let me
6 * 		index into anywhere in an array, and store a generic object where I
7 * 		couldn't directly.
8 * 
9 * Everything should be documented relatively well, so if you're wondering how
10 * you can do something like this, feel free to take a look around. There's some
11 * API integration, file IO, and string processing, plus the generics and array
12 * extending semantics in the array class I wrote.
13 */
14/*----------------------------------------------------
15 * YoutubeUserCommentHunter.java
16 *--------------------------------------------------*/
17package YoutubeAPIGradle;
18
19import java.io.BufferedWriter;
20import java.io.FileInputStream;
21import java.io.FileWriter;
22import java.io.IOException;
23import java.security.GeneralSecurityException;
24import java.time.LocalDateTime;
25import java.time.format.DateTimeFormatter;
26import java.util.ArrayList;
27import java.util.Arrays;
28import java.util.List;
29import java.util.Properties;
30
31import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
32import com.google.api.client.googleapis.json.GoogleJsonResponseException;
33import com.google.api.client.http.javanet.NetHttpTransport;
34import com.google.api.client.json.JsonFactory;
35import com.google.api.client.json.jackson2.JacksonFactory;
36import com.google.api.services.youtube.YouTube;
37import com.google.api.services.youtube.model.Comment;
38import com.google.api.services.youtube.model.CommentSnippet;
39import com.google.api.services.youtube.model.CommentThread;
40import com.google.api.services.youtube.model.CommentThreadListResponse;
41
42/**
43 * This class uses the YouTube Data API v3 to scan a youtube video for all
44 * comments by a target channel id (commenter), and compile their comments into
45 * an HTML file.
46 * 
47 * @author Melody Griesen
48 *
49 */
50public class YoutubeUserCommentHunter {
51
52	// MODIFIABLE PARAMETERS
53	/** Video that we're scanning for comments. */
54	private static final String VIDEO_ID = "UwxatzcYf9Q";
55	/**
56	 * Channel ID of the user whose comments we are isolating. This is Anxley's
57	 * channel ID.
58	 */
59	private static final String TARGET_CHANNEL_ID = "UCSr1Me3Ls-4_lIpYsK_A7nA";
60
61	// FIXED PARAMETERS
62	// Misc
63	/** Pastebin link where the source code is held. */
64	private static final String CODE_PASTE = "https://pastebin.com/nw59HjjC";
65	/** Date time format for printing out comments. */
66	private static final String DATE_TIME_FORMAT = "MMMM d, uuuu (h:mm a)";
67	/** Name of the application. */
68	private static final String APPLICATION_NAME = "YouTube User Comment Hunter";
69	// Files
70	/** File storing my developer key. Do not steal. */
71	private static final String DEVELOPER_KEY_FILE = "src/main/resources/developer_key.prop";
72	/** Output file for the generated HTML. */
73	private static final String OUT_FILE = "src/main/resources/index.html";
74	// HTML File Contents
75	/** Header string from Melody, developer. */
76	private static final String HEADER_NOTE = "Note from Melody, the gal who created this:\n"
77			+ "This paste was generated by a Java program I wrote that uses the YouTube Data API v3.\n"
78			+ "I'm going to try to keep this updated with the latest posts roughly every week.\n"
79			+ "To see the code that generates these pages, look <a href=" + CODE_PASTE + ">here</a>.\n"
80			+ "Much love from your local trans woman,\n" + "\tMelody Griesen <3";
81	/** Start of the HTML document. */
82	private static final String HTML_START = "<!DOCTYPE html>\r\n" + "<html>\r\n" + "<head>\r\n"
83			+ "	<title>Anxley's 365 Day Challenge</title>\r\n" + "</head>\r\n" + "<body>";
84	/** End of the HTML document. */
85	private static final String HTML_END = "</body>" + "</html>";
86
87	// VARIABLES
88	/** List of comments, organized by day. */
89	private static ExtendableGenericArray<List<Comment>> commentsByDay;
90	/** Collection of all comments that we don't recognize a day for. */
91	private static List<Comment> unrecognizedComments;
92	/** Next page token to use when requesting a page of data. */
93	private static String nextPageToken;
94	/** JSON Factory for creating the YouTube object. */
95	private static final JsonFactory JSON_FACTORY = JacksonFactory.getDefaultInstance();
96
97	/**
98	 * Build and return an authorized API client service.
99	 *
100	 * @return an authorized API client service
101	 * @throws GeneralSecurityException, IOException
102	 */
103	public static YouTube getService() throws GeneralSecurityException, IOException {
104		final NetHttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
105		return new YouTube.Builder(httpTransport, JSON_FACTORY, null).setApplicationName(APPLICATION_NAME).build();
106	}
107
108	/**
109	 * Call function to create API service object. Define and execute API request.
110	 * Loop over each page of responses, collecting comments. Filter by comments
111	 * that match the target channel ID. When done, print out all found comments.
112	 *
113	 * @throws GeneralSecurityException, IOException, GoogleJsonResponseException
114	 */
115	public static void main(String[] args) throws GeneralSecurityException, IOException, GoogleJsonResponseException {
116		// Get the YoTube object we need to perform API calls.
117		YouTube youtubeService = getService();
118
119		// Load our developer key from the file
120		// First, get the property file loaded
121		Properties prop = new Properties();
122		prop.load(new FileInputStream(DEVELOPER_KEY_FILE));
123
124		// Then, load the "key" property from the file (in the file, it looks like
125		// "key=...")
126		final String DEVELOPER_KEY = prop.getProperty("key");
127
128		/*
129		 * Define the API request.
130		 * 
131		 * 1. Specify a comment threads request.
132		 * 
133		 * 2. Ask for just the comment snippets, AKA just the top-level comments.
134		 * 
135		 * 3. Set my developer key so that I can authenticate with YouTube.
136		 * 
137		 * 4. Set the video ID for the video that we're scanning.
138		 * 
139		 * 5. Get the max results (100) per page.
140		 * 
141		 * 6. Sort by relevance (as opposed to "time"), because Anxley is popular, so
142		 * more of their comments will be at the top by that sorting.
143		 */
144		YouTube.CommentThreads.List request = youtubeService.commentThreads()
145				.list(Arrays.asList("snippet"/* , "replies" */)).setKey(DEVELOPER_KEY).setVideoId(VIDEO_ID)
146				.setMaxResults((long) 100).setOrder("relevance");
147
148		// Initialize our comment fields
149		commentsByDay = new ExtendableGenericArray<List<Comment>>();
150		unrecognizedComments = new ArrayList<Comment>();
151
152		// Set up the next page token for the first iteration. For the first page, any
153		// random string will do - we'll set it after we get the first response.
154		nextPageToken = "FIRST PAGE";
155
156		// While there's another page of data
157		while (nextPageToken != null) {
158
159			// Execute the API call to get one page of responses.
160			CommentThreadListResponse response = request.execute();
161
162			// Loop over all of the CommentThreads we got in this page
163			for (CommentThread ct : response.getItems()) {
164				// Call our helper method to handle this CommentThread
165				processComment(ct.getSnippet().getTopLevelComment());
166			}
167
168			// Get the token of the next page of comments to read
169			nextPageToken = response.getNextPageToken();
170			// Use that token to prep the next request to get the next page of comments.
171			request.setPageToken(nextPageToken);
172		}
173		// Done scanning in data from YouTube!
174
175		// Time to print it out all nice and pretty-like into an HTML file.
176		writeToFile(OUT_FILE);
177	}
178
179	/**
180	 * Process one CommentThread, a.k.a. top-level comment. If it doesn't match the
181	 * target channel ID, skip it. Otherwise, try to figure out what day it is and
182	 * add it to that day in the commentsByDay list. Otherwise, add it to the
183	 * unrecognizedComments list.
184	 * 
185	 * @param ct the CommentThread to parse and maybe
186	 */
187	private static void processComment(Comment comment) {
188		// Get just the top-level snippet - this contains all of the useful data like
189		// author, comment text, likes, date, etc.
190		CommentSnippet topLevelSnippet = comment.getSnippet();
191
192		// Get the Author's channel id
193		String authorChannelId = topLevelSnippet.getAuthorChannelId().getValue();
194
195		// If the channel ID of this comment matches the target ID
196		if (authorChannelId.equals(TARGET_CHANNEL_ID)) {
197
198			// Try to identify which day the comment was written, so that we can place it in
199			// the commentsByDay list
200			try {
201				// Get day from the comment
202				int day = getDayFromComment(comment);
203
204				// Make sure the Days array has a List for that entry
205				if (commentsByDay.get(day) == null) {
206					commentsByDay.set(day, new ArrayList<Comment>());
207				}
208				// Add this comment to the list for this day
209				commentsByDay.get(day).add(comment);
210				// Yay! We registered it by day
211
212			} catch (IllegalArgumentException e) {
213				// Oops! This comment doesn't match the expected format.
214				// Better add it to our unknowns list.
215				unrecognizedComments.add(comment);
216			}
217		}
218	}
219
220	/**
221	 * Write the data in the two comment lists to the output file in HTML format.
222	 * 
223	 * @param outfile the file to output the data to
224	 * @throws IOException if cannot write to the outfile
225	 */
226	private static void writeToFile(String outfile) throws IOException {
227		// Start by opening the file itself
228		BufferedWriter out = new BufferedWriter(new FileWriter(outfile));
229
230		// Write the header HTML information
231		out.write(HTML_START);
232
233		// Write the programmer info header
234		out.write("<pre>" + HEADER_NOTE + "</pre>");
235
236		// Write the start of the list!
237		out.write("<h1>Anxley's 365 Day Challenge</h1>");
238
239		// Start an unordered list
240		out.write("<ul>\n");
241
242		// Loop over our daily list
243		for (int i = 1; i < commentsByDay.length(); i++) {
244
245			// Start one list item
246			out.write("<li>\n");
247
248			// Get the list of comments for each day
249			List<Comment> entries = commentsByDay.get(i);
250
251			// If null, we haven't seen a comment for this day.
252			if (entries == null) {
253				// Oh no!
254				out.write("<em>No comment for day " + i + "!</em>");
255			}
256			// Otherwise, there's at least one comment to print.
257			else {
258				// If we've got more than 1 comment for the day
259				if (entries.size() > 1) {
260					// Let the user know we've got multiple comments for this day.
261					out.write("<em>" + entries.size() + " comments for this day!</em>");
262				}
263
264				// Print out each comment
265				for (Comment c : entries) {
266					out.write("<pre style=\"font-family: Roboto, Arial, sans-serif;\">" + printComment(c) + "</pre>");
267				}
268
269			}
270			// End list item
271			out.write("</li>");
272		}
273
274		// End the unordered list
275		out.write("</ul>");
276
277		// Loop over our unknowns
278		out.write("<p>Unrecognizable comments from Anxley:</p>");
279		if (unrecognizedComments.size() > 0) {
280			// Start another unordered list
281			out.write("<ul>");
282			// For each unrecognized comment, print out just its text
283			for (Comment comm : unrecognizedComments) {
284				out.write(comm.getSnippet().getTextDisplay());
285			}
286			// Finish unordered list
287			out.write("</ul>");
288		}
289		// If we had no unrecognized comments, let the user know
290		else {
291			out.write("<p>(none)</p>");
292		}
293
294		// Write the end of the HTML file
295		out.write(HTML_END);
296		// Close the output stream. All done!
297		out.close();
298	}
299
300	/**
301	 * Creates a String holding the data for one Comment. Each string holds the
302	 * comment string, then one line each for likes, date, and URL.
303	 * 
304	 * @param comment the comment to print
305	 */
306	private static String printComment(Comment comment) {
307		// Use a StringBuilder to compile all of the things
308		StringBuilder sb = new StringBuilder();
309
310		// Get the content of the comment itself
311		String commentStr = comment.getSnippet().getTextDisplay();
312		// Replace special characters
313		commentStr = commentStr.replace("&#39;", "'").replace("<br />", "\n");
314		// Print out comment itself
315		sb.append("<div style=\"font-size: 15px;\">" + commentStr + "</div>");
316
317		// Get the comment's like count
318		Long likeCount = comment.getSnippet().getLikeCount();
319		// Print out comment likes
320		sb.append("\t\t\tLikes: " + likeCount + "\n");
321
322		// Get the comment's (pretty) post date
323		String prettyDate = getPrettyDate(comment);
324		// Get a URL for this comment
325		String url = getURLForComment(comment);
326		// Print out URL
327		sb.append("\t\t\t<a href=" + url + ">" + prettyDate + "</a>");
328
329		// Return built string
330		return sb.toString();
331	}
332
333	/**
334	 * Retrieves the publication date of the specified comment, and returns it in a
335	 * nice format specified by DATE_TIME_FORMAT.
336	 * 
337	 * @param comment the comment to get the date of
338	 * @return a nicely formatted date
339	 */
340	private static String getPrettyDate(Comment comment) {
341		// Format to use when reading in data
342		DateTimeFormatter iso8601Format = DateTimeFormatter.ISO_OFFSET_DATE_TIME;
343		// This object stores the date and time so that we can re-format it in a nicer
344		// format
345		LocalDateTime dateTimeData = iso8601Format.parse(comment.getSnippet().getPublishedAt(), LocalDateTime::from);
346		// This is a much nicer format to view data in.
347		DateTimeFormatter niceFormat = DateTimeFormatter.ofPattern(DATE_TIME_FORMAT);
348		// Convert the date and time into that nicer format
349		String dateTimeString = dateTimeData.format(niceFormat) + " GMT";
350		// Return that nice-looking date and time. Lookin' good, datetime.
351		return dateTimeString;
352	}
353
354	/**
355	 * Generates a URL for the given Comment object from the video ID and the
356	 * comment ID.
357	 * 
358	 * @param comment the Comment to generate a URL for
359	 * @return a URL string linking to the given Comment
360	 */
361	private static String getURLForComment(Comment comment) {
362		// All URLs are going to start with this general link
363		String prefix = "https://www.youtube.com/watch";
364		// Video argument is the video ID that the comment is on
365		String video = "v=" + comment.getSnippet().getVideoId();
366		// Comment argument is the id of this comment
367		String cid = "lc=" + comment.getId();
368		// Stitch the URL together with ? and & syntax to give variables to the URL
369		String url = prefix + "?" + video + "&" + cid;
370		// All done! Send it back to the user.
371		return url;
372	}
373
374	/**
375	 * Gets the day that Anxley posted this log
376	 * 
377	 * @param comment the Anxley comment to check
378	 * @throws NumberFormatException if the comment cannot be parsed
379	 * @return the day it was logged
380	 */
381	private static int getDayFromComment(Comment comment) {
382		// Get the comment string itself
383		String commentStr = comment.getSnippet().getTextDisplay();
384
385		// Special cases
386		// Day 1 has a bunch of stuff at the start before the entry itself
387		if (commentStr.substring(0, 10).equals("Im doing t")) {
388			return 1;
389		}
390		// Day 72 has an extra colon after "Day" and before "72"
391		else if (commentStr.substring(0, 10).equals("Day: 72: I")) {
392			return 72;
393		}
394
395		// Expecting the general format of:
396		// Day ###: ...
397		// Strategy to get that ###: Find the first space and the first colon, get
398		// everything between the two, and convert to an int.
399
400		// Find the first colon in the comment
401		int colonIdx = commentStr.indexOf(":");
402		// Find the first space in the comment
403		int spaceIdx = commentStr.indexOf(" ");
404		// If this doesn't have the format we expect, we might run into trouble
405		try {
406			// Get just the characters we want: those between the space and the colon.
407			String intStr = commentStr.substring(spaceIdx + 1, colonIdx);
408			// Try to read those characters as a number. If successful, return that number
409			// as the day that this comment matches to.
410			return Integer.parseInt(intStr);
411		}
412		// Either getting the substring or parsing the string as a number can throw an
413		// exception
414		catch (StringIndexOutOfBoundsException | NumberFormatException e) {
415			// If either of these problems arise, tell the caller that the comment they
416			// passed us couldn't be read
417			throw new IllegalArgumentException();
418		}
419	}
420}
421
422
423
424
425
426
427
428
429
430
431/*----------------------------------------------------
432 * ExtendableGenericArray.java
433 *--------------------------------------------------*/
434package YoutubeAPIGradle;
435
436/**
437 * Allows an Array of generic objects to be stored. Any index can be accessed,
438 * and the array will adapt its internal size to hold at least that many
439 * elements.
440 * 
441 * @author Melody Griesen
442 *
443 * @param <E> generic object to store in this custom Array
444 */
445public class ExtendableGenericArray<E> {
446
447	/** Internal array used for storage. */
448	E[] list;
449	/** Default initial capacity of 7. */
450	public static final int DEFAUT_CAPACITY = 7;
451
452	/**
453	 * Initialize the custom Array with the specified initial capacity.
454	 * 
455	 * @param capacity initial capacity for the Array. Cannot be negative.
456	 * @throws IllegalArgumentException if capacity is negative
457	 */
458	@SuppressWarnings("unchecked")
459	public ExtendableGenericArray(int capacity) {
460		if (capacity < 0)
461			throw new IllegalArgumentException("Array cannot have negative capacity!");
462		list = (E[]) new Object[capacity];
463	}
464	
465	/**
466	 * Initialize the custom Array with the default initial capacity.
467	 */
468	public ExtendableGenericArray() {
469		this(DEFAUT_CAPACITY);
470	}
471
472	/**
473	 * Set the element at the specified index. The previous element is returned. If
474	 * the array is not currently big enough, it will be extended to allow for such
475	 * an index to be accessed.
476	 * 
477	 * @param idx  the index of the element to set
478	 * @param data the new element to place at the index
479	 * @return the previous element there
480	 */
481	public E set(int idx, E data) {
482		// Grow the array to include idx, if needed
483		ensureAddressable(idx);
484
485		// Store previous data
486		E old = list[idx];
487		// Set new data
488		list[idx] = data;
489		// Return old data
490		return old;
491	}
492
493	/**
494	 * Retrieves the element at the specified index. If the array is not currently
495	 * big enough, it will be extended to allow for such an index to be accessed.
496	 * 
497	 * @param idxthe index of the element to retrieve.
498	 * @return the element at the specified index
499	 */
500	public E get(int idx) {
501		// Grow the array to include idx, if needed
502		ensureAddressable(idx);
503		// Return the data at the specified index
504		return list[idx];
505	}
506
507	/**
508	 * Returns the current length of the underlying array.
509	 * 
510	 * @return the internal array's length.
511	 */
512	public int length() {
513		return list.length;
514	}
515
516	/**
517	 * Ensures that the underlying array can accomodate an access to the given index
518	 * by growing the array similar to an ArrayList if needed.
519	 * 
520	 * @param idx
521	 */
522	private void ensureAddressable(int idx) {
523		// If we already have enough space, cool, we're done
524		if( idx < length() )
525			return;
526		
527		// Calculate new capacity
528		int oldCapacity = length(), newCapacity = oldCapacity * 2 + 1;
529		while( idx >= newCapacity ) {
530			oldCapacity = newCapacity;
531			newCapacity = oldCapacity * 2 + 1;
532		}
533
534		// Create new list object
535		@SuppressWarnings("unchecked")
536		E[] newList = (E[]) new Object[newCapacity];
537
538		// Copy over existing data
539		for (int i = 0; i < length(); i++)
540			newList[i] = list[i];
541
542		// Set the underlying list to be the new list
543		list = newList;
544	}
545}
546