· 5 years ago · Jan 13, 2021, 06:28 PM
1using System;
2using System.Net;
3using System.Net.Http;
4using System.Runtime.InteropServices;
5using System.Diagnostics;
6using System.Collections.Generic;
7using HtmlAgilityPack;
8using Newtonsoft.Json;
9using Newtonsoft;
10using Newtonsoft.Json.Serialization;
11using Newtonsoft.Json.Linq;
12using System.Threading;
13using System.Linq;
14using System.IO;
15//using System.Windows.Forms;
16using Form = System.Windows.Forms.Form;
17
18using Microsoft.Win32;
19using System.Text.RegularExpressions;
20
21namespace Interop
22{
23 class Interop
24 {
25 // To support flashing.
26 [DllImport("user32.dll")]
27 [return: MarshalAs(UnmanagedType.Bool)]
28 static extern bool FlashWindowEx(ref FLASHWINFO pwfi);
29
30 //Flash both the window caption and taskbar button.
31 //This is equivalent to setting the FLASHW_CAPTION | FLASHW_TRAY flags.
32 public const UInt32 FLASHW_ALL = 3;
33
34 // Flash continuously until the window comes to the foreground.
35 public const UInt32 FLASHW_TIMERNOFG = 12;
36
37 [StructLayout(LayoutKind.Sequential)]
38 public struct FLASHWINFO
39 {
40 public UInt32 cbSize;
41 public IntPtr hwnd;
42 public UInt32 dwFlags;
43 public UInt32 uCount;
44 public UInt32 dwTimeout;
45 }
46
47 public static bool FlashWindowEx()
48 {
49 IntPtr hWnd = Process.GetCurrentProcess().MainWindowHandle;//form.Handle;
50 FLASHWINFO fInfo = new FLASHWINFO();
51
52 fInfo.cbSize = Convert.ToUInt32(Marshal.SizeOf(fInfo));
53 fInfo.hwnd = hWnd;
54 fInfo.dwFlags = FLASHW_ALL | FLASHW_TIMERNOFG;
55 fInfo.uCount = UInt32.MaxValue;
56 fInfo.dwTimeout = 0;
57
58 return FlashWindowEx(ref fInfo);
59 }
60 }
61}
62
63namespace CurseforgeMirrorer
64{
65 public class Image
66 {
67 public string Filepath
68 = string.Empty;
69 public string URL
70 = string.Empty;
71 [JsonIgnore]
72 public bool FileExists { get => File.Exists(Filepath); }
73 }
74
75 public class DownloadFile
76 {
77 public string Filename
78 = string.Empty;
79 public int Id
80 = 0;
81 public bool Downloaded
82 = false;
83 [JsonIgnore]
84 public bool UseURL { get => Url != null && Url.Length > 0; }
85 public string Url
86 = string.Empty;
87 public bool DownloadedFilePage
88 = false;
89 }
90
91 public class Category
92 {
93 public static string[] CategoriesString = { //verified 1/13/2021
94 "World Gen",
95 "Biomes",
96 "Ores and Resources",
97 "Structures",
98 "Dimensions",
99 "Mobs",
100 "Technology",
101 "Processing",
102 "Player Transport",
103 "Energy, Fluid, and Item Transport",
104 "Farming",
105 "Energy",
106 "Redstone",
107 "Genetics",
108 "Magic",
109 "Storage",
110 "API and Library",
111 "Adventure and RPG",
112 "Map and Information",
113 "Cosmetic",
114 "Miscellaneous",
115 "Addons",
116 "Thermal Expansion",
117 "Tinker's Construct",
118 "Industrial Craft",
119 "Thaumcraft",
120 "Buildcraft",
121 "Forestry",
122 "Blood Magic",
123 "Lucky Blocks",
124 "Applied Energistics 2",
125 "CraftTweaker",
126 "Armor, Tools, and Weapons",
127 "Server Utility",
128 "Food",
129 "Twitch Integration",
130 "Fabric"
131 };
132
133 public static List<string> CategoriesStringList = new List<string>(CategoriesString);
134
135 public bool HasCategory(int i)
136 {
137 return (Categories & (1L << i)) != 0;
138 }
139
140 public IEnumerable<string> GetCategories()
141 {
142 List<string> vs = new List<string>();
143
144 for (int i = 0; i < CategoriesString.Length; i++)
145 if (HasCategory(i))
146 vs.Add(CategoriesString[i]);
147
148 return vs;
149 }
150
151 public void AddCategory(int i)
152 {
153 Categories |= 1L << i;
154 }
155
156 public void AddCategory(string category)
157 {
158 if (CategoriesString.Contains(category))
159 AddCategory(CategoriesStringList.IndexOf(category));
160 }
161
162 public long Categories { get; set; }
163 }
164
165 public class Mod
166 {
167
168 public string ModIdentifier = string.Empty;
169 public string ModName = string.Empty;
170 public Image CoverImage
171 = new Image();
172 //Purge
173 /*
174 [JsonIgnore]
175 public List<DownloadFile> _Files
176 = new List<DownloadFile>();
177 */
178 public List<int> Files
179 = new List<int>();
180 public DateTime CreationDate
181 = DateTime.UnixEpoch;
182 public DateTime UpdateDate
183 = DateTime.UnixEpoch;
184 public string Synopsis
185 = string.Empty;
186 public string Description
187 = string.Empty;
188 public string DescriptionHTML
189 = string.Empty;
190 public string Author
191 = string.Empty;
192 public HashSet<string> CompleteAuthorList
193 = new HashSet<string>();
194 public List<int> ParsedPages
195 = new List<int>();
196 public int PageCount
197 = 0;
198 public int DownloadCount
199 = 0;
200 public HashSet<string> Images
201 = new HashSet<string>();
202 /*
203 [JsonIgnore]
204 public HashSet<string> ___Categories
205 = new HashSet<string>();
206 */
207 [JsonIgnore]
208 public Category __Categories
209 = new Category();
210 public long Categories { get => __Categories.Categories; set => __Categories.Categories = value; }
211 public int ProjectId
212 = 0;
213 [JsonIgnore]
214 public bool HasSource { get => SourceURL.Length > 0; }
215 [JsonIgnore]
216 public bool HasImage { get => CoverImage.URL.Length > 0; }
217
218 public bool HasScreenshots
219 = false;
220
221 public string SourceURL
222 = string.Empty;
223 //public bool SourceTar = false;
224 public bool FailedToRetrieveSource
225 = false;
226
227 //Extra state
228 //Added after starting a good scrape.
229 public bool ModpageRechecked
230 = false;
231 public bool FailedToRetrieveExtraMetadata
232 = false;
233 public bool FailedToRetrieveImages
234 = false;
235 }
236
237 public class Program
238 {
239 [JsonIgnore]
240 public static StreamWriter logFile
241 = new StreamWriter(File.Open("files.txt", FileMode.Append, FileAccess.Write, FileShare.Read));
242
243 [JsonIgnore]
244 public static StreamWriter failFile
245 = new StreamWriter(File.Open("fails.txt", FileMode.Append, FileAccess.Write, FileShare.Read));
246
247 [JsonIgnore]
248 public static WebClient ProxyClient
249 = new WebClient();
250
251 [JsonIgnore]
252 public static WebClient CDNClient
253 = new WebClient();
254
255 public static Dictionary<string, Mod> Mods
256 = new Dictionary<string, Mod>();
257
258 /// <summary>
259 /// Add mods to scrape here
260 /// </summary>
261 public static Queue<Mod> ModsToScrape
262 = new Queue<Mod>();
263 public static Queue<string> ModsToCaptureInfoPage
264 = new Queue<string>();
265
266 //Purge
267 public static Queue<KeyValuePair<Mod, DownloadFile>> _FilesToDownload
268 = new Queue<KeyValuePair<Mod, DownloadFile>>();
269 public static Queue<KeyValuePair<string, int>> FilesToDownload
270 = new Queue<KeyValuePair<string, int>>();
271
272 //Purge
273 public static Queue<KeyValuePair<Mod, DownloadFile>> _FilesToCaptureDownloadPage
274 = new Queue<KeyValuePair<Mod, DownloadFile>>();
275 public static Queue<KeyValuePair<string, int>> FilesToCaptureDownloadPage
276 = new Queue<KeyValuePair<string, int>>();
277
278 public static Dictionary<string, HashSet<int>> FilesCouldNotDownload
279 = new Dictionary<string, HashSet<int>>();
280
281 public static Dictionary<int, DownloadFile> DownloadRegistry
282 = new Dictionary<int, DownloadFile>();
283
284 [JsonIgnore]
285 public static List<Func<bool>> MirrorActions
286 = new List<Func<bool>>();
287
288 /// <summary>
289 /// Add parsed mod identifers from the main page here
290 /// </summary>
291 public static List<string> ParsedModIdentifiers
292 = new List<string>();
293 public static HashSet<string> CompletedMods
294 = new HashSet<string>();
295 public static List<int> ParsedPages
296 = new List<int>();
297 public static HashSet<string> CompletedModInfo
298 = new HashSet<string>();
299 public static HashSet<string> FailedToCaptureInfoPage
300 = new HashSet<string>();
301
302 [JsonIgnore]
303 public static int PageCount = 964; //941 Total pages ?page=
304
305 [JsonIgnore]
306 public static Random gRandom
307 = new Random();
308
309 public static DateTime lastRequest = DateTime.Now.AddSeconds(-60);
310
311 public static int getRandomWaitTime()
312 {
313 /*
314 float x = (float)gRandom.NextDouble() * 2.0f;
315 x -= 1.8f;
316 float v = -((x * x * x) + (x * x) - x - 1);
317 v /= 2.0f;
318 int mx = 75 - 15;
319 v *= mx;
320 v += 15;
321 return (int)v;
322 */
323 float x = (float)gRandom.NextDouble();
324 x = -((x - 0.5f) * (x - 0.5f));
325 x *= 4;
326 /*
327 int mx = 0 - 10;
328 x *= mx;
329 x += 10;
330 */
331 int mx = 0 - 5;
332 x *= mx;
333 x += 5;
334 return (int)x;
335 }
336
337 public static HtmlDocument GetHtmlDocument(string url, int tries = 0)
338 {
339 //Console.Write("Sleeping...");
340 //while (DateTime.Now.Subtract(lastRequest).TotalSeconds < 60)
341 // Thread.Sleep(60000 - (int)DateTime.Now.Subtract(lastRequest).TotalMilliseconds);
342 int randomWaitTime = getRandomWaitTime();//gRandom.Next(15, 75);
343 Console.Write("Sleeping for {0}s...", randomWaitTime);
344 while (DateTime.Now.Subtract(lastRequest).TotalSeconds < randomWaitTime)
345 Thread.Sleep((randomWaitTime * 1000) - (int)DateTime.Now.Subtract(lastRequest).TotalMilliseconds);
346 Console.Write("Making request to {0}...", url);
347 HtmlDocument htmlDocument = new HtmlDocument();
348 htmlDocument.Load(ProxyClient.OpenRead(url));
349 Console.WriteLine("Complete.");
350 lastRequest = DateTime.Now;
351
352 if (htmlDocument.DocumentNode.ChildNodes.Count < 1)
353 {
354 if (tries > 2)
355 {
356 Console.WriteLine($"Could not download page: {url} after 3 tries");
357 return htmlDocument;
358 }
359 else
360 {
361 //Console.WriteLine("Would you like to retry this request? It could have been a captcha.");
362 //Console.WriteLine("Press any key to continue, ^C to terminate program");
363 //Console.ReadKey(true);
364 NeedUserAttention(1, "Would you like to retry this request? It could have been a captcha.");
365 Console.WriteLine($"Retrying...");
366
367 return GetHtmlDocument(url, tries + 1);
368 }
369 }
370
371 return htmlDocument;
372 }
373
374 public static void TimerTest()
375 {
376 Console.Write("Sleeping...");
377 while (DateTime.Now.Subtract(lastRequest).TotalSeconds < 60)
378 {
379 Thread.Sleep(60000 - (int)DateTime.Now.Subtract(lastRequest).TotalMilliseconds);
380
381 }
382 Console.Write("Making request to {0}...", "http://iansweb.org");
383 HtmlDocument htmlDocument = new HtmlDocument();
384 htmlDocument.Load(CDNClient.OpenRead("http://iansweb.org"));
385 Console.WriteLine("Complete.");
386 lastRequest = DateTime.Now;
387 }
388
389 //REMEMBER TO SET MODPAGERECHECKED
390 public static bool DownloadMetadata(Mod mod, HtmlDocument doc = null)
391 {
392 mod.ModpageRechecked = true; //Set here. Do not return to function unless changes are made
393 mod.FailedToRetrieveExtraMetadata = true;
394 try
395 {
396 Console.WriteLine($"Downloading metadata of {mod.ModIdentifier}");
397
398 string url = $"http://www.curseforge.com/minecraft/mc-mods/{mod.ModIdentifier}";
399 //HtmlDocument doc = null;
400 if (doc is null)
401 {
402 try
403 {
404 doc = GetHtmlDocument(url);
405 }
406 catch (Exception e)
407 {
408 Console.WriteLine("{0}\r\n{1}", e.Message, e.StackTrace);
409 NeedUserAttention(1);
410 return false;
411 }
412 }
413
414 if (doc == null)
415 return false;
416
417 //Has screenshots page verified 1/13/2021
418 //var hSPNodes = (doc.DocumentNode.SelectNodes("//li[@class=' b-list-item p-nav-item px-2 pb-1/10 -mb-1/10 text-gray-500']/a[@class='text-gray-500 hover:no-underline']"));
419 var hSPNodes = (doc.DocumentNode.SelectNodes("//li[@class=' border-b-2 border-primary-500 b-list-item p-nav-item px-2 pb-1/10 -mb-1/10 text-gray-500']/a[@class='text-gray-500 hover:no-underline']"));
420 if (hSPNodes != null)
421 {
422 if (hSPNodes.Any(n => n.InnerText.Contains("Images")))
423 {
424 if (hSPNodes.First(n => n.InnerText.Contains("Images")).Attributes.Contains("href"))
425 {
426 mod.HasScreenshots = true;
427 }
428 }
429 }
430
431 //Categories verified 1/13/2021
432 var categoryNodes = (doc.DocumentNode.SelectNodes("//div[@class='flex -mx-1']/div[@class='px-1']/a/figure"));
433
434 foreach (var categoryNode in categoryNodes)
435 {
436 if (categoryNode is null || categoryNode.Attributes is null || categoryNode.Attributes.Count < 1)
437 continue;
438 mod.__Categories.AddCategory(categoryNode.Attributes["title"].Value);
439 }
440
441 DownloadImages(mod);
442
443 mod.ModpageRechecked = true; //Always set or this method will be called again
444 mod.FailedToRetrieveExtraMetadata = false;
445 return true;
446 }
447 catch( Exception e)
448 {
449 Console.WriteLine("{0}\r\n{1}", e.Message, e.StackTrace);
450 NeedUserAttention(1);
451 return false;
452 }
453 }
454
455 public static void DownloadImages(Mod mod)
456 {
457 mod.FailedToRetrieveImages = true;
458 //var doc = GetHtmlDocument($"http://www.curseforge.com/minecraft/mc-mods/{mod.modIdentifier}");
459
460 if (!Directory.Exists(mod.ModIdentifier))
461 Directory.CreateDirectory(mod.ModIdentifier);
462
463 if (!Directory.Exists(mod.ModIdentifier + "/images"))
464 Directory.CreateDirectory(mod.ModIdentifier + "/images");
465
466 //Avatar
467 try
468 {
469 if (mod.CoverImage.URL != null && mod.CoverImage.URL.Length > 0)
470 { //https://media.forgecdn.net/avatars/233/345/637071615860203079.png
471 //https://media.forgecdn.net/avatars/thumbnails/233/345/64/64/637071615860203079.png
472 var url = mod.CoverImage.URL.Replace("/64/64", "");
473 url = url.Replace("/thumbnails", "");
474 var filename = mod.ModIdentifier + "/images/" + mod.CoverImage.URL.Split('/').Last();
475 CDNClient.DownloadFile(url.Replace("_animated",""), filename);
476 Console.WriteLine("{0} -> {1}", url, filename);
477 }
478 else
479 {
480 throw new Exception();
481 }
482 }
483 catch (Exception e)
484 {
485 NeedUserAttention($"Download the avatar of {mod.ModIdentifier} manually");
486 }
487
488 //Images contained in the description
489 Regex imageRegex = new Regex(@"(http(s?):)([/|.|\w|\s|-])*\.(?:jpg|gif|png|jpeg)", RegexOptions.IgnoreCase);
490
491 MatchCollection matches = imageRegex.Matches(mod.DescriptionHTML);
492
493 foreach (var match in matches.ToArray())
494 {
495 try
496 {
497 Console.WriteLine(match.Value);
498 CDNClient.DownloadFile(match.Value, $"{mod.ModIdentifier}/images/{match.Value.Split('/').Last()}");
499 Thread.Sleep(100);
500 }
501 catch (Exception e)
502 {
503 //NeedUserAttention($"Failed to download match {match.Value} {e.Message}");
504 failFile.WriteLine($"Failed to download match {match.Value} {e.Message}");
505 }
506 }
507
508 if (mod.HasScreenshots)
509 {
510 //Images page
511 HtmlDocument doc = null;
512 try
513 {
514 doc = GetHtmlDocument($"http://www.curseforge.com/minecraft/mc-mods/{mod.ModIdentifier}/screenshots");
515 }
516 catch (Exception e)
517 {
518 return;
519 }
520
521 if (doc == null)
522 return;
523
524 //verified 1/13/2021
525 var imageNodes = doc.DocumentNode.SelectNodes("//div[@class='project-screenshot-page']/div/div/article/div");
526
527 foreach (var node in imageNodes)
528 {
529 try
530 {
531 if (node.HasAttributes && node.Attributes.Contains("data-featherlight"))
532 {
533 var url = node.Attributes["data-featherlight"].Value;
534 var filename = mod.ModIdentifier + "/images/" + url.Split('/').Last();
535 CDNClient.DownloadFile(url, filename);
536 Console.WriteLine("{0} -> {1}", url, filename);
537 }
538 }
539 catch (Exception e)
540 {
541 NeedUserAttention($"Failed to download {node.InnerHtml} {e.Message}");
542 }
543 }
544 }
545
546 mod.FailedToRetrieveImages = false;
547 }
548
549 /// <summary>
550 /// Obtain literally everything I decided to skip
551 /// </summary>
552 /// <returns></returns>
553 public static bool Mirror_DownloadMissedMetadata()
554 {
555 Console.WriteLine("Mirror_DownloadMissedMetadata");
556 //Only do it every so often (keep from making too many bogus requests)
557 if (gRandom.Next(0, 4) == 0)
558 return true; //Returning true should keep the scraper going?
559
560 if (Mods.Count(n => !n.Value.ModpageRechecked) < 1)
561 return false;
562
563 var modPair = Mods.FirstOrDefault(n => !n.Value.ModpageRechecked);
564 var mod = modPair.Value;
565
566 if (mod is null)
567 return false;
568
569 if (ModsToCaptureInfoPage.Contains(mod.ModIdentifier))
570 return false; //If we haven't captured info page we should return
571
572 //Download the metadata
573 DownloadMetadata(mod);
574
575 return false;
576 }
577
578 /// <summary>
579 /// Obtain mod information such as the description, friendly name, developer names, creation date, last update, and total downloads
580 /// Will even clone repositories
581 /// </summary>
582 /// <returns>false if there are no more mods left to obtain info from</returns>
583 public static bool Mirror_LoadModInfoPage()
584 {
585 if (ModsToCaptureInfoPage.Count < 1)
586 return false;
587
588 var modId = ModsToCaptureInfoPage.Peek();
589
590 if (!Mods.ContainsKey(modId))
591 {
592 Console.WriteLine("Mirrorer wished to load mod info, but has yet to create an instance of the mod. Aborting for now.");
593 return false;
594 }
595
596 var mod = Mods[modId];
597
598 string url = $"http://www.curseforge.com/minecraft/mc-mods/{modId}";
599
600 HtmlDocument doc;
601 try
602 {
603 doc = GetHtmlDocument(url);
604 }
605 catch
606 {
607 Console.WriteLine("Failed to retrieve document");
608 return false;
609 }
610
611 try
612 {
613 //Mod description verified 1/13/2021
614 mod.Description = (doc.DocumentNode.SelectSingleNode("//div[@class='box p-4 pb-2 project-detail__content']") ?? HtmlNode.CreateNode("")).InnerText.HtmlDecode();
615 mod.DescriptionHTML = (doc.DocumentNode.SelectSingleNode("//div[@class='box p-4 pb-2 project-detail__content']") ?? HtmlNode.CreateNode("")).InnerHtml;
616 //Members verified 1/13/2021
617 //foreach (var authorNode in doc.DocumentNode.SelectNodes("//div[@class='flex mb-2']/div[@class='flex flex-col flex-grow']/p[@class='text-sm text-primary-500 flex']/a/span").Select(n => n.InnerText))
618 foreach (var authorNode in doc.DocumentNode.SelectNodes("//div[@class='flex mb-2']/div[@class='flex flex-col flex-grow']/p[@class='text-sm flex']/a/span").Select(n => n.InnerText))
619 {
620 string author = authorNode.Trim('\r', '\n', ' ', '\0', '\t');
621 if (!mod.CompleteAuthorList.Contains(author))
622 mod.CompleteAuthorList.Add(author);
623 }
624 //Source verified 1/13/2021
625 var srcNodes = (doc.DocumentNode.SelectNodes("//li[@class=' b-list-item p-nav-item px-2 pb-1/10 -mb-1/10 text-gray-500']/a[@class='text-gray-500 hover:no-underline']"));
626 if (srcNodes != null)
627 if (srcNodes.Any(n => n.InnerText.Contains("Source")))
628 if (srcNodes.First(n => n.InnerText.Contains("Source")).Attributes.Contains("href"))
629 mod.SourceURL = srcNodes.First(n => n.InnerText.Contains("Source")).Attributes["href"].Value;
630
631 //Download Count verified 1/13/2021
632 //mod.DownloadCount = Convert.ToInt32(doc.DocumentNode.SelectSingleNode("//div[@class='flex flex-col mt-auto mb-auto']/div[@class='flex']/span[@class='mr-2 text-sm text-gray-500']").InnerText.Split(' ')[0].Replace(",", ""));
633 mod.DownloadCount = Convert.ToInt32(doc.DocumentNode.SelectSingleNode("//div[@class='pb-4 border-b border-gray--100']/div[@class='flex flex-col mb-3']/div[4]/span[2]").InnerText.Split(' ')[0].Replace(",", ""));
634
635 //Project ID
636 mod.ProjectId = Convert.ToInt32(doc.DocumentNode.SelectSingleNode("//div[@class='pb-4 border-b border-gray--100']/div[@class='flex flex-col mb-3']/div[@class='w-full flex justify-between']/span[2]").InnerText);
637
638 var node = (doc.DocumentNode.SelectSingleNode("//div[@class='project-avatar project-avatar-64']/a[@class='bg-white']/img[@class='mx-auto']"));
639 if (node != null && node.HasAttributes && node.Attributes.Contains("src"))
640 mod.CoverImage.URL = node.Attributes["src"].Value;
641
642 //Print out results
643 Console.WriteLine("{0}: {1}", mod.ModIdentifier, mod.ModName);
644 Console.WriteLine("Description: {0}", mod.Description);
645 Console.WriteLine("Description HTML: {0}", mod.DescriptionHTML);
646 Console.WriteLine("Authors: {0}", mod.CompleteAuthorList.Aggregate((a, b) => $"{a}, {b}"));
647 Console.WriteLine("Source: {0}", mod.SourceURL);
648 Console.WriteLine("Icon: {0}", mod.CoverImage.URL);
649 Console.WriteLine("Downloads: {0}", mod.DownloadCount);
650 Console.WriteLine("ProjectID: {0}", mod.ProjectId);
651
652 if (!CompletedModInfo.Contains(modId))
653 CompletedModInfo.Add(modId);
654
655 if (mod.HasSource)
656 {
657 Console.WriteLine("Source preset for {0} ({1})", mod.ModIdentifier, mod.SourceURL);
658
659 using (Process gitProcess = new Process())
660 {
661 if (!Directory.Exists(mod.ModIdentifier))
662 Directory.CreateDirectory(mod.ModIdentifier);
663 gitProcess.StartInfo.UseShellExecute = false;
664 gitProcess.StartInfo.FileName = "git.exe";
665 gitProcess.StartInfo.WorkingDirectory = mod.ModIdentifier;
666 gitProcess.StartInfo.Arguments = $"clone {mod.SourceURL} --recursive";
667 gitProcess.Start();
668 Console.WriteLine("Times out in 120 seconds");
669 if (!gitProcess.WaitForExit(120000))
670 if (NeedUserAttention("Program failed to exit after wait period. Wait anyway?"))
671 gitProcess.WaitForExit();
672 if (!gitProcess.HasExited || gitProcess.ExitCode != 0)
673 {
674 Console.WriteLine("Failed to clone {0} ({1}) ({2})", mod.ModIdentifier, mod.SourceURL, gitProcess.ExitCode);
675 mod.FailedToRetrieveSource = true;
676 }
677 else
678 {
679 Console.WriteLine("Cloned {0} ({1}) successfully", mod.ModIdentifier, mod.SourceURL);
680 }
681 }
682
683 //Add to tar
684 if (!mod.FailedToRetrieveSource)
685 using (Process _7zProcess = new Process()) {
686 var path = Directory.GetDirectories(mod.ModIdentifier).Where(n => !n.EndsWith("images")).First() ?? throw new Exception();
687 var folderName = Path.GetFileName(path);
688 _7zProcess.StartInfo = new ProcessStartInfo()
689 {
690 UseShellExecute = false,
691 FileName = "7z.exe",
692 WorkingDirectory = mod.ModIdentifier,
693 Arguments = $"a {folderName}-source.tar {folderName} -ttar -y -sdel"
694 };
695 _7zProcess.Start();
696 Console.WriteLine("Times out in 480 seconds");
697 if (!_7zProcess.WaitForExit(480000))
698 if (NeedUserAttention("Program failed to exit after wait period. Wait anyway?"))
699 _7zProcess.WaitForExit();
700 if (!_7zProcess.HasExited || _7zProcess.ExitCode != 0)
701 {
702 Console.WriteLine("Failed to archive repository {0} marking as failed", mod.ModIdentifier);
703 mod.FailedToRetrieveSource = true;
704 }
705 else
706 {
707 Console.WriteLine("Archived {0} successfully", mod.ModIdentifier);
708 mod.FailedToRetrieveSource = false;
709 }
710 }
711 }
712
713 //Get and create the icon
714 if (mod.HasImage)
715 {
716 Console.WriteLine("Avatar preset for {0} ({1})", mod.ModIdentifier, mod.CoverImage.URL);
717 var uri = new Uri(mod.CoverImage.URL);
718 var ext = "." + mod.CoverImage.URL.Split('.').Last();
719 mod.CoverImage.Filepath = mod.ModIdentifier + "/" + mod.ModIdentifier + ext;
720 //Capture image
721 try
722 {
723 CDNClient.DownloadFile(mod.CoverImage.URL, mod.CoverImage.Filepath);
724 }
725 catch
726 {
727 Console.WriteLine("Could not download avatar for {0}", mod.ModIdentifier);
728 mod.CoverImage.URL = string.Empty;
729 goto a;
730 }
731
732 using (Process magickProcess = new Process())
733 {
734 if (!Directory.Exists(mod.ModIdentifier))
735 Directory.CreateDirectory(mod.ModIdentifier);
736 magickProcess.StartInfo.UseShellExecute = false;
737 magickProcess.StartInfo.FileName = "magick.exe";
738 magickProcess.StartInfo.WorkingDirectory = mod.ModIdentifier;
739 magickProcess.StartInfo.Arguments = $"convert -verbose {mod.ModIdentifier + ext} {mod.ModIdentifier + ".ico"}";
740 magickProcess.Start();
741 Console.WriteLine("Times out in 30 seconds");
742 magickProcess.WaitForExit(30000);
743 if (magickProcess.ExitCode != 0)
744 {
745 Console.WriteLine("Failed to capture image or create icon");
746 }
747 else
748 {
749 Console.WriteLine("Captured image and created icon successfully");
750 var iniPath = mod.ModIdentifier + "/desktop.ini";
751 if (File.Exists(iniPath))
752 File.SetAttributes(iniPath, File.GetAttributes(iniPath) & ~(FileAttributes.Hidden | FileAttributes.System));
753 using (StreamWriter stw = new StreamWriter(iniPath, false))
754 {
755 //stw.WriteLine("[{0}]", mod.ModIdentifier);
756 //stw.WriteLine("Icon={0}", mod.ModIdentifier + ".ico");
757 stw.WriteLine("[.ShellClassInfo]");
758 stw.WriteLine("IconResource={0},0", mod.ModIdentifier + ".ico");
759 stw.WriteLine("IconFile={0}", mod.ModIdentifier + ".ico");
760 stw.WriteLine("IconIndex=0");
761 stw.WriteLine("[ViewState]");
762 stw.WriteLine("Mode=");
763 stw.WriteLine("Vid=");
764 stw.WriteLine("FolderType=Generic");
765 }
766 File.SetAttributes(iniPath, File.GetAttributes(iniPath) | FileAttributes.Hidden | FileAttributes.System);
767 //set the folder as system
768 File.SetAttributes(mod.ModIdentifier, File.GetAttributes(mod.ModIdentifier) | FileAttributes.System);
769 }
770 }
771 }
772
773 a:;
774
775 ModsToCaptureInfoPage.Dequeue();
776 DownloadMetadata(mod, doc);
777
778 return true;
779 }
780 catch (Exception e)
781 {
782 Console.WriteLine("{0}\r\n{1}", e.Message, e.StackTrace);
783 NeedUserAttention(1);
784 if (!FailedToCaptureInfoPage.Contains(mod.ModIdentifier))
785 FailedToCaptureInfoPage.Add(mod.ModIdentifier);
786 ModsToCaptureInfoPage.Dequeue();
787 return false;
788 }
789
790 }
791
792 public static bool NeedUserAttention(string message = "Program ran into an error, would you like to continue?")
793 {
794 Console.WriteLine(message + "\a\a\a");
795 Interop.Interop.FlashWindowEx();
796 while (true)
797 {
798 Console.WriteLine("[Y/N]? ");
799 var key = Console.ReadKey(false);
800 Console.WriteLine();
801 if (key.Key == ConsoleKey.Y)
802 return true;
803 if (key.Key == ConsoleKey.N)
804 return false;
805 }
806 }
807
808 public static void NeedUserAttention(int exitCode, string message = "Program ran into an error, would you like to continue?")
809 {
810 Console.WriteLine(message + "\a\a\a");
811 Interop.Interop.FlashWindowEx();
812 while (true)
813 {
814 Console.Write("[Y/N]? ");
815 var key = Console.ReadKey(true);
816 Console.WriteLine(key.KeyChar);
817 if (key.Key == ConsoleKey.Y)
818 return;
819 if (key.Key == ConsoleKey.N)
820 {
821 Console.WriteLine("Exit requested.");
822 Environment.Exit(exitCode);
823 return;
824 }
825 }
826 }
827
828 /// <summary>
829 /// Load a new page from curseforge
830 /// Never repeats a download
831 /// </summary>
832 /// <returns>false if there are no pages left to mirror</returns>
833 public static bool Mirror_LoadPage()
834 {
835 //Select next page to parse
836 int nextPage = 0;
837 while (true)
838 {
839 nextPage = (new Random()).Next(1, PageCount + 1);
840 if (!ParsedPages.Contains(nextPage))
841 break;
842 if (ParsedPages.Count > PageCount - 1)
843 return false;
844 }
845
846 /*
847 string url = "http://www.curseforge.com/minecraft/mc-mods";
848 if (nextPage > 1)
849 url += $"?page={nextPage}";
850 */
851 string url = "http://www.curseforge.com/minecraft/mc-mods?filter-sort=-1";
852 if (nextPage > 1)
853 url += $"&page={nextPage}";
854
855 //Obtain page
856 HtmlDocument doc;
857 try
858 {
859 doc = GetHtmlDocument(url);
860 }
861 catch
862 {
863 //Most likely captcha
864 Console.WriteLine("Failed to retrieve document");
865 return false;
866 }
867
868 //HtmlDocument doc = new HtmlDocument();
869 //doc.Load("technology.1");
870
871 //Parse page to obtain mods
872 string selector = "//div[@class='project-listing-row box py-3 px-4 flex flex-col lg:flex-row lg:items-center']/div[@class='flex flex-col']"; //1/13/2021
873 foreach (var node in doc.DocumentNode.SelectNodes(selector))
874 {
875 Mod modInit = new Mod();
876 modInit.ModName = node.SelectNodes(".//h3[@class='font-bold text-lg']")[0].InnerText.HtmlDecode(); //1/13/2021
877 modInit.ModIdentifier = node.SelectNodes(".//a[@class='my-auto']")[0].Attributes["href"].DeEntitizeValue.Split('/').Last(); //1/13/2021
878 modInit.Author = node.SelectNodes(".//a[@class='text-base leading-normal font-bold hover:no-underline my-auto']")[0].InnerText.HtmlDecode(); //1/13/2021
879 modInit.CreationDate = DateTime.UnixEpoch.AddSeconds(Convert.ToInt64(node.SelectNodes(".//span[@class='text-xs text-gray-500']/abbr")[0].Attributes["data-epoch"].Value)); //1/13/2021
880 var updateDateNodes = node.SelectNodes(".//span[@class='mr-2 text-xs text-gray-500']/abbr"); //1/13/2021
881 if (updateDateNodes != null && updateDateNodes.Count > 0)
882 {
883 if (updateDateNodes[0] != null && updateDateNodes[0].Attributes.Contains("data-epoch") && updateDateNodes[0].Attributes["data-epoch"].Value != null)
884 modInit.UpdateDate = DateTime.UnixEpoch.AddSeconds(Convert.ToInt64(updateDateNodes[0].Attributes["data-epoch"].Value));
885 }
886 modInit.Synopsis = node.SelectNodes(".//p[@class='text-sm leading-snug']")[0].InnerText.HtmlDecode().Trim('\r','\n',' ', '\t'); //This is the synopsis, we do not need leading tabs //1/13/2021
887
888 if (!ParsedModIdentifiers.Contains(modInit.ModIdentifier))
889 {
890 Console.WriteLine("Enqueued: {0}", modInit.ModIdentifier);
891 ModsToScrape.Enqueue(modInit);
892 ModsToCaptureInfoPage.Enqueue(modInit.ModIdentifier);
893 ParsedModIdentifiers.Add(modInit.ModIdentifier);
894 }
895 else
896 {
897 Console.WriteLine("Duplicate identifer: {0}", modInit.ModIdentifier);
898 }
899 }
900
901 //Tell future instances
902 ParsedPages.Add(nextPage);
903
904 return true;
905 }
906
907 public static string CreateCDNUrl(DownloadFile file)
908 {
909 return "";
910 }
911
912 /// <summary>
913 /// Scrapes the download page of mods that have differing download names
914 /// This may be considered a waste of time
915 /// </summary>
916 /// <returns></returns>
917 public static bool Mirror_DownloadModPage()
918 {
919 //Check if any files need to be captured
920 if (FilesToCaptureDownloadPage.Count < 1)
921 return false;
922
923 var dlFileId = FilesToCaptureDownloadPage.Peek();
924 var dlFile = new KeyValuePair<Mod, DownloadFile>(Mods[dlFileId.Key], DownloadRegistry[dlFileId.Value]);
925
926 try
927 {
928 Console.WriteLine($"Capturing download page of {dlFile.Key.ModIdentifier}:{dlFile.Value.Id}");
929 string dlPageURL = $"http://www.curseforge.com/minecraft/mc-mods/{dlFile.Key.ModIdentifier}/files/{dlFile.Value.Id}";
930 var doc = GetHtmlDocument(dlPageURL);
931 string pageXPath = "//div[@class='flex flex-col md:flex-row justify-between border-b border-gray--100 mb-2 pb-4']/div/span[@class='text-sm']"; //1/13/2021
932 var filename = doc.DocumentNode.SelectNodes(pageXPath)[0];
933 dlFile.Value.Filename = filename.InnerText.HtmlDecode();
934 logFile.WriteLine($"{dlFile.Value.Id}:{dlFile.Value.Filename}");
935 logFile.Flush();
936 dlFile.Value.DownloadedFilePage = true;
937 FilesToCaptureDownloadPage.Dequeue();
938 FilesToDownload.Enqueue(dlFileId);
939 }
940 catch (Exception e)
941 {
942 string er = $"Failed to capture download page {dlFile.Key.ModIdentifier}:{dlFile.Value.Id}";
943 dlFile.Value.DownloadedFilePage = true;
944 Console.WriteLine(er);
945 failFile.WriteLine(er);
946 failFile.Flush();
947 NeedUserAttention(1);
948 FilesToCaptureDownloadPage.Dequeue();
949 if (!FilesCouldNotDownload.ContainsKey(dlFileId.Key))
950 FilesCouldNotDownload.Add(dlFileId.Key, new HashSet<int>());
951 if (!FilesCouldNotDownload[dlFileId.Key].Contains(dlFileId.Value))
952 FilesCouldNotDownload[dlFileId.Key].Add(dlFileId.Value);
953 return false;
954 }
955
956 return true;
957 }
958
959 /// <summary>
960 /// Load a new page from the list of mod downloads
961 /// Selects a random mod
962 /// Never repeats a download
963 /// </summary>
964 /// <returns>false if all the pages from each mod have been mirrored</returns>
965 public static bool Mirror_LoadModPage()
966 {
967 //50/50 chance to either collect a new mod or continue scraping a random mod
968 //The point of this function is to collect mods to download. All things considered, this function is performing 2 functions.
969 if (ModsToScrape.Count > 0 && ((new Random()).Next(0,2) == 0 || Mods.Count == 0))
970 {
971 //Collect a new mod. This means capture the page count and files on the first page
972 Mod collect = ModsToScrape.Peek();
973 Console.WriteLine("Collecting: {0}", collect.ModIdentifier);
974
975
976 HtmlDocument doc;
977 try
978 {
979 doc = GetHtmlDocument($"http://www.curseforge.com/minecraft/mc-mods/{collect.ModIdentifier}/files/all");
980 }
981 catch
982 {
983 //Most likely captcha
984 Console.WriteLine("Failed to retrieve document");
985 return false;
986 }
987
988 //Capture page count
989 var pages = doc.DocumentNode.SelectNodes("//div[@class='pagination pagination-top flex items-center']"); //1/13/2021
990 if (pages is null)
991 {
992 collect.PageCount = 1;
993 }
994 else
995 {
996 pages = pages[0].SelectNodes("./a");
997 collect.PageCount = Convert.ToInt32(pages.Last().InnerText);
998 }
999
1000 //Capture files
1001 var files = doc.DocumentNode.SelectNodes("//tr/td/a[@data-action='file-link']"); //1/13/2021
1002 if (files != null && files.Count > 0) //Some projects have 0 files, just learned about that
1003 foreach (var file in files)
1004 {
1005 DownloadFile dlFile = new DownloadFile();
1006 dlFile.Id = Convert.ToInt32(file.Attributes["href"].Value.Split('/').Last());
1007 dlFile.Filename = file.InnerText.HtmlDecode();
1008 Console.WriteLine("{0} : {1}", dlFile.Id, dlFile.Filename);
1009 logFile.WriteLine("{0}:{1}", dlFile.Id, dlFile.Filename);
1010 if (!DownloadRegistry.ContainsKey(dlFile.Id))
1011 DownloadRegistry.Add(dlFile.Id, dlFile);
1012 else
1013 DownloadRegistry[dlFile.Id] = dlFile;
1014 collect.Files.Add(dlFile.Id);
1015 //FilesToDownload.Enqueue(new KeyValuePair<Mod, DownloadFile>(collect, dlFile));
1016 FilesToDownload.Enqueue(new KeyValuePair<string, int>(collect.ModIdentifier, dlFile.Id));
1017 }
1018
1019 logFile.Flush();
1020
1021 //Dequeue if success
1022 ModsToScrape.Dequeue();
1023 //Add to master list
1024 Mods.Add(collect.ModIdentifier, collect);
1025 //Mark page 1 as scraped
1026 collect.ParsedPages.Add(1);
1027 return true;
1028 }
1029 else
1030 {
1031 //Scrape a random mod. Capture a random page
1032 var rnd = new Random();
1033 int pageToScrape = 0;
1034 Mod mod;
1035
1036 //Identify a mod that has pages to be scraped
1037 while (true)
1038 {
1039 if (CompletedMods.Count >= Mods.Count)
1040 {
1041 Console.WriteLine("Completed download of all mods (or pages have yet to be parsed). Sleeping for 1 second");
1042 Thread.Sleep(1000);
1043 return false;
1044 }
1045
1046 //mod = Mods[ParsedModIdentifiers[(new Random()).Next(0, ParsedModIdentifiers.Count)]];
1047 //mod = Mods[Mods.Keys.ElementAt((new Random()).Next(0, Mods.Count))];
1048 mod = Mods.ElementAt((new Random()).Next(0, Mods.Count)).Value;
1049
1050 if (mod.ParsedPages.Count > mod.PageCount - 1)
1051 {
1052 //Console.WriteLine("Downloaded all pages of {0} : {1}/{2}", mod.ModIdentifier, mod.ParsedPages.Count, mod.PageCount);
1053 if (!CompletedMods.Contains(mod.ModIdentifier))
1054 {
1055 CompletedMods.Add(mod.ModIdentifier);
1056 Console.WriteLine($"Completed: {mod.ModIdentifier}");
1057 }
1058 continue;
1059 }
1060
1061 break;
1062 }
1063
1064 //Pick a page that can be scraped
1065 int nextPage = 0;
1066 while (true)
1067 {
1068 nextPage = rnd.Next(1, mod.PageCount + 1);
1069 if (!mod.ParsedPages.Contains(nextPage))
1070 break;
1071 if (mod.ParsedPages.Count >= PageCount - 2)
1072 return false;
1073 }
1074
1075 string url = $"http://www.curseforge.com/minecraft/mc-mods/{mod.ModIdentifier}/files/all";
1076 if (nextPage > 1)
1077 url += $"?page={nextPage}";
1078
1079 //Obtain page
1080 HtmlDocument doc;
1081 try
1082 {
1083 doc = GetHtmlDocument(url);
1084 }
1085 catch
1086 {
1087 //Most likely captcha
1088 Console.WriteLine("Failed to retrieve document");
1089 return false;
1090 }
1091
1092 //Capture files
1093 var files = doc.DocumentNode.SelectNodes("//tr/td/a[@data-action='file-link']");
1094 foreach (var file in files)
1095 {
1096 DownloadFile dlFile = new DownloadFile();
1097 dlFile.Id = Convert.ToInt32(file.Attributes["href"].Value.Split('/').Last());
1098 dlFile.Filename = file.InnerText.HtmlDecode();
1099 Console.WriteLine("{0} : {1}", dlFile.Id, dlFile.Filename);
1100 logFile.WriteLine("{0}:{1}", dlFile.Id, dlFile.Filename);
1101 if (!DownloadRegistry.ContainsKey(dlFile.Id))
1102 DownloadRegistry.Add(dlFile.Id, dlFile);
1103 else
1104 DownloadRegistry[dlFile.Id] = dlFile;
1105 mod.Files.Add(dlFile.Id);
1106 FilesToDownload.Enqueue(new KeyValuePair<string, int>(mod.ModIdentifier, dlFile.Id));
1107 }
1108
1109 logFile.Flush();
1110
1111 if (!mod.ParsedPages.Contains(nextPage))
1112 mod.ParsedPages.Add(nextPage);
1113
1114 Console.WriteLine("Scraping: {0}", mod.ModIdentifier);
1115
1116 return true;
1117 }
1118 }
1119
1120 public static void PerformRandomMirrorAction()
1121 {
1122 Random rnd = new Random();
1123 int nextAction;
1124 bool[] returnValue = new bool[MirrorActions.Count];
1125 do //Keep trying to perform a mirror action until one returns true or succeeds
1126 {
1127 nextAction = rnd.Next(0, MirrorActions.Count);
1128 DownloadFiles();
1129 } while (!MirrorActions[nextAction]());
1130 }
1131
1132 public static void DownloadFiles()
1133 {
1134 while (FilesToDownload.Count > 0)
1135 {
1136 var pair_ref = FilesToDownload.Peek();
1137 KeyValuePair<Mod, DownloadFile> pair
1138 = new KeyValuePair<Mod, DownloadFile>(Mods[pair_ref.Key], DownloadRegistry[pair_ref.Value]);
1139 var file = pair.Value;
1140 string filename = file.Filename.EndsWith(".jar") ? file.Filename : file.Filename + ".jar";
1141 string filenameLink = filename;
1142 filenameLink.Replace("+", "%2B");
1143 filename = filename.Replace(' ', '+');//4,3 Substring(0,4) Remove(0,4)
1144 filenameLink = filename.Replace(' ', '+');
1145 string link = $"https://media.forgecdn.net/files/{file.Id.ToString().Substring(0, file.Id.ToString().Length - 3)}/{file.Id.ToString().Remove(0, file.Id.ToString().Length - 3).TrimStart('0')}/{filenameLink}";
1146 string edgelink = $"https://edge.forgecdn.net/files/{file.Id.ToString().Substring(0, file.Id.ToString().Length - 3)}/{file.Id.ToString().Remove(0, file.Id.ToString().Length - 3).TrimStart('0')}/{file.Filename}";
1147 filename = pair.Key.ModIdentifier.WindowsEncode() + "\\" + pair.Value.Id + "-" + filename;
1148 Directory.CreateDirectory(pair.Key.ModIdentifier.WindowsEncode());
1149 try
1150 {
1151 if (!pair.Value.DownloadedFilePage)
1152 {
1153 CDNClient.DownloadFile(link, filename);
1154 Console.WriteLine("Downloaded {0} -> {1}", link, filename);
1155 file.Downloaded = true;
1156 FilesToDownload.Dequeue();
1157 }
1158 else
1159 {
1160 CDNClient.DownloadFile(edgelink, filename);
1161 Console.WriteLine("Downloaded {0} -> {1} USING EDGE", edgelink, filename);
1162 file.Downloaded = true;
1163 FilesToDownload.Dequeue();
1164 }
1165 }
1166 catch (Exception e)
1167 {
1168 if (pair.Value.DownloadedFilePage)
1169 {
1170 string er = $"Could not download {edgelink}: {e.Message}";
1171 Console.WriteLine(er);
1172 failFile.WriteLine(er);
1173 failFile.Flush();
1174 //NeedUserAttention();
1175 Console.WriteLine("Will not requeue");
1176
1177 if (!FilesCouldNotDownload.ContainsKey(pair.Key.ModIdentifier))
1178 FilesCouldNotDownload.Add(pair.Key.ModIdentifier, new HashSet<int>());
1179 if (!FilesCouldNotDownload[pair.Key.ModIdentifier].Contains(pair.Value.Id))
1180 FilesCouldNotDownload[pair.Key.ModIdentifier].Add(pair.Value.Id);
1181
1182 FilesToDownload.Dequeue();
1183 }
1184 else
1185 {
1186 string er = $"Error with {link}: {e.Message}";
1187 Console.WriteLine(er);
1188 failFile.WriteLine(er);
1189 failFile.Flush();
1190 //NeedUserAttention();
1191 Console.WriteLine("Adding to list");
1192 FilesToCaptureDownloadPage.Enqueue(pair_ref);
1193 FilesToDownload.Dequeue();
1194 }
1195 }
1196 Console.WriteLine("Sleeping...");
1197 Thread.Sleep(1000);
1198 }
1199 }
1200
1201 public static void TestXPath(string path)
1202 {
1203 HtmlDocument doc = new HtmlDocument();
1204 doc.Load(path);
1205 //HtmlDocument doc = GetHtmlDocument(path);
1206 /*
1207 string pageXPath = "//div[@class='flex flex-col md:flex-row justify-between border-b border-gray--100 mb-2 pb-4']/div/span[@class='text-sm']";
1208 var filename = doc.DocumentNode.SelectNodes(pageXPath)[0];
1209 Console.WriteLine(filename.InnerText.HtmlDecode());
1210 */
1211 Mod mod = new Mod();
1212 //Mod description
1213 mod.Description = (doc.DocumentNode.SelectSingleNode("//div[@class='box p-4 pb-2 project-detail__content']") ?? HtmlNode.CreateNode("")).InnerText.HtmlDecode();
1214 //Members
1215 foreach (var authorNode in doc.DocumentNode.SelectNodes("//div[@class='flex mb-2']/div[@class='flex flex-col flex-grow']/p[@class='text-sm text-primary-500 flex']/a/span").Select(n => n.InnerText))
1216 {
1217 string author = authorNode.Trim('\r', '\n', ' ', '\0', '\t');
1218 if (!mod.CompleteAuthorList.Contains(author))
1219 mod.CompleteAuthorList.Add(author);
1220 }
1221 //Source
1222 var srcNodes = (doc.DocumentNode.SelectNodes("//li[@class=' b-list-item p-nav-item px-2 pb-1/10 -mb-1/10 text-gray-500']/a[@class='text-gray-500 hover:no-underline']"));
1223 if (srcNodes != null)
1224 if (srcNodes.Any(n => n.InnerText.Contains("Source")))
1225 if (srcNodes.First(n => n.InnerText.Contains("Source")).Attributes.Contains("href"))
1226 mod.SourceURL = srcNodes.First(n => n.InnerText.Contains("Source")).Attributes["href"].Value;
1227
1228 //Download Count
1229 mod.DownloadCount = Convert.ToInt32(doc.DocumentNode.SelectSingleNode("//div[@class='flex flex-col mt-auto mb-auto']/div[@class='flex']/span[@class='mr-2 text-sm text-gray-500']").InnerText.Split(' ')[0].Replace(",", ""));
1230
1231 mod.ProjectId = Convert.ToInt32(doc.DocumentNode.SelectSingleNode("//div[@class='pb-4 border-b border-gray--100']/div[@class='flex flex-col mb-3']/div[@class='w-full flex justify-between']/span[2]").InnerText);
1232
1233
1234 //Print out results
1235 Console.WriteLine("{0}: {1}", mod.ModIdentifier, mod.ModName);
1236 Console.WriteLine("Description: {0}", mod.Description);
1237 Console.WriteLine("Authors: {0}", mod.CompleteAuthorList.Aggregate((a, b) => $"{a}, {b}"));
1238 Console.WriteLine("Source: {0}", mod.SourceURL);
1239 Console.WriteLine("Downloads: {0}", mod.DownloadCount);
1240 Console.WriteLine("ProjectID: {0}", mod.ProjectId);
1241
1242 //foreach (var a in filename)
1243 //{
1244 // Console.WriteLine(a.InnerText);
1245 //}
1246 /*
1247 string pageXPath = "//div[@class='pagination pagination-top flex items-center']";
1248 var pages = doc.DocumentNode.SelectNodes(pageXPath)[0].SelectNodes("./a");
1249 int pageCount = Convert.ToInt32(pages.Last().InnerText);
1250 Console.WriteLine("Pages: {0}", pageCount);
1251 string filesXPath = "//tr/td/a[@data-action='file-link']";
1252 var files = doc.DocumentNode.SelectNodes(filesXPath);
1253 foreach (var file in files)
1254 {
1255 DownloadFile dlFile = new DownloadFile();
1256 dlFile.Id = Convert.ToInt32(file.Attributes["href"].Value.Split('/').Last());
1257 dlFile.Filename = file.InnerText.HtmlDecode();
1258 Console.WriteLine("{0} : {1}", dlFile.Id, dlFile.Filename);
1259 logFile.WriteLine("{0}:{1}", dlFile.Id, dlFile.Filename);
1260 }
1261 logFile.Flush();
1262 */
1263 /*
1264 string o = "//div[@class='project-listing-row box py-3 px-4 flex flex-col lg:flex-row lg:items-center']/div[@class='flex flex-col']";
1265 var nodes = doc.DocumentNode.SelectNodes(o);
1266 var d = doc.DocumentNode;
1267 string p = o + "/div[@class='lg:flex items-end hidden']";
1268 string q = o + "/div[@class='flex my-1']";
1269 foreach (var node in nodes)
1270 {
1271 //var a = node.SelectNodes(p);
1272 //var b = node.SelectNodes(q);
1273 //Title
1274 //Console.WriteLine(d.SelectNodes(p + "/a[@class='my-auto']/h3[@class='text-primary-500 font-bold text-lg']")[0].InnerText);
1275 Console.WriteLine(node.SelectNodes(".//h3[@class='text-primary-500 font-bold text-lg']")[0].InnerText.HtmlDecode());
1276 //Identifier
1277 Console.WriteLine(node.SelectNodes(".//a[@class='my-auto']")[0].Attributes["href"].DeEntitizeValue.Split('/').Last());
1278 //Project manager
1279 Console.WriteLine(node.SelectNodes(".//a[@class='text-base leading-normal font-bold hover:no-underline my-auto']")[0].InnerText.HtmlDecode());
1280 //Downloads
1281 Console.WriteLine(node.SelectNodes(".//span[@class='mr-2 text-xs text-gray-500']")[0].InnerText.HtmlDecode());
1282 //Updated
1283 Console.WriteLine(node.SelectNodes(".//span[@class='mr-2 text-xs text-gray-500']")[1].InnerText.HtmlDecode());
1284 //Created
1285 Console.WriteLine(node.SelectNodes(".//span[@class='text-xs text-gray-500']")[0].InnerText.HtmlDecode());
1286 //Synopsis
1287 Console.WriteLine(node.SelectNodes(".//p[@class='text-sm leading-snug']")[0].InnerText.HtmlDecode());
1288 Console.WriteLine();
1289 //Console.WriteLine(node.InnerText);
1290 Console.ReadKey(true);
1291 }
1292 */
1293 }
1294
1295 [JsonIgnore]
1296 public static NewContainer data = new NewContainer();
1297
1298 public static void SaveThread()
1299 {
1300 Console.WriteLine("Autosaving enabled.");
1301 int current = 0;
1302 while (true)
1303 {
1304 try
1305 {
1306
1307 Thread.Sleep(1800000);
1308 Console.Write("Saving..."); //Just hope a property isn't being iterated over
1309 SaveState($"savedState.new.auto.{current++ % 3}.json");
1310 //Console_CancelKeyPress(null, null);
1311 Console.WriteLine("Saved.");
1312 }
1313 catch
1314 {
1315
1316 }
1317 }
1318 }
1319
1320 /// <summary>
1321 /// See if downloaded mods are present
1322 /// </summary>
1323 public static void Verify()
1324 {
1325
1326 }
1327
1328 public static void Main(string[] args)
1329 {
1330 Environment.CurrentDirectory = "E:\\curseforge\\";
1331 //DownloadFile file = new DownloadFile();
1332 //file.Id = 3333444;
1333 //file.Filename = "unity.jar";
1334 //string link = $"https://media.forgecdn.net/files/{file.Id.ToString().Substring(0, 4)}/{file.Id.ToString().Substring(4, 3)}/{file.Filename}";
1335 //Console.ReadKey(true);
1336 //return;
1337 //TestXPath("technology.1");
1338 //Mirror_LoadPage();
1339 //TestXPath("all");
1340 //TestXPath("jei");
1341 //NeedUserAttention();
1342 //ProxyClient.Proxy = new WebProxy("192.168.128.17", 9097);
1343 //TestXPath("http://www.curseforge.com/minecraft/mc-mods/nether-mod/files/2916574");
1344 //Console.WriteLine("Done.");
1345 //Console.ReadKey(true);
1346 //return;
1347
1348 //Load saved state
1349 if (File.Exists("savedState.new.json"))
1350 {
1351 using (FileStream fstream = new FileStream("savedState.new.json", FileMode.Open, FileAccess.Read, FileShare.None, 1048576))
1352 using (StreamReader sr = new StreamReader(fstream))
1353 using (JsonReader jr = new JsonTextReader(sr))
1354 {
1355 JsonSerializer jsonSerializer = new JsonSerializer();
1356 data = jsonSerializer.Deserialize<NewContainer>(jr);
1357 Mods = data.Mods;
1358 ModsToScrape = data.ModsToScrape;
1359 ModsToCaptureInfoPage = data.ModsToCaptureInfoPage;
1360 CompletedModInfo = data.CompletedModInfo;
1361 FilesToDownload = data.FilesToDownload;
1362 FilesToCaptureDownloadPage = data.FilesToCaptureDownloadPage;
1363 FilesCouldNotDownload = data.FilesCouldNotDownload;
1364 FailedToCaptureInfoPage = data.FailedToCaptureInfoPage;
1365 DownloadRegistry = data.DownloadRegistry;
1366 ParsedModIdentifiers = data.ParsedModIdentifiers;
1367 CompletedMods = data.CompletedMods;
1368 ParsedPages = data.ParsedPages;
1369 lastRequest = data.lastRequest;
1370 /*
1371 Mods = jsonSerializer.Deserialize<Dictionary<string, Mod>>(jr);
1372 ModsToScrape = jsonSerializer.Deserialize<Queue<Mod>>(jr);
1373 FilesToDownload = jsonSerializer.Deserialize<Queue<KeyValuePair<Mod, DownloadFile>>>(jr);
1374 FilesToCaptureDownloadPage = jsonSerializer.Deserialize<Queue<KeyValuePair<Mod, DownloadFile>>>(jr);
1375 ParsedModIdentifiers = jsonSerializer.Deserialize<List<string>>(jr);
1376 CompletedMods = jsonSerializer.Deserialize<HashSet<string>>(jr);
1377 ParsedPages = jsonSerializer.Deserialize<List<int>>(jr);
1378 lastRequest = jsonSerializer.Deserialize<DateTime>(jr);
1379 */
1380 }
1381 }
1382
1383
1384 data.Mods = Mods;
1385 data.ModsToScrape = ModsToScrape;
1386 data.ModsToCaptureInfoPage = ModsToCaptureInfoPage;
1387 data.CompletedModInfo = CompletedModInfo;
1388 data.FilesToDownload = FilesToDownload;
1389 data.FilesToCaptureDownloadPage = FilesToCaptureDownloadPage;
1390 data.FilesCouldNotDownload = FilesCouldNotDownload;
1391 data.FailedToCaptureInfoPage = FailedToCaptureInfoPage;
1392 data.ParsedModIdentifiers = ParsedModIdentifiers;
1393 data.DownloadRegistry = DownloadRegistry;
1394 data.CompletedMods = CompletedMods;
1395 data.ParsedPages = ParsedPages;
1396 data.lastRequest = lastRequest;
1397
1398 //GET OUT
1399 //SaveState("new.json");
1400 //return;
1401
1402 /*
1403 var copy = data.FilesToCaptureDownloadPage.ToArray();
1404 data.FilesToCaptureDownloadPage.Clear();
1405
1406 foreach (var a in copy)
1407 {
1408 if (!data.FilesToCaptureDownloadPage.Any(n => n.Value == a.Value && n.Key == a.Key))
1409 data.FilesToCaptureDownloadPage.Enqueue(a);
1410 }
1411
1412 SaveState("savedState.new.fix.json");
1413 return;
1414 */
1415
1416 /*
1417 //Convert
1418 NewContainer newData = new NewContainer();
1419 foreach (var mod in data.Mods)
1420 foreach (var file in mod.Value.Files)
1421 {
1422 mod.Value._Files.Add(file.Id);
1423 if (!newData.DownloadRegistry.ContainsKey(file.Id))
1424 newData.DownloadRegistry.Add(file.Id, file);
1425 }
1426 newData.Mods = data.Mods;
1427 foreach (var mod in data.ModsToScrape)
1428 foreach (var file in mod.Files)
1429 {
1430 mod._Files.Add(file.Id);
1431 if (!newData.DownloadRegistry.ContainsKey(file.Id))
1432 newData.DownloadRegistry.Add(file.Id, file);
1433 }
1434 newData.ModsToScrape = data.ModsToScrape;
1435 if (data.FilesToDownload.Count > 0)
1436 data.FilesToDownload.Select(n => new KeyValuePair<string, int>(n.Key.ModIdentifier, n.Value.Id)).ToList().ForEach(newData.FilesToDownload.Enqueue);
1437 if (data.FilesToCaptureDownloadPage.Count > 0)
1438 data.FilesToCaptureDownloadPage.Select(n => new KeyValuePair<string, int>(n.Key.ModIdentifier, n.Value.Id)).ToList().ForEach(newData.FilesToCaptureDownloadPage.Enqueue);
1439 newData.ParsedModIdentifiers = data.ParsedModIdentifiers;
1440 newData.CompletedMods = data.CompletedMods;
1441 newData.ParsedPages = data.ParsedPages;
1442 newData.lastRequest = data.lastRequest;
1443
1444 using (FileStream fstream = new FileStream("newFormat.json", FileMode.Create, FileAccess.Write, FileShare.None, 1048576))
1445 using (StreamWriter sw = new StreamWriter(fstream))
1446 using (JsonWriter jw = new JsonTextWriter(sw))
1447 {
1448 JsonSerializer jsonSerializer = new JsonSerializer();
1449
1450 jsonSerializer.Serialize(jw, newData);
1451 }
1452 return;
1453 */
1454
1455 Console.CancelKeyPress += Console_CancelKeyPress;
1456
1457 if (args.Any("--verify".Equals)) Verify();
1458
1459 MirrorActions.Add(Mirror_LoadPage);
1460 MirrorActions.Add(Mirror_LoadModPage);
1461 MirrorActions.Add(Mirror_DownloadModPage);
1462 MirrorActions.Add(Mirror_LoadModInfoPage);
1463 MirrorActions.Add(Mirror_DownloadMissedMetadata);
1464
1465 var savingThread = new Thread(SaveThread);
1466 savingThread.Start();
1467
1468 ProxyClient.Proxy = new WebProxy("127.0.0.1", 9097); //Cloudscrape request system hosted on a seperate machine
1469
1470 try
1471 {
1472 while (true)
1473 {
1474 PerformRandomMirrorAction();
1475 if (Console.KeyAvailable)
1476 if (Console.ReadKey(true).Key == ConsoleKey.Spacebar)
1477 Console.WriteLine($"STATUS: Pages: {ParsedPages.Count}/{PageCount} Mods: {{Σ{DownloadRegistry.Count}:{Mods.Count}:{ModsToScrape.Count}:{CompletedMods.Count}:{ParsedModIdentifiers.Count}}} Files: {{+{DownloadRegistry.Sum(n => n.Value.Downloaded ? 1 : 0)}:{FilesToDownload.Count}:{FilesToCaptureDownloadPage.Count}:!{FilesCouldNotDownload.Sum(n => n.Value.Count)}}} Info: {{+{CompletedModInfo.Count}:!{Mods.Sum(n => n.Value.FailedToRetrieveSource ? 1 : 0)}:-{Mods.Sum(n => n.Value.HasImage ? 1 : 0)}:{Mods.Sum(n => n.Value.HasSource ? 1 : 0)}}}");
1478 }
1479 }
1480 catch (Exception e)
1481 {
1482 Console.WriteLine($"Program crash: {e.Message}");
1483 Console.WriteLine(e.StackTrace);
1484 //Save
1485 SaveState("savedState.new.json");
1486 //Console_CancelKeyPress(null, null);
1487 NeedUserAttention(1);
1488 }
1489 Console.WriteLine("Press any key to continue...");
1490 Console.ReadKey(true);
1491 }
1492
1493 public class NewContainer
1494 {
1495 public Dictionary<string, Mod> Mods
1496 = new Dictionary<string, Mod>();
1497 public Queue<Mod> ModsToScrape
1498 = new Queue<Mod>();
1499 public Queue<KeyValuePair<string, int>> FilesToDownload
1500 = new Queue<KeyValuePair<string, int>>();
1501 public Queue<KeyValuePair<string, int>> FilesToCaptureDownloadPage
1502 = new Queue<KeyValuePair<string, int>>();
1503 public Dictionary<string, HashSet<int>> FilesCouldNotDownload
1504 = new Dictionary<string, HashSet<int>>();
1505 public HashSet<string> FailedToCaptureInfoPage
1506 = new HashSet<string>();
1507 public Dictionary<int, DownloadFile> DownloadRegistry
1508 = new Dictionary<int, DownloadFile>();
1509 public List<string> ParsedModIdentifiers
1510 = new List<string>();
1511 public HashSet<string> CompletedMods
1512 = new HashSet<string>();
1513 public Queue<string> ModsToCaptureInfoPage
1514 = new Queue<string>();
1515 public HashSet<string> CompletedModInfo
1516 = new HashSet<string>();
1517 public List<int> ParsedPages
1518 = new List<int>();
1519 public DateTime lastRequest = DateTime.Now.AddSeconds(-60);
1520 }
1521
1522 //Very first container (keeping for nostaglic reasons; because it was shit)
1523 public class Container
1524 {
1525 public Dictionary<string, Mod> Mods;
1526 public Queue<Mod> ModsToScrape;
1527 public Queue<KeyValuePair<Mod, DownloadFile>> FilesToDownload;
1528 public Queue<KeyValuePair<Mod, DownloadFile>> FilesToCaptureDownloadPage;
1529 public List<string> ParsedModIdentifiers;
1530 public HashSet<string> CompletedMods;
1531 public List<int> ParsedPages;
1532 public DateTime lastRequest = DateTime.Now.AddSeconds(-60);
1533 }
1534
1535 private static void SaveState(string path)
1536 {
1537 using (FileStream fstream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.None, 1048576))
1538 using (StreamWriter sw = new StreamWriter(fstream))
1539 using (JsonWriter jw = new JsonTextWriter(sw))
1540 {
1541 JsonSerializer jsonSerializer = new JsonSerializer();
1542
1543 jsonSerializer.Serialize(jw, data);
1544 }
1545 }
1546
1547 private static void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e)
1548 {
1549 SaveState("savedState.new.json");
1550 //Program exit
1551 Environment.Exit(0);
1552 }
1553 }
1554
1555 public static class Extensions
1556 {
1557 public static string HtmlDecode(this string str)
1558 {
1559 return WebUtility.HtmlDecode(str);
1560 }
1561
1562 public static string WindowsEncode(this string str)
1563 {
1564 return str.Replace("<", "")
1565 .Replace(">", "")
1566 .Replace(":", "")
1567 .Replace("\"", "")
1568 .Replace("/", "")
1569 .Replace("\\", "")
1570 .Replace("|", "")
1571 .Replace("?", "")
1572 .Replace("*", "");
1573 }
1574 }
1575}
1576