· 7 years ago · Dec 28, 2018, 02:02 PM
1import std.stdio;
2import std.array;
3import std.conv;
4import std.string;
5import std.path;
6import std.file;
7import requests;
8import arsd.database, arsd.sqlite;
9import std.regex;
10import core.time : days;
11import std.datetime;
12import std.zip;
13import std.process;
14
15
16string ftp_url = "ftp.zakupki.gov.ru";
17string ftp_login = "fz223free";
18string ftp_pass = "fz223free";
19string root_ftp_dir = "/out/published/";
20
21Request rq;
22Response rs;
23Database db;
24
25string ftp_root_uri = "ftp://ftp.zakupki.gov.ru/";
26string files_folder;
27
28struct MyStruct
29{
30 string region_name;
31 string section_name;
32 string full_file_path;
33 string arch_date;
34}
35
36 MyStruct [] mystructs;
37
38void main()
39{
40
41 writeln(thisExePath());
42
43 files_folder = dirName(thisExePath()) ~ `/files/`;
44
45 db = new Sqlite("my.db");
46 //downloadFile();
47
48 chdir(`D:\code\2018\zakupki-downloader\source2`);
49
50 extractFromDBAndProcess();
51 //listOfFinalFolders();
52
53
54}
55
56void listOfFinalFolders() // ÑпиÑок файлов Ñ Ð¿Ð¾Ð»Ð½Ñ‹Ð¼Ð¸ путÑми
57{
58
59 MyStruct mystruct;
60
61 rq.verbosity = 3;
62 rq.authenticator = new BasicAuthentication(ftp_login, ftp_pass);
63
64 rs = rq.get("ftp://ftp.zakupki.gov.ru/out/published/");
65 string [] list_of_root_regions_folders;
66
67
68 string [] sectionsForProcessing = listOfSectionForProcessing();
69
70 foreach (line; splitLines(to!string(rs.responseBody)))
71 {
72 if(line.startsWith(`/`))
73 {
74
75 foreach(section; sectionsForProcessing)
76 {
77 string folder_path;
78 if(baseName(line).toLower == "moskva") // пока только Ð´Ð»Ñ Ð¼Ð¾Ñквы
79 {
80 folder_path = `ftp://ftp.zakupki.gov.ru/out/published/` ~ baseName(line) ~ `/` ~ section ~ `/daily/`; // baseName(line) - название региона
81 //writeln("folder_paths: ", folder_paths);
82
83 string [] file_list;
84 file_list = getListOfFolderFiles(folder_path);
85
86 foreach(file_full_name; file_list)
87 {
88 auto str_date = matchFirst(file_full_name, regex(r"([0-9]{8})")); // 20180711
89 if (to!int(str_date.hit[0..4])>=2018) // берем только 2018 и Ñтарше
90 {
91 mystruct.region_name = baseName(line);
92 mystruct.section_name = section;
93 mystruct.full_file_path = file_full_name;
94 mystruct.arch_date = str_date.hit;
95 mystructs ~= mystruct;
96
97 }
98 }
99 }
100
101
102 }
103 }
104 }
105
106
107 saveToDB();
108
109}
110
111string [] getListOfFolderFiles(string folder)
112{
113
114 string [] file_list;
115
116 rq.verbosity = 3;
117 rq.authenticator = new BasicAuthentication(ftp_login, ftp_pass);
118 rs = rq.get(folder);
119
120 foreach (line; splitLines(to!string(rs.responseBody)))
121 {
122 if(line.startsWith(`/`))
123 {
124 file_list ~= line;
125 //writeln(line);
126 }
127 }
128
129 return file_list;
130
131}
132
133
134void saveToDB()
135{
136 writeln("saveToDB");
137
138 string sql_create = `CREATE TABLE IF NOT EXISTS ftp_files (ID INTEGER PRIMARY KEY AUTOINCREMENT, region TEXT NOT NULL, section_name TEXT NOT NULL, ftp_file_full_path TEXT NOT NULL UNIQUE, arch_date TEXT NOT NULL, processing_status TEXT)`;
139 db.query(sql_create);
140
141 foreach(mystr; mystructs)
142 {
143 //writeln(mystr);
144 db.query(`INSERT INTO ftp_files (region, section_name, ftp_file_full_path, arch_date) VALUES (?, ?, ?, ?);`, mystr.region_name, mystr.section_name, mystr.full_file_path, mystr.arch_date);
145 }
146}
147
148
149void extractFromDBAndProcess()
150{
151 foreach(row; db.query(`SELECT ID, region, section_name, ftp_file_full_path, arch_date, processing_status FROM ftp_files WHERE section_name="purchaseNotice" AND processing_status IS NULL;`))
152 {
153 writeln(row["ftp_file_full_path"]);
154 downloadFile(row["ftp_file_full_path"], row["section_name"], to!int(row["ID"]));
155
156 }
157}
158
159void updateDBStatus(string status, int id)
160{
161 string sql = (`UPDATE "ftp_files" SET "processing_status" = "%s" WHERE id=%d`).format(status,id);
162 db.query(sql);
163}
164
165
166void downloadFile(string full_file_path, string section_name, int id )
167{
168 if(!exists("files"))
169 {
170 auto dir = "files";
171 dir.mkdir;
172 }
173
174 rq.authenticator = new BasicAuthentication(ftp_login, ftp_pass);
175 Response rs;
176 try // потенциальное падение. Пофиг еÑли упало
177 {
178 rs = rq.get(ftp_root_uri ~ `/` ~ full_file_path);
179 }
180
181 catch (Exception e)
182 {
183 return;
184 }
185
186
187 string archive_path = files_folder ~ baseName(full_file_path);
188 writeln("archive_path: ", archive_path);
189
190 File f = File(archive_path, "wb");
191 f.rawWrite(rs.responseBody.data);
192 f.close();
193
194 if(getSize(archive_path) < 256)
195 {
196 writeln("File too small and deleted: ", archive_path);
197 archive_path.remove;
198 return;
199 }
200
201 processSingleFile(archive_path, section_name, id);
202 archive_path.remove();
203
204}
205
206void processSingleFile(string archive_path, string section_name, int id)
207{
208
209 auto zip = new ZipArchive(read(archive_path));
210
211 foreach (name, am; zip.directory)
212 {
213 writefln("%10s %08x %s", am.expandedSize, am.crc32, name);
214 assert(am.expandedData.length == 0);
215 // decompress the archive member
216 auto my_xml = zip.expand(am);
217
218 string xml_dir = archive_path.stripExtension.stripExtension;
219
220 if(!exists(xml_dir))
221 {
222 auto dir = xml_dir;
223 dir.mkdir;
224 }
225
226 string xml_full_path = xml_dir ~ `/` ~ name;
227
228 File f = File(xml_full_path, "wb");
229 f.rawWrite(my_xml);
230 f.close();
231 writeln("xml_full_path: ", xml_full_path);
232
233 writeln(`python D:\code\2018\zakupki-downloader\source2\main.py ` ~ xml_full_path ~ ` ` ~ section_name);
234 auto pid = spawnShell(`python D:\code\2018\zakupki-downloader\source2\main.py ` ~ xml_full_path ~ ` ` ~ section_name);
235 auto exitCode = wait(pid);
236 if(exitCode == 0)
237 updateDBStatus("success", id);
238 else
239 updateDBStatus("fail", id);
240
241 }
242
243}
244
245
246string [] listOfSectionForProcessing()
247{
248 //string [] sections = ["purchaseContract", "purchaseNotice", "purchaseNoticeAE", "purchaseProtocol"];
249 string [] sections = ["purchaseContract", "purchaseNotice"];
250 return sections;
251}