· 6 years ago · Dec 30, 2019, 06:32 AM
1/* eslint no-await-in-loop: "off" */
2const debug = require('debug')('apartment-db-migration:siteController');
3const convert = require('../lib/maeshori/index');
4const fs = require('fs');
5const parse = require('csv-parse');
6const axios = require('axios');
7const ConvertCsvException = require('../exceptions/ConvertCsvException');
8const CallApiException = require('../exceptions/CallApiException');
9const logger = require('../config/logger');
10const knex = require('../config/db');
11const Result = require('../models/Result');
12const FormData = require('form-data');
13const ruleCombined = require('../config/Rule_Combined.json');
14
15const collection = process.env.COLLECTION;
16const apiUrl = `${process.env.URL}/api/v10/analysis/text`;
17const timeout = process.env.TIMEOUT || 5000;
18
19class ProcessingFileCSVService {
20 async handle(filePath, csvFileId) {
21 let dataCsv;
22 let results = [];
23
24 try {
25 results = await convert(filePath, csvFileId);
26 dataCsv = await this.readFileCsv(`${filePath.substr(0, filePath.length - 4)}_前処理.csv`);
27
28 const chunk = 500;
29 for (let i = 0; i < results.length; i += chunk) {
30 const temparray = results.slice(i, i + chunk);
31 debug(temparray.length);
32 const res = await knex(Result.tableName).insert(temparray);
33 debug(res);
34 }
35 } catch (error) {
36 logger.log('error', error.stack);
37 throw new ConvertCsvException(error.message);
38 }
39
40 try {
41 for (let index = 0; index < dataCsv.length; index += 1) {
42 let row = dataCsv[index];
43 debug(row);
44 await this.saveResult(row, csvFileId);
45 }
46 } catch (error) {
47 logger.log('error', error.stack);
48 throw new CallApiException(error.message);
49 }
50 }
51
52 async saveResult(row, csvFileId) {
53 const ruleCombinedData = ruleCombined.path;
54 const updateData = {};
55 let textfacets = [];
56
57 let res = await this.callApi(row[2]);
58 updateData.workRequiredInformationKeyword = res.keyword;
59 textfacets = textfacets.concat(res.textfacets);
60
61 res = await this.callApi(row[3]);
62 updateData.otherInformationKeyword = res.keyword;
63 textfacets = textfacets.concat(res.textfacets);
64
65 res = await this.callApi(row[4]);
66 updateData.equipmentInformationKeyword = res.keyword;
67 textfacets = textfacets.concat(res.textfacets);
68
69 res = await this.callApi(row[5]);
70 updateData.shitenArticle1Keyword = res.keyword;
71 textfacets = textfacets.concat(res.textfacets);
72 updateData.finishTime = Result.fn.now();
73
74 let ruleGroup = this.groupByRuleName(ruleCombinedData, textfacets);
75 let data = this.statisticRuleGroup(ruleGroup.rule_group, ruleGroup.mentions);
76
77 if (data.hasOwnProperty('mdf_key')) {
78 updateData.mdfKeyResult = data.mdf_key.result;
79 updateData.mdfKeyRule = data.mdf_key.rule;
80 updateData.mdfKeyConfidence = data.mdf_key.confidence;
81 }
82
83 if (data.hasOwnProperty('mdf_key_person')) {
84 updateData.mdfKeyPersonResult = data.mdf_key_person.result;
85 updateData.mdfKeyPersonRule = data.mdf_key_person.rule;
86 updateData.mdfKeyPersonConfidence = data.mdf_key_person.confidence;
87 }
88
89 if (data.hasOwnProperty('advance_contact')) {
90 updateData.advanceContactResult = data.advance_contact.result;
91 updateData.advanceContactRule = data.advance_contact.rule;
92 updateData.advanceContactConfidence = data.advance_contact.confidence;
93 }
94
95 if (data.hasOwnProperty('work_permit_registration')) {
96 updateData.workPermitRegistrationResult = data.work_permit_registration.result;
97 updateData.workPermitRegistrationRule = data.work_permit_registration.rule;
98 updateData.workPermitRegistrationConfidence = data.work_permit_registration.confidence;
99 }
100
101 if (data.hasOwnProperty('immigration_registration')) {
102 updateData.immigrationRegistrationResult = data.immigration_registration.result;
103 updateData.immigrationRegistrationRule = data.immigration_registration.rule;
104 updateData.immigrationRegistrationConfidence = data.immigration_registration.confidence;
105 }
106
107 if (data.hasOwnProperty('other_registration')) {
108 updateData.otherRegistrationResult = data.other_registration.result;
109 updateData.otherRegistrationRule = data.other_registration.rule;
110 }
111
112 if (data.hasOwnProperty('no_working_day')) {
113 updateData.noWorkingDayResult = data.no_working_day.result;
114 updateData.noWorkingDayRule = data.no_working_day.rule;
115 updateData.noWorkingDayConfidence = data.no_working_day.confidence;
116 }
117
118 const numUpdated = await Result.query()
119 .patch(updateData)
120 .where('bukenId', '=', row[1])
121 .where('csvFileId', '=', csvFileId);
122
123 debug(`update ${numUpdated}`);
124 }
125
126 // read csv file result
127 async readFileCsv(filePath) {
128 if (!fs.existsSync(filePath)) {
129 throw new Error('File not exists!');
130 }
131
132 return new Promise((resolve, reject) => {
133 const data = [];
134 fs.createReadStream(filePath)
135 .on('error', error => {
136 reject(error);
137 })
138 .pipe(parse())
139 .on('data', row => {
140 data.push(row);
141 })
142 .on('end', () => {
143 resolve(data);
144 });
145 });
146 }
147
148 // statistic data
149 statisticRuleGroup(result, mentions) {
150 let data = {};
151
152 for (const key in result) {
153 if (result[key].length) {
154 let res = this.groupByPriority(result[key]);
155 res = res[Math.max(...Object.keys(res))];
156
157 if (res.length == 1) {
158 // pattern 1
159 if (!res[0].hasOwnProperty('mention')) {
160 data[key] = {
161 confidence: res[0]['確信度'],
162 rule: res[0]['rule_name'],
163 result: res[0]['代入値']
164 }
165 } else {
166 if (this.isPatternThird(res[0], mentions)) {
167 data[key] = {
168 confidence: res[0]['mention']['確信度'],
169 rule: res[0]['rule_name'],
170 result: res[0]['mention']['代入値']
171 }
172 } else {
173 data[key] = {
174 confidence: res[0]['確信度'],
175 rule: res[0]['rule_name'],
176 result: res[0]['代入値']
177 }
178 }
179 }
180 } else if (res.length == 2) {
181 data[key] = this.calculateConfidence(res, mentions);
182 } else {
183 let firstRes = res[0];
184 let secondRes = res[res.length - 1];
185
186 for (let index = 1; index < res.length - 1; index++) {
187 firstRes = this.getRulePriority(firstRes, res[index], mentions);
188 }
189 data[key] = this.calculateConfidence([firstRes, secondRes], mentions);
190 }
191 }
192 }
193
194 return data;
195 }
196
197 // group by rule name
198 groupByRuleName(ruleCombinedData, textfacets) {
199 let mentions = [];
200 let ruleGroup = {
201 mdf_key: [],
202 mdf_key_person: [],
203 advance_contact: [],
204 work_permit_registration: [],
205 immigration_registration: [],
206 other_registration: [],
207 no_working_day: []
208 };
209
210 textfacets.forEach(path => {
211 if (Object.keys(ruleCombinedData).includes(path.path[0])) {
212 let data = ruleCombinedData[path.path[0]]['対象列'];
213
214 // mdf key
215 if (data.hasOwnProperty('MDF鍵')) {
216 data['MDF鍵']['rule_name'] = path.path[0];
217 ruleGroup.mdf_key.push(data['MDF鍵']);
218 }
219
220 // mdf key person
221 if (data.hasOwnProperty('MDF鍵手配者')) {
222 data['MDF鍵手配者']['rule_name'] = path.path[0];
223 ruleGroup.mdf_key_person.push(data['MDF鍵手配者'])
224 }
225
226 // no working day
227 if (data.hasOwnProperty('工事不可日')) {
228 data['工事不可日']['rule_name'] = path.path[0];
229 ruleGroup.no_working_day.push(data['工事不可日']);
230 }
231
232 // advance contact
233 if (data.hasOwnProperty('事前連絡')) {
234 data['事前連絡']['rule_name'] = path.path[0];
235 ruleGroup.advance_contact.push(data['事前連絡']);
236 }
237
238 // immigration registration
239 if (data.hasOwnProperty('入館届')) {
240 data['入館届']['rule_name'] = path.path[0];
241 ruleGroup.immigration_registration.push(data['入館届']);
242 }
243
244 // work permit registration
245 if (data.hasOwnProperty('工事許可申請')) {
246 data['工事許可申請']['rule_name'] = path.path[0];
247 ruleGroup.work_permit_registration.push(data['工事許可申請']);
248 }
249
250 // other registration
251 if (data.hasOwnProperty('その他申請')) {
252 data['その他申請']['rule_name'] = path.path[0];
253 data['その他申請']['代入値'] = path.content;
254 data['その他申請']['確信度'] = 0;
255 data['その他申請']['優先度'] = 0;
256 ruleGroup.other_registration.push(data['その他申請']);
257 }
258 }
259
260 if (path.path[0] == 'mention') {
261 mentions.push(path);
262 }
263 });
264
265 return {
266 mentions: mentions,
267 rule_group: ruleGroup
268 };
269 }
270
271 // get rule priority
272 getRulePriority(ruleFirst, ruleSecond, mentions) {
273 if (this.isPatternThird(ruleFirst, mentions)) {
274 if (!ruleFirst['mention']['代入値'].includes('無') && !ruleFirst['mention']['代入値'].includes('不要')) {
275 return ruleFirst;
276 } else if (
277 (ruleFirst['mention']['代入値'].includes('無') || ruleFirst['mention']['代入値'].includes('不要'))
278 && (!ruleSecond['mention']['代入値'].includes('無') && !ruleSecond['mention']['代入値'].includes('不要'))
279 ) {
280 return ruleSecond;
281 }
282
283 return ruleFirst;
284 }
285
286 if (!ruleFirst['代入値'].includes('無') && !ruleFirst['代入値'].includes('不要')) {
287 return ruleFirst;
288 } else if (
289 (ruleFirst['代入値'].includes('無') || ruleFirst['代入値'].includes('不要'))
290 && (!ruleSecond['代入値'].includes('無') && !ruleSecond['代入値'].includes('不要'))
291 ) {
292 return ruleSecond;
293 }
294
295 return ruleFirst;
296 }
297
298 // Calculate confidence
299 calculateConfidence(res, mentions) {
300 let firstRes = this.getRulePriority(res[0], res[1], mentions);
301 if (this.isPatternThird(firstRes, mentions)) {
302 if (res[0]['mention']['代入値'] == res[1]['mention']['代入値']) {
303 return {
304 confidence: (1 - Math.pow((1 - firstRes['mention']['確信度']), 2)).toFixed(2),
305 rule: firstRes['rule_name'],
306 result: firstRes['mention']['代入値']
307 };
308 }
309
310 return {
311 confidence: (firstRes['mention']['確信度'] * (1 - firstRes['mention']['確信度'])).toFixed(2),
312 rule: firstRes['rule_name'],
313 result: firstRes['mention']['代入値']
314 };
315 }
316
317 if (res[0]['代入値'] == res[1]['代入値']) {
318 return {
319 confidence: (1 - Math.pow((1 - firstRes['確信度']), 2)).toFixed(2),
320 rule: firstRes['rule_name'],
321 result: firstRes['代入値']
322 };
323 }
324
325 return {
326 confidence: (firstRes['確信度'] * (1 - firstRes['確信度'])).toFixed(2),
327 rule: firstRes['rule_name'],
328 result: firstRes['代入値']
329 };
330 }
331
332 // check is pattern third
333 isPatternThird(rule, mentions) {
334 for (const mention of mentions) {
335 if (rule.hasOwnProperty('mention') && mention.path[1] == Object.keys(rule.mention)[0] && rule.mention[mention.path[1]].includes(mention.keyword)) {
336 return true;
337 }
338 }
339
340 return false;
341 }
342
343 // group rule by priority
344 groupByPriority(data) {
345 if (data.length) {
346 return data.reduce(function (rv, x) {
347 (rv[x['優先度']] = rv[x['優先度']] || []).push(x);
348 return rv;
349 }, {});
350 }
351
352 return [];
353 }
354
355 // call api
356 async callApi(text) {
357 let keyword = '';
358 const textfacetFilter = [];
359 const form = new FormData();
360 form.append('collection', collection);
361 form.append('locale', 'ja_JP');
362 form.append('output', 'application/json');
363 form.append('text', text);
364
365 for (let index = 0; index < 3; index++) {
366 try {
367 const res = await axios({
368 method: 'post',
369 url: apiUrl,
370 data: form,
371 headers: form.getHeaders(),
372 timeout: timeout
373 });
374
375 if (typeof res.data.metadata.textfacets !== 'undefined') {
376 res.data.metadata.textfacets.forEach(el => {
377 if (typeof el.path[0] !== 'undefined' && !el.path[0].startsWith('_')) {
378 keyword = keyword ? `${keyword}、${el.keyword}` : el.keyword;
379
380 if (el.path[0] == 'otherPerm') {
381 el.content = res.data.content;
382 }
383 textfacetFilter.push(el);
384 }
385 });
386 }
387
388 return {
389 keyword: keyword,
390 textfacets: textfacetFilter
391 };
392 } catch (error) {
393
394 if (index == 2) {
395 throw error;
396 } else {
397 logger.log('error', error.stack);
398 }
399 }
400 }
401 }
402
403}
404
405module.exports = new ProcessingFileCSVService();