· 5 years ago · Jan 12, 2021, 03:38 PM
1const pMap = require('p-map')
2const axios = require('axios')
3const fs = require('fs')
4const https = require('https')
5const path = require('path')
6const camelCase = require('camelcase')
7const { mapKeys, isPlainObject, trimEnd, map, find } = require('lodash')
8
9const TYPE_AUTHOR = 'author'
10const TYPE_ATTACHEMENT = 'attachment'
11const TMPDIR = '.temp/downloads'
12const DOWNLOAD_DIR = 'src/wp-images'
13
14function mkdirSyncRecursive (absDirectory) {
15 const paths = absDirectory.replace(/\/$/, '').split('/')
16 paths.splice(0, 1)
17
18 let dirPath = '/'
19 paths.forEach(segment => {
20 dirPath += segment + '/'
21 if (!fs.existsSync(dirPath)) fs.mkdirSync(dirPath)
22 })
23}
24
25class WordPressSource {
26 static defaultOptions () {
27 return {
28 baseUrl: '',
29 apiBase: 'wp-json',
30 perPage: 100,
31 concurrent: 10,
32 routes: {
33 post: '/:slug',
34 post_tag: '/tag/:slug',
35 category: '/category/:slug',
36 author: '/author/:slug'
37 },
38 typeName: 'WordPress',
39 splitPostsIntoFragments: false,
40 downloadRemoteImagesFromPosts: false,
41 downloadRemoteFeaturedImages: false,
42 downloadACFImages: false
43 }
44 }
45
46 constructor (api, options) {
47 const opts = this.options = { ...WordPressSource.defaultOptions, ...options }
48 this.restBases = { posts: {}, taxonomies: {}}
49
50 if (!opts.typeName) {
51 throw new Error(`Missing typeName option.`)
52 }
53
54 if (opts.perPage > 100 || opts.perPage < 1) {
55 throw new Error(`${opts.typeName}: perPage cannot be more than 100 or less than 1.`)
56 }
57
58 const baseUrl = trimEnd(opts.baseUrl, '/')
59
60 this.client = axios.create({
61 baseURL: `${baseUrl}/${opts.apiBase}`
62 })
63
64 this.routes = this.options.routes || {}
65
66 /* Create image directories */
67 mkdirSyncRecursive(path.resolve(DOWNLOAD_DIR))
68 mkdirSyncRecursive(path.resolve(TMPDIR))
69 this.tmpCount = 0
70
71 this.slugify = str => api.store.slugify(str).replace(/-([^-]*)$/, '.$1')
72
73 api.loadSource(async actions => {
74 this.store = actions
75
76 console.log(`Loading data from ${baseUrl}`)
77
78 await this.getPostTypes(actions)
79 await this.getUsers(actions)
80 await this.getTaxonomies(actions)
81 await this.getPosts(actions)
82 })
83 }
84
85 async getPostTypes (actions) {
86 const { data } = await this.fetch('wp/v2/types', {}, {})
87 const addCollection = actions.addCollection || actions.addContentType
88
89 for (const type in data) {
90 const options = data[type]
91
92 this.restBases.posts[type] = options.rest_base
93
94 addCollection({
95 typeName: this.createTypeName(type),
96 route: this.routes[type] || `/${type}/:slug`
97 })
98 }
99 }
100
101 async getUsers (actions) {
102 const { data } = await this.fetch('wp/v2/users')
103 const addCollection = actions.addCollection || actions.addContentType
104
105 const authors = addCollection({
106 typeName: this.createTypeName(TYPE_AUTHOR),
107 route: this.routes.author
108 })
109
110 for (const author of data) {
111 const fields = this.normalizeFields(author)
112 const avatars = mapKeys(author.avatar_urls, (v, key) => `avatar${key}`)
113
114 authors.addNode({
115 ...fields,
116 id: author.id,
117 title: author.name,
118 avatars
119 })
120 }
121 }
122
123 async getTaxonomies (actions) {
124 const { data } = await this.fetch('wp/v2/taxonomies', {}, {})
125 const addCollection = actions.addCollection || actions.addContentType
126
127 for (const type in data) {
128 const options = data[type]
129 const taxonomy = addCollection({
130 typeName: this.createTypeName(type),
131 route: this.routes[type]
132 })
133
134 this.restBases.taxonomies[type] = options.rest_base
135
136 const terms = await this.fetchPaged(`wp/v2/${options.rest_base}`)
137
138 for (const term of terms) {
139 taxonomy.addNode({
140 id: term.id,
141 title: term.name,
142 slug: term.slug,
143 content: term.description,
144 count: term.count
145 })
146 }
147 }
148 }
149
150 extractImagesFromPostHtml (string) {
151 const regex = /<img[^>]* src=\"([^\"]*)\"[^>]*alt=\"([^\"]*)\"[^>]*>/gm
152
153 const matches = []
154 let m
155 while ((m = regex.exec(string)) !== null) {
156 // This is necessary to avoid infinite loops with zero-width matches
157 if (m.index === regex.lastIndex) {
158 regex.lastIndex++
159 }
160
161 matches.push({
162 url: m[1],
163 alt: m[2]
164 })
165 }
166
167 return matches
168 }
169
170 async downloadImage (url, destPath, fileName) {
171 const imagePath = path.resolve(destPath, fileName)
172
173 try {
174 if (fs.existsSync(imagePath)) return
175 } catch (err) {
176 console.log(err)
177 }
178
179 const tmpPath = path.resolve(TMPDIR, `${++this.tmpCount}.tmp`)
180
181 return new Promise(function (resolve, reject) {
182 const file = fs.createWriteStream(tmpPath)
183 https.get(url, (response) => {
184 response.pipe(file)
185 file.on('finish', () => {
186 file.close()
187 fs.rename(tmpPath, imagePath, resolve)
188 })
189 }).on('error', (err) => {
190 console.error(err.message)
191 fs.unlinkSync(tmpPath) // Cleanup blank file
192 reject(err)
193 })
194 })
195 }
196
197 processPostFragments (post) {
198 const postImages = this.extractImagesFromPostHtml(post)
199
200 const regex = /<img[^>]* src=\"([^\"]*)\"[^>]*>/
201 const fragments = post.split(regex)
202
203 return map(fragments, (fragment, index) => {
204 const image = find(postImages, (image) => { return image.url === fragment })
205 if (image && this.options.downloadRemoteImagesFromPosts) {
206 const fileName = this.slugify(fragment.split('/').pop())
207 const imageData = {
208 type: 'img',
209 order: index + 1,
210 fragmentData: {
211 remoteUrl: fragment,
212 fileName: fileName,
213 image: path.resolve(DOWNLOAD_DIR, fileName),
214 alt: image.alt
215 }
216 }
217 this.downloadImage(
218 fragment,
219 DOWNLOAD_DIR,
220 fileName
221 )
222 return imageData
223 } else {
224 return {
225 type: 'html',
226 order: index + 1,
227 fragmentData: {
228 html: fragment
229 }
230 }
231 }
232 })
233 }
234
235 async getPosts (actions) {
236 const { createReference } = actions
237 const getCollection = actions.getCollection || actions.getContentType
238
239 const AUTHOR_TYPE_NAME = this.createTypeName(TYPE_AUTHOR)
240 const ATTACHEMENT_TYPE_NAME = this.createTypeName(TYPE_ATTACHEMENT)
241
242 for (const type in this.restBases.posts) {
243 const restBase = this.restBases.posts[type]
244 const typeName = this.createTypeName(type)
245 const posts = getCollection(typeName)
246
247 const data = await this.fetchPaged(`wp/v2/${restBase}?_embed`)
248
249 for (const post of data) {
250 const fields = this.normalizeFields(post)
251 fields.author = createReference(AUTHOR_TYPE_NAME, post.author || '0')
252
253 if (post.type !== TYPE_ATTACHEMENT) {
254 fields.featuredMedia = createReference(ATTACHEMENT_TYPE_NAME, post.featured_media)
255 }
256
257 // add references if post has any taxonomy rest bases as properties
258 for (const type in this.restBases.taxonomies) {
259 const propName = this.restBases.taxonomies[type]
260
261 if (post.hasOwnProperty(propName)) {
262 const typeName = this.createTypeName(type)
263 const ref = createReference(typeName, post[propName])
264 const key = camelCase(propName)
265
266 fields[key] = ref
267 }
268 }
269
270 if (this.options.splitPostsIntoFragments && fields['content']) { fields.postFragments = this.processPostFragments(fields['content']) }
271
272 // download the featured image
273 if (this.options.downloadRemoteFeaturedImages && post._embedded && post._embedded['wp:featuredmedia']) {
274 try {
275 const featuredImageFileName = this.slugify(post._embedded['wp:featuredmedia']['0'].source_url.split('/').pop())
276 await this.downloadImage(
277 post._embedded['wp:featuredmedia']['0'].source_url,
278 DOWNLOAD_DIR,
279 featuredImageFileName
280 )
281 fields.featuredMediaImage = path.resolve(DOWNLOAD_DIR, featuredImageFileName)
282 } catch (err) {
283 console.log(err)
284 console.log('WARNING - No featured image for post ' + post.slug)
285 }
286 }
287
288 posts.addNode({
289 ...fields,
290 id: post.id
291 })
292 }
293 }
294 }
295
296 async fetch (url, params = {}, fallbackData = []) {
297 let res
298
299 try {
300 res = await this.client.request({ url, params })
301 } catch ({ response, code, config }) {
302 if (!response && code) {
303 throw new Error(`${code} - ${config.url}`)
304 }
305
306 const { url } = response.config
307 const { status } = response.data.data
308
309 if ([401, 403].includes(status)) {
310 console.warn(`Error: Status ${status} - ${url}`)
311 return { ...response, data: fallbackData }
312 } else {
313 throw new Error(`${status} - ${url}`)
314 }
315 }
316
317 return res
318 }
319
320 async fetchPaged (path) {
321 const { perPage, concurrent } = this.options
322
323 return new Promise(async (resolve, reject) => {
324 let res
325
326 try {
327 res = await this.fetch(path, { per_page: perPage })
328 } catch (err) {
329 return reject(err)
330 }
331
332 const totalItems = parseInt(res.headers['x-wp-total'], 10)
333 const totalPages = parseInt(res.headers['x-wp-totalpages'], 10)
334
335 try {
336 res.data = ensureArrayData(path, res.data)
337 } catch (err) {
338 return reject(err)
339 }
340
341 if (!totalItems || totalPages <= 1) {
342 return resolve(res.data)
343 }
344
345 const queue = []
346
347 for (let page = 2; page <= totalPages; page++) {
348 queue.push({ per_page: perPage, page })
349 }
350
351 await pMap(queue, async params => {
352 try {
353 const { data } = await this.fetch(path, params)
354 res.data.push(...ensureArrayData(path, data))
355 } catch (err) {
356 console.log(err.message)
357 }
358 }, { concurrency: concurrent })
359
360 resolve(res.data)
361 })
362 }
363
364 normalizeFields (fields, isACF) {
365 const res = {}
366
367 for (const key in fields) {
368 if (key.startsWith('_')) continue // skip links and embeds etc
369 res[camelCase(key)] = this.normalizeFieldValue(fields[key], isACF || key === 'acf')
370 }
371
372 return res
373 }
374
375 normalizeFieldValue (value, isACF) {
376 if (value === null) return null
377 if (value === undefined) return null
378
379 if (Array.isArray(value)) {
380 return value.map(v => this.normalizeFieldValue(v, isACF))
381 }
382
383 if (isPlainObject(value)) {
384 if (value.type === 'image' && value.filename && value.url && isACF && this.options.downloadACFImages) {
385 const filename = this.slugify(value.filename)
386 this.downloadImage(
387 value.url,
388 DOWNLOAD_DIR,
389 filename
390 )
391 return {
392 src: path.resolve(DOWNLOAD_DIR, filename),
393 title: value.title,
394 alt: value.description
395 }
396 } else if (value.post_type && (value.ID || value.id)) {
397 const typeName = this.createTypeName(value.post_type)
398 const id = value.ID || value.id
399
400 return this.store.createReference(typeName, id)
401 } else if (value.filename && (value.ID || value.id)) {
402 const typeName = this.createTypeName(TYPE_ATTACHEMENT)
403 const id = value.ID || value.id
404
405 return this.store.createReference(typeName, id)
406 } else if (value.hasOwnProperty('rendered')) {
407 return value.rendered
408 }
409
410 return this.normalizeFields(value, isACF)
411 }
412
413 if (isACF && this.options.downloadACFImages && String(value).match(/^https:\/\/.*\/.*\.(jpg|png|svg|jpeg)($|\?)/i)) {
414 const filename = this.slugify(value.split('/').pop())
415 console.log(`Downloading ${filename}`)
416 this.downloadImage(
417 value,
418 DOWNLOAD_DIR,
419 filename
420 )
421 return path.resolve(DOWNLOAD_DIR, filename)
422 }
423
424 return value
425 }
426
427 createTypeName (name = '') {
428 return camelCase(`${this.options.typeName} ${name}`, { pascalCase: true })
429 }
430}
431
432function ensureArrayData (url, data) {
433 if (!Array.isArray(data)) {
434 try {
435 data = JSON.parse(data)
436 } catch (err) {
437 throw new Error(
438 `Failed to fetch ${url}\n` +
439 `Expected JSON response but received:\n` +
440 `${data.trim().substring(0, 150)}...\n`
441 )
442 }
443 }
444 return data
445}
446
447module.exports = WordPressSource
448