· 4 years ago · Aug 17, 2021, 04:50 PM
1"""Define admin interfaces for :model:`harvester.DataSource`."""
2import os
3import tempfile
4import uuid
5
6from django.contrib import admin, messages
7from django.contrib.auth.mixins import PermissionRequiredMixin
8from django.core.files.storage import FileSystemStorage, default_storage
9from django.db.models import Count
10from django.forms import CharField, ChoiceField, HiddenInput, IntegerField
11from django.http import HttpResponseRedirect
12from django.shortcuts import redirect
13from django.urls import path, reverse
14from django.utils.safestring import mark_safe
15from django.utils.translation import gettext_lazy as _
16from django.views.generic import DetailView
17from django_q.tasks import async_task
18from formtools.wizard.views import SessionWizardView
19
20from harvester.views import DynamicIdentifierSampleView, DynamicUrlRegexSampleView
21from search_engine.functions.es_backend_helpers import mark_resources_for_reindex
22from .actions import hide_child_sets
23from ..common import (
24 DUBLIN_CORE_DEFAULT_ASSOCIATION,
25 DUBLIN_CORE_DEFAULT_ASSOCIATION_INITIAL,
26 MARC_XML_DEFAULT_ASSOCIATION_INITIAL,
27)
28from ..forms import forms
29from ..forms.forms import DataSourceAdminForm, DynamicIdentifierConfigInlineForm
30from ..functions.csv import get_csv_columns
31from ..functions.harvester import (
32 MAPPABLE_FIELDS,
33 BadSource,
34 download_file_and_upload,
35 generate_file_name,
36 get_harvested_sets,
37 get_mapping,
38 get_sets,
39 get_storage_path,
40 prepare_file,
41 upload_to_storage,
42)
43from ..models import DataSource, DynamicIdentifierConfig, Schedule
44
45
46class ScheduleInline(admin.TabularInline):
47 """Define inline admin for Schedules."""
48
49 model = Schedule
50 max_num = 8
51 ordering = ("day",)
52
53
54class DynamicIdentifierConfigInline(admin.StackedInline):
55 """Define inline admin for Dynamic Identifier Config."""
56
57 model = DynamicIdentifierConfig
58 form = DynamicIdentifierConfigInlineForm
59 template = "harvester/dynamic_identifier_stacked_inline.html"
60 extra = 1
61 ordering = ("id",)
62
63 def has_add_permission(self, request, obj=None):
64 """Check if can add new Dynamic Identifier Configs."""
65 if obj and obj.has_expired_dynamic_identifiers():
66 return False
67 return super().has_add_permission(request, obj=obj)
68
69 def has_change_permission(self, request, obj=None):
70 """Check if can edit Dynamic Identifier Configs."""
71 if obj and obj.has_expired_dynamic_identifiers():
72 return False
73 return super().has_change_permission(request, obj=obj)
74
75 def has_delete_permission(self, request, obj=None):
76 """Check if can delete new Dynamic Identifier Configs."""
77 if obj and obj.has_expired_dynamic_identifiers():
78 return False
79 return super().has_delete_permission(request, obj=obj)
80
81
82@admin.register(DataSource)
83class DataSourceAdmin(admin.ModelAdmin):
84 """Integrate DataSource with admin."""
85
86 actions = [hide_child_sets]
87 inlines = [ScheduleInline, DynamicIdentifierConfigInline]
88 list_display = [
89 "name",
90 "sets",
91 "exclusive",
92 "datasource_actions",
93 "updated_at",
94 "created_at",
95 ]
96 list_filter = ["updated_at", "exclusive"]
97 readonly_fields = ["data_mapping"]
98 search_fields = ["name"]
99 form = DataSourceAdminForm
100
101 def save_related(self, request, form, formsets, change):
102 """
103 Save related models.
104
105 Expire ContentResources when a DynamicIdentifierConfig is modified.
106 """
107 super().save_related(request, form, formsets, change)
108 for formset in formsets:
109 if formset.queryset and isinstance(
110 formset.queryset[0], DynamicIdentifierConfig
111 ):
112 if (
113 formset.deleted_objects
114 or formset.changed_objects
115 or formset.new_objects
116 ):
117 async_task("harvester.helpers.set_dynamic_identifier_expired_task",formset.instance)
118 formset.instance.contentresource_set.update(
119 dynamic_identifier_expired=True
120 )
121 break
122
123 def get_queryset(self, request):
124 """Augment queryset to include Sets count."""
125 queryset = super().get_queryset(request)
126 return queryset.annotate(Count("set"))
127
128 def sets(self, obj):
129 """Return set count for an object."""
130 return obj.set__count
131
132 sets.admin_order_field = "set__count"
133
134 def get_urls(self):
135 """Return admin urls."""
136 urls = super().get_urls()
137 more_urls = [
138 path(
139 "<int:pk>/harvest",
140 self.admin_site.admin_view(
141 NewHarvestWizardView.as_view(
142 condition_dict={
143 "0": NewHarvestWizardView.has_upload_method,
144 "1": NewHarvestWizardView.has_csv_format,
145 "2": NewHarvestWizardView.has_dublin_core_format,
146 "3": NewHarvestWizardView.has_marc_format,
147 "5": NewHarvestWizardView.can_select_sets,
148 }
149 )
150 ),
151 name="harvest",
152 ),
153 path(
154 "<int:pk>/mark_resources_for_reindex",
155 self.admin_site.admin_view(MarkDataSourceForReindexView.as_view()),
156 name="mark_resources_for_reindex",
157 ),
158 path(
159 "<int:pk>/dynamic_url_regex_sample/",
160 self.admin_site.admin_view(DynamicUrlRegexSampleView.as_view()),
161 name="dynamic_url_regex_sample",
162 ),
163 path(
164 "<int:pk>/dynamic_identifier_regex_sample/",
165 self.admin_site.admin_view(DynamicIdentifierSampleView.as_view()),
166 name="dynamic_identifier_regex_sample",
167 ),
168 ]
169 return more_urls + urls
170
171 def datasource_actions(self, obj):
172 """Send an actions to the view/controller to start datasources processes."""
173 # Start the Harvesting process/Wizard
174 harvest_link = '<a href="{}" class="button">{}</a>'.format(
175 reverse("admin:harvest", args=[obj.id]), _("cosechar")
176 )
177
178 # Force reindex to resources from selected datasource
179 mark_resources_for_reindex_link = '<a href="{}" class="button">{}</a>'.format(
180 reverse("admin:mark_resources_for_reindex", args=[obj.id]), _("reindexar")
181 )
182 return mark_safe(harvest_link + " " + mark_resources_for_reindex_link)
183
184 datasource_actions.short_description = _("acciones")
185
186
187class NewHarvestWizardView(PermissionRequiredMixin, SessionWizardView):
188 """Harvest Wizard form with steps."""
189
190 permission_required = ["harvester.can_harvest"]
191 template_name = "harvester/harvest_wizard.html"
192 data_source = None
193 storage_path = ""
194 file_path = ""
195 form_list = [
196 forms.UploadFileForm, # 0
197 forms.MappingAbstractForm, # 1 <- CsvMappingForm
198 forms.MappingAbstractForm, # 2 <- DublinCoreMappingForm
199 forms.MappingAbstractForm, # 3 <- MarcMappingForm
200 forms.MappingAbstractForm, # 4 <- PositionMappingForm
201 forms.SelectSetsForm, # 5
202 forms.ConfirmHarvestForm, # 6
203 ]
204 file_storage = default_storage
205
206 def setup(self, request, *args, **kwargs):
207 """Initialize view attributes."""
208 super().setup(request, *args, **kwargs)
209 self.data_source = DataSource.objects.get(pk=self.kwargs["pk"])
210
211 def dispatch(self, request, *args, **kwargs):
212 """Verify that the source can be harvested (has no expired identifiers)."""
213 if self.data_source.has_expired_dynamic_identifiers():
214 messages.add_message(
215 self.request,
216 messages.WARNING,
217 _(
218 'No se puede cosechar "%(fuente)s": Contiene Identificadores'
219 " en proceso de actualización. Intente nuevamente más tarde."
220 )
221 % {"fuente": self.data_source.name},
222 )
223 return HttpResponseRedirect(
224 reverse("admin:harvester_datasource_changelist")
225 )
226 return super().dispatch(request, *args, **kwargs)
227
228 def done(self, form_list, **kwargs):
229 """Process the form and creates a task when finished."""
230 # Update Mapping
231 self.data_source.data_mapping = get_mapping(form_list)
232 self.data_source.save()
233
234 # Get Sets
235 selected_sets = get_selected_sets(form_list)
236
237 if selected_sets is None:
238 selected_sets = get_sets(self.data_source)
239
240 # Get Task ID
241 group_id = self.storage.extra_data["group_id"]
242
243 # # Upload File
244 # temp_file_path = self.get_temp_file_path(form_list)
245 # # if temp_file_path:
246 # # extension = os.path.splitext(temp_file_path)[1]
247 # # self.storage_path = os.path.join(
248 # # get_storage_path(self.data_source.id, group_id),
249 # # generate_file_name(extension),
250 # # )
251 # # upload_to_storage(self.storage_path, temp_file_path)
252
253 async_task(
254 "harvester.functions.harvester.harvest_task",
255 self.data_source,
256 selected_sets,
257 group_id,
258 temp_file_path,
259 task_name=_("Cosechamiento: %(source)s [%(group_id)s]")
260 % {"source": self.data_source.name[:40], "group_id": group_id},
261 group=group_id,
262 )
263 messages.add_message(
264 self.request,
265 messages.SUCCESS,
266 _("Tarea de cosechamiento creada correctamente"),
267 )
268 return HttpResponseRedirect(reverse("admin:harvester_datasource_changelist"))
269
270 def get_context_data(self, form, **kwargs):
271 """Initialize the form context with usable data as step titles."""
272 context = super().get_context_data(form=form, **kwargs)
273 # Specific Step Modification if required
274 if self.steps.current == "0":
275 context.update({"title": _("subir archivo")})
276 if self.steps.current == "1":
277 context.update({"title": _("mapeo por CSV")})
278 if self.steps.current == "2":
279 context.update({"title": _("mapeo por Dublin Core")})
280 if self.steps.current == "3":
281 context.update({"title": _("mapeo por Marc")})
282 if self.steps.current == "4":
283 context.update({"title": _("posición Item Principal")})
284 if self.steps.current == "5":
285 context.update({"title": _("selecciona Sets")})
286 if self.steps.current == "6": # Confirmation Screen
287 context.update({"title": _("confirmar")})
288 return context
289
290 def get_form(self, step=None, data=None, files=None):
291 """Given a step this method obtains and returns an associated Form instance."""
292 form = super().get_form(step, data, files)
293 if step is None:
294 step = self.steps.current
295
296 # Specific Form Modification based on steps
297 if step == "1":
298 if data:
299 columns = self.storage.extra_data["columns"]
300 else:
301 if "0-file_field" in self.request.FILES:
302 file_pointer = self.storage.current_step_files["0-file_field"]
303 elif self.data_source.config["url"]:
304 file_path = download_file_and_upload(
305 self.data_source.config["url"],
306 self.data_source.id,
307 self.storage.extra_data["group_id"],
308 )[0]
309 file_pointer = prepare_file(file_path)
310 columns, invalid_columns = get_csv_columns(
311 file_pointer,
312 self.data_source.config["delimiter"],
313 self.data_source.config["quotechar"],
314 )
315 if invalid_columns:
316 messages.add_message(
317 self.request,
318 messages.ERROR,
319 _(
320 "Las siguientes columnas tienen nombres inválidos:"
321 " '%(columns)s'."
322 )
323 % {"columns": "', '".join(invalid_columns)},
324 )
325
326 self.storage.extra_data["columns"] = columns
327 form.fields.update(get_csv_mapping_fields(columns))
328 form.initial = self.get_form_initial(step)
329 if step == "2":
330 form.fields.update(get_dublin_core_mapping_fields())
331 if step == "3":
332 form.fields.update(get_marc_mapping_fields())
333 if step == "4":
334 form.fields.update(get_position_mapping_fields())
335 if step == "5":
336 try:
337 form.fields["sets"].choices = get_sets(self.data_source)
338 except BadSource:
339 messages.add_message(
340 self.request,
341 messages.ERROR,
342 _(
343 "No fue posible conseguir la lista de Sets."
344 " Revise la configuración de la fuente."
345 ),
346 )
347 if step == "6":
348 for prev_step in ["1", "2", "3"]:
349 prev_data = self.storage.data["step_data"].get(prev_step, None) or {}
350 for key, value in prev_data.items():
351 clean_key = key.replace(f"{prev_step}-", "")
352 clean_val = value[0]
353 if clean_val and (
354 clean_val in MAPPABLE_FIELDS or clean_key in MAPPABLE_FIELDS
355 ):
356 field = CharField(
357 disabled=True, initial=clean_val, required=False
358 )
359 form.fields.update({clean_key.capitalize(): field})
360
361 return form
362
363 def get_form_initial(self, step):
364 """Return the initial data for a form given a step."""
365 initial_data = super().get_form_initial(step)
366
367 try:
368 self.storage.extra_data["group_id"]
369 except KeyError:
370 self.storage.extra_data[
371 "group_id"
372 ] = str(uuid.uuid1())
373
374 initial_data["group_id"] = self.storage.extra_data["group_id"]
375
376 # File Path
377 if step == "1":
378 initial_data["file_path"] = self.file_path
379 if self.data_source.data_mapping:
380 for model_field, columns in self.data_source.data_mapping.items():
381 if model_field in MAPPABLE_FIELDS:
382 for column in columns:
383 initial_data[column] = model_field
384
385 # Mapping for dublincore
386 if step == "2":
387 initial_data.update(
388 {
389 key: value[0]
390 for key, value in self.data_source.data_mapping.items()
391 if key in MAPPABLE_FIELDS
392 }
393 if self.data_source.data_mapping
394 else DUBLIN_CORE_DEFAULT_ASSOCIATION_INITIAL
395 )
396
397 # Mapping for marc
398 if step == "3":
399 initial_data.update(
400 {
401 key: value[0]
402 for key, value in self.data_source.data_mapping.items()
403 if key in MAPPABLE_FIELDS
404 }
405 if self.data_source.data_mapping
406 else MARC_XML_DEFAULT_ASSOCIATION_INITIAL
407 )
408
409 # Mapping for positions
410 if step == "4" and self.data_source.data_mapping:
411 initial_data.update(
412 {
413 f"position_{key}": value + 1
414 for key, value in self.data_source.data_mapping["positions"].items()
415 if key in MAPPABLE_FIELDS
416 }
417 if "positions" in self.data_source.data_mapping
418 else {}
419 )
420
421 # Add Common Elements to Initial #
422 # Sets
423 if step and "sets" in self.form_list[step].base_fields:
424 initial_data["sets"] = list(
425 get_harvested_sets(DataSource.objects.get(pk=self.kwargs["pk"]))
426 )
427
428 # Data Source
429 if step and "data_source" in self.form_list[step].base_fields:
430 initial_data["data_source"] = self.data_source
431
432 return initial_data
433
434 def get_form_step_files(self, form):
435 """Look inside a form to obtain the uploaded files in it."""
436 for file in form.files.values():
437 form.cleaned_data["file_path"] = file.temporary_file_path()
438 self.file_path = file.temporary_file_path()
439 return form.files
440
441 @staticmethod
442 def get_temp_file_path(form_list):
443 """Return the temporary location of an uploaded file."""
444 for form in form_list:
445 if "file_field" in form.cleaned_data:
446 return form.cleaned_data["file_field"].file.name
447 return None
448
449 def get_temp_file(self):
450 """Return an uploaded file as a File like object."""
451 for file in self.request.FILES.values():
452 return file.file
453 return None
454
455 # Conditional Functions
456 def has_upload_method(self):
457 """Check if the datasource method is upload."""
458 if self.data_source.config["method"] == "upload":
459 return True
460 return False
461
462 def can_select_sets(self):
463 """Check if the datasource allows to select sets."""
464 if self.data_source.config["method"] == "api":
465 return True
466 return False
467
468 def has_csv_format(self):
469 """Check if the datasource format is a csv file."""
470 if self.data_source.config["format"] in ["csv", "txt", "tsv"]:
471 return True
472 return False
473
474 def has_dublin_core_format(self):
475 """Check if the datasource format is dublincore."""
476 if self.data_source.config["format"] in ["oai_dc"]:
477 return True
478 return False
479
480 def has_marc_format(self):
481 """Check if the datasource method is marc21."""
482 if self.data_source.config["format"] in ["marc_xml", "marc_plain"]:
483 return True
484 return False
485
486
487class MarkDataSourceForReindexView(PermissionRequiredMixin, DetailView):
488 """Mark for reindex to resources from the selected data source."""
489
490 permission_required = ["harvester.can_reindex_datasource"]
491 model = DataSource
492 template_name = "harvester/admin/mark_datasource_for_reindex_confirm.html"
493
494 def post(self, request, *args, **kwargs): # pylint: disable=unused-argument
495 """POST method."""
496 instance = self.get_object()
497 queryset = instance.contentresource_set.all()
498 mark_resources_for_reindex(queryset)
499 info = instance._meta.app_label, instance._meta.model_name
500 return redirect(reverse("admin:%s_%s_changelist" % info))
501
502
503def get_dublin_core_mapping_fields():
504 """Return a list of mappable fields for a dublin core datasource instance."""
505 form_fields = {}
506 empty_choice = [("", _("Selecciona una opción"))]
507 required_fields = ["title", "identifier"]
508
509 for m_field in MAPPABLE_FIELDS:
510 required = m_field in required_fields
511
512 form_fields[m_field] = ChoiceField(
513 label=_(m_field),
514 choices=empty_choice + DUBLIN_CORE_DEFAULT_ASSOCIATION,
515 required=required,
516 )
517
518 return form_fields
519
520
521def get_marc_mapping_fields():
522 """Return a list of mappable fields for a marc21 datasource instance."""
523 form_fields = {}
524 required_fields = ["title", "identifier"]
525
526 for m_field in MAPPABLE_FIELDS:
527 required = m_field in required_fields
528
529 form_fields[m_field] = CharField(
530 label=_(m_field),
531 required=required,
532 help_text=_(
533 "Escriba la posición a mapear para Marc21: field$subfield$ind1$ind2$"
534 ),
535 )
536 return form_fields
537
538
539def get_csv_mapping_fields(columns):
540 """Return a list of mappable fields for a csv datasource based on file columns."""
541 form_fields = {}
542 empty_choice = [("", _("Selecciona una opción"))]
543 model_choices = [(model_field, model_field) for model_field in MAPPABLE_FIELDS]
544
545 for file_field in columns:
546 form_fields[file_field] = ChoiceField(
547 label=file_field, choices=empty_choice + model_choices, required=False
548 )
549 form_fields["csv_mapping"] = CharField(required=False, widget=HiddenInput())
550 return form_fields
551
552
553def get_selected_sets(validated_form_list):
554 """Return a list of selected sets suitable for iterate over inside the task."""
555 selected_sets = None
556 for form in validated_form_list:
557 if "sets" in form.cleaned_data:
558 selected_sets = [
559 (spec, name)
560 for spec, name in form.fields["sets"].choices
561 if spec in form.cleaned_data["sets"]
562 ]
563 return selected_sets
564
565
566def get_position_mapping_fields():
567 """Return a list of mappable fields to select the Principal Item Position."""
568 form_fields = {}
569 for m_field in MAPPABLE_FIELDS:
570 form_fields[f"position_{m_field}"] = IntegerField(
571 label=_(m_field),
572 required=True,
573 help_text=_("Seleccione la posición que se usará como Item Principal"),
574 min_value=1,
575 initial=1,
576 )
577 return form_fields
578