· 5 years ago · May 07, 2020, 07:36 AM
1# !/usr/bin/env python
2# -*- coding: utf-8 -*-
3"""Dataverse data-types data model."""
4from __future__ import absolute_import
5
6from pyDataverse.utils import dict_to_json
7from pyDataverse.utils import read_file_json
8from pyDataverse.utils import write_file_json
9
10"""
11Data-structure to work with data and metadata of Dataverses, Datasets and
12Datafiles - coming from different sources.
13"""
14
15
16class Dataverse(object):
17 """Base class for Dataverse data model."""
18
19 """Attributes required for Dataverse metadata json."""
20 __attr_required_metadata = [
21 'alias',
22 'name',
23 'dataverseContacts'
24 ]
25 """Attributes valid for Dataverse metadata json."""
26 __attr_valid_metadata = [
27 'alias',
28 'name',
29 'affiliation',
30 'description',
31 'dataverseContacts',
32 'dataverseType'
33 ]
34 """Attributes valid for Dataverse class."""
35 __attr_valid_class = [
36 # 'datasets',
37 # 'dataverses',
38 'pid'
39 ] + __attr_valid_metadata
40
41 def __init__(self):
42 """Init a Dataverse() class.
43
44 Examples
45 -------
46 Create a Dataverse::
47
48 >>> from pyDataverse.models import Dataverse
49 >>> dv = Dataverse()
50
51 """
52 """Misc"""
53 self.datasets = []
54 self.dataverses = []
55 self.pid = None
56
57 """Metadata"""
58 self.name = None
59 self.alias = None
60 self.dataverseContacts = []
61 self.affiliation = None
62 self.description = None
63 self.dataverseType = None
64
65 def __str__(self):
66 """Return name of Dataverse() class for users."""
67 return 'pyDataverse Dataverse() model class.'
68
69 def set(self, data):
70 """Set class attributes with a flat dict.
71
72 Parameters
73 ----------
74 data : dict
75 Flat dict with data. Key's must be name the same as the class
76 attribute, the data should be mapped to.
77
78 Examples
79 -------
80 Set Dataverse attributes via flat dict::
81
82 >>> from pyDataverse.models import Dataverse
83 >>> dv = Dataverse()
84 >>> data = {
85 >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}],
86 >>> 'name': 'Test pyDataverse',
87 >>> 'alias': 'test-pyDataverse'
88 >>> }
89 >>> dv.set(data)
90 >>> dv.name
91 'Test pyDataverse'
92
93 """
94 for key, val in data.items():
95 if key in self.__attr_valid_class:
96 self.__setattr__(key, val)
97 else:
98 # TODO: Raise Exception
99 print('Key {0} not valid.'.format(key))
100
101 def import_metadata(self, filename, format='dv_up'):
102 """Import Dataverse metadata from file.
103
104 This simply parses in data with valid attribute naming as keys.
105 Data must not be complete, and also attributes required for the
106 metadata json export can be missing.
107
108 Parameters
109 ----------
110 filename : string
111 Filename with full path.
112 format : string
113 Data format of input. Available formats are: `dv_up` for Dataverse
114 Api upload compatible format.
115
116 Examples
117 -------
118 Import metadata coming from json file::
119
120 >>> from pyDataverse.models import Dataverse
121 >>> dv = Dataverse()
122 >>> dv.import_metadata('tests/data/dataverse_min.json')
123 >>> dv.name
124 'Test pyDataverse'
125
126 """
127 data = {}
128 if format == 'dv_up':
129 metadata = read_file_json(filename)
130 # get first level metadata and parse it automatically
131 for attr in self.__attr_valid_metadata:
132 if attr in metadata:
133 data[attr] = metadata[attr]
134 self.set(data)
135 elif format == 'dv_down':
136 metadata = read_file_json(filename)
137 self.set(data)
138 else:
139 # TODO: Exception
140 print('Data-format not right.')
141
142 def is_valid(self):
143 """Check if set attributes are valid for Dataverse api metadata creation.
144
145 The attributes required are listed in `__attr_required_metadata`.
146
147 Returns
148 -------
149 bool
150 True, if creation of metadata json is possible. False, if not.
151
152 Examples
153 -------
154 Check if metadata is valid for Dataverse api upload::
155
156 >>> from pyDataverse.models import Dataverse
157 >>> dv = Dataverse()
158 >>> data = {
159 >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}],
160 >>> 'name': 'Test pyDataverse',
161 >>> 'alias': 'test-pyDataverse'
162 >>> }
163 >>> dv.set(data)
164 >>> dv.is_valid
165 True
166 >>> dv.name = None
167 >>> dv.is_valid
168 False
169
170 """
171 is_valid = True
172 for attr in self.__attr_required_metadata:
173 if not self.__getattribute__(attr):
174 is_valid = False
175 print('attribute \'{0}\' missing.'.format(attr))
176 return is_valid
177
178 def dict(self, format='dv_up'):
179 """Create dicts in different data formats.
180
181 `dv_up`: Checks if data is valid for the different dict formats.
182
183 Parameters
184 ----------
185 format : string
186 Data format for dict creation. Available formats are: `dv_up` with
187 all metadata for Dataverse api upload, and `all` with all attributes
188 set.
189
190 Returns
191 -------
192 dict
193 Data as dict.
194
195 Examples
196 -------
197 Get dict of Dataverse metadata::
198
199 >>> from pyDataverse.models import Dataverse
200 >>> dv = Dataverse()
201 >>> data = {
202 >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}],
203 >>> 'name': 'Test pyDataverse',
204 >>> 'alias': 'test-pyDataverse'
205 >>> }
206 >>> dv.set(data)
207 >>> data = dv.dict()
208 >>> data['name']
209 'Test pyDataverse'
210
211 Todo
212 -------
213 Validate standards.
214
215 """
216 data = {}
217 if format == 'dv_up':
218 if self.is_valid():
219 for attr in self.__attr_valid_metadata:
220 if self.__getattribute__(attr) is not None:
221 data[attr] = self.__getattribute__(attr)
222 # TODO: prüfen, ob required attributes gesetzt sind = Exception
223 return data
224 else:
225 print('dict can not be created. Data is not valid for format')
226 return None
227 elif format == 'all':
228 for attr in self.__attr_valid_class:
229 if self.__getattribute__(attr) is not None:
230 data[attr] = self.__getattribute__(attr)
231 return data
232 else:
233 # TODO: Exception
234 print('Format not right for dict.')
235 return None
236
237 def json(self, format='dv_up'):
238 r"""Create json from attributes.
239
240 Parameters
241 ----------
242 format : string
243 Data format of input. Available formats are: `dv_up` for Dataverse
244 Api upload compatible format and `all` with all attributes named in
245 `__attr_valid_class`.
246
247 Returns
248 -------
249 string
250 json-formatted string of Dataverse metadata for api upload.
251
252 Examples
253 -------
254 Get dict of Dataverse metadata::
255
256 >>> from pyDataverse.models import Dataverse
257 >>> dv = Dataverse()
258 >>> data = {
259 >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}],
260 >>> 'name': 'Test pyDataverse',
261 >>> 'alias': 'test-pyDataverse'
262 >>> }
263 >>> dv.set(data)
264 >>> data = dv.json()
265 >>> data
266 '{\n "name": "Test pyDataverse",\n "dataverseContacts": [\n {\n "contactEmail": "test@example.com"\n }\n ],\n "alias": "test-pyDataverse"\n}'
267
268 Todo
269 -------
270 Validate standards.
271
272 """
273 if format == 'dv_up':
274 data = self.dict('dv_up')
275 if data:
276 return dict_to_json(data)
277 else:
278 return None
279 elif format == 'all':
280 data = self.dict('all')
281 if data:
282 return dict_to_json(data)
283 else:
284 return None
285 else:
286 # TODO Exception
287 print('data format not valid.')
288
289 def export_metadata(self, filename, format='dv_up'):
290 """Export Dataverse metadata to Dataverse api upload json.
291
292 Parameters
293 ----------
294 filename : string
295 Filename with full path.
296 format : string
297 Data format for export. Available format is: `dv_up` with all
298 metadata for Dataverse api upload.
299
300 Examples
301 -------
302 Export Dataverse metadata::
303
304 >>> from pyDataverse.models import Dataverse
305 >>> dv = Dataverse()
306 >>> data = {
307 >>> 'dataverseContacts': [{'contactEmail': 'test@example.com'}],
308 >>> 'name': 'Test pyDataverse',
309 >>> 'alias': 'test-pyDataverse'
310 >>> }
311 >>> dv.set(data)
312 >>> dv.export_metadata('tests/data/dataverse_export.json')
313
314 """
315 if format == 'dv_up':
316 return write_file_json(filename, self.dict())
317 else:
318 # TODO: Exception
319 print('Data-format not right.')
320
321
322class Dataset(object):
323 """Base class for the Dataset data model."""
324
325 """Attributes required for Dataset metadata json."""
326 __attr_required_metadata = [
327 'title',
328 'author',
329 'datasetContact',
330 'dsDescription',
331 'subject'
332 ]
333
334 """
335 Dataset metadata attributes of Dataverse api upload inside
336 [\'datasetVersion\'].
337 """
338 __attr_valid_metadata_datasetVersion = [
339 'license',
340 'termsOfUse',
341 'termsOfAccess'
342 ]
343
344 """
345 Dataset metadata attributes of Dataverse api upload inside
346 [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'].
347 """
348 __attr_valid_metadata_citation_dicts = [
349 'title',
350 'subtitle',
351 'alternativeTitle',
352 'alternativeURL',
353 'subject',
354 'notesText',
355 'productionDate',
356 'productionPlace',
357 'distributionDate',
358 'depositor',
359 'dateOfDeposit',
360 'kindOfData',
361 'seriesName',
362 'seriesInformation',
363 'relatedMaterial',
364 'relatedDatasets',
365 'otherReferences',
366 'dataSources',
367 'originOfSources',
368 'characteristicOfSources',
369 'accessToSources',
370 'kindOfData'
371 ]
372
373 """
374 Dataset metadata attributes of Dataverse api upload inside
375 [\'datasetVersion\'][\'metadataBlocks\'][\'citation\'][\'fields\'].
376 """
377 __attr_valid_metadata_citation_arrays = {
378 'otherId': ['otherIdAgency', 'otherIdValue'],
379 'author': ['authorName', 'authorAffiliation', 'authorIdentifierScheme',
380 'authorIdentifier'],
381 'datasetContact': ['datasetContactName', 'datasetContactAffiliation',
382 'datasetContactEmail'],
383 'dsDescription': ['dsDescriptionValue', 'dsDescriptionDate'],
384 'keyword': ['keywordValue', 'keywordVocabulary',
385 'keywordVocabularyURI'],
386 'producer': ['producerName', 'producerAffiliation',
387 'producerAbbreviation', 'producerURL', 'producerLogoURL'],
388 'contributor': ['contributorType', 'contributorName'],
389 'grantNumber': ['grantNumberAgency', 'grantNumberValue'],
390 'topicClassification': ['topicClassValue', 'topicClassVocab'],
391 'publication': ['publicationCitation', 'publicationIDType',
392 'publicationIDNumber', 'publicationURL'],
393 'distributor': ['distributorName', 'distributorAffiliation',
394 'distributorAbbreviation', 'distributorURL',
395 'distributorLogoURL'],
396 'timePeriodCovered': ['timePeriodCoveredStart',
397 'timePeriodCoveredEnd'],
398 'dateOfCollection': ['dateOfCollectionStart', 'dateOfCollectionEnd'],
399 'software': ['softwareName', 'softwareVersion']
400 }
401
402 """
403 Dataset metadata attributes of Dataverse api upload inside
404 [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'].
405 """
406 __attr_valid_metadata_geospatial_dicts = [
407 'geographicUnit'
408 ]
409
410 """
411 Dataset metadata attributes of Dataverse api upload inside
412 [\'datasetVersion\'][\'metadataBlocks\'][\'geospatial\'][\'fields\'].
413 """
414 __attr_valid_metadata_geospatial_arrays = {
415 'geographicCoverage': ['country', 'state', 'city',
416 'otherGeographicCoverage'],
417 'geographicBoundingBox': ['westLongitude', 'eastLongitude',
418 'northLongitude', 'southLongitude']
419 }
420
421 """
422 Dataset metadata attributes of Dataverse api upload inside
423 [\'datasetVersion\'][\'metadataBlocks\'][\'socialscience\'].
424 """
425 __attr_valid_metadata_socialscience_dicts = [
426 'unitOfAnalysis',
427 'universe',
428 'timeMethod',
429 'dataCollector',
430 'collectorTraining',
431 'frequencyOfDataCollection',
432 'samplingProcedure',
433 'deviationsFromSampleDesign',
434 'collectionMode',
435 'researchInstrument',
436 'dataCollectionSituation',
437 'actionsToMinimizeLoss',
438 'controlOperations',
439 'weighting',
440 'cleaningOperations',
441 'datasetLevelErrorNotes',
442 'responseRate',
443 'samplingErrorEstimates',
444 'otherDataAppraisal',
445 ]
446
447 """
448 Dataset metadata attributes of Dataverse api upload inside
449 [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'].
450 """
451 __attr_valid_metadata_journal_dicts = [
452 'journalArticleType'
453 ]
454
455 """
456 Dataset metadata attributes of Dataverse api upload inside
457 [\'datasetVersion\'][\'metadataBlocks\'][\'journal\'][\'fields\'].
458 """
459 __attr_valid_metadata_journal_arrays = {
460 'journalVolumeIssue': ['journalVolume', 'journalIssue',
461 'journalPubDate']
462 }
463
464 """Attributes valid for Dataset class."""
465 __attr_valid_class = [
466 'datafiles'
467 ] + __attr_valid_metadata_datasetVersion \
468 + __attr_valid_metadata_citation_dicts \
469 + list(__attr_valid_metadata_citation_arrays.keys()) \
470 + __attr_valid_metadata_geospatial_dicts \
471 + list(__attr_valid_metadata_geospatial_arrays.keys()) \
472 + __attr_valid_metadata_socialscience_dicts \
473 + __attr_valid_metadata_journal_dicts \
474 + list(__attr_valid_metadata_journal_arrays.keys()) \
475
476 """Attributes and theirs typeClass"""
477 __attr_type_class = {
478 'subject': 'controlledVocabulary',
479 'otherId': 'compound',
480 'author': 'compound',
481 'datasetContact': 'compound',
482 'dsDescription': 'compound',
483 'keyword': 'compound',
484 'producer': 'compound',
485 'contributor': 'compound',
486 'grantNumber': 'compound',
487 'topicClassification': 'compound',
488 'publication': 'compound',
489 'distributor': 'compound',
490 'timePeriodCovered': 'compound',
491 'dateOfCollection': 'compound',
492 'software': 'compound',
493 }
494
495 def __init__(self):
496 """Init a Dataset() class.
497
498 Examples
499 -------
500 Create a Dataverse::
501
502 >>> from pyDataverse.models import Dataset
503 >>> ds = Dataset()
504
505 """
506 """Misc"""
507 self.datafiles = []
508
509 """Metadata: dataset"""
510 self.license = None
511 self.termsOfUse = None
512 self.termsOfAccess = None
513
514 """Metadata: citation"""
515 self.citation_displayName = None
516 self.title = None
517 self.subtitle = None
518 self.alternativeTitle = None
519 self.alternativeURL = None
520 self.otherId = None
521 self.author = None
522 self.datasetContact = None
523 self.dsDescription = None
524 self.subject = None
525 self.keyword = None
526 self.topicClassification = None
527 self.publication = None
528 self.notesText = None
529 self.producer = None
530 self.productionDate = None
531 self.productionPlace = None
532 self.contributor = None
533 self.grantNumber = None
534 self.distributor = None
535 self.distributionDate = None
536 self.depositor = None
537 self.dateOfDeposit = None
538 self.timePeriodCovered = None
539 self.dateOfCollection = None
540 self.kindOfData = None
541 self.seriesName = None
542 self.seriesInformation = None
543 self.software = None
544 self.relatedMaterial = None
545 self.relatedDatasets = None
546 self.otherReferences = None
547 self.dataSources = None
548 self.originOfSources = None
549 self.characteristicOfSources = None
550 self.accessToSources = None
551
552 """Metadata: geospatial"""
553 self.geospatial_displayName = None
554 self.geographicCoverage = None
555 self.geographicUnit = None
556 self.geographicBoundingBox = None
557
558 """Metadata: socialscience"""
559 self.socialscience_displayName = None
560 self.unitOfAnalysis = None
561 self.universe = None
562 self.timeMethod = None
563 self.dataCollector = None
564 self.collectorTraining = None
565 self.frequencyOfDataCollection = None
566 self.samplingProcedure = None
567 self.targetSampleActualSize = None
568 self.targetSampleSizeFormula = None
569 self.socialScienceNotesType = None
570 self.socialScienceNotesSubject = None
571 self.socialScienceNotesText = None
572 self.deviationsFromSampleDesign = None
573 self.collectionMode = None
574 self.researchInstrument = None
575 self.dataCollectionSituation = None
576 self.actionsToMinimizeLoss = None
577 self.controlOperations = None
578 self.weighting = None
579 self.cleaningOperations = None
580 self.datasetLevelErrorNotes = None
581 self.responseRate = None
582 self.samplingErrorEstimates = None
583 self.otherDataAppraisal = None
584
585 """Metadata: journal"""
586 self.journal_displayName = None
587 self.journalVolumeIssue = None
588 self.journalArticleType = None
589
590 def __str__(self):
591 """Return name of Dataset() class for users."""
592 return 'pyDataverse Dataset() model class.'
593
594 def set(self, data):
595 """Set class attributes with a flat dict as input.
596
597 Parameters
598 ----------
599 data : dict
600 Flat dict with data. Key's must be name the same as the class
601 attribute, the data should be mapped to.
602
603 Examples
604 -------
605 Set Dataverse attributes via flat dict::
606
607 >>> from pyDataverse.models import Dataset
608 >>> ds = Dataset()
609 >>> data = {
610 >>> 'title': 'pyDataverse study 2019',
611 >>> 'dsDescription': 'New study about pyDataverse usage in 2019'
612 >>> }
613 >>> ds.set(data)
614 >>> ds.title
615 'pyDataverse study 2019'
616
617 """
618 for key, val in data.items():
619 if key in self.__attr_valid_class or key == 'citation_displayName' or key == 'geospatial_displayName' or key == 'socialscience_displayName' or key == 'journal_displayName' or key == 'targetSampleActualSize' or key == 'targetSampleSizeFormula' or key == 'socialScienceNotesType' or key == 'socialScienceNotesText' or key == 'socialScienceNotesSubject':
620 self.__setattr__(key, val)
621 else:
622 # TODO: Raise Exception
623 print('Key {0} not valid.'.format(key))
624
625 def import_metadata(self, filename, format='dv_up'):
626 """Import Dataset metadata from file.
627
628 Parameters
629 ----------
630 filename : string
631 Filename with full path.
632 format : string
633 Data format of input. Available formats are: `dv_up` for Dataverse
634 api upload compatible format.
635
636 Examples
637 -------
638 Set Dataverse attributes via flat dict::
639
640 >>> from pyDataverse.models import Dataset
641 >>> ds = Dataset()
642 >>> ds.import_metadata('tests/data/dataset_full.json')
643 >>> ds.title
644 'Replication Data for: Title'
645
646 """
647 data = {}
648 if format == 'dv_up':
649 metadata = read_file_json(filename)
650 """dataset"""
651 # get first level metadata and parse it automatically
652 for key, val in metadata['datasetVersion'].items():
653 if key in self.__attr_valid_metadata_datasetVersion:
654 data[key] = val
655
656 # get nested metadata and parse it manually
657 if 'dataverseContacts' in metadata:
658 data['contactEmail'] = []
659 for contact in metadata['dataverseContacts']:
660 for key, val in contact.items():
661 if key == 'contactEmail':
662 data['contactEmail'].append(val)
663
664 """citation"""
665 if 'citation' in metadata['datasetVersion']['metadataBlocks']:
666 citation = metadata['datasetVersion']['metadataBlocks']['citation']
667 if 'displayName' in citation:
668 data['citation_displayName'] = citation['displayName']
669
670 for field in citation['fields']:
671 if field['typeName'] in self.__attr_valid_metadata_citation_dicts:
672 data[field['typeName']] = field['value']
673
674 if field['typeName'] in self.__attr_valid_metadata_citation_arrays:
675 data[field['typeName']] = self.__parse_dicts(
676 field['value'],
677 self.__attr_valid_metadata_citation_arrays[field['typeName']])
678
679 if field['typeName'] == 'series':
680 if 'seriesName' in field['value']:
681 data['seriesName'] = field['value']['seriesName']['value']
682 if 'seriesInformation' in field['value']:
683 data['seriesInformation'] = field['value']['seriesInformation']['value']
684 else:
685 # TODO: Exception
686 print('citation not in json')
687
688 """geospatial"""
689 if 'geospatial' in metadata['datasetVersion']['metadataBlocks']:
690 geospatial = metadata['datasetVersion']['metadataBlocks']['geospatial']
691 if 'displayName' in geospatial:
692 self.__setattr__('geospatial_displayName',
693 geospatial['displayName'])
694
695 for field in geospatial['fields']:
696 if field['typeName'] in self.__attr_valid_metadata_geospatial_dicts:
697 data[field['typeName']] = field['value']
698
699 if field['typeName'] in self.__attr_valid_metadata_geospatial_arrays:
700 data[field['typeName']] = self.__parse_dicts(
701 field['value'],
702 self.__attr_valid_metadata_geospatial_arrays[field['typeName']])
703 else:
704 # TODO: Exception
705 print('geospatial not in json')
706
707 """socialscience"""
708 if 'socialscience' in metadata['datasetVersion']['metadataBlocks']:
709 socialscience = metadata['datasetVersion']['metadataBlocks']['socialscience']
710 if 'displayName' in socialscience:
711 self.__setattr__('socialscience_displayName',
712 socialscience['displayName'])
713
714 for field in socialscience['fields']:
715 if field['typeName'] in self.__attr_valid_metadata_socialscience_dicts:
716 data[field['typeName']] = field['value']
717
718 if field['typeName'] == 'targetSampleSize':
719 if 'targetSampleActualSize' in field['value']:
720 data['targetSampleActualSize'] = field['value']['targetSampleActualSize']['value']
721 if 'targetSampleSizeFormula' in field['value']:
722 data['targetSampleSizeFormula'] = field['value']['targetSampleSizeFormula']['value']
723
724 if field['typeName'] == 'socialScienceNotes':
725 if 'socialScienceNotesType' in field['value']:
726 data['socialScienceNotesType'] = field['value']['socialScienceNotesType']['value']
727 if 'socialScienceNotesSubject' in field['value']:
728 data['socialScienceNotesSubject'] = field['value']['socialScienceNotesSubject']['value']
729 if 'socialScienceNotesText' in field['value']:
730 data['socialScienceNotesText'] = field['value']['socialScienceNotesText']['value']
731 else:
732 # TODO: Exception
733 print('socialscience not in json')
734
735 """journal"""
736 if 'journal' in metadata['datasetVersion']['metadataBlocks']:
737 journal = metadata['datasetVersion']['metadataBlocks']['journal']
738 if 'displayName' in journal:
739 self.__setattr__('journal_displayName',
740 journal['displayName'])
741
742 for field in journal['fields']:
743 if field['typeName'] in self.__attr_valid_metadata_journal_dicts:
744 data[field['typeName']] = field['value']
745
746 if field['typeName'] in self.__attr_valid_metadata_journal_arrays:
747 data[field['typeName']] = self.__parse_dicts(
748 field['value'],
749 self.__attr_valid_metadata_journal_arrays[field['typeName']])
750 else:
751 # TODO: Exception
752 print('journal not in json')
753
754 self.set(data)
755 elif format == 'dv_down':
756 metadata = read_file_json(filename)
757 self.set(data)
758 else:
759 # TODO: Exception
760 print('Data-format not right')
761
762 def __parse_dicts(self, data, attr_list):
763 """Parse out Dataverse api metadata dicts.
764
765 Parameters
766 ----------
767 data : list
768 List of Dataverse api metadata fields.
769 attr_list : list
770 List of attributes to be parsed.
771
772 Returns
773 -------
774 list
775 List of dicts with parsed out key-value pairs.
776
777 """
778 data_tmp = []
779
780 for d in data:
781 tmp_dict = {}
782 for key, val in d.items():
783 if key in attr_list:
784 tmp_dict[key] = val['value']
785 else:
786 print('Key \'{0}\' not in attribute list'.format(key))
787 data_tmp.append(tmp_dict)
788
789 return data_tmp
790
791 def is_valid(self):
792 """Check if attributes available are valid for Dataverse api metadata creation.
793
794 The attributes required are listed in `__attr_required_metadata`.
795
796 Returns
797 -------
798 bool
799 True, if creation of metadata json is possible. False, if not.
800
801 Examples
802 -------
803 Check if metadata is valid for Dataverse api upload::
804
805 >>> from pyDataverse.models import Dataset
806 >>> ds = Dataset()
807 >>> data = {
808 >>> 'title': 'pyDataverse study 2019',
809 >>> 'dsDescription': 'New study about pyDataverse usage in 2019'
810 >>> }
811 >>> ds.set(data)
812 >>> ds.is_valid()
813 False
814 >>> ds.author = [{'authorName': 'LastAuthor1, FirstAuthor1'}]
815 >>> ds.datasetContact = [{'datasetContactName': 'LastContact1, FirstContact1'}]
816 >>> ds.subject = ['Engineering']
817 >>> ds.is_valid()
818 True
819
820 Todo
821 -------
822 Test out required fields or ask Harvard.
823
824 """
825 is_valid = True
826
827 # check if all required attributes are set
828 for attr in self.__attr_required_metadata:
829 if not self.__getattribute__(attr):
830 is_valid = False
831 print('Metadata not valid: attribute \'{0}\' missing.'.format(attr))
832
833 # check if attribute sets are complete where necessary
834 tp_cov = self.__getattribute__('timePeriodCovered')
835 if tp_cov:
836 for tp in tp_cov:
837 if 'timePeriodCoveredStart' in tp or 'timePeriodCoveredEnd' in tp:
838 if not ('timePeriodCoveredStart' in tp and 'timePeriodCoveredEnd' in tp):
839 is_valid = False
840
841 d_coll = self.__getattribute__('dateOfCollection')
842 if d_coll:
843 for d in d_coll:
844 if 'dateOfCollectionStart' in d or 'dateOfCollectionEnd' in d:
845 if not ('dateOfCollectionStart' in d and 'dateOfCollectionEnd' in d):
846 is_valid = False
847
848 authors = self.__getattribute__('author')
849 if authors:
850 for a in authors:
851 if 'authorAffiliation' in a or 'authorIdentifierScheme' in a or 'authorIdentifier' in a:
852 if 'authorName' not in a:
853 is_valid = False
854
855 ds_contac = self.__getattribute__('datasetContact')
856 if ds_contac:
857 for c in ds_contac:
858 if 'datasetContactAffiliation' in c or 'datasetContactEmail' in c:
859 if 'datasetContactName' not in c:
860 is_valid = False
861
862 producer = self.__getattribute__('producer')
863 if producer:
864 for p in producer:
865 if 'producerAffiliation' in p or 'producerAbbreviation' in p or 'producerURL' in p or 'producerLogoURL' in p:
866 if not p['producerName']:
867 is_valid = False
868
869 contributor = self.__getattribute__('contributor')
870 if contributor:
871 for c in contributor:
872 if 'contributorType' in c:
873 if 'contributorName' not in c:
874 is_valid = False
875
876 distributor = self.__getattribute__('distributor')
877 if distributor:
878 for d in distributor:
879 if 'distributorAffiliation' in d or 'distributorAbbreviation' in d or 'distributorURL' in d or 'distributorLogoURL' in d:
880 if 'distributorName' not in d:
881 is_valid = False
882
883 bbox = self.__getattribute__('geographicBoundingBox')
884 if bbox:
885 for b in bbox:
886 if b:
887 if not (
888 'westLongitude' in b and 'eastLongitude' in b and 'northLongitude' in b and 'southLongitude' in b):
889 is_valid = False
890
891 return is_valid
892
893 def dict(self, format='dv_up'):
894 """Create dicts in different data formats.
895
896 Parameters
897 ----------
898 format : string
899 Data format for dict creation. Available formats are: `dv_up` with
900 all metadata for Dataverse api upload, and `all` with all attributes
901 set.
902
903 Returns
904 -------
905 dict
906 Data as dict.
907
908 Examples
909 -------
910 Get dict of Dataverse metadata::
911
912 >>> from pyDataverse.models import Dataset
913 >>> ds = Dataset()
914 >>> data = {
915 >>> 'title': 'pyDataverse study 2019',
916 >>> 'dsDescription': 'New study about pyDataverse usage in 2019'
917 >>> }
918 >>> ds.set(data)
919 >>> data = dv.dict()
920 >>> data['title']
921 'pyDataverse study 2019'
922
923 Todo
924 -------
925 Validate standard
926
927 """
928 if format == 'dv_up':
929 if self.is_valid():
930 data = {}
931 data['datasetVersion'] = {}
932 data['datasetVersion']['metadataBlocks'] = {}
933 citation = {}
934 citation['fields'] = []
935 geospatial = {}
936 geospatial['fields'] = []
937 socialscience = {}
938 socialscience['fields'] = []
939 journal = {}
940 journal['fields'] = []
941
942 """dataset"""
943 # Generate first level attributes
944 for attr in self.__attr_valid_metadata_datasetVersion:
945 if self.__getattribute__(attr) is not None:
946 data['datasetVersion'][attr] = self.__getattribute__(attr)
947
948 """citation"""
949 if self.citation_displayName:
950 citation['displayName'] = self.citation_displayName
951
952 # Generate first level attributes
953 for attr in self.__attr_valid_metadata_citation_dicts:
954 if self.__getattribute__(attr) is not None:
955 value = self.__getattribute__(attr)
956 citation['fields'].append({
957 'typeName': attr,
958 'value': value,
959 'multiple': self.__is_multiple(value),
960 'typeClass': self.__get_attr_class_type(attr)
961 })
962
963 # Generate fields attributes
964 for key, val in self.__attr_valid_metadata_citation_arrays.items():
965 if self.__getattribute__(key) is not None:
966 value = self.__generate_dicts(key, val)
967 citation['fields'].append({
968 'typeName': key,
969 'value': value,
970 'multiple': self.__is_multiple(value),
971 'typeClass': self.__get_attr_class_type(key)
972 })
973
974 # Generate series attributes
975 if self.__getattribute__('seriesName') is not None or self.__getattribute__(
976 'seriesInformation') is not None:
977 tmp_dict = {}
978 tmp_dict['value'] = {}
979 if self.__getattribute__('seriesName') is not None:
980 tmp_dict['value']['seriesName'] = {}
981 tmp_dict['value']['seriesName']['typeName'] = 'seriesName'
982 tmp_dict['value']['seriesName']['value'] = self.__getattribute__('seriesName')
983 if self.__getattribute__('seriesInformation') is not None:
984 tmp_dict['value']['seriesInformation'] = {}
985 tmp_dict['value']['seriesInformation']['typeName'] = 'seriesInformation'
986 tmp_dict['value']['seriesInformation']['value'] = self.__getattribute__('seriesInformation')
987 citation['fields'].append({
988 'typeName': 'series',
989 'value': tmp_dict
990 })
991
992 """geospatial"""
993 # Generate first level attributes
994 for attr in self.__attr_valid_metadata_geospatial_dicts:
995 if self.__getattribute__(attr) is not None:
996 value = self.__getattribute__(attr)
997 geospatial['fields'].append({
998 'typeName': attr,
999 'value': value,
1000 'multiple': self.__is_multiple(value),
1001 'typeClass': self.__get_attr_class_type(attr)
1002 })
1003
1004 # Generate fields attributes
1005 for key, val in self.__attr_valid_metadata_geospatial_arrays.items():
1006 # check if attribute exists
1007 if self.__getattribute__(key) is not None:
1008 value = self.__generate_dicts(key, val)
1009 geospatial['fields'].append({
1010 'typeName': key,
1011 'value': value,
1012 'multiple': self.__is_multiple(value),
1013 'typeClass': self.__get_attr_class_type(attr)
1014 })
1015
1016 """socialscience"""
1017 # Generate first level attributes
1018 for attr in self.__attr_valid_metadata_socialscience_dicts:
1019 if self.__getattribute__(attr) is not None:
1020 value = self.__getattribute__(attr)
1021 socialscience['fields'].append({
1022 'typeName': attr,
1023 'value': value,
1024 'multiple': self.__is_multiple(value),
1025 'typeClass': self.__get_attr_class_type(attr)
1026 })
1027
1028 # Generate targetSampleSize attributes
1029 if self.__getattribute__('targetSampleActualSize') is not None or self.__getattribute__(
1030 'targetSampleSizeFormula') is not None:
1031 tmp_dict = {}
1032 tmp_dict['value'] = {}
1033 if 'targetSampleActualSize' in self.__getattribute__('targetSampleSize'):
1034 if self.__getattribute__('targetSampleActualSize') is not None:
1035 tmp_dict['value']['targetSampleActualSize'] = {}
1036 tmp_dict['value']['targetSampleActualSize']['typeName'] = 'targetSampleActualSize'
1037 tmp_dict['value']['targetSampleActualSize']['value'] = self.__getattribute__(
1038 'targetSampleActualSize')
1039 if 'targetSampleSizeFormula' in self.__getattribute__('targetSampleSize'):
1040 if self.__getattribute__('targetSampleSizeFormula') is not None:
1041 tmp_dict['value']['targetSampleSizeFormula'] = {}
1042 tmp_dict['value']['targetSampleSizeFormula']['typeName'] = 'targetSampleSizeFormula'
1043 tmp_dict['value']['targetSampleSizeFormula']['value'] = self.__getattribute__(
1044 'targetSampleSizeFormula')
1045 socialscience['fields'].append({
1046 'typeName': 'targetSampleSize',
1047 'value': tmp_dict
1048 })
1049
1050 # Generate socialScienceNotes attributes
1051 if self.__getattribute__('socialScienceNotesType') is not None or self.__getattribute__(
1052 'socialScienceNotesSubject') is not None or self.__getattribute__(
1053 'socialScienceNotesText') is not None:
1054 tmp_dict = {}
1055 tmp_dict['value'] = {}
1056 if self.__getattribute__('socialScienceNotesType') is not None:
1057 tmp_dict['value']['socialScienceNotesType'] = {}
1058 tmp_dict['value']['socialScienceNotesType']['typeName'] = 'socialScienceNotesType'
1059 tmp_dict['value']['socialScienceNotesType']['value'] = self.__getattribute__(
1060 'socialScienceNotesType')
1061 if self.__getattribute__('socialScienceNotesSubject') is not None:
1062 tmp_dict['value']['socialScienceNotesSubject'] = {}
1063 tmp_dict['value']['socialScienceNotesSubject']['typeName'] = 'socialScienceNotesSubject'
1064 tmp_dict['value']['socialScienceNotesSubject']['value'] = self.__getattribute__(
1065 'socialScienceNotesSubject')
1066 if self.__getattribute__('socialScienceNotesText') is not None:
1067 tmp_dict['value']['socialScienceNotesText'] = {}
1068 tmp_dict['value']['socialScienceNotesText']['typeName'] = 'socialScienceNotesText'
1069 tmp_dict['value']['socialScienceNotesText']['value'] = self.__getattribute__(
1070 'socialScienceNotesText')
1071 tmp_dict['value']['socialScienceNotesText']['muliple'] = self.__is_multiple(
1072 tmp_dict['value']['socialScienceNotesText']['value']),
1073 tmp_dict['value']['socialScienceNotesText']['typeClass'] = self.__get_attr_class_type(attr)
1074 socialscience['fields'].append({
1075 'typeName': 'socialScienceNotes',
1076 'value': tmp_dict
1077 })
1078
1079 """journal"""
1080 # Generate first level attributes
1081 for attr in self.__attr_valid_metadata_journal_dicts:
1082 if self.__getattribute__(attr) is not None:
1083 value = self.__getattribute__(attr)
1084 journal['fields'].append({
1085 'typeName': attr,
1086 'value': value,
1087 'multiple': self.__is_multiple(value),
1088 'typeClass': self.__get_attr_class_type(attr)
1089 })
1090
1091 # Generate fields attributes
1092 for key, val in self.__attr_valid_metadata_journal_arrays.items():
1093 if self.__getattribute__(key) is not None:
1094 value = self.__generate_dicts(key, val)
1095 journal['fields'].append({
1096 'typeName': key,
1097 'value': value,
1098 'multiple': self.__is_multiple(value),
1099 'typeClass': self.__get_attr_class_type(attr)
1100 })
1101
1102 # TODO: prüfen, ob required attributes gesetzt sind. wenn nicht = Exception!
1103 data['datasetVersion']['metadataBlocks']['citation'] = citation
1104 data['datasetVersion']['metadataBlocks']['socialscience'] = socialscience
1105 data['datasetVersion']['metadataBlocks']['geospatial'] = geospatial
1106 data['datasetVersion']['metadataBlocks']['journal'] = journal
1107
1108 return data
1109 else:
1110 print('dict can not be created. Data is not valid for format')
1111 return None
1112 elif format == 'all':
1113 for attr in self.__attr_valid_class:
1114 if self.__getattribute__(attr) is not None:
1115 data[attr] = self.__getattribute__(attr)
1116 return data
1117
1118 else:
1119 print('dict can not be created. Format is not valid')
1120 return None
1121
1122 def __generate_dicts(self, key, val):
1123 """Generate dicts for array attributes of Dataverse api metadata upload.
1124
1125 Parameters
1126 ----------
1127 key : string
1128 Name of attribute
1129 val : string
1130 Value of attribute.
1131
1132 Returns
1133 -------
1134 list
1135 List of filled dicts of metadata for Dataverse api upload.
1136
1137 """
1138 # check if attribute exists
1139 tmp_list = []
1140 if self.__getattribute__(key):
1141 # loop over list of attribute dicts()
1142 for d in self.__getattribute__(key):
1143 tmp_dict = {}
1144 # iterate over key-value pairs
1145 for k, v in d.items():
1146 # check if key is in attribute list
1147 if k in val:
1148 tmp_dict[k] = {}
1149 tmp_dict[k]['typeName'] = k
1150 tmp_dict[k]['value'] = v
1151 tmp_dict[k]['multiple'] = self.__is_multiple(v)
1152 tmp_dict[k]['typeClass'] = self.__get_attr_class_type(k)
1153 tmp_list.append(tmp_dict)
1154
1155 return tmp_list
1156
1157 def json(self, format='dv_up'):
1158 """Create Dataset json from attributes.
1159
1160 Parameters
1161 ----------
1162 format : string
1163 Data format of input. Available formats are: `dv_up` for Dataverse
1164 Api upload compatible format and `all` with all attributes named in
1165 `__attr_valid_class`.
1166
1167 Returns
1168 -------
1169 string
1170 json-formatted string of Dataverse metadata for api upload.
1171
1172 Examples
1173 -------
1174 Get json of Dataverse api upload::
1175
1176 >>> from pyDataverse.models import Dataset
1177 >>> ds = Dataset()
1178 >>> data = {
1179 >>> 'title': 'pyDataverse study 2019',
1180 >>> 'dsDescription': 'New study about pyDataverse usage in 2019'
1181 >>> 'author': [{'authorName': 'LastAuthor1, FirstAuthor1'}],
1182 >>> 'datasetContact': [{'datasetContactName': 'LastContact1, FirstContact1'}],
1183 >>> 'subject': ['Engineering'],
1184 >>> }
1185 >>> ds.set(data)
1186 >>> data = ds.json()
1187
1188 Todo
1189 -------
1190 TODO: Validate standard
1191 TODO: Link to default json file
1192
1193 """
1194 if format == 'dv_up':
1195 return dict_to_json(self.dict())
1196 elif format == 'all':
1197 return dict_to_json(self.dict('all'))
1198 else:
1199 # TODO Exception
1200 print('data format not valid.')
1201
1202 def export_metadata(self, filename, format='dv_up'):
1203 """Export Dataset metadata to Dataverse api upload json.
1204
1205 Parameters
1206 ----------
1207 filename : string
1208 Filename with full path.
1209 format : string
1210 Data format for export. Available format is: `dv_up` with all
1211 metadata for Dataverse api upload.
1212
1213 Examples
1214 -------
1215 Export metadata to json file::
1216
1217 >>> from pyDataverse.models import Dataset
1218 >>> ds = Dataset()
1219 >>> data = {
1220 >>> 'title': 'pyDataverse study 2019',
1221 >>> 'dsDescription': 'New study about pyDataverse usage in 2019'
1222 >>> 'author': [{'authorName': 'LastAuthor1, FirstAuthor1'}],
1223 >>> 'datasetContact': [{'datasetContactName': 'LastContact1, FirstContact1'}],
1224 >>> 'subject': ['Engineering'],
1225 >>> }
1226 >>> ds.export_metadata('tests/data/export_dataset.json')
1227
1228 """
1229 if format == 'dv_up':
1230 return write_file_json(filename, self.dict())
1231 else:
1232 # TODO: Exception
1233 print('Data-format not right.')
1234
1235 def __get_attr_class_type(self, attr):
1236 return self.__attr_type_class[attr] if attr in self.__attr_type_class else 'primitive'
1237
1238 def __is_multiple(self, value):
1239 return isinstance(value, list) and bool(len(value))
1240
1241
1242class Datafile(object):
1243 """Base class for the Datafile model.
1244
1245 Parameters
1246 ----------
1247 filename : string
1248 Filename with full path.
1249 pid : type
1250 Description of parameter `pid` (the default is None).
1251
1252 Attributes
1253 ----------
1254 description : string
1255 Description of datafile
1256 restrict : bool
1257 Unknown
1258 __attr_required_metadata : list
1259 List with required metadata.
1260 __attr_valid_metadata : list
1261 List with valid metadata for Dataverse api upload.
1262 __attr_valid_class : list
1263 List of all attributes.
1264 pid
1265 filename
1266
1267 """
1268
1269 """Attributes required for Datafile metadata json."""
1270 __attr_required_metadata = [
1271 'filename',
1272 'pid'
1273 ]
1274
1275 """Attributes on first level of Datafile metadata json."""
1276 __attr_valid_metadata = [
1277 'description',
1278 'pid',
1279 'restrict'
1280 ]
1281 """Attributes on first level of Datafile metadata json."""
1282 __attr_valid_class = [
1283 'filename'
1284 ] + __attr_valid_metadata
1285
1286 def __init__(self, filename=None, pid=None):
1287 """Init a Datafile() class.
1288
1289 Parameters
1290 ----------
1291 filename : string
1292 Filename with full path.
1293 pid : string
1294 Persistend identifier, e.g. DOI.
1295
1296 Examples
1297 -------
1298 Create a Datafile::
1299
1300 >>> from pyDataverse.models import Datafile
1301 >>> df = Datafile()
1302 >>> df
1303 <pyDataverse.models.Datafile at 0x7f4dfc0466a0>
1304
1305 """
1306 """Misc"""
1307 self.pid = pid
1308 self.filename = filename
1309
1310 """Metadata"""
1311 self.description = None
1312 self.restrict = None
1313
1314 def __str__(self):
1315 """Return name of Datafile() class for users."""
1316 return 'pyDataverse Datafile() model class.'
1317
1318 def set(self, data):
1319 """Set class attributes with a flat dict.
1320
1321 Parameters
1322 ----------
1323 data : dict
1324 Flat dict with data. Key's must be name the same as the class
1325 attribute, the data should be mapped to.
1326
1327 Examples
1328 -------
1329 Set Datafile attributes via flat dict::
1330
1331 >>> from pyDataverse.models import Datafile
1332 >>> df = Datafile()
1333 >>> data = {
1334 >>> 'pid': 'doi:10.11587/EVMUHP',
1335 >>> 'description': 'Test file',
1336 >>> 'filename': 'tests/data/datafile.txt'
1337 >>> }
1338 >>> df.set(data)
1339 >>> df.pid
1340 'doi:10.11587/EVMUHP',
1341
1342 """
1343 for key, val in data.items():
1344 if key in self.__attr_valid_class:
1345 self.__setattr__(key, val)
1346 else:
1347 # TODO: Raise Exception
1348 print('Key {0} not valid.'.format(key))
1349
1350 def is_valid(self):
1351 """Check if set attributes are valid for Dataverse api metadata creation.
1352
1353 Returns
1354 -------
1355 bool
1356 True, if creation of metadata json is possible. False, if not.
1357
1358 Examples
1359 -------
1360 Check if metadata is valid for Dataverse api upload::
1361
1362 >>> from pyDataverse.models import Datafile
1363 >>> df = Datafile()
1364 >>> data = {
1365 >>> 'pid': 'doi:10.11587/EVMUHP',
1366 >>> 'description': 'Test file',
1367 >>> 'filename': 'tests/data/datafile.txt'
1368 >>> }
1369 >>> df.set(data)
1370 >>> df.is_valid
1371 True
1372 >>> df.filename = None
1373 >>> df.is_valid
1374 False
1375
1376 """
1377 is_valid = True
1378
1379 for attr in self.__attr_required_metadata:
1380 if self.__getattribute__(attr) is None:
1381 is_valid = False
1382 print('attribute \'{0}\' missing.'.format(attr))
1383
1384 return is_valid
1385
1386 def dict(self, format='dv_up'):
1387 """Create dict in different data formats.
1388
1389 Parameters
1390 ----------
1391 format : string
1392 Data format for dict creation. Available formats are: `dv_up` with
1393 all metadata for Dataverse api upload, and `all` with all attributes
1394 set.
1395
1396 Returns
1397 -------
1398 dict
1399 Data as dict.
1400
1401 Examples
1402 -------
1403 Check if metadata is valid for Dataverse api upload::
1404
1405 >>> from pyDataverse.models import Datafile
1406 >>> df = Datafile()
1407 >>> data = {
1408 >>> 'pid': 'doi:10.11587/EVMUHP',
1409 >>> 'description': 'Test file',
1410 >>> 'filename': 'tests/data/datafile.txt'
1411 >>> }
1412 >>> df.set(data)
1413 >>> data = df.dict()
1414 >>> data['description']
1415 'Test file'
1416
1417 Todo
1418 -------
1419 Validate standards.
1420
1421 """
1422 data = {}
1423 if format == 'dv_up':
1424 if self.is_valid():
1425 for attr in self.__attr_valid_metadata:
1426 if self.__getattribute__(attr) is not None:
1427 data[attr] = self.__getattribute__(attr)
1428
1429 return data
1430 else:
1431 print('dict can not be created. Data is not valid')
1432 return None
1433 elif format == 'all':
1434 for attr in self.__attr_valid_class:
1435 if self.__getattribute__(attr) is not None:
1436 data[attr] = self.__getattribute__(attr)
1437 return data
1438 else:
1439 # TODO: Exception
1440 print('Format not right for dict.')
1441 return None
1442
1443 def json(self, format='dv_up'):
1444 r"""Create json from attributes.
1445
1446 Parameters
1447 ----------
1448 format : string
1449 Data format of input. Available formats are: `dv_up` for Dataverse
1450 Api upload compatible format and `all` with all attributes named in
1451 `__attr_valid_class`.
1452
1453 Returns
1454 -------
1455 string
1456 json-formatted string of Dataverse metadata for api upload.
1457
1458 Examples
1459 -------
1460 Get dict of Dataverse metadata::
1461
1462 >>> from pyDataverse.models import Datafile
1463 >>> df = Datafile()
1464 >>> data = {
1465 >>> 'pid': 'doi:10.11587/EVMUHP',
1466 >>> 'description': 'Test file',
1467 >>> 'filename': 'tests/data/datafile.txt'
1468 >>> }
1469 >>> df.set(data)
1470 >>> df.dict()
1471 {'description': 'Test file',
1472 'directoryLabel': None,
1473 'restrict': None}
1474
1475 Todo
1476 -------
1477 Validate standards.
1478 Link to default json file
1479
1480 """
1481 if format == 'dv_up':
1482 data = self.dict('dv_up')
1483 if data:
1484 return dict_to_json(data)
1485 else:
1486 print('Dict can not be created')
1487 return None
1488 elif format == 'all':
1489 data = self.dict('all')
1490 if data:
1491 return dict_to_json(data)
1492 else:
1493 print('Dict can not be created')
1494 return None
1495 else:
1496 # TODO Exception
1497 print('data format not valid.')
1498 return None