· 5 years ago · Mar 26, 2020, 02:24 PM
1import os
2from pprint import pprint
3import sqlite3
4from sqlite3 import OperationalError
5
6import pydicom
7from pydicom.filereader import read_dicomdir
8
9base_folder = os.environ.get("NELSON", "/media/lubuntu/EOL/archive")
10
11# There are the fields we wish to extract.
12headers_to_extract = [
13 "AccessionNumber",
14 "AcquisitionDate",
15 "AcquisitionNumber",
16 "AcquisitionTime",
17 "ContentDate",
18 "ContentTime",
19 "InstanceNumber",
20 "PatientBirthDate",
21 "PatientID",
22 "PatientName",
23 "PatientSex",
24 "SeriesDate",
25 "SeriesDescription",
26 "SeriesInstanceUID",
27 "SeriesNumber",
28 "SeriesTime",
29 "StudyDate",
30 "StudyDescription",
31 "StudyID",
32 "StudyInstanceUID",
33 "StudyTime",
34]
35
36
37# Connnect to database.
38conn = sqlite3.connect("records.sql")
39c = conn.cursor()
40
41
42# Try to create table, if it not yet exists.
43def create_table():
44 header_name_type = (f"{header} TEXT" for header in headers_to_extract)
45 c.execute("CREATE TABLE dicom_files ({})".format(",".join(header_name_type)))
46 conn.commit()
47
48
49try:
50 create_table()
51except OperationalError:
52 pass
53
54# Collect all files in this folder.
55#files = [
56# os.path.join(base_folder, file)
57# for file in os.listdir(base_folder)
58# if os.path.isfile(os.path.join(base_folder, file))
59#]
60filelist = '/home/lubuntu/Desktop/archive.lst'
61files = [os.path.join(base_folder, line) for line in open(filelist).read().splitlines()]
62
63def get_header_values(dcm):
64 """
65 Extract header values, when available.
66 """
67 values = []
68 for key in headers_to_extract:
69 try:
70 value = getattr(dcm, key)
71 except AttributeError:
72 value = ""
73 values.append(str(value))
74 return values
75
76
77def header_values_to_database(values):
78 """
79 Store the key-value pairs in the database.
80 """
81 query = "INSERT INTO dicom_files ({})".format(",".join(headers_to_extract))
82 query += " VALUES ({})".format(",".join(("?",) * len(values)))
83 c.execute(query, values)
84 conn.commit()
85
86
87previous_patient_id = None
88MAX_ITER = 10
89# Extract headers from DICOM file and store in database.
90for i, file in enumerate(files):
91 # 1) Read dicom file.
92 try:
93 dcm = pydicom.dcmread(file)
94 except Exception as e:
95 print(e)
96 continue
97
98 values = get_header_values(dcm)
99
100 # 2) Store in database.
101 header_values_to_database(values)
102
103 # 3) Print progress.
104 if dcm.PatientID != previous_patient_id:
105 print("")
106 print(f"{dcm.PatientID}", flush=True, end="")
107 previous_patient_id = dcm.PatientID
108 else:
109 print(".", flush=True, end="")
110
111 if MAX_ITER > 0 and i >= MAX_ITER:
112 print('')
113 print('Maximum reached')
114 break
115
116
117c.close()