· 6 years ago · Jun 25, 2019, 02:36 PM
1/*
2** This file was created by Laurens Bogaardt, Advisor Data Analytics at EY Amsterdam on 2016-11-03.
3** This script allows you to import multiple delimited text files into a SQL database. The tables
4** into which the data is imported, including all required columns, are created automatically. This
5** script uses tab-delimited (tsv) files and SQL Server Management Studio. The script may need some
6** minor adjustments for other formats and tools. The scripts makes several assumptions which need
7** to be valid before it can run properly. First of all, it assumes none of the output tables exist
8** in the SQL tool before starting. Therefore, it may be necessary to clean the database and delete
9** all the existing tables. Secondly, the script assumes that, if multiple text files are imported
10** into the same output table, the number and order of the columns of these files is identical. If
11** this is not the case, some manual work may need to be done to the text files before importing.
12** Finally, please note that this script only imports data as strings (to be precise, as NVARCHAR's
13** of length 255). It does not allow you to specify the datatype per column. This would need to be
14** done using another script after importing the data as strings.
15*/
16
17-- 1. Import Multiple Delimited Text Files into a SQL Database
18
19-- 1.1 Define the path to the input and define the terminators
20
21/*
22** In this section, some initial parameters are set. Obviously, the 'DatabaseName' refers to the
23** database in which you want to create new tables. The '@Path' parameter sets the folder in
24** which the text files are located which you want to import. Delimited files are defined by
25** two characters: one which separates columns and one which separates rows. Usually, the
26** row-terminator is the newline character CHAR(10), also given by 'n'. When files are created
27** in Windows, the row-terminator often includes a carriage return CHAR(13), also given by 'rn'.
28** Often, a tab is used to separate each column. This is given by CHAR(9) or by the character 't'.
29** Other useful characters include the comma CHAR(44), the semi-colon CHAR(59) and the pipe
30** CHAR(124).
31*/
32
33USE [DatabaseName]
34DECLARE @Path NVARCHAR(255) = 'C:\PathToFiles\'
35DECLARE @RowTerminator NVARCHAR(5) = CHAR(13) + CHAR(10)
36DECLARE @ColumnTerminator NVARCHAR(5) = CHAR(9)
37
38-- 1.2 Define the list of input and output in a temporary table
39
40/*
41** In this section, a temporary table is created which lists all the filenames of the delimited
42** files which need to be imported, as well as the names of the tables which are created and into
43** which the data is imported. Multiple files may be imported into the same output table. Each row
44** is prepended with an integer which increments up starting from 1. It is essential that this
45** number follows this logic. The temporary table is deleted at the end of this script.
46*/
47
48IF OBJECT_ID('[dbo].[Files_Temporary]', 'U') IS NOT NULL
49DROP TABLE [dbo].[Files_Temporary];
50CREATE TABLE [dbo].[Files_Temporary]
51(
52 [ID] INT
53 , [FileName] NVARCHAR(255)
54 , [TableName] NVARCHAR(255)
55);
56
57INSERT INTO [dbo].[Files_Temporary] SELECT 1, 'MyFileA.txt', 'NewTable1'
58INSERT INTO [dbo].[Files_Temporary] SELECT 2, 'MyFileB.txt', 'NewTable2'
59INSERT INTO [dbo].[Files_Temporary] SELECT 3, 'MyFileC.tsv', 'NewTable2'
60INSERT INTO [dbo].[Files_Temporary] SELECT 4, 'MyFileD.csv', 'NewTable2'
61INSERT INTO [dbo].[Files_Temporary] SELECT 5, 'MyFileE.dat', 'NewTable2'
62INSERT INTO [dbo].[Files_Temporary] SELECT 6, 'MyFileF', 'NewTable3'
63INSERT INTO [dbo].[Files_Temporary] SELECT 7, 'MyFileG.text', 'NewTable4'
64INSERT INTO [dbo].[Files_Temporary] SELECT 8, 'MyFileH.txt', 'NewTable5'
65INSERT INTO [dbo].[Files_Temporary] SELECT 9, 'MyFileI.txt', 'NewTable5'
66INSERT INTO [dbo].[Files_Temporary] SELECT 10, 'MyFileJ.txt', 'NewTable5'
67INSERT INTO [dbo].[Files_Temporary] SELECT 11, 'MyFileK.txt', 'NewTable6'
68
69-- 1.3 Loop over the list of input and output and import each file to the correct table
70
71/*
72** In this section, the 'WHILE' statement is used to loop over all input files. A counter is defined
73** which starts at '1' and increments with each iteration. The filename and tablename are retrieved
74** from the previously defined temporary table. The next step of the script is to check whether the
75** output table already exists or not.
76*/
77
78DECLARE @Counter INT = 1
79
80WHILE @Counter <= (SELECT COUNT(*) FROM [dbo].[Files_Temporary])
81BEGIN
82 PRINT 'Counter is ''' + CONVERT(NVARCHAR(5), @Counter) + '''.'
83
84 DECLARE @FileName NVARCHAR(255)
85 DECLARE @TableName NVARCHAR(255)
86 DECLARE @Header NVARCHAR(MAX)
87 DECLARE @SQL_Header NVARCHAR(MAX)
88 DECLARE @CreateHeader NVARCHAR(MAX) = ''
89 DECLARE @SQL_CreateHeader NVARCHAR(MAX)
90
91 SELECT @FileName = [FileName], @TableName = [TableName] FROM [dbo].[Files_Temporary] WHERE [ID] = @Counter
92
93 IF OBJECT_ID('[dbo].[' + @TableName + ']', 'U') IS NULL
94 BEGIN
95/*
96** If the output table does not yet exist, it needs to be created. This requires the list of all
97** columnnames for that table to be retrieved from the first line of the text file, which includes
98** the header. A piece of SQL code is generated and executed which imports the header of the text
99** file. A second temporary table is created which stores this header as a single string.
100*/
101 PRINT 'Creating new table with name ''' + @TableName + '''.'
102
103 IF OBJECT_ID('[dbo].[Header_Temporary]', 'U') IS NOT NULL
104 DROP TABLE [dbo].[Header_Temporary];
105 CREATE TABLE [dbo].[Header_Temporary]
106 (
107 [Header] NVARCHAR(MAX)
108 );
109
110 SET @SQL_Header = '
111 BULK INSERT [dbo].[Header_Temporary]
112 FROM ''' + @Path + @FileName + '''
113 WITH
114 (
115 FIRSTROW = 1,
116 LASTROW = 1,
117 MAXERRORS = 0,
118 FIELDTERMINATOR = ''' + @RowTerminator + ''',
119 ROWTERMINATOR = ''' + @RowTerminator + '''
120 )'
121 EXEC(@SQL_Header)
122
123 SET @Header = (SELECT TOP 1 [Header] FROM [dbo].[Header_Temporary])
124 PRINT 'Extracted header ''' + @Header + ''' for table ''' + @TableName + '''.'
125/*
126** The columnnames in the header are separated using the column-terminator. This can be used to loop
127** over each columnname. A new piece of SQL code is generated which will create the output table
128** with the correctly named columns.
129*/
130 WHILE CHARINDEX(@ColumnTerminator, @Header) > 0
131 BEGIN
132 SET @CreateHeader = @CreateHeader + '[' + LTRIM(RTRIM(SUBSTRING(@Header, 1, CHARINDEX(@ColumnTerminator, @Header) - 1))) + '] NVARCHAR(255), '
133 SET @Header = SUBSTRING(@Header, CHARINDEX(@ColumnTerminator, @Header) + 1, LEN(@Header))
134 END
135 SET @CreateHeader = @CreateHeader + '[' + @Header + '] NVARCHAR(255)'
136
137 SET @SQL_CreateHeader = 'CREATE TABLE [' + @TableName + '] (' + @CreateHeader + ')'
138 EXEC(@SQL_CreateHeader)
139 END
140
141/*
142** Finally, the data from the text file is imported into the newly created table. The first line,
143** including the header information, is skipped. If multiple text files are imported into the same
144** output table, it is essential that the number and the order of the columns is identical, as the
145** table will only be created once, using the header information of the first text file.
146*/
147 PRINT 'Inserting data from ''' + @FileName + ''' to ''' + @TableName + '''.'
148 DECLARE @SQL NVARCHAR(MAX)
149 SET @SQL = '
150 BULK INSERT [dbo].[' + @TableName + ']
151 FROM ''' + @Path + @FileName + '''
152 WITH
153 (
154 FIRSTROW = 2,
155 MAXERRORS = 0,
156 FIELDTERMINATOR = ''' + @ColumnTerminator + ''',
157 ROWTERMINATOR = ''' + @RowTerminator + '''
158 )'
159 EXEC(@SQL)
160
161 SET @Counter = @Counter + 1
162END;
163
164-- 1.4 Cleanup temporary tables
165
166/*
167** In this section, the temporary tables which were created and used by this script are deleted.
168** Alternatively, the script could have used 'real' temporary table (identified by the '#' character
169** in front of the name) or a table variable. These would have deleted themselves once they were no
170** longer in use. However, the end result is the same.
171*/
172
173IF OBJECT_ID('[dbo].[Files_Temporary]', 'U') IS NOT NULL
174DROP TABLE [dbo].[Files_Temporary];
175
176IF OBJECT_ID('[dbo].[Header_Temporary]', 'U') IS NOT NULL
177DROP TABLE [dbo].[Header_Temporary];
178
179select *
180into SQLServerTable FROM OPENROWSET('Microsoft.Jet.OLEDB.4.0',
181 'Excel 8.0;Database=C:your_path_heretest.xls;HDR=YES',
182 'SELECT * FROM [Sheet1$]')
183
184Private Declare Function SetCurrentDirectoryA Lib _
185 "kernel32" (ByVal lpPathName As String) As Long
186
187Public Function ChDirNet(szPath As String) As Boolean
188'based on Rob Bovey's code
189 Dim lReturn As Long
190 lReturn = SetCurrentDirectoryA(szPath)
191 ChDirNet = CBool(lReturn <> 0)
192End Function
193
194Sub Get_TXT_Files()
195'For Excel 2000 and higher
196 Dim Fnum As Long
197 Dim mysheet As Worksheet
198 Dim basebook As Workbook
199 Dim TxtFileNames As Variant
200 Dim QTable As QueryTable
201 Dim SaveDriveDir As String
202 Dim ExistFolder As Boolean
203
204 'Save the current dir
205 SaveDriveDir = CurDir
206
207 'You can change the start folder if you want for
208 'GetOpenFilename,you can use a network or local folder.
209 'For example ChDirNet("C:UsersRontest")
210 'It now use Excel's Default File Path
211
212 ExistFolder = ChDirNet("C:your_path_hereText")
213 If ExistFolder = False Then
214 MsgBox "Error changing folder"
215 Exit Sub
216 End If
217
218 TxtFileNames = Application.GetOpenFilename _
219 (filefilter:="TXT Files (*.txt), *.txt", MultiSelect:=True)
220
221 If IsArray(TxtFileNames) Then
222
223 On Error GoTo CleanUp
224
225 With Application
226 .ScreenUpdating = False
227 .EnableEvents = False
228 End With
229
230 'Add workbook with one sheet
231 Set basebook = Workbooks.Add(xlWBATWorksheet)
232
233 'Loop through the array with txt files
234 For Fnum = LBound(TxtFileNames) To UBound(TxtFileNames)
235
236 'Add a new worksheet for the name of the txt file
237 Set mysheet = Worksheets.Add(After:=basebook. _
238 Sheets(basebook.Sheets.Count))
239 On Error Resume Next
240 mysheet.Name = Right(TxtFileNames(Fnum), Len(TxtFileNames(Fnum)) - _
241 InStrRev(TxtFileNames(Fnum), "", , 1))
242 On Error GoTo 0
243
244 With ActiveSheet.QueryTables.Add(Connection:= _
245 "TEXT;" & TxtFileNames(Fnum), Destination:=Range("A1"))
246 .TextFilePlatform = xlWindows
247 .TextFileStartRow = 1
248
249 'This example use xlDelimited
250 'See a example for xlFixedWidth below the macro
251 .TextFileParseType = xlDelimited
252
253 'Set your Delimiter to true
254 .TextFileTabDelimiter = True
255 .TextFileSemicolonDelimiter = False
256 .TextFileCommaDelimiter = False
257 .TextFileSpaceDelimiter = False
258
259 'Set the format for each column if you want (Default = General)
260 'For example Array(1, 9, 1) to skip the second column
261 .TextFileColumnDataTypes = Array(1, 9, 1)
262
263 'xlGeneralFormat General 1
264 'xlTextFormat Text 2
265 'xlMDYFormat Month-Day-Year 3
266 'xlDMYFormat Day-Month-Year 4
267 'xlYMDFormat Year-Month-Day 5
268 'xlMYDFormat Month-Year-Day 6
269 'xlDYMFormat Day-Year-Month 7
270 'xlYDMFormat Year-Day-Month 8
271 'xlSkipColumn Skip 9
272
273 ' Get the data from the txt file
274 .Refresh BackgroundQuery:=False
275 End With
276 ActiveSheet.QueryTables(1).Delete
277 Next Fnum
278
279 'Delete the first sheet of basebook
280 On Error Resume Next
281 Application.DisplayAlerts = False
282 basebook.Worksheets(1).Delete
283 Application.DisplayAlerts = True
284 On Error GoTo 0
285
286CleanUp:
287
288 ChDirNet SaveDriveDir
289
290 With Application
291 .ScreenUpdating = True
292 .EnableEvents = True
293 End With
294 End If
295End Sub