· 7 years ago · Sep 30, 2018, 02:54 AM
1def dataFrameToSQL(DSN, df, tablename, csvpath, convert, encodingformat = None):
2 #Dumps dataframe (df) to sql server (DSN) table (tablename) using bulk insert from csv (csvpath)
3 #Forces conversion to existing table's column datatypes if convert == True
4 #If convert == False, insert will fail if datatypes do not align properly
5 df.to_csv(csvpath, index = False, header = False, sep = '~', encoding = encodingformat)
6
7 conversiondictionary = {'int64' : 'int', 'datetime64[ns]' : 'smalldatetime', 'datetime32[ns]' : 'smalldatetime', 'object' : 'text', 'float64' : 'float', 'float32' : 'float', '<M8[ns]' : 'smalldatetime', 'bool' : 'varchar(5)'}
8 alltables = sqlToDataFrame(DSN, "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE' AND TABLE_CATALOG = 'DATABASE'")
9 tableexists = False
10 for table in alltables['TABLE_NAME']:
11 if tablename.lower() == table.lower():
12 tableexists = True
13
14 if not tableexists:
15 print 'Table does not already exist. Creating new table and inserting data.'
16 sql = 'create table ' + tablename + '('
17 for column in range(0, len(df.columns)):
18 if column == len(df.columns) - 1:
19 sql = sql + '"' + str(df.columns[column]) + '" ' + conversiondictionary[str(df[df.columns[column]].dtype)]
20 else:
21 sql = sql + '"' + str(df.columns[column]) + '" ' + conversiondictionary[str(df[df.columns[column]].dtype)] + ','
22
23 sql = sql + ');'
24 executeSQL(DSN, sql)
25
26 else:
27 print 'Table already exists. Inserting data into existing table.'
28
29 if convert == False:
30 sql = 'create table #' + tablename + '('
31 for column in range(0, len(df.columns)):
32 if column == len(df.columns) - 1:
33 sql = sql + '"' + str(df.columns[column]) + '" ' + conversiondictionary[str(df[df.columns[column]].dtype)]
34 else:
35 sql = sql + '"' + str(df.columns[column]) + '" ' + conversiondictionary[str(df[df.columns[column]].dtype)] + ','
36
37 sql = sql + '); bulk insert #' + tablename + " from '" + csvpath + "' with (FIELDTERMINATOR = '~') insert into " + tablename + ' select '
38 for column in range(0, len(df.columns)):
39 if column == len(df.columns) - 1:
40 sql = sql + 'cast("' + str(df.columns[column]) + '" as ' + conversiondictionary[str(df[df.columns[column]].dtype)] + ')'
41 else:
42 sql = sql + 'cast("' + str(df.columns[column]) + '" as ' + conversiondictionary[str(df[df.columns[column]].dtype)] + '),'
43
44 sql = sql + ' from #' + tablename + '; drop table #' + tablename + ';'
45 executeSQL(DSN, sql)
46
47 else:
48 existingdtypes = sqlToDataFrame(DSN, "SELECT DATA_TYPE, CHARACTER_MAXIMUM_LENGTH FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '" + tablename + "'")
49 sql = 'create table #' + tablename + '('
50 for column in range(0, len(df.columns)):
51 if column == len(df.columns) - 1:
52 if pd.isnull(existingdtypes['CHARACTER_MAXIMUM_LENGTH'][column]) or existingdtypes['DATA_TYPE'][column] == 'text': #No maxmimum length for this datatype
53 sql = sql + '"' + str(df.columns[column]) + '" ' + existingdtypes['DATA_TYPE'][column]
54 else:
55 sql = sql + '"' + str(df.columns[column]) + '" ' + existingdtypes['DATA_TYPE'][column] + '(' + str(int(existingdtypes['CHARACTER_MAXIMUM_LENGTH'][column])) + ')'
56 else:
57 if pd.isnull(existingdtypes['CHARACTER_MAXIMUM_LENGTH'][column]) or existingdtypes['DATA_TYPE'][column] == 'text': #No maxmimum length for this datatype
58 sql = sql + '"' + str(df.columns[column]) + '" ' + existingdtypes['DATA_TYPE'][column] + ','
59 else:
60 sql = sql + '"' + str(df.columns[column]) + '" ' + existingdtypes['DATA_TYPE'][column] + '(' + str(int(existingdtypes['CHARACTER_MAXIMUM_LENGTH'][column])) + '),'
61
62 sql = sql + '); bulk insert #' + tablename + " from '" + csvpath + "' with (FIELDTERMINATOR = '~') insert into " + tablename + ' select '
63 for column in range(0, len(df.columns)):
64 if column == len(df.columns) - 1:
65 if pd.isnull(existingdtypes['CHARACTER_MAXIMUM_LENGTH'][column]) or existingdtypes['DATA_TYPE'][column] == 'text': #No maxmimum length for this datatype
66 sql = sql + 'cast("' + str(df.columns[column]) + '" as ' + existingdtypes['DATA_TYPE'][column] + ')'
67 else:
68 sql = sql + 'cast("' + str(df.columns[column]) + '" as ' + existingdtypes['DATA_TYPE'][column] + '(' + str(int(existingdtypes['CHARACTER_MAXIMUM_LENGTH'][column])) + '))'
69 else:
70 if pd.isnull(existingdtypes['CHARACTER_MAXIMUM_LENGTH'][column]) or existingdtypes['DATA_TYPE'][column] == 'text': #No maxmimum length for this datatype
71 sql = sql + 'cast("' + str(df.columns[column]) + '" as ' + existingdtypes['DATA_TYPE'][column] + '),'
72 else:
73 sql = sql + 'cast("' + str(df.columns[column]) + '" as ' + existingdtypes['DATA_TYPE'][column] + '(' + str(int(existingdtypes['CHARACTER_MAXIMUM_LENGTH'][column])) + ')),'
74
75 sql = sql + ' from #' + tablename + '; drop table #' + tablename + ';'
76 executeSQL(DSN, sql)