· 4 years ago · May 12, 2021, 08:02 AM
1USE [Dev]
2
3GO
4
5/****** Object: StoredProcedure [dbo].[FindBadData] Script Date: 05/31/2016 16:49:03 ******/
6
7SET ANSI_NULLS ON
8
9GO
10
11SET QUOTED_IDENTIFIER ON
12
13GO
14
15
16
17
18
19
20
21/**********************************************************************************************************************
22
23**
24
25** Procedure : FindBadData
26
27**
28
29** Purpose : To locate a column within a given table that contains unsupported data, and output said data.
30
31**
32
33** Author : Scott Tippett
34
35**
36
37** Date : 06/05/2016
38
39**``
40
41** Version History : 06/05/2016 Initial version created to speed up finding crap
42
43
44
45
46
47**********************************************************************************************************************/
48
49
50
51/*
52
53
54
55 exec dev.dbo.FindBadData 'spt_temp', 'WorkEmail'
56
57
58
59*/
60
61
62
63ALTER PROCEDURE [dbo].[FindBadData]
64
65 @TableName nvarchar(255), --the table to search for bad stuff
66
67 @identifier varchar(255) --a column in the table that we can use to identify a row
68
69AS
70
71
72
73
74
75declare @MaxOrdinal int
76
77declare @CurrentOrdinal int
78
79declare @sSQL varchar(5000)
80
81declare @currentCol nvarchar(max)
82
83--We could probably declare an email variable so we can send a report straight out to that person.
84
85
86
87--select @TableName = 'spt_test'--SET THIS
88
89--select @identifier = 'id' --This needs to be something that exists in the table that you can use to ID the messed up row,
90
91--Note this is a varchar, so using a float will give you screwy results
92
93select @CurrentOrdinal = 0
94
95select @currentCol = 0
96
97select @MaxOrdinal = 0 --the three '0's just ensure it will run if for some reason they don't get set further down.
98
99
100
101IF OBJECT_ID ( @TableName , N'U') IS NOT NULL --verify the table exists
102
103begin
104
105
106
107create table #numbers
108
109(
110
111 number int
112
113)
114
115insert into #numbers values (1)
116
117while (select max(number) from #numbers) < 1000
118
119begin
120
121 insert into #numbers
122
123 select max(number) + 1 from #numbers
124
125/*Create a numbers table, as i dont know if we have one in the db. We use this to loop through the substring of a column character by character
126
127looking at the ASCII values of each character and assessing if the character is supported or not.*/
128
129end
130
131
132
133create table #BadData
134
135(
136
137 IdentifierRow nvarchar(255),
138
139 UnsupportedValueColumn nvarchar(255),
140
141 UnsupportedValue nvarchar(max) --Short but sweet, a table to store and identify the bad data
142
143)
144
145
146
147
148
149--'select * from information_schema.columns where table_name = ''' + @TableName + ''' and Ordinal_position = ' + convert(varchar(5),@CurrentOrdinal) + '
150
151
152
153select @MaxOrdinal = MAX(ordinal_position) from information_schema.columns where table_name = ''+ @TableName + ''
154
155--To get out loop counter we look at the schema of the table and find the max ordinal value (how many columns there are in the table)
156
157--so we don't try to loop past that number
158
159
160
161
162
163 while @CurrentOrdinal < @MaxOrdinal --start loopin'
164
165 begin
166
167 select @currentCol = Column_name from information_schema.columns where table_name ='' + @TableName + '' and Ordinal_position = @CurrentOrdinal
168
169 --Get the current column name, so we can output if that column has unsupported values
170
171
172
173 set @sSQL = --dynamic sql. seriously, its made of hate
174
175 'SELECT
176
177 CONVERT(varchar(255),'+@identifier+'), ''[' + @currentCol + ']'' AS [UnsupportedValueColumn], [' + Convert(varchar(100),@currentCol) + '] AS [UnsupportedValue]
178
179 FROM ' + @TableName + ' y
180
181 INNER JOIN #numbers n ON n.Number <= LEN( ['+Convert(varchar(100),@currentCol)+'] )
182
183 WHERE ASCII(SUBSTRING(['+Convert(varchar(100),@currentCol)+'], n.Number, 1))<32 OR ASCII(SUBSTRING(['+Convert(varchar(100),@currentCol)+'], n.Number, 1))>127
184
185
186
187
188
189 OPTION (MAXRECURSION 1000)
190
191 '
192
193 /*
194
195 Essentially we select the contents of the ID row, the name of the current column, and the contents of the current column that contains unsupported data.
196
197 We do it in dynamic SQL because other wise we would only select either the contents or the name of the row.
198
199 We join it on to the numbers table and ensure we know how many characters there are in to so we dont substring past that many.
200
201 We also need to let the server know it can use recursion as many times as we have numbers (1000), this bypasses the default recursion which is lower
202
203 The ASCII Values I look for are outside of teh standard alphabet, however some characters are accepted in marrs, perhaps replacing the ASCII codes in the
204
205 below statement would be good
206
207 OR ASCII(SUBSTRING(['+Convert(varchar(100),@currentCol)+'], n.Number, 1)) not in (
208
209 129,130,131,132,133,136,`37,138,139,140,142,144,147,148,149,150,151,152,153,160,161,162,
210
211 )
212
213 */
214
215
216
217
218
219 --select @sSQL
220
221 if (select DATA_TYPE from information_schema.columns where table_name = ''+@TableName+'' and Ordinal_position = @CurrentOrdinal )
222
223 in ('varchar', 'nvarchar' ,'char' )
224
225 --We insert the rows into our table, but only if the current column is a varchar, nvarchar, or char as we know that others won't contain unsupported ASCII
226
227 begin
228
229 insert into #BadData
230
231 exec (@sSQL) --Simple. Our dynamic SQL is a select statement, so we can insert with that statement
232
233 end
234
235
236
237
238
239
240
241 select @CurrentOrdinal = @CurrentOrdinal + 1 --loop stuff
242
243 end
244
245select distinct * from #BadData-- display all the "unsupported" data
246
247drop table #numbers
248
249end
250
251else
252
253begin
254
255print ('The table ' + DB_NAME() + '.dbo.' + @TableName + ' does not exist')
256
257end
258
259
260
261
262
263--delete d
264
265--from spt_temp d
266
267--join #BadData b
268
269--on b.identifierRow = d.workemail
270
271----If we wanted we could simply delete the crap from the import and continue, then use the #BadData table as a report.
272
273
274
275
276
277
278
279--commit