Have you ever been stuck needing to fix text data that contained unwanted control characters in your database?

Here’s a Microsoft SQL Server 2005/2008/2012 stored procedure that searches text columns for non-ASCII characters, where [sql]CHAR(x)<32 OR CHAR(x)>126 [/sql]. It is meant to run on columns that are CHAR or VARCHAR (ASCII), not NCHAR or NVARCHAR (International). It will replace all control characters with spaces if you wish.

If you don’t change the default parameters, the procedure will check all CHAR and VARCHAR columns in all tables in all schemas in the default database. The procedure is not especially fast and will probably take a long while to finish if you run it against the entire database.

Internally, the procedure builds strings and executes them using the sp_executesql procedure.

Without further ado, here is the procedure. Hope it helps someone. But, use it at your own risk, please back up your data before running this procedure.

[sql]
ALTER procedure [dbo].[FindNonAsciiText]
(
@fix bit = 0, — 1 to replace non-ascii characters with blank
@schema varchar(128) = ‘%’, — percent for all schemas
@table varchar(128) = ‘%’, — percent for all tables
@column varchar(128) = ‘%’, — percent for all columns
@timelimitseconds bigint = 43200 — the procedure should stop after this many seconds have elapsed. 43200 seconds is 12 hours.
)

as

begin

— Database name
declare @dbname varchar(128);
— Schema name
declare @schemaname varchar(128);
— Table name
declare @tablename varchar(128);
— Column name
declare @columnname varchar(128);
— SQL statement to be executed
declare @stmt nvarchar(1024);
— Count of unprintable characters
declare @kount bigint;
— Loop counter, char(@loopcount) is the particular unprintable character being checked/fixed
declare @loopcount integer;
— Table for the counts of unprintable characters
declare @DirtyColumns table (dbname varchar(128), schemaname varchar(128), tablename varchar(128), columnname varchar(128), kount bigint, chardecimalvalue int);
— Timer
declare @starttime datetime;
set @starttime = current_timestamp;

— Loop through all columns in all tables in all schemas in the specified database
declare colcurs cursor for
select
db_name() as dbname,
h.name as schemaname,
t.name as tablename,
c.name as columnname

from sys.tables t join sys.columns c on t.object_id=c.object_id
join sys.types y on c.system_type_id=y.system_type_id
join sys.schemas h on t.schema_id=h.schema_id
where t.type=’U’
and h.name like @schema
and t.name like @table
and c.name like @column
and y.name in (‘text’,’char’,’varchar’)
order by dbname, schemaname, tablename, columnname;

open colcurs;

fetch next from colcurs into @dbname, @schemaname, @tablename, @columnname;

while (@@fetch_status = 0) and (datediff(second, @starttime, current_timestamp) &lt; @timelimitseconds)
begin

— Loop through all unprintable ascii characters, checking the current column for each character one at a time
set @loopcount = 0
while (@loopcount &lt; 256) and (datediff(second, @starttime, current_timestamp) &lt; @timelimitseconds)

begin

set @stmt =
‘select @kountOut=count(*) from [‘ +
rtrim(@dbname) + ‘].[‘ + rtrim(@schemaname) + ‘].[‘ + rtrim(@tablename) +
‘] where charindex(char(‘ + cast(@loopcount as varchar(3)) + ‘),[‘ + rtrim(@columnname) +
‘])&gt;0;’;

execute sp_executesql @stmt, N’@kountOut bigint output’, @kountOut=@kount output;

— Remember the non-zero counts
insert into @DirtyColumns
select @dbname, @schemaname, @tablename, @columnname, @kount, @loopcount
where @kount&gt;0;

— If called with non-zero argument then replace the unprintable characters with spaces
if @fix&lt;&gt;0 and @kount&gt;0
begin
set @stmt =
‘update [‘ +
rtrim(@dbname) + ‘].[‘ + rtrim(@schemaname) + ‘].[‘ + rtrim(@tablename) +
‘] set [‘ + rtrim(@columnname) + ‘]=replace([‘ + rtrim(@columnname) + ‘],char(‘ + cast(@loopcount as varchar(3)) + ‘),” ”);’;
execute sp_executesql @stmt
end;

— Jump from 31 to 127, skipping all the printable chartacters
set @loopcount = @loopcount + 1;
if @loopcount = 32
begin
set @loopcount = 127;
end;

— End of loop through unprintable chartacters for this column
end;

— end of loop through columns
fetch next from colcurs into @dbname, @schemaname, @tablename, @columnname;
end;

close colcurs;
deallocate colcurs;

— Finish by returning the counts in the result set
select * from @DirtyColumns;

end;
[/sql]

One Response to “Cleaning Up Text in a Microsoft SQL Server Database”
  1. Rick Kohler

    Wow, what a fantastically insightful post!

    Reply

Leave a Reply

Your email address will not be published. Required fields are marked *

 characters available