SHA1 ScriptComponent using Metadata

gravatar

Peter Avenant

I use this as an alternative when I cannot use Custom Components or if I need a Hash column for a source where I cannot use T-SQL Hashbytes. This has worked in all my scenarios, but please test it prior to use. I also hook it up to an external metadata repository, but reading from AdventureWorks should allow you to easily replicate it.

published 06.03.14

last updated 06.03.14


Share

                            


<!-- Connections.biml-->
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
	<Connections>
        <OleDbConnection Name="AW" ConnectionString="Server=localhost;Initial Catalog=AdventureWorksDW2012;Provider=SQLNCLI11;Integrated Security=SSPI;" />
    </Connections>
	<Databases>
		<Database Name="AW" ConnectionName="AW" />
    </Databases>
	<Schemas>
		<Schema Name="dbo" DatabaseName="AW" />
    </Schemas>
</Biml>

<!-- Import Tables.biml-->
<#@ import namespace="Varigence.Hadron.CoreLowerer.SchemaManagement" #>

<Biml xmlns="http://schemas.varigence.com/biml.xsd">
<#	var sourceConnection = (AstDbConnectionNode)RootNode.Connections["AW"];
	var importedSchema = sourceConnection.ImportDB("", "", ImportOptions.ExcludeForeignKey | ImportOptions.ExcludeColumnDefault | ImportOptions.ExcludeViews);
	// Filtering a couple of tables.
	var tableNamesToImport = new List<string>() { "DimAccount", "DimCurrency", "DimCustomer", "DimEmployee", "DimOrganization", "DimProduct" };#>
	<Tables>
    <#	foreach (var tableNode in importedSchema.TableNodes.Where(item => tableNamesToImport.Contains(item.Name)).OrderBy(item => item.Name)) { #>
        <#=tableNode.GetBiml()#>
    <#	} #>
    </Tables>
</Biml>

<!-- Use the following code to create the Packages to Extract Data and add SHA1 columns-->
<Biml xmlns="http://schemas.varigence.com/biml.xsd">
	<ScriptProjects>
	<#	foreach(var tableNode in RootNode.Tables) {#>
        <ScriptComponentProject Name="SCP_SHA1_<#=tableNode.Name#>" ProjectCoreName="SCP_SHA1_<#=tableNode.Name#>">
            <AssemblyReferences>
                <AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSPipelineWrap.dll" />
                <AssemblyReference AssemblyPath="Microsoft.SqlServer.DTSRuntimeWrap.dll" />
                <AssemblyReference AssemblyPath="Microsoft.SqlServer.PipelineHost.dll" />
                <AssemblyReference AssemblyPath="Microsoft.SqlServer.TxScript.dll" />
                <AssemblyReference AssemblyPath="System.dll" />
                <AssemblyReference AssemblyPath="System.AddIn.dll" />
                <AssemblyReference AssemblyPath="System.Data.dll" />
                <AssemblyReference AssemblyPath="System.Xml.dll" />
            </AssemblyReferences>
            <Files>
                <File Path="ScriptMain.cs">using System;
using System.Collections.Generic;
using System.Data;
using System.Reflection;
using Microsoft.SqlServer.Dts.Pipeline;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using System.Security.Cryptography;
using System.Text;
using System.IO;
using SCP_SHA1_<#=tableNode.Name#>.csproj;

[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{
    private int ColumnCount;
    private ColumnInfo[] inputColumnInfos;
    private PipelineBuffer inputBuffer;
    private SHA1CryptoServiceProvider SHA1Hash;

    private struct ColumnInfo
    {
        public int bufferColumnIndex;
        public string columnName;
        public string dataType;
    }

    public override void PreExecute()
    {
        base.PreExecute();
        SHA1Hash = new SHA1CryptoServiceProvider();
        ColumnCount = ComponentMetaData.InputCollection[0].InputColumnCollection.Count;
        inputColumnInfos = new ColumnInfo[ColumnCount];
        int[] columnIndexes = GetColumnIndexes(ComponentMetaData.InputCollection[0].ID);
        int columnIndex = 0;
        foreach (IDTSInputColumn100 item in ComponentMetaData.InputCollection[0].InputColumnCollection)
        {
            inputColumnInfos[columnIndex] = new ColumnInfo
            {
                columnName = Convert.ToString(item.Name),
                bufferColumnIndex =
                    columnIndexes[ComponentMetaData.InputCollection[0].InputColumnCollection.GetObjectIndexByID(item.ID)
                        ],
                dataType = item.DataType.ToString()
            };
            columnIndex++;
        }
    }

    public override void ProcessInput(int InputID, Microsoft.SqlServer.Dts.Pipeline.PipelineBuffer Buffer)
    {
        inputBuffer = Buffer;
        base.ProcessInput(InputID, Buffer);
    }

    public override void PostExecute()
    {
        base.PostExecute();
    }

    public override void Input0_ProcessInputRow(Input0Buffer Row)
    {
        byte[] inputByteBuffer = new byte[1000];
        Int32 bufferUsed = 0;
        string nullHandling = String.Empty;
        uint blobLength = 0;
        Int32 columnToProcessID = 0;

        for (int i = 0; i &lt; ColumnCount; i++)
        {
            var info = inputColumnInfos[i];
            columnToProcessID = info.bufferColumnIndex;

            if (!inputBuffer.IsNull(columnToProcessID))
            {
                nullHandling += "N";
                switch (inputBuffer.GetColumnInfo(columnToProcessID).DataType)
                {
                    case DataType.DT_BOOL:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetBoolean(columnToProcessID));
                        break;
                    case DataType.DT_IMAGE:
                        blobLength = inputBuffer.GetBlobLength(columnToProcessID);
                        Utility.Append(ref inputByteBuffer, ref bufferUsed,
                            inputBuffer.GetBlobData(columnToProcessID, 0, (int)blobLength));
                        nullHandling += blobLength.ToString();
                        break;
                    case DataType.DT_BYTES:
                        byte[] bytesFromBuffer = inputBuffer.GetBytes(columnToProcessID);
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, bytesFromBuffer);
                        nullHandling += bytesFromBuffer.GetLength(0).ToString();
                        break;
                    case DataType.DT_CY:
                    case DataType.DT_DECIMAL:
                    case DataType.DT_NUMERIC:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetDecimal(columnToProcessID));
                        break;
                    case DataType.DT_DBDATE:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetDate(columnToProcessID), true);
                        break;
                    case DataType.DT_DATE:
                    case DataType.DT_DBTIMESTAMP:
                    case DataType.DT_DBTIMESTAMP2:
                    case DataType.DT_FILETIME:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetDateTime(columnToProcessID), true);
                        break;
                    case DataType.DT_DBTIME:
                    case DataType.DT_DBTIME2:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetTime(columnToProcessID));
                        break;
                    case DataType.DT_GUID:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetGuid(columnToProcessID));
                        break;
                    case DataType.DT_I1:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetSByte(columnToProcessID));
                        break;
                    case DataType.DT_I2:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetInt16(columnToProcessID));
                        break;
                    case DataType.DT_I4:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetInt32(columnToProcessID));
                        break;
                    case DataType.DT_I8:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetInt64(columnToProcessID));
                        break;
                    case DataType.DT_NTEXT:
                    case DataType.DT_STR:
                    case DataType.DT_TEXT:
                    case DataType.DT_WSTR:
                        String stringFromBuffer = inputBuffer.GetString(columnToProcessID);
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, stringFromBuffer, Encoding.UTF8);
                        nullHandling += stringFromBuffer.Length.ToString();
                        break;
                    case DataType.DT_R4:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetSingle(columnToProcessID));
                        break;
                    case DataType.DT_R8:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetDouble(columnToProcessID));
                        break;
                    case DataType.DT_UI1:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetByte(columnToProcessID));
                        break;
                    case DataType.DT_UI2:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetUInt16(columnToProcessID));
                        break;
                    case DataType.DT_UI4:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetUInt32(columnToProcessID));
                        break;
                    case DataType.DT_UI8:
                        Utility.Append(ref inputByteBuffer, ref bufferUsed, inputBuffer.GetUInt64(columnToProcessID));
                        break;
                    case DataType.DT_EMPTY:
                    case DataType.DT_NULL:
                    default:
                        break;
                }
            }
            else
            {
                nullHandling += "Y";
            }
        }

        Utility.Append(ref inputByteBuffer, ref bufferUsed, nullHandling, Encoding.UTF8);
        var sha1Hasher = new SHA1CryptoServiceProvider();
        Row.SHA1 = sha1Hasher.ComputeHash(inputByteBuffer);
    }
}
            </File>
                <File Path="Utility.cs">using System;
using System.IO;
using System.Text;

namespace SCP_SHA1_<#=tableNode.Name#>.csproj
{
    public static class Utility
    {
        #region Types to Byte Arrays

        public static byte[] ToArray(bool value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(decimal value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        private static byte[] ToArray(DateTimeOffset value, Boolean millisecondHandling)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(millisecondHandling
                        ? value.ToString("yyyy-MM-dd HH:mm:ss.fffffff zzz")
                        : value.ToString("u"));
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(DateTime value, Boolean millisecondHandling)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(millisecondHandling
                        ? value.ToString("yyyy-MM-dd HH:mm:ss.fffffff")
                        : value.ToString("u"));
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(TimeSpan value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value.ToString());
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(byte value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(Guid value)
        {
            return value.ToByteArray();
        }

        public static byte[] ToArray(short value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(int value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(long value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(float value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(double value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(ushort value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(uint value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(ulong value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        public static byte[] ToArray(sbyte value)
        {
            using (var stream = new MemoryStream())
            {
                using (var writer = new BinaryWriter(stream))
                {
                    writer.Write(value);
                    return stream.ToArray();
                }
            }
        }

        #endregion

        #region Byte Array Appending

        public static void Append(ref byte[] array, ref Int32 bufferUsed, bool value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        private static void Append(ref byte[] array, ref Int32 bufferUsed, System.DateTimeOffset value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value, true));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, DateTime value, Boolean millisecondHandling)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value, millisecondHandling));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, TimeSpan value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, Guid value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, ulong value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, float value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, byte value)
        {
            if (bufferUsed + 1 &gt;= array.Length)
            {
                System.Array.Resize&lt;byte&gt;(ref array, array.Length + 1000);
            }

            array[bufferUsed++] = value;
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, byte[] value)
        {
            var valueLength = value.Length;
            var arrayLength = array.Length;

            if (bufferUsed + valueLength &gt;= arrayLength)
            {
                if (valueLength &gt; 1000)
                {
                    System.Array.Resize&lt;byte&gt;(ref array, arrayLength + valueLength + 1000);
                }
                else
                {
                    System.Array.Resize&lt;byte&gt;(ref array, arrayLength + 1000);
                }
            }

            System.Array.Copy(value, 0, array, bufferUsed, valueLength);
            bufferUsed += valueLength;
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, sbyte value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, short value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, ushort value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, int value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, long value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, uint value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, double value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, decimal value)
        {
            Utility.Append(ref array, ref bufferUsed, Utility.ToArray(value));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, char value, Encoding encoding)
        {
            Utility.Append(ref array, ref bufferUsed, encoding.GetBytes(new char[] { value }));
        }

        public static void Append(ref byte[] array, ref Int32 bufferUsed, string value, System.Text.Encoding encoding)
        {
            Utility.Append(ref array, ref bufferUsed, encoding.GetBytes(value));
        }
        #endregion
    }
}
</File>
            </Files>
            <OutputBuffers>
                <OutputBuffer Name="Output0">
                    <Columns>
                        <Column Name="SHA1" Length="20" DataType="Binary" />
                    </Columns>
                </OutputBuffer>
            </OutputBuffers>
            <InputBuffer Name="Input0">
                <Columns>
                <#	foreach(var columnNode in tableNode.Columns.Where(c => c.IsUsedInPrimaryKey == false && c.IsIdentityColumn == false && c.Length != -1)) { #>
					<Column Name="<#=columnNode#>" <#=GetBimlDatatype(columnNode.DataType.ToString(), columnNode.Length.ToString(), columnNode.Precision.ToString(), columnNode.Scale.ToString())#> UsageType="ReadOnly"  />
				<#	} #>
                </Columns>
            </InputBuffer>
        </ScriptComponentProject>
	<#	} #>
    </ScriptProjects>
	<Packages>
	<# 	foreach (var tableNode in RootNode.Tables) { #>
		<Package Name="Extract <#=tableNode.SsisSafeScopedName#>" ConstraintMode="Linear" PackageSubpath="AW_EXTRACT">
			<Tasks>
				<Dataflow Name="Copy Table">
					<Transformations>
						<OleDbSource Name="OLE_SRC <#=tableNode.SsisSafeScopedName#>" ConnectionName="AW">
							<DirectInput>SELECT <#=tableNode.GetColumnList() #> FROM <#=tableNode.SchemaQualifiedName#></DirectInput>
                        </OleDbSource>
						<ScriptComponentTransformation Name="SC Add SHA1">
                            <InputPath OutputPathName="OLE_SRC <#=tableNode.SsisSafeScopedName#>.Output"></InputPath>
                            <ScriptComponentProjectReference ScriptComponentProjectName="SCP_SHA1_<#=tableNode.Name#>" />                               
                        </ScriptComponentTransformation>
                    </Transformations>
                </Dataflow>
            </Tasks>
        </Package>
        <# } #>
    </Packages>

	
<#+
private string GetBimlDatatype(string dataType, string length, string precision, string scale)
{
    var bimlDatatype = "DataType=\"Byte\"";

    switch (dataType)
    {
        case "Boolean":
		case "SByte":
		case "Single":
        case "Int16":
		case "Int32":
		case "Int64":
		case "Double":
		case "Currency":
		case "Date":
        case "DateTime":
        case "DateTime2":
        case "DateTimeOffset":
        case "Xml":
        	bimlDatatype = "DataType=\"" + dataType + "\"";
            break;
        case "Decimal":
            bimlDatatype = "DataType=\"" + dataType + "\" Precision=\"" + precision + "\" Scale=\"" + scale + "\"";
            break;
        case "VarNumeric":
		case "String":
		case "StringFixedLength":
			bimlDatatype = "DataType=\"" + dataType + "\" Length=\"" + length + "\"";
            break;
		case "AnsiString":
        case "AnsiStringFixedLength":
			bimlDatatype = "DataType=\"Byte\"";
            break;
		default:
            break;
    }

    return bimlDatatype;
}
#>	
</Biml>
                        
You are not authorized to comment. A verification email has been sent to your email address. Please verify your account.

Comments

gravatar

Craig

2:13am 06.11.14

Great Component Peter! Very useful. I did find one minor bug in your code. In the function GetBimlDatatype, you are missing the "Guid" type from the switch statement. Here is a suggested modification: switch (dataType) { case "Boolean": case "SByte": case "Single": case "Int16": case "Int32": case "Int64": case "Double": case "Currency": case "Date": case "DateTime": case "DateTime2": case "DateTimeOffset": case "Xml": case "Guid": bimlDatatype = "DataType=\"" + dataType + "\""; break; case "Decimal": bimlDatatype = "DataType=\"" + dataType + "\" Precision=\"" + precision + "\" Scale=\"" + scale + "\""; break; case "VarNumeric": case "String": case "StringFixedLength": bimlDatatype = "DataType=\"" + dataType + "\" Length=\"" + length + "\""; break; case "AnsiString": case "AnsiStringFixedLength": bimlDatatype = "DataType=\"Byte\""; break; default: break; }

gravatar

Admin

8:49pm 09.21.16

great solution but I am receiving an error when compiling this.

The data type for input buffer column 'BldgNumber' on the ScriptComponent input buffer 'Input0' does not match the mapped output column from the previous transformation.

Any insight would be greatly appreciated.