Thursday 26 September 2013

MetaData Change Data Capture Solution (CDC)


Change Data Capture, CDC is a feature that is revolutionizing SQL Server in respects to data flow.  Previous to change data capture, triggers or high water marks needed to be used in order to perform delta transfers of data to data warehouses or disparate systems.

This example shows the use of a custom CDC solution that utilizes a definition table which stored table and column names in order to create a meta-data driven CDC subscription.

First create your CDC subscription table

CREATE TABLE [dbo].[Columns_To_Monitor_Sto](
    [ColumnID] [INT] IDENTITY(1,1) NOT NULL,
    [DatabaseName] [VARCHAR](40) NULL,
    [SchemaName] [VARCHAR](20) NULL,
    [TableName] [VARCHAR](255) NULL,
    [ColumnName] [VARCHAR](255) NULL,
    [Ordinal] [INT] NULL,
    [IsActive] [tinyint] NULL
 )

With the table now created, we have a meta data repository of columns. Anything added or removed from this table will automatically get added or removed from the CDC subscription.

    DECLARE @DebugMode INT                                     = 1
    DECLARE @source_schema VARCHAR(20)
    DECLARE @source_name VARCHAR(255)
    DECLARE @role_name VARCHAR(20)              = 'cdc_manager'
    DECLARE @supports_net_changes INT           = 1
    DECLARE @captured_column_list VARCHAR(MAX)  = ''
    DECLARE @TableCount INT
    DECLARE @iOrigCount INT
    DECLARE @capture_instance nvarchar(MAX)
    DECLARE @TableName VARCHAR(255)
    DECLARE @IsCDCEnabled INT
   
    CREATE TABLE #TablesToMonitor
    (
        ID                  INT             IDENTITY(1,1)
        ,SchemaName         VARCHAR(20)
        ,TableName          VARCHAR(255)
        ,ColumnsToMonitor   VARCHAR(MAX)
        ,CaptureInstance    VARCHAR(MAX)
        ,IsCDCEnabled       bit
    )  

    INSERT INTO #TablesToMonitor
    (
        TableName
        ,SchemaName
        ,ColumnsToMonitor
        ,CaptureInstance
        ,IsCDCEnabled
    )
    SELECT DISTINCT
        rcm.TableName
        ,rcm.SchemaName
        ,ColumnsToMonitor   = SUBSTRING((SELECT
                                        ',' + r.ColumnName
                                        FROM(
                                            SELECT DISTINCT TableName,ColumnName
                                            FROM dbo.Columns_To_Monitor_Sto
                                            WHERE IsActive = 1
                                        ) r
                                        WHERE (r.TableName = rcm.TableName)
                                        FOR XML PATH('')   ), 2, 8000)
        ,CaptureInstance    = rcm.SchemaName + '_' + rcm.TableName
        ,IsCDCEnabled       = (CASE WHEN ct.object_id IS NULL THEN 0 ELSE 1 END)
    FROM dbo.Columns_To_Monitor_Sto rcm
    LEFT JOIN cdc.change_tables ct
    ON ct.capture_instance = rcm.SchemaName + '_' + rcm.TableName
               
    IF @DebugMode = 1
    BEGIN
        SELECT * FROM #TablesToMonitor
    END            

    -- Count the number of tables to loop
    SET @TableCount = (SELECT MAX(ID) FROM #TablesToMonitor)
   
    IF @DebugMode = 1
    BEGIN
        SELECT @TableCount
    END
   
    WHILE @TableCount > 0
    BEGIN
       
        -- Populate Variables
        SELECT
            @source_schema          = tm.SchemaName
            ,@source_name           = tm.TableName
            ,@capture_instance      = tm.CaptureInstance
            ,@captured_column_list  = tm.ColumnsToMonitor
        FROM #TablesToMonitor tm
        WHERE tm.ID = @TableCount
       
        BEGIN TRY
            -- Disable the table
            EXEC sys.sp_cdc_disable_table
            @source_schema          = @source_schema --mandatory
            ,@source_name           = @source_name --mandatory
            ,@capture_instance      = @capture_instance;
           
        END TRY
        BEGIN CATCH
            SELECT 'Disable CDC Failed', ERROR_MESSAGE(), @source_name
        END CATCH
       
        BEGIN TRY
            -- Enable the table
            EXEC sys.sp_cdc_enable_table
            @source_schema          = @source_schema --mandatory
            , @source_name          = @source_name --mandatory
            , @role_name            = @role_name --mandatory
            , @supports_net_changes = 1
            , @captured_column_list = @captured_column_list
            , @filegroup_name       = N'PRIMARY';
                       
        END TRY
        BEGIN CATCH
            SELECT 'Enable CDC Failed', ERROR_MESSAGE(), @source_name, @captured_column_list
        END CATCH

        SET @TableCount -= 1
   
    END
   
    UPDATE rcm
    SET Ordinal = sys.fn_cdc_get_column_ordinal(SchemaName + '_' + TableName, ColumnName)
    FROM dbo.Columns_To_Monitor_Sto rcm
   
    IF @DebugMode = 1
    BEGIN
       
        SELECT DISTINCT tablename, columnname, 'not in CDC'
        FROM dbo.Columns_To_Monitor_Sto
        WHERE IsActive = 1
        AND columnname NOT IN(
        SELECT Column_Name
        FROM cdc.captured_columns)
       
        SELECT column_name, 'not in Columns_To_Monitor'
        FROM cdc.captured_columns
        WHERE column_name NOT IN (
        SELECT DISTINCT columnname
        FROM dbo.Columns_To_Monitor_Sto
        WHERE IsActive = 1)
    END


No comments: