Wednesday, October 13, 2010

How to validate a CSV file while importing to a SQL table

This post is based on How to import CSV or Text file to SQL table using SqlBulkCopy in C#, which contain all the explanation about the data import validating excising records and bulk import.

Validating file schema is the next hurdle. For this I have imported the Importer’s column mapping class to handle column type and column width. So that invalid file will be thrown out with an exception.
Based on the table, 
I add column mappings with column types and widths of text fields
///
/// Set column mappings
///
importer.DateTimeFormat = "dd-MMM-yyyy";
importer.ColumnMappings.Add(new ColumnMapping("Id""Id"DataType.Short));
importer.ColumnMappings.Add(new ColumnMapping("Name""Name"DataType.Text, 50));
importer.ColumnMappings.Add(new ColumnMapping("Grade""Grade"DataType.Text, 3));
 Will create a schema file like this,
When we input a CSV file in correct format like this
Importer will import the file. But an Invalid file like this will be thrown with an exception.

Full example
Mark-up:
<%@ Page Language="C#" %>
<html>
<head id="Head1" runat="server">
</head>
<body>
    <form id="form1" runat="server">
        <asp:FileUpload runat="server" ID="fuFile" />
        <asp:Button runat="server" ID="btnImport" OnClick="Import" Text="Import" />
        <asp:DropDownList runat="server" ID="ddlFileType">
            <asp:ListItem Value="CSV" Selected="True">CSV Delimited</asp:ListItem>
            <asp:ListItem Value="TAB">Tab Delimited</asp:ListItem>
        </asp:DropDownList>
    </form>
</body>
</html>
Code:
namespace ActiveTest
{
    public partial class Test : Page
    {
        protected void Import(object sender, EventArgs e)
        {
            String path = Server.MapPath("~/Data/");
            try
            {
                if (this.fuFile.HasFile)
                {
                    /// 
                    /// Upload file
                    ///
                    string fileName = this.fuFile.FileName;
                    string fullName = Path.Combine(path, fileName);
                    if (File.Exists(fullName)) File.Delete(fullName);
                    this.fuFile.PostedFile.SaveAs(fullName);
                    ///
                    /// Instantiate DelimitedDataImporter and set connection string
                    ///
                    DelimiterType type = DelimiterType.CsvDelimited;
                    if (this.ddlFileType.SelectedValue.Equals("TAB")) type = DelimiterType.TabDelimited;
                    DelimitedFileImporter importer = new DelimitedFileImporter(type);
                    importer.ConnectionString =
                        ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString;
                    ///
                    /// Find existing records and set filter statement [OPTIONAL]
                    ///
                    StringBuilder sb = new StringBuilder(string.Empty);
                    using (SqlConnection con = new SqlConnection(
                        ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString))
                    {
                        SqlCommand cmd = new SqlCommand("SELECT Id FROM Student", con);
                        con.Open();
                        SqlDataReader reader = cmd.ExecuteReader();
                        while (reader.Read())
                            sb.AppendFormat("{0},", reader.GetInt32(0).ToString());
                        reader.Close();
                        con.Close();
                    }
                    string ids = string.Empty;
                    if (!string.IsNullOrEmpty(sb.ToString())) ids = sb.ToString().Remove(sb.Length - 1, 1);
                    if (!string.IsNullOrEmpty(ids)) importer.Filter =
                        string.Format("Id NOT IN ({0})", ids);
                    ///
                    /// Set column mappings
                    ///
                    importer.DateTimeFormat = "dd-MMM-yyyy";
                    importer.ColumnMappings.Add(new ColumnMapping("Id""Id"DataType.Short));
                    importer.ColumnMappings.Add(new ColumnMapping("Name""Name"DataType.Text, 50));
                    importer.ColumnMappings.Add(new ColumnMapping("Grade""Grade"DataType.Text, 3));
                    ///
                    /// Import data to sql table
                    ///
                    importer.ImportToDatabase(fullName, "Student");
                }
            }
            catch
            {
                throw new Exception("Import Faild");
            }
        }      
    }
 
    #region EventArgs Specification
 
    public enum DelimiterType
    {
        TabDelimited,
        CsvDelimited,
        CustomDelimited
    }
    public class ImportDelimitedEventArgs : EventArgs
    {
        private ReadOnlyCollection<object> content;
        private int lineNumber;
 
        public ImportDelimitedEventArgs(OleDbDataReader reader, int number)
        {
            object[] columns = new object[reader.FieldCount];
            reader.GetValues(columns);
            content = new ReadOnlyCollection<object>(columns);
            lineNumber = number;
        }
        public bool BreakImport { getset; }
        public int LineNumber
        {
            get { return lineNumber; }
        }
        public ReadOnlyCollection<object> Content
        {
            get { return content; }
        }
    }
 
    #endregion
 
    #region Mappings
 
    public class ColumnMapping
    {
        public string SourceColumn { getset; }
        public string DestinationColumn { getset; }
        public DataType Type { getset; }
        public int Width { getset; }
        public bool IsValidatble { getset; }
        public ColumnMapping(string sourceColumn, string destinationColumn)
        {
            this.SourceColumn = sourceColumn;
            this.DestinationColumn = destinationColumn;
        }
        public ColumnMapping(string sourceColumn, string destinationColumn, DataType type)
        {
            this.SourceColumn = sourceColumn;
            this.DestinationColumn = destinationColumn;
            this.Type = type;
            if (this.Type == DataType.Text)
                throw new ArgumentNullException("Width should be specified when the data type is Text");
            this.IsValidatble = true;
        }
        public ColumnMapping(string sourceColumn, string destinationColumn, DataType type, int width)
        {
            this.SourceColumn = sourceColumn;
            this.DestinationColumn = destinationColumn;
            this.Type = type;
            this.Width = width;
            this.IsValidatble = true;
        }
    }
    public class ColumnMappingCollection : List<ColumnMapping>
    {
        public bool IsValidatable
        {
            get
            {
                foreach (ColumnMapping m in this) 
                    if (!m.IsValidatble) return false;
                return true;
            }
        }
        public bool HasDateFields
        {
            get
            {
                foreach (ColumnMapping m in thisif (m.Type == DataType.DateTime) return true;
                return false;
            }
        }
    }
 
    #endregion
 
    #region Types
 
    public enum DataType
    {
        Bit,
        Byte,
        Short,
        Long,
        Currency,
        Single,
        Double,
        DateTime,
        Text,
        Memo
    }
 
    #endregion
 
    #region Importer
 
    public class DelimitedFileImporter
    {
        #region Event handler
 
        public event EventHandler<ImportDelimitedEventArgs> ProcessLine;
        private bool HandleLine(OleDbDataReader reader, int number)
        {
            bool result = true;
            EventHandler<ImportDelimitedEventArgs> temp = ProcessLine;
            if (temp != null)
            {
                ImportDelimitedEventArgs args = new ImportDelimitedEventArgs(reader, number);
                temp(this, args);
                result = !args.BreakImport;
            }
            return result;
        }
 
        #endregion
 
        #region Constructors
 
        public DelimitedFileImporter()
            : this(DelimiterType.TabDelimited, null)
        {
            this.Initialize();
        }
        public DelimitedFileImporter(DelimiterType delimiter)
            : this(delimiter, null)
        {
            this.Initialize();
        }
        public DelimitedFileImporter(DelimiterType delimiterType, string delimiter)
        {
            this.Delimiter = delimiterType;
            this.CustomDelimiter = delimiter;
            this.Initialize();
        }
        private void Initialize()
        {
            this.ColumnMappings = new ColumnMappingCollection();
        }
 
        #endregion
 
        #region Properties
 
        public DelimiterType Delimiter { getset; }
        public string CustomDelimiter { get;set;}
        public string Filter { getset; }
        public string ConnectionString { getset; }
        public string DateTimeFormat { getset; }
        public ColumnMappingCollection ColumnMappings { getset; }
 
        #endregion
 
        #region Methods
 
        public void ImportToDatabase(string fileName, string tableName)
        {
            if (string.IsNullOrEmpty(this.ConnectionString))
                throw new ArgumentNullException("Connection String");
            FileInfo file = new FileInfo(fileName);
            WriteSchemaIniFile(file);
            using (OleDbConnection con = JetConnection(file))
            {
                using (OleDbCommand cmd = JetCommand(file, con))
                {
                    con.Open();
                    using (OleDbDataReader reader = cmd.ExecuteReader())
                    {
                        SqlBulkCopy sqlBulk = new SqlBulkCopy(this.ConnectionString);
                        sqlBulk.DestinationTableName = tableName;
                        foreach (ColumnMapping map in this.ColumnMappings)
                            sqlBulk.ColumnMappings.Add(map.DestinationColumn, map.SourceColumn);
                        sqlBulk.WriteToServer(reader);
                    }
                }
            }
        }
        private OleDbConnection JetConnection(FileInfo file)
        {
            StringBuilder connection = new StringBuilder("Provider=Microsoft.Jet.OLEDB.4.0");
            connection.AppendFormat(";Data Source=\"{0}\"", file.DirectoryName);
            connection.Append(";Extended Properties='text;HDR=Yes");
            if (this.Delimiter == DelimiterType.CustomDelimited)
            {
                if (this.CustomDelimiter == null)
                    throw new InvalidOperationException("Custom delimiter is not specified");
                connection.AppendFormat(";FMT=Delimited({1})"this.CustomDelimiter);
            }
            connection.Append("';");
            return new OleDbConnection(connection.ToString());
        }
        private OleDbCommand JetCommand(FileInfo file, OleDbConnection con)
        {
            StringBuilder commandText = new StringBuilder("SELECT * FROM ");
            commandText.AppendFormat("[{0}]", file.Name);
            if (this.Filter != null)
            {
                commandText.Append(" WHERE ");
                commandText.Append(this.Filter);
            }
            OleDbCommand cmd = new OleDbCommand(commandText.ToString(), con);
            cmd.CommandTimeout = 60000;
            return cmd;
        }
        private void WriteSchemaIniFile(FileInfo file)
        {
            string schema = Path.Combine(file.DirectoryName, "Schema.ini");
            if (!File.Exists(schema)) File.Delete(schema);
            using (StreamWriter writer = new StreamWriter(schema))
            {
                writer.WriteLine(string.Format(CultureInfo.InvariantCulture, "[{0}]", file.Name));
                if (this.ColumnMappings.IsValidatable) writer.WriteLine("ColNameHeader=True");             
                switch (this.Delimiter)
                {
                    case DelimiterType.CustomDelimited:
                        writer.WriteLine(string.Format(CultureInfo.InvariantCulture,
                            "Format=Delimited({0})"this.CustomDelimiter));
                        break;
                    case DelimiterType.CsvDelimited:
                    case DelimiterType.TabDelimited:
                    default:
                        writer.WriteLine(string.Format(CultureInfo.InvariantCulture,
                            "Format={0}"this.Delimiter));
                        break;
                }
                if (this.ColumnMappings.HasDateFields)
                {
                    if (string.IsNullOrEmpty(this.DateTimeFormat))
                        throw new ArgumentNullException("DateTimeFormat missing");
                    else
                        writer.WriteLine(string.Format(CultureInfo.InvariantCulture, 
                            "DateTimeFormat={0}"this.DateTimeFormat));
                }
                if (this.ColumnMappings.IsValidatable)
                {
                    foreach (ColumnMapping m in this.ColumnMappings)
                    {
                        writer.WriteLine("Col{0}={1} {2}{3}", 
                            this.ColumnMappings.IndexOf(m) + 1, m.SourceColumn, 
                                m.Type, m.Type == DataType.Text ? " Width " + m.Width.ToString() : string.Empty);
                    }
                }
            }
        }
 
        #endregion
    }
 
    #endregion
}

1 comment:

Anonymous said...

Interesting code. Will test soon. Seems like lots of work has gone into it.

Azure Storage Account Types

Defferent Types of Blobs Block blobs store text and binary data. Block blobs are made up of blocks of data that can be managed individually...