Hi Folks,
The built in Split function in C# is cool to split up a delimited string, or record when reading text data from files etc, but what happens when you need to split a string with text qualifiers in it like this:
Romiko, Derbynew, 29, "52 SurfSide Street, Durban, South Africa"
You going to run into issues when using the C# built in string split function.
I checked out a number of places on the net and many of the samples are SLOW. So I thought it would be best to create a low level version, that is extremely fast.
Here it is:
public string[] Split ( string expression, char delimiter, char qualifier, bool ignoreCase ) { if (ignoreCase) { expression = expression.ToLower(); delimiter = char.ToLower(delimiter); qualifier = char.ToLower(qualifier); } int len = expression.Length; char symbol;
List<string> list = new List<string>(); string newField = null;
for (int begin = 0; begin < len; ++begin) { symbol = expression[begin];
if (symbol == delimiter || symbol == ‘\n’) { list.Add(string.Empty); } else { newField = null; int end = begin; for (end = begin; end < len; ++end) { symbol = expression[end]; if (symbol == qualifier) { // bypass the unsplitable block of text bool foundClosingSymbol = false; for (end = end + 1; end < len; ++end) { symbol = expression[end]; if (symbol == qualifier) { foundClosingSymbol = true; break; } } if (false == foundClosingSymbol) { throw new ArgumentException ("expression contains an unclosed qualifier symbol" ); } continue; } if (symbol == delimiter || symbol == ‘\n’) { newField = expression.Substring(begin, end – begin); begin = end; break; }
} if (newField == null) { newField = expression.Substring(begin); begin = end; } list.Add(newField); } } return list.ToArray(); } |
I ran the above with 10 000 or so records and it was completed within 2 seconds or so. Some slow versions on the net that I found took over 6-7 minutes:
Here are some slow ones that you may run into on the net.
using System.Text.RegularExpressions;
public string[] Split(string expression, string delimiter, string qualifier, bool ignoreCase) { string _Statement = String.Format("{0}(?=(?:[^{1}]*{1}[^{1}]*{1})*(?![^{1}]*{1}))", Regex.Escape(delimiter), Regex.Escape(qualifier));
RegexOptions _Options = RegexOptions.Compiled | RegexOptions.Multiline; if (ignoreCase) _Options = _Options | RegexOptions.IgnoreCase;
Regex _Expression = New Regex(_Statement, _Options); return _Expression.Split(expression); } |
public string[] Split(string expression, string delimiter, string qualifier, bool ignoreCase) { bool _QualifierState = false; int _StartIndex = 0; System.Collections.ArrayList _Values = new System.Collections.ArrayList();
for (int _CharIndex=0; _CharIndex<expression.Length-1; _CharIndex++) { if ((qualifier!=null) & (string.Compare(expression.Substring(_CharIndex, qualifier.Length), qualifier, ignoreCase)==0)) { _QualifierState = !(_QualifierState); } else if (!(_QualifierState) & (delimiter!=null) & (string.Compare(expression.Substring(_CharIndex, delimiter.Length), delimiter, ignoreCase)==0)) { _Values.Add(expression.Substring(_StartIndex, _CharIndex – _StartIndex)); _StartIndex = _CharIndex + 1; } }
if (_StartIndex<expression.Length) _Values.Add(expression.Substring(_StartIndex, expression.Length – _StartIndex));
string[] _returnValues = new string[_Values.Count]; _Values.CopyTo(_returnValues); return _returnValues; } |
So I hope you have fun with split functions, and if you in the mood to make other custom functions, I am keen to see them!
- Uncategorized