Question

I am trying to extract database connection strings from a XML file which looks like this:

<DTS:Property DTS:Name="ConnectionString">
    Data Source=Instance1;
    User ID=;
    Initial Catalog=DB;
    Provider=SQLOLEDB.1;
    Integrated Security=SSPI;
    Auto Translate=False;    
</DTS:Property>
</DTS:ConnectionManager>
    </DTS:ObjectData>
</DTS:ConnectionManager>
<snip>

Now ideally I would like a group to come back with

[0] = Data Source=Instance_1;User ID=;Initial Catalog=DB1;Provider=SQLOLEDB.1;Integrated Security=SSPI;Auto Translate=False;

[1] = DB1

So I have this regex:

<DTS:Property DTS:Name="ConnectionString">(?<cs>.*)(?<cs>)</DTS:Property>

So I catch the value in the first (), is there a way I can do a regex to capture both the full connection string and only the db?

Was it helpful?

Solution

Sure. Plenty of ways to skin a cat with regexen. See below.

I recommend Connection String Builders though.


For fun and education, here's a regexy solution to the question:

Note I'm deliberately showing only how to tackle the connection string with regex, since XML should be handled with XmlReader, XDocument, XPathReader or XmlDocument (etc).

See it live on http://ideone.com/I47R8

using System;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

public class Program
{
    const string kvPair   = @"(?<pair>(?<key>.*?)\s*=\s*(?<value>.*?))";
    const string kvpRegex = "^" + kvPair + "$";
    const string csRegex  = @"^\s*(?<cs>(?:" + kvPair + @"\s*(;\s*|\s*$))+)$";

    public static void Main(string[] args)
    {
        const string input = @"Data Source=Instance1;User ID=;Initial Catalog=DB;Provider=SQLOLEDB.1;Integrated Security=SSPI;Auto Translate=False;";

        var match = Regex.Match(input, csRegex);
        Console.WriteLine("======\nSuccess: {0}\n======", match.Success);

        // 'smart' dictionary from the subcaptures:
        var dict = match.Groups["pair"]
            .Captures
            .Cast<Capture>()
            .Select(cap => Regex.Match(cap.Value, kvpRegex))
            .ToDictionary(submatch => submatch.Groups["key"].Value,
                          submatch => submatch.Groups["value"].Value);

        foreach (var kvp in dict)
            Console.WriteLine(kvp);

        // Getting just the DB1:
        Console.WriteLine("======\nCatalog: {0}\n======", dict["Initial Catalog"]);

        // raw access from csRegex full match:
        foreach (Capture cap in match.Groups["key"].Captures)
            Console.WriteLine("Key: '{0}'", cap.Value);
        foreach (Capture cap in match.Groups["value"].Captures)
            Console.WriteLine("Value: '{0}'", cap.Value);

        Console.WriteLine("======\nConnection string: '{0}'", match.Groups["cs"].Value);
    }
}

With the proper output:

======
Success: True
======
[Data Source, Instance1]
[User ID, ]
[Initial Catalog, DB]
[Provider, SQLOLEDB.1]
[Integrated Security, SSPI]
[Auto Translate, False]
======
Catalog: DB
======
Key: 'Data Source'
Key: 'User ID'
Key: 'Initial Catalog'
Key: 'Provider'
Key: 'Integrated Security'
Key: 'Auto Translate'
Value: 'Instance1'
Value: ''
Value: 'DB'
Value: 'SQLOLEDB.1'
Value: 'SSPI'
Value: 'False'
======
Connection string: 'Data Source=Instance1;User ID=;Initial Catalog=DB;Provider=SQLOLEDB.1;Integrated Security=SSPI;Auto Translate=False;'
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top