Question

I've written the following code in C# for obtaining the length of longest common subsequence of two texts given by use, but it doesn't work with large strings. Could you please help me. I'm really confused.

public Form1()
{
    InitializeComponent();
}

public int lcs(char[] s1, char[] s2, int s1size, int s2size)
{
    if (s1size == 0 || s2size == 0)
    {
        return 0;
    }
    else
    {
        if (s1[s1size - 1] == s2[s2size - 1])
        {
            return (lcs(s1, s2, s1size - 1, s2size - 1) + 1);
        }
        else
        {
            int x = lcs(s1, s2, s1size, s2size - 1);
            int y = lcs(s1, s2, s1size - 1, s2size);
            if (x > y)
            {
                return x;
            }
            else
                return y;
        }
    }
}

private void button1_Click(object sender, EventArgs e)
{
    string st1 = textBox2.Text.Trim(' ');
    string st2 = textBox3.Text.Trim(' ');

    char[] a = st1.ToCharArray();
    char[] b = st2.ToCharArray();

    int s1 = a.Length;
    int s2 = b.Length;

    textBox1.Text = lcs(a, b, s1, s2).ToString(); 
}
Was it helpful?

Solution

Here you are using the Recursion method. So it leads the program to occur performance problems as you mentioned.

Instead of recursion, use the dynamic programming approach.

Here is the C# Code.

public static void LCS(char[] str1, char[] str2)
    {
        int[,] l = new int[str1.Length, str2.Length];
        int lcs = -1;
        string substr = string.Empty;
        int end = -1;

        for (int i = 0; i < str1.Length; i++)
        {
            for (int j = 0; j < str2.Length; j++)
            {
                if (str1[i] == str2[j])
                {
                    if (i == 0 || j == 0)
                    {
                        l[i, j] = 1;
                    }
                    else
                        l[i, j] = l[i - 1, j - 1] + 1;
                    if (l[i, j] > lcs)
                    {
                        lcs = l[i, j];
                        end = i;
                    }

                }
                else
                    l[i, j] = 0;
            }
        }

        for (int i = end - lcs + 1; i <= end; i++)
        {
            substr += str1[i];
        }

        Console.WriteLine("Longest Common SubString Length = {0}, Longest Common Substring = {1}", lcs, substr);
    } 

OTHER TIPS

Here is a solution how to find the longest common substring in C#:

public static string GetLongestCommonSubstring(params string[] strings)
{
    var commonSubstrings = new HashSet<string>(strings[0].GetSubstrings());
    foreach (string str in strings.Skip(1))
    {
        commonSubstrings.IntersectWith(str.GetSubstrings());
        if (commonSubstrings.Count == 0)
            return string.Empty;
    }

    return commonSubstrings.OrderByDescending(s => s.Length).DefaultIfEmpty(string.Empty).First();
}

private static IEnumerable<string> GetSubstrings(this string str)
{
    for (int c = 0; c < str.Length - 1; c++)
    {
        for (int cc = 1; c + cc <= str.Length; cc++)
        {
            yield return str.Substring(c, cc);
        }
    }
}

I found it here: http://www.snippetsource.net/Snippet/75/longest-common-substring

Just for fun, here is one example using Queue<T>:

string LongestCommonSubstring(params string[] strings)
{
    return LongestCommonSubstring(strings[0], new Queue<string>(strings.Skip(1)));
}

string LongestCommonSubstring(string x, Queue<string> strings)
{
    if (!strings.TryDequeue(out var y))
    {
        return x;
    }

    var output = string.Empty;

    for (int i = 0; i < x.Length; i++)
    {
        for (int j = x.Length - i; j > -1; j--)
        {
            string common = x.Substring(i, j);
            if (y.IndexOf(common) > -1 && common.Length > output.Length) output = common;
        }
    }

    return LongestCommonSubstring(output, strings);
}

It's still using recursion though, but it's a nice example of Queue<T>.

I refactored the C++ code from Ashutosh Singh at https://iq.opengenus.org/longest-common-substring-using-rolling-hash/ to create a rolling hash approach in C# - this will find the substring in O(N * log(N)^2) time and O(N) space

using System;
using System.Collections.Generic;
public class RollingHash
{
    private class RollingHashPowers
    {
        // _mod = prime modulus of polynomial hashing
        // any prime number over a billion should suffice
        internal const int _mod = (int)1e9 + 123;
        // _hashBase = base (point of hashing)
        // this should be a prime number larger than the number of characters used
        // in my use case I am only interested in ASCII (256) characters
        // for strings in languages using non-latin characters, this should be much larger
        internal const long _hashBase = 257;
        // _pow1 = powers of base modulo mod
        internal readonly List<int> _pow1 = new List<int> { 1 };
        // _pow2 = powers of base modulo 2^64
        internal readonly List<long> _pow2 = new List<long> { 1L };

        internal void EnsureLength(int length)
        {
            if (_pow1.Capacity < length)
            {
                _pow1.Capacity = _pow2.Capacity = length;
            }
            for (int currentIndx = _pow1.Count - 1; currentIndx < length; ++currentIndx)
            {
                _pow1.Add((int)(_pow1[currentIndx] * _hashBase % _mod));
                _pow2.Add(_pow2[currentIndx] * _hashBase);
            }
        }
    }

    private class RollingHashedString
    {
        readonly RollingHashPowers _pows;
        readonly int[] _pref1; // Hash on prefix modulo mod
        readonly long[] _pref2; // Hash on prefix modulo 2^64

        // Constructor from string:
        internal RollingHashedString(RollingHashPowers pows, string s, bool caseInsensitive = false)
        {
            _pows = pows;
            _pref1 = new int[s.Length + 1];
            _pref2 = new long[s.Length + 1];

            const long capAVal = 'A';
            const long capZVal = 'Z';
            const long aADif = 'a' - 'A';

            unsafe
            {
                fixed (char* c = s)
                {
                    // Fill arrays with polynomial hashes on prefix
                    for (int i = 0; i < s.Length; ++i)
                    {
                        long v = c[i];
                        if (caseInsensitive && capAVal <= v && v <= capZVal)
                        {
                            v += aADif;
                        }
                        _pref1[i + 1] = (int)((_pref1[i] + v * _pows._pow1[i]) % RollingHashPowers._mod);
                        _pref2[i + 1] = _pref2[i] + v * _pows._pow2[i];
                    }
                }
            }
        }

        // Rollingnomial hash of subsequence [pos, pos+len)
        // If mxPow != 0, value automatically multiply on base in needed power.
        // Finally base ^ mxPow
        internal Tuple<int, long> Apply(int pos, int len, int mxPow = 0)
        {
            int hash1 = _pref1[pos + len] - _pref1[pos];
            long hash2 = _pref2[pos + len] - _pref2[pos];
            if (hash1 < 0)
            {
                hash1 += RollingHashPowers._mod;
            }
            if (mxPow != 0)
            {
                hash1 = (int)((long)hash1 * _pows._pow1[mxPow - (pos + len - 1)] % RollingHashPowers._mod);
                hash2 *= _pows._pow2[mxPow - (pos + len - 1)];
            }
            return Tuple.Create(hash1, hash2);
        }
    }

    private readonly RollingHashPowers _rhp;
    public RollingHash(int longestLength = 0)
    {
        _rhp = new RollingHashPowers();
        if (longestLength > 0)
        {
            _rhp.EnsureLength(longestLength);
        }
    }

    public string FindCommonSubstring(string a, string b, bool caseInsensitive = false)
    {
        // Calculate max neede power of base:
        int mxPow = Math.Max(a.Length, b.Length);
        _rhp.EnsureLength(mxPow);
        // Create hashing objects from strings:
        RollingHashedString hash_a = new RollingHashedString(_rhp, a, caseInsensitive);
        RollingHashedString hash_b = new RollingHashedString(_rhp, b, caseInsensitive);

        // Binary search by length of same subsequence:
        int pos = -1;
        int low = 0;
        int minLen = Math.Min(a.Length, b.Length);
        int high = minLen + 1;
        var tupleCompare = Comparer<Tuple<int, long>>.Default;
        while (high - low > 1)
        {
            int mid = (low + high) / 2;
            List<Tuple<int, long>> hashes = new List<Tuple<int, long>>(a.Length - mid + 1);
            for (int i = 0; i + mid <= a.Length; ++i)
            {
                hashes.Add(hash_a.Apply(i, mid, mxPow));
            }
            hashes.Sort(tupleCompare);
            int p = -1;
            for (int i = 0; i + mid <= b.Length; ++i)
            {
                if (hashes.BinarySearch(hash_b.Apply(i, mid, mxPow), tupleCompare) >= 0)
                {
                    p = i;
                    break;
                }
            }
            if (p >= 0)
            {
                low = mid;
                pos = p;
            }
            else
            {
                high = mid;
            }
        }
        // Output answer:
        return pos >= 0
            ? b.Substring(pos, low)
            : string.Empty;
    }
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top