Pergunta

I used to calculate longest common Substring using dynamic programming O(m * n), suffix tree O(m + n), suffix array O(nlog^2 n) according to my need. Recently I have learnt Suffix Automaton which performs in O(n) which is very impressive.

I can write the code by which I can calculate the length of longest common substring easily. For example:

Input:
abcdef
xyzabc

Output:
3

And this is the code:

#include <bits/stdc++.h>
using namespace std;

const int maxN = 250500;
const int maxState = maxN << 1;

struct State {
    State *go[26], *suffix;
    int depth, id;
    long long cnt;
};
State pool[maxState], *point, *root, *sink;
int size;

State *newState(int dep) {
    point->id = size++;
    point->depth = dep;
    return point++;
}

void init() {
    point = pool;
    size = 0;
    root = sink = newState(0);
}

void insert(int a) {
    State *p = newState(sink->depth+1);
    State *cur = sink, *sufState;
    while (cur && !cur->go[a]) {
        cur->go[a] = p;
        cur = cur->suffix;
    }
    if (!cur)
        sufState = root;
    else {
        State *q = cur->go[a];
        if (q->depth == cur->depth + 1)
            sufState = q;
        else {
            State *r = newState(cur->depth+1);
            memcpy(r->go, q->go, sizeof(q->go));
            r->suffix = q->suffix;
            q->suffix = r;
            sufState = r;
            while (cur && cur->go[a] == q) {
                cur->go[a] = r;
                cur = cur->suffix;
            }
        }
    }
    p->suffix = sufState;
    sink = p;
}

int work(char buf[]) {
    //printf("%s", buf);
    int len = strlen(buf);
    int tmp = 0, ans = 0;
    State *cur = root;
    for (int i = 0; i < len; i++) {
        if (cur->go[buf[i]-'a']) {
            tmp++;
            cur = cur->go[buf[i]-'a'];
        } else {
            while (cur && !cur->go[buf[i]-'a'])
                cur = cur->suffix;
            if (!cur) {
                cur = root;
                tmp = 0;
            } else {
                tmp = cur->depth + 1;
                cur = cur->go[buf[i]-'a'];
            }
        }
        ans = max(ans, tmp);

    }
    return ans;
}

char ch[maxN];

int main() {
    scanf("%s", ch);
    init();
    int len = strlen(ch);
    for (int i = 0; i < len; i++)
        insert(ch[i]-'a');
    scanf("%s", ch);
    printf("%d\n", work(ch));
    return 0;
}

But now I need to print the longest Common Substring itself, not the length. But I can't be able to modify my code :( How this code can be modified to print the longest common Substring?

Foi útil?

Solução

When you are at this line:

ans = max(ans, tmp);

The starting position in buf that achieved depth tmp was i - tmp + 1. Now you know the positions of all longest common substrings in the second string. Just pick any and output the result.

Licenciado em: CC-BY-SA com atribuição
Não afiliado a StackOverflow
scroll top