URAL 1713 – Key Substrings [后缀数组+RMQ]

Link

http://acm.timus.ru/problem.aspx?space=1&num=1713

Problem

Although the program committee works as one team, heated debates arise frequently enough. For example, there is no agreement upon which client of the version control system is more convenient to use: a graphic interface program or a console client.
Let us consider some command of a console client. A substring of this command that is not a substring of any other command of this client can be called a key substring because it uniquely identifies the command. In the latest versions of the client, it is not necessary to type the whole command; it is sufficient to type any of its key substrings.

A supporter of the console client wants to convince the program committee to use it. In order to show how fast and convenient the work with this client is, he wants to find a key substring of minimal length for each command. Help him do it.
The first line contains the number n of commands in the console client (2 ≤ n ≤ 1000). Each of the following n lines contains one command of the client. Each command is a nonempty string consisting of lowercase Latin letters and its length is at most 100. No command is a substring of another command.
Output n lines. The i-th line should contain any of the shortest key substrings of the i-th command (the commands are numbered in the order they are given in the input).

Mean

给你N个字符串,每个字符串要找一个最短的字串成为key,使得这个key在其他串中不存在

Analysis

首先把所有的串拼起来求后缀数组,相当于把所有串的后缀进行排序。排序后对于每个串的后缀,找到前后两个部在同一原串中的后缀求最小前缀(RMQ+height数组的常见用法),这个长度+1就是把他作为key的答案。然后对于原串中的所有后缀求最优答案即可。

Code

#include <bits/stdc++.h>
using namespace std;
typedef pair<int, int> P;

const int maxn = 1024*105;

struct SuffixArray {
    static const int MAXN = maxn;
    int *s;
    int rank[MAXN], sa[MAXN], X[MAXN], Y[MAXN], height[MAXN];
    int buc[MAXN];

    void init(int *str) {
        memset(sa, 0, sizeof(sa));
        s = str;
    }
    bool cmp(int *r, int a, int b, int l) {
        return (r[a] == r[b] && r[a + l] == r[b + l]);
    }
    void calc(int n, int m = 128) {
        int i, k, p, *x = X, *y = Y;
        for (i = 0; i < m; i++) buc[i] = 0;
        for (i = 0; i < n; i++) buc[x[i] = s[i]]++;
        for (i = 1; i < m; i++) buc[i] += buc[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--buc[x[i]]] = i;
        for (k = 1, p = 1; p < n; m = p, k *= 2) {
            p = 0;
            for (i = n - k; i < n; i++) y[p++] = i;
            for (i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i] - k;
            for (i = 0; i < m; i++) buc[i] = 0;
            for (i = 0; i < n; i++) buc[x[y[i]]]++;
            for (i = 1; i < m; i++) buc[i] += buc[i - 1];
            for (i = n - 1; i >= 0; i--) sa[--buc[x[y[i]]]] = y[i];
            for (swap(x, y), x[sa[0]] = 0, i = 1, p = 1; i < n; i++)
                x[sa[i]] = cmp(y, sa[i - 1], sa[i], k) ? p - 1 : p++;
        }
        calheight(n - 1);
    }
    void calheight(int n) {
        int i, j, k = 0;
        for (i = 1; i <= n; i++) rank[sa[i]] = i;
        for (i = 0; i < n; height[rank[i++]] = k)
            for (k ? k-- : 0, j = sa[rank[i] - 1]; s[i + k] == s[j + k]; k++);
    }
}sa;

struct RMQ {
    int d[maxn][30];
    void init(const int *A, int n) {
        for(int i = 0; i < n; ++i) d[i][0] = A[i];
        for(int j = 1; (1<<j) <= n; ++j)
            for(int i = 0; i + (1<<j) - 1 < n; ++i)
                d[i][j] = min(d[i][j-1], d[i+(1<<(j-1))][j-1]);
    }
    int query(int L, int R) {
        int k = 0;
        while((1<<(k+1)) <= R-L+1) ++k;
        return min(d[L][k], d[R-(1<<k)+1][k]);
    }
}rmq;

struct Node {
    int id, pos;
    Node(int id, int pos) : id(id), pos(pos) {}
    bool operator < (const Node &rhs) const {
        return pos < rhs.pos;
    }
};

int s[maxn];
char t[maxn];
int N, Len;
int pa[maxn];
int ans[maxn];
vector<P> seg;

stack<Node> qu;
void solve(int i) {
    int curPos = sa.sa[i];
    int curId = pa[curPos];
    while(!qu.empty() && qu.top().id == curId)
        qu.pop();
    int ret = qu.empty() ? -1 : qu.top().pos;
    qu.push(Node(curId, i));
    if(ret != -1) {
        int L = min(ret, i) + 1;
        int R = max(ret, i);
        ans[curPos] = max(ans[curPos], rmq.query(L, R));
    }
}

int main() {
    scanf("%d", &N);
    memset(pa, -1, sizeof(pa));
    for(int i = 1; i <= N; ++i) {
        scanf("%s", t);
        size_t len = strlen(t);
        seg.push_back(P(Len, Len + len));
        for(int j = 0; j < len; ++j) {
            s[Len] = t[j] - 'a' + 1001;
            pa[Len] = i;
            Len += 1;
        }
        s[Len++] = i;
    }
    s[Len++] = 0;
    sa.init(s);
    sa.calc(Len, 1100);
    rmq.init(sa.height, Len + 1);

    memset(ans, 0, sizeof(ans));
    for(int i = N + 1; i < Len; ++i)
        solve(i);
    while(!qu.empty())
        qu.pop();
    for(int i = Len - 1; i >= N + 1; --i)
        solve(i);
    for(auto i : seg) {
        int best = -1, len = 0;
        for(int j = i.first; j < i.second; ++j) {
            if(ans[j] >= i.second-j)
                continue;
            if(best == -1 || ans[j]+1 < len) {
                best = j;
                len = ans[j]+1;
            }
        }
        for(int j = best; j < best + len; ++j)
            putchar(s[j]-1001+'a');
        puts("");
    }
    return 0;
}

欢迎留言