POJ3693 Maximum repetition substring
? 解题记录 ? ? POJ ? ? 后缀数组 ? ? ST表 ?    2018-12-10 08:34:41    668    0    0

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

Source

题意:找出一个串循环节最多的子串,n<=1e5。

循环串是后缀数组一个很重要的操作,后缀自动机不是很好实现。

我们对原串正反预处理出后缀数组以及ST表,使得我们可以O(1)查询任意两个前缀最长公共后缀以及两个后缀的最长公共前缀。

然后考虑枚举循环节大小x,把原来的字符串每x个分一段,查出相邻段间的最长公共后缀和最长公共前缀。我们发现这样处理后一个循环节长度为x的循环串一定形如:

我们定义x的大循环串为不是任何一个循环节为x的循环串子串的串。

发现我们只需要对每一个x找到所有大循环串即可。

这里就有一堆很恶心的特判和讨论了。

这样我们直接枚举x,再每一次O(n/x)计算,总复杂度就是O(n log n)的。

写代码时细节很多,首先是要找出所有的合法区间。

考虑这种情况,例如枚举循环节是3:

bacbacbacbacba

实际上有三个串都满足要求

bacbacbacbac
acbacbacbacb
cbacbacbacba

我们来观察它划分后求出的东西:

bac bac bac bac ba

发现其实是因为前缀和后缀拼出的长度大于了(n/3),所以其中每一段长度为4*3的区间都满足条件。

我们可以把这些串都找出来,可以证明,一个串所有的大循环串(x=1~n)是O(n log n)的。

#include<cstdio>
#include<cstring>
#include<iostream>
#include<algorithm>
#define For(i, a, b) for(register int i = a; i <= b; ++i)
#define Down(i, a, b) for(register int i = a; i >= b; --i)
using namespace std;
const int maxn = 1e5 + 5;
char s[maxn];
int ccnt;

namespace pre {
	int low[maxn], ST[20][maxn];
	int SA[maxn], SA2[maxn << 1], H[maxn], RK[maxn << 1], tot[maxn];
	
	void Rsort(int n, int mx) {
		For(i, 1, mx) tot[i] = 0;
		For(i, 1, n) ++tot[RK[i]];
		For(i, 1, mx) tot[i] += tot[i - 1];
		Down(i, n, 1) SA[tot[RK[SA2[i]]]--] = SA2[i];
	}
	
	inline bool cmp(int x, int y, int l) {
		return SA2[x] == SA2[y] && SA2[x + l] == SA2[y + l];
	}
	
	int Create(char *s) {
		int n = strlen(s + 1);
		For(i, 1, n) RK[i] = s[i], SA2[i] = i;
		Rsort(n, 128);
		for(register int l = 1, p = 0; l < n; l <<= 1) {
			p = 0;
			For(i, n - l + 1, n) SA2[++p] = i;
			For(i, 1, n) if(SA[i] > l) SA2[++p] = SA[i] - l;
			Rsort(n, p), swap(SA2, RK), RK[SA[1]] = p = 1;
			For(i, 2, n) RK[SA[i]] = (cmp(SA[i], SA[i - 1], l)) ? p : ++p;
		}
		for(register int i = 1, k = 0, j; i <= n; H[RK[i++]] = k)
			for(k ? --k : k, j = SA[RK[i] - 1]; s[i + k] == s[j + k]; ++k);
		For(i, 1, n) ST[0][i] = H[i];
		low[1] = 0;
		for(register int i = 2; i <= n; ++i)
			low[i] = low[(i + 1) / 2] + 1;
		for(register int i = 1, p = 1; i < n; i <<= 1, ++p)
			for(register int j = i; j <= n; ++j)
				ST[p][j] = min(ST[p - 1][j], ST[p - 1][j - i]);
		return n;
	}
	
	inline int Query(int l, int r) {
		int len = low[r - l + 1];
		return min(ST[len][l + (1 << len) - 1], ST[len][r]);
	}
	
	int lcp(int x, int y) {
		if(RK[x] > RK[y]) swap(x, y);
		return Query(RK[x] + 1, RK[y]);
	}
};

namespace suf {
	int low[maxn], ST[20][maxn];
	int SA[maxn], SA2[maxn << 1], H[maxn], RK[maxn << 1], tot[maxn];
	
	void Rsort(int n, int mx) {
		For(i, 1, mx) tot[i] = 0;
		For(i, 1, n) ++tot[RK[i]];
		For(i, 1, mx) tot[i] += tot[i - 1];
		Down(i, n, 1) SA[tot[RK[SA2[i]]]--] = SA2[i];
	}
	
	inline bool cmp(int x, int y, int l) {
		return SA2[x] == SA2[y] && SA2[x + l] == SA2[y + l];
	}
	
	int Create(char *s) {
		int n = strlen(s + 1);
		For(i, 1, n) RK[i] = s[i], SA2[i] = i;
		Rsort(n, 128);
		for(register int l = 1, p = 0; l < n; l <<= 1) {
			p = 0;
			For(i, n - l + 1, n) SA2[++p] = i;
			For(i, 1, n) if(SA[i] > l) SA2[++p] = SA[i] - l;
			Rsort(n, p), swap(SA2, RK), RK[SA[1]] = p = 1;
			For(i, 2, n) RK[SA[i]] = (cmp(SA[i], SA[i - 1], l)) ? p : ++p;
		}
		for(register int i = 1, k = 0, j; i <= n; H[RK[i++]] = k)
			for(k ? --k : k, j = SA[RK[i] - 1]; s[i + k] == s[j + k]; ++k);
		For(i, 1, n) ST[0][i] = H[i];
		low[1] = 0;
		for(register int i = 2; i <= n; ++i)
			low[i] = low[(i + 1) / 2] + 1;
		for(register int i = 1, p = 1; i < n; i <<= 1, ++p)
			for(register int j = i; j <= n; ++j)
				ST[p][j] = min(ST[p - 1][j], ST[p - 1][j - i]);
		return n;
	}
	
	inline int Query(int l, int r) {
		int len = low[r - l + 1];
		return min(ST[len][l + (1 << len) - 1], ST[len][r]);
	}
	
	int lcp(int x, int y) {
		if(RK[x] > RK[y]) swap(x, y);
		return Query(RK[x] + 1, RK[y]);
	}
};

int u, v, ans, con, lst, flag = 0;
int al, ar, L, R, nl, nr, nans;
int main() {
	//freopen("test.txt", "r", stdin);
	//freopen("test.out", "w", stdout);
	int t = 0;
	while(++t) {
		flag = 0, ans = 0;
		scanf("%s", s + 1);
		if(s[1] == '#') break;
		int n = pre::Create(s);
		reverse(s + 1, s + n + 1);
		suf::Create(s);
		reverse(s + 1, s + n + 1);
		for(register int i = 1; i <= n; ++i) {
			L = lst = con = 0; 
			for(register int l = i + 1, r = i + i; l <= n; l += i, r += i) {
				u = pre::lcp(l, l - i);
				if(r <= n) v = suf::lcp(n - r + 1, n - r + i + 1);
				nans = 0;
				if(u >= i && r < n) {
					++con;
				} else {
					nr = l + u - 1;
					nl = nr - ((con + 1) * i) - u - lst + 1;
					lst = v;
					if((nr - nl + 1) / i < 2) continue;
					nans = ((nr - nl + 1) / i);
					if(nans > ans) {
						ans = nans, al = nl, ar = nl + nans * i - 1;
						for(register int j = nl + 1; j + nans * i - 1 <= nr; ++j)
							if(pre::RK[al] > pre::RK[j])
								al = j, ar = j + nans * i - 1;
					} else if(nans == ans)
						for(register int j = nl; j + nans * i - 1 <= nr; ++j)
							if(pre::RK[al] > pre::RK[j])
								al = j, ar = j + nans * i - 1;
					con = 0;
				}
			}
		}
		printf("Case %d: ", t);
		for(register int i = al; i <= ar; ++i)
			putchar(s[i]);
		putchar('\n');
	}
	return 0;
}


上一篇: POJ 3608 Bridge Across Islands

下一篇: HDU5608 function

668 人读过
立即登录, 发表评论.
没有帐号? 立即注册
0 条评论
文档导航