1. 题目链接:P3763 [TJOI2017]DNA2022年01月24日,第十天
A m a z i n g ! ! ! Amazing!!! Amazing!!! 这题居然有四种思路解决。
思路1:多项式解决,时间复杂度 O ( n l o g n ) O(nlog n) O(nlog n),总共跑了 12 12 12 次 F F T FFT FFT ,常数过大,开 O 2 O2 O2 才 A C AC AC。由于 D N A DNA DNA 序列最多有 4 4 4 种不同的字符,我们可以考虑如下做法,分别考虑每一个字符 c c c ,对于两个串 s , t s, t s, t ,长度分别为 n , m n, m n, m,则有 a [ i ] = [ s [ i ] = = c ] , b [ i ] = [ t [ i ] = = c ] a[i] = [s[i]==c], b[i]=[t[i]==c] a[i]=[s[i]==c], b[i]=[t[i]==c] , a [ i ] a[i] a[i] 和 b [ i ] b[i] b[i] 都是多项式,则有每一个位置的匹配数量为 ∑ i = 0 m − 1 a [ i + k ] ⋅ b [ i ] displaystylesum_{i = 0}^{m - 1}a[i+k]cdot b[i] i=0∑m−1a[i+k]⋅b[i] ,我们发现这东西很像 F F T FFT FFT ,我们使用一个技巧,将 b [ i ] b[i] b[i] 翻转,令 T [ i ] = b [ m − 1 − i ] T[i] = b[m - 1-i] T[i]=b[m−1−i] ,那么 ∑ i = 0 m − 1 a [ i + k ] ⋅ T [ m − 1 − i ] displaystylesum_{i = 0}^{m - 1}a[i + k]cdot T[m - 1 - i] i=0∑m−1a[i+k]⋅T[m−1−i] ,下标和为定值 m − 1 + k m - 1 + k m−1+k ,直接上 N T T NTT NTT 或 F F T FFT FFT 即可。
#include#define ull unsigned long long #define ll long long #define re register #define endl 'n' using namespace std; const int MOD = 998244353; const int inf = 0x3f3f3f3f; const double eps = 1e-8; const double Pi = acos(-1.0); const int N = 4e5 + 5; struct node { double x, y; node (double x=0, double y=0): x(x), y(y) {} node operator * (node r) { return node (x*r.x - y*r.y, x*r.y + y*r.x); } node operator - (node r) { return node (x - r.x, y - r.y); } node operator + (node r) { return node (x + r.x, y + r.y); } }a[N], b[N]; int rev[N], bit, lim; void fft (node* f, int flag) { for (int i = 0; i < lim; i ++) if (rev[i] > i) swap (f[i], f[rev[i]]); for (int k = 1; k < lim; k <<= 1) { node wnk = node (cos (Pi/k), flag * sin (Pi/k)); for (int i = 0; i < lim; i += k << 1) { node w = node (1, 0); for (int j = 0; j < k; j ++, w = w * wnk) { node nx = f[i + j], ny = w * f[i + j + k]; f[i + j] = nx + ny; f[i + j + k] = nx - ny; } } } if (flag == -1) for (int i = 0; i < lim; i ++) f[i].x /= lim; } char ch[] = {'A', 'G', 'C', 'T'}; char s[N], t[N]; int ans[N], n, m; int solve () { n = strlen (s), m = strlen (t); for (lim = 1, bit = 0; lim < n + m; lim <<= 1) bit ++; for (int i = 0; i < lim; i ++) rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1)); for (int i = 0; i < lim; i ++) ans[i] = 0; for (int j = 0; j < 4; j ++) { for (int i = 0; i < n; i ++) a[i] = node (s[i] == ch[j], 0); for (int i = n; i < lim; i ++) a[i] = node (); for (int i = 0; i < m; i ++) b[i] = node (t[m - i - 1] == ch[j], 0); for (int i = m; i < lim; i ++) b[i] = node (); fft (a, 1), fft (b, 1); for (int i = 0; i < lim; i ++) a[i] = a[i] * b[i]; fft (a, -1); for (int i = 0; i < n - m + 1; i ++) ans[i] += int(a[i + m - 1].x + 0.5); } int res = 0; for (int i = 0; i < n - m + 1; i ++) if (m - ans[i] <= 3) res ++; return res; } int main() { int T; scanf ("%d", &T); while (T --) { scanf ("%s%s", s, t); printf ("%dn", solve ()); } return 0; }
思路2:后缀自动机解决,时间复杂度 O ( n ) O(n) O(n) 。根据后缀自动机的性质,从源点出发,得到的状态是原串的子串,每一个点的 e n d p o s endpos endpos 大小就是该子串出现的次数。由此我们直接 d f s dfs dfs ,在允许有 3 3 3 次跳过的前提下线性求出贡献。
#include#define ull unsigned long long #define ll long long #define re register #define endl 'n' using namespace std; const int MOD = 998244353; const int inf = 0x3f3f3f3f; const double eps = 1e-8; const double Pi = acos(-1.0); const int N = 2e5 + 5; struct node { int link, len, ch[26]; }st[N << 1]; int sz, last, sum[N]; void sam_init () { for (int i = 0; i <= sz; i ++) { st[i].len = st[i].link = sum[i] = 0; memset (st[i].ch, 0, sizeof (st[i].ch)); } sz = last = 1; } void sam_extend (int c) { int p = last, cur = ++ sz; st[cur].len = st[p].len + 1, last = cur, sum[cur] = 1; for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur; if (! p) st[cur].link = 1; else { int q = st[p].ch[c]; if (st[p].len + 1 == st[q].len) st[cur].link = q; else { int clone = ++ sz; st[clone].len = st[p].len + 1; st[clone].link = st[q].link; memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch)); for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone; st[cur].link = st[q].link = clone; } } } char ch[] = {'A', 'C', 'G', 'T'}; char s[N], t[N]; int ns, nt, a[N], b[N], ans; void dfs (int u, int j, int tot) { if (j > nt) return void(ans += sum[u]); for (int i = 0; i < 4; i ++) { int v = ch[i] - 'A'; if (st[u].ch[v] == 0) continue; if (ch[i] == t[j]) dfs (st[u].ch[v], j + 1, tot); else if (tot < 3) dfs (st[u].ch[v], j + 1, tot + 1); } } int main() { int T; scanf ("%d", &T); while (T --) { scanf ("%s%s", s + 1, t + 1); sam_init (), ns = strlen (s + 1), nt = strlen (t + 1); for (int i = 1; i <= ns; i ++) sam_extend (s[i] - 'A'); for (int i = 1; i <= ns; i ++) b[i] = 0; for (int i = 1; i <= sz; i ++) b[st[i].len] ++; for (int i = 1; i <= ns; i ++) b[i] += b[i - 1]; for (int i = 1; i <= sz; i ++) a[b[st[i].len] --] = i; for (int i = sz; i >= 1; i --) sum[st[a[i]].link] += sum[a[i]]; dfs (1, 1, ans = 0); printf ("%dn", ans); } return 0; }
思路3:后缀数组解决,时间复杂度 O ( n l o g n ) O(n log n) O(n log n) ,常数稍大,但不开 O 2 O2 O2 也可以过。具体做法是将两个串合并在一起(后缀数组解题常常需要这么做),然后求出 h e i g h t height height 数组以及它的 S T ST ST 表,用于 O ( 1 ) O(1) O(1) 求出两个后缀的 L C P LCP LCP ,依照题目意思我们最多有三次机会可以跳过,即我们要对每一个可能的位置做三次 L C P LCP LCP ,如果可以匹配,那么答案贡献加一。
#include#define ull unsigned long long #define ll long long #define endl 'n' using namespace std; const int MOD = 998244353; const int inf = 0x3f3f3f3f; const double eps = 1e-8; const double Pi = acos(-1.0); const int N = 2e5 + 5; int sa[N], rk[N], hei[N]; int x[N], y[N << 1], c[N]; char s[N], t[N]; int ns, nt, m, n; void get_sa () { for (int i = 1; i <= m; i ++) c[i] = 0; for (int i = 1; i <= n; i ++) c[x[i] = s[i]] ++; for (int i = 2; i <= m; i ++) c[i] += c[i - 1]; for (int i = n; i >= 1; i --) sa[c[x[i]] --] = i; for (int k = 1; k <= n; k <<= 1) { int num = 0; for (int i = n - k + 1; i <= n; i ++) y[++ num] = i; for (int i = 1; i <= n; i ++) if (sa[i] > k) y[++ num] = sa[i] - k; for (int i = 1; i <= m; i ++) c[i] = 0; for (int i = 1; i <= n; i ++) c[x[y[i]]] ++; for (int i = 2; i <= m; i ++) c[i] += c[i - 1]; for (int i = n; i >= 1; i --) sa[c[x[y[i]]] --] = y[i], y[i] = 0; for (int i = 1; i <= n; i ++) swap (x[i], y[i]); x[sa[1]] = num = 1; for (int i = 2; i <= n; i ++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++ num; if (n == num) return ; m = num; } } void get_height () { for (int i = 1; i <= n; i ++) rk[sa[i]] = i; for (int i = 1, k = 0; i <= n; i ++) { if (k) k --; int j = sa[rk[i] - 1]; while (j + k <= n && s[i + k] == s[j + k]) k ++; hei[rk[i]] = k; } } int st[N][19], Lg[N]; void get_st () { for (int i = 1; i <= n; i ++) st[i][0] = hei[i]; for (int i = 2; i <= n; i ++) Lg[i] = Lg[i >> 1] + 1; for (int j = 1; j <= 18; j ++) for (int i = 1; i + (1 << j) - 1 <= n; i ++) st[i][j] = min (st[i][j - 1], st[i + (1 << (j - 1))][j - 1]); } int lcp (int x, int y) { x = rk[x], y = rk[y]; if (x > y) swap (y, x); int k = Lg [y - x]; return min (st[x + 1][k], st[y - (1 << k) + 1][k]); } int main() { int T; scanf ("%d", &T); while (T --) { scanf ("%s%s", s + 1, t + 1); ns = strlen (s + 1), nt = strlen (t + 1); n = ns + nt, m = 256; for (int i = 1; i <= nt; i ++) s[i + ns] = t[i]; int cas = ns - nt + 1; if (cas <= 0) printf ("0n"); else { get_sa (), get_height (), get_st (); int ans = 0; for (int i = 1; i <= cas; i ++) { int re = 0, len = 0; while (re <= 3 && len <= nt) { int num = lcp (i + len, ns + len + 1); len += num + (re < 3), re ++; } if (len >= nt) ans ++; } printf ("%dn", ans); } } return 0; }
思路4:二分加 h a s h hash hash 解决,时间复杂度为 O ( n l o g n ) O(nlog n) O(nlog n) ,常数超小( N B ! ! ! NB!!! NB!!!),易于编程。具体操作和后缀数组那里一致。
#include2. 题目链接:SP1812 LCS2 - Longest Common Substring II#define ull unsigned long long #define ll long long #define re register #define endl 'n' using namespace std; const int MOD = 998244353; const int inf = 0x3f3f3f3f; const double eps = 1e-8; const double Pi = acos(-1.0); const int N = 1e5 + 5; const int k1 = 13331; ull hs1[N], hs2[N], bin[N]; char s[N], t[N]; int n, m; inline ull calc1 (int l, int r) { return hs1[r] - hs1[l - 1] * bin[r - l + 1]; } inline ull calc2 (int l, int r) { return hs2[r] - hs2[l - 1] * bin[r - l + 1]; } inline int lcp (int x, int y) { int l = 1, r = m - y + 1; while (l <= r) { int mid = (l + r) >> 1; if (calc1 (x, x + mid - 1) == calc2 (y, y + mid - 1)) l = mid + 1; else r = mid - 1; } return l - 1; } int main() { ios::sync_with_stdio(false), cin.tie(0), cout.tie(0); int T; cin >> T; bin[0] = 1; while (T --) { cin >> (s + 1) >> (t + 1); n = strlen (s + 1), m = strlen (t + 1); int ans = 0; for (int i = 1; i <= n; i ++) { bin[i] = bin[i - 1] * k1; hs1[i] = hs1[i - 1] * k1 + s[i]; } for (int i = 1; i <= m; i ++) hs2[i] = hs2[i - 1] * k1 + t[i]; for (int i = 1, p = 0; i <= n - m + 1; i ++, p = 0) { for (int cas = 0; cas < 4 && p <= m; cas ++) p += lcp (i + p, 1 + p) + (cas < 3); ans += p >= m; } cout << ans << endl; } return 0; }
思路1:后缀数组解法,时间复杂度 O ( n l o g n ) O(nlog n) O(nlog n) ,扩展两个字符串的最长公共子串到多个字符串的最长公共子串。我们将所有字符串连接起来,并且用一个特殊的字符分隔开,当 s a [ l ⋯ r ] sa[lcdots r] sa[l⋯r] 里包含所有用于分隔的特殊字符,那么这个区间内 h e i g h t height height 数组的最小值就是答案,依照这个思路,我们可以二分答案 O ( n l o g n ) O(nlog n) O(nlog n),或者用单调队列加 t w o − p o i n t e r two-pointer two−pointer O ( n ) O(n) O(n),下面程序使用单调队列维护 h e i g h t height height 数组的最小值。
#include#define ull unsigned long long #define ll long long #define re register #define endl 'n' using namespace std; const int MOD = 998244353; const int inf = 0x3f3f3f3f; const double eps = 1e-8; const double Pi = acos(-1.0); const int N = 1e6 + 5; int sa[N], rk[N], hei[N]; int x[N], y[N << 1], c[N]; char s[N], str[100100]; int n, m, idx; void get_sa () { for (int i = 1; i <= n; i ++) c[x[i] = s[i]] ++; for (int i = 2; i <= m; i ++) c[i] += c[i - 1]; for (int i = n; i >= 1; i --) sa[c[x[i]] --] = i; for (int k = 1; k <= n; k <<= 1) { int num = 0; for (int i = n - k + 1; i <= n; i ++) y[++ num] = i; for (int i = 1; i <= n; i ++) if (sa[i] > k) y[++ num] = sa[i] - k; for (int i = 1; i <= m; i ++) c[i] = 0; for (int i = 1; i <= n; i ++) c[x[y[i]]] ++; for (int i = 2; i <= m; i ++) c[i] += c[i - 1]; for (int i = n; i >= 1; i --) sa[c[x[y[i]]] --] = y[i], y[i] = 0; for (int i = 1; i <= n; i ++) swap (x[i], y[i]); x[sa[1]] = num = 1; for (int i = 2; i <= n; i ++) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++ num; if (n == num) break; m = num; } } void get_height () { for (int i = 1; i <= n; i ++) rk[sa[i]] = i; for (int i = 1, k = 0; i <= n; i ++) { if (k) k --; int j = sa[rk[i] - 1]; while (j + k <= n && s[i + k] == s[j + k]) k ++; hei[rk[i]] = k; } } int vis[N], tot, v[100], q[N], tt, hh; void add (int i) { if (! vis[sa[i]]) return ; v[vis[sa[i]]] ++; tot += v[vis[sa[i]]] == 1; } void del (int i) { if (! vis[sa[i]]) return ; v[vis[sa[i]]] --; tot -= v[vis[sa[i]]] == 0; } int solve () { int ans = 0; add (hh = 1); for (int l = 1, r = 2; r <= n; r ++) { while (hh <= tt && hei[q[tt]] >= hei[r]) tt --; add (r), q[++ tt] = r; if (tot == idx) { while (l < r && tot == idx) del (l ++); add (-- l); while (hh <= tt && q[hh] <= l) hh ++; ans = max (ans, hei[q[hh]]); } } return ans; } int main() { ios::sync_with_stdio(false), cin.tie(0), cout.tie(0); while (cin >> (str + 1)) { s[++ n] = ++ idx; for (int i = 1; str[i]; i ++) s[++ n] = str[i], vis[n] = idx; } m = 256; get_sa (); get_height (); cout << solve () << endl; return 0; }
思路2:后缀自动机解法,时间复杂度 O ( n ) O(n) O(n) ,具体操作见代码注释。
#include3. 题目链接:P5341 [TJOI2019]甲苯先生和大中锋的字符串#define ull unsigned long long #define ll long long #define re register #define endl 'n' using namespace std; const int MOD = 998244353; const int inf = 0x3f3f3f3f; const double eps = 1e-8; const double Pi = acos(-1.0); const int N = 1e5 + 5; struct node { int len, link, ch[26]; }st[N << 1]; int sz, last; void sam_init () { sz = last = 1; } void sam_extend (int c) { int cur = ++ sz, p = last; st[cur].len = st[p].len + 1, last = cur; for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur; if (! p) st[cur].link = 1; else { int q = st[p].ch[c]; if (st[p].len + 1 == st[q].len) st[cur].link = q; else { int clone = ++ sz; st[clone].len = st[p].len + 1; st[clone].link = st[q].link; memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch)); for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone; st[q].link = st[cur].link = clone; } } } char s[N]; int a[N << 1], b[N], n; int g[N << 1], f[N << 1]; int main() { ios::sync_with_stdio(false), cin.tie(0), cout.tie(0); sam_init (); cin >> s, n = strlen (s); for (int i = 0; i < n; i ++) sam_extend (s[i] - 'a'); for (int i = 1; i <= sz; i ++) b[st[i].len] ++; for (int i = 1; i <= n; i ++) b[i] += b[i - 1]; for (int i = sz; i >= 1; i --) a[b[st[i].len] --] = i; for (int i = 1; i <= sz; i ++) g[i] = st[i].len; while (cin >> s) { n = strlen (s); for (int i = 1; i <= sz; i ++) f[i] = 0; for (int i = 0, v = 1, len = 0; i < n; i ++) { int c = s[i] - 'a'; // 到某个位置匹配不出去,我们缩减原串的长度,即找到它的后缀,此时我们往后缀链接跳即可。 while (v && ! st[v].ch[c]) v = st[v].link, len = st[v].len; if (! v) v = 1, len = 0; if (st[v].ch[c]) ++ len, v = st[v].ch[c]; // 匹配成功,那么匹配出去即可。 f[v] = max (f[v], len); // 更新该结点的长度 } // a[i]的endpos 是 st[a[i]].link的endpos 的 子集,往上更新长度大小 for (int i = sz; i >= 1; i --) f[st[a[i]].link] = max (f[st[a[i]].link], f[a[i]]); for (int i = 1; i <= sz; i ++) g[i] = min (f[i], g[i]); } cout << *max_element (g + 1, g + sz + 1) << endl; return 0; }
思路:后缀自动机解决,时间复杂度 O ( n ) O(n) O(n) ,求出每个状态 e n d p o s endpos endpos 的大小 s i z [ i ] siz[i] siz[i],对于每个结点 i i i 表示长度为 l e n [ s t [ i ] . l i n k ] + 1 len[st[i].link]+1 len[st[i].link]+1 到 l e n [ i ] len[i] len[i] 的子串出现次数为 s i z [ i ] siz[i] siz[i] ,对于区间修改,可以考虑差分。
#include#define ull unsigned long long #define ll long long #define re register #define endl 'n' using namespace std; const int MOD = 998244353; const int inf = 0x3f3f3f3f; const double eps = 1e-8; const double Pi = acos(-1.0); const int N = 2e5 + 5; struct node { int len, link, ch[26]; }st[N]; int sz, last, siz[N]; void sam_init () { for (int i = 0; i <= sz; i ++) { st[i].link = st[i].len = siz[i] = 0; memset (st[i].ch, 0, sizeof (st[i].ch)); } sz = last = 1; } void sam_extend (int c) { int cur = ++ sz, p = last; st[cur].len = st[p].len + 1, last = cur, siz[cur] = 1; for ( ; p && ! st[p].ch[c]; p = st[p].link) st[p].ch[c] = cur; if (! p) st[cur].link = 1; else { int q = st[p].ch[c]; if (st[p].len + 1 == st[q].len) st[cur].link = q; else { int clone = ++ sz; st[clone].len = st[p].len + 1; st[clone].link = st[q].link; memcpy (st[clone].ch, st[q].ch, sizeof (st[q].ch)); for ( ; p && st[p].ch[c] == q; p = st[p].link) st[p].ch[c] = clone; st[cur].link = st[q].link = clone; } } } char s[N]; int k, n, a[N], b[N]; int main() { ios::sync_with_stdio(false), cin.tie(0), cout.tie(0); int T; cin >> T; while (T --) { cin >> (s + 1) >> k; sam_init (), n = strlen (s + 1); for (int i = 1; i <= n; i ++) sam_extend (s[i] - 'a'); for (int i = 0; i <= n; i ++) b[i] = 0; for (int i = 1; i <= sz; i ++) b[st[i].len] ++; for (int i = 1; i <= n; i ++) b[i] += b[i - 1]; for (int i = sz; i >= 1; i --) a[b[st[i].len] --] = i; for (int i = sz; i >= 1; i --) siz[st[a[i]].link] += siz[a[i]]; for (int i = 0; i <= n; i ++) b[i] = 0; int ans = -1, cnt = 1; for (int i = 1; i <= sz; i ++) if (siz[i] == k && st[i].len != n) b[st[st[i].link].len + 1] ++, b[st[i].len + 1] --; for (int i = 1; i <= n; i ++) { b[i] += b[i - 1]; if (b[i] >= cnt) cnt = b[i], ans = i; } cout << ans << endl; } return 0; }



