This documentation is automatically generated by online-judge-tools/verification-helper
View the Project on GitHub ebi-fly13/icpc_library
#include "convolution/ntt4.hpp"
#pragma once #include"../utility/modint.hpp" namespace lib { // only for modint998244353 template<typename mint> struct NTT { using uint = unsigned int; static constexpr uint mod = mint::mod(); static constexpr ull mod2 = (ull)mod * mod; static constexpr uint pr = 3; // for modint998244353 static constexpr int level = 23; // for modint998244353 array<mint,level+1> wp, wm; void set_ws(){ mint r = mint(pr).pow((mod-1) >> level); wp[level] = r, wm[level] = r.inv(); for (int i = level-1; i >= 0; i--){ wp[i] = wp[i+1] * wp[i+1]; wm[i] = wm[i+1] * wm[i+1]; } } NTT () { set_ws(); } void fft4(vector<mint> &a, int k){ uint im = wm[2].val(); uint n = 1<<k; uint len = n; int d = k; while (len > 1){ if (d == 1){ for (int i = 0; i < (1<<(k-1)); i++){ a[i*2+0] += a[i*2+1]; a[i*2+1] = a[i*2+0] - a[i*2+1] * 2; } len >>= 1; d -= 1; } else { int len4 = len/4; int nlen = n/len; ull r1 = 1, r2 = 1, r3 = 1, imr1 = im, imr3 = im; for (int i = 0; i < len4; i++){ for (int j = 0; j < nlen; j++){ uint a0 = a[len4*0+i + len*j].val(); uint a1 = a[len4*1+i + len*j].val(); uint a2 = a[len4*2+i + len*j].val(); uint a3 = a[len4*3+i + len*j].val(); uint a0p2 = a0 + a2; uint a1p3 = a1 + a3; ull b0m2 = (a0 + mod - a2) * r1; ull b1m3 = (a1 + mod - a3) * imr1; ull c0m2 = (a0 + mod - a2) * r3; ull c1m3 = (a1 + mod - a3) * imr3; a[len4*0+i + len*j] = a0p2 + a1p3; a[len4*1+i + len*j] = b0m2 + b1m3; a[len4*2+i + len*j] = (a0p2 + mod*2 - a1p3) * r2; a[len4*3+i + len*j] = c0m2 + mod2*2 - c1m3; } r1 = r1 * wm[d].val() % mod; r2 = r1 * r1 % mod; r3 = r1 * r2 % mod; imr1 = im * r1 % mod; imr3 = im * r3 % mod; } len >>= 2; d -= 2; } } } void ifft4(vector<mint> &a, int k){ uint im = wp[2].val(); uint n = 1<<k; uint len = (k & 1 ? 2 : 4); int d = (k & 1 ? 1 : 2); while (len <= n){ if (d == 1){ for (int i = 0; i < (1<<(k-1)); i++){ a[i*2+0] += a[i*2+1]; a[i*2+1] = a[i*2+0] - a[i*2+1] * 2; } len <<= 2; d += 2; } else { int len4 = len/4; int nlen = n/len; ull r1 = 1, r2 = 1, r3 = 1, imr1 = im, imr3 = im; for (int i = 0; i < len4; i++){ for (int j = 0; j < nlen; j++){ ull a0 = a[len4*0+i + len*j].val(); ull a1 = a[len4*1+i + len*j].val() * r1; ull a2 = a[len4*2+i + len*j].val() * r2; ull a3 = a[len4*3+i + len*j].val() * r3; ull b1 = a[len4*1+i + len*j].val() * imr1; ull b3 = a[len4*3+i + len*j].val() * imr3; ull a0p2 = a0 + a2; ull a1p3 = a1 + a3; ull a0m2 = a0 + mod2 - a2; ull b1m3 = b1 + mod2 - b3; a[len4*0+i + len*j] = a0p2 + a1p3; a[len4*1+i + len*j] = a0m2 + b1m3; a[len4*2+i + len*j] = a0p2 + mod2*2 - a1p3; a[len4*3+i + len*j] = a0m2 + mod2*2 - b1m3; } r1 = r1 * wp[d].val() % mod; r2 = r1 * r1 % mod; r3 = r1 * r2 % mod; imr1 = im * r1 % mod; imr3 = im * r3 % mod; } len <<= 2; d += 2; } } } vector<mint> multiply(const vector<mint> &a, const vector<mint> &b){ if (a.empty() || b.empty()) return {}; int d = a.size() + b.size() - 1; if (min<int>(a.size(), b.size()) <= 40){ vector<mint> s(d); rep(i,0,a.size()) rep(j,0,b.size()) s[i+j] += a[i]*b[j]; return s; } int k = 2, M = 4; while (M < d) M <<= 1, ++k; vector<mint> s(M), t(M); rep(i,0,a.size()) s[i] = a[i]; rep(i,0,b.size()) t[i] = b[i]; fft4(s,k); fft4(t,k); rep(i,0,M) s[i] *= t[i]; ifft4(s, k); s.resize(d); mint invm = mint(M).inv(); rep(i,0,d) s[i] *= invm; return s; } }; } // namespace lib
#line 2 "convolution/ntt4.hpp" #line 2 "utility/modint.hpp" #line 2 "template/template.hpp" #include <bits/stdc++.h> #define rep(i, s, n) for (int i = (int)(s); i < (int)(n); i++) #define rrep(i, s, n) for (int i = (int)(n)-1; i >= (int)(s); i--) #define all(v) v.begin(), v.end() using ll = long long; using ld = long double; using ull = unsigned long long; template <typename T> bool chmin(T &a, const T &b) { if (a <= b) return false; a = b; return true; } template <typename T> bool chmax(T &a, const T &b) { if (a >= b) return false; a = b; return true; } namespace lib { using namespace std; } // namespace lib // using namespace lib; #line 4 "utility/modint.hpp" namespace lib { template <ll m> struct modint { using mint = modint; ll a; modint(ll x = 0) : a((x % m + m) % m) {} static constexpr ll mod() { return m; } ll val() const { return a; } ll& val() { return a; } mint pow(ll n) const { mint res = 1; mint x = a; while (n) { if (n & 1) res *= x; x *= x; n >>= 1; } return res; } mint inv() const { return pow(m - 2); } mint& operator+=(const mint rhs) { a += rhs.a; if (a >= m) a -= m; return *this; } mint& operator-=(const mint rhs) { if (a < rhs.a) a += m; a -= rhs.a; return *this; } mint& operator*=(const mint rhs) { a = a * rhs.a % m; return *this; } mint& operator/=(mint rhs) { *this *= rhs.inv(); return *this; } friend mint operator+(const mint& lhs, const mint& rhs) { return mint(lhs) += rhs; } friend mint operator-(const mint& lhs, const mint& rhs) { return mint(lhs) -= rhs; } friend mint operator*(const mint& lhs, const mint& rhs) { return mint(lhs) *= rhs; } friend mint operator/(const mint& lhs, const mint& rhs) { return mint(lhs) /= rhs; } friend bool operator==(const modint &lhs, const modint &rhs) { return lhs.a == rhs.a; } friend bool operator!=(const modint &lhs, const modint &rhs) { return !(lhs == rhs); } mint operator+() const { return *this; } mint operator-() const { return mint() - *this; } }; using modint998244353 = modint<998244353>; using modint1000000007 = modint<1'000'000'007>; } // namespace lib #line 4 "convolution/ntt4.hpp" namespace lib { // only for modint998244353 template<typename mint> struct NTT { using uint = unsigned int; static constexpr uint mod = mint::mod(); static constexpr ull mod2 = (ull)mod * mod; static constexpr uint pr = 3; // for modint998244353 static constexpr int level = 23; // for modint998244353 array<mint,level+1> wp, wm; void set_ws(){ mint r = mint(pr).pow((mod-1) >> level); wp[level] = r, wm[level] = r.inv(); for (int i = level-1; i >= 0; i--){ wp[i] = wp[i+1] * wp[i+1]; wm[i] = wm[i+1] * wm[i+1]; } } NTT () { set_ws(); } void fft4(vector<mint> &a, int k){ uint im = wm[2].val(); uint n = 1<<k; uint len = n; int d = k; while (len > 1){ if (d == 1){ for (int i = 0; i < (1<<(k-1)); i++){ a[i*2+0] += a[i*2+1]; a[i*2+1] = a[i*2+0] - a[i*2+1] * 2; } len >>= 1; d -= 1; } else { int len4 = len/4; int nlen = n/len; ull r1 = 1, r2 = 1, r3 = 1, imr1 = im, imr3 = im; for (int i = 0; i < len4; i++){ for (int j = 0; j < nlen; j++){ uint a0 = a[len4*0+i + len*j].val(); uint a1 = a[len4*1+i + len*j].val(); uint a2 = a[len4*2+i + len*j].val(); uint a3 = a[len4*3+i + len*j].val(); uint a0p2 = a0 + a2; uint a1p3 = a1 + a3; ull b0m2 = (a0 + mod - a2) * r1; ull b1m3 = (a1 + mod - a3) * imr1; ull c0m2 = (a0 + mod - a2) * r3; ull c1m3 = (a1 + mod - a3) * imr3; a[len4*0+i + len*j] = a0p2 + a1p3; a[len4*1+i + len*j] = b0m2 + b1m3; a[len4*2+i + len*j] = (a0p2 + mod*2 - a1p3) * r2; a[len4*3+i + len*j] = c0m2 + mod2*2 - c1m3; } r1 = r1 * wm[d].val() % mod; r2 = r1 * r1 % mod; r3 = r1 * r2 % mod; imr1 = im * r1 % mod; imr3 = im * r3 % mod; } len >>= 2; d -= 2; } } } void ifft4(vector<mint> &a, int k){ uint im = wp[2].val(); uint n = 1<<k; uint len = (k & 1 ? 2 : 4); int d = (k & 1 ? 1 : 2); while (len <= n){ if (d == 1){ for (int i = 0; i < (1<<(k-1)); i++){ a[i*2+0] += a[i*2+1]; a[i*2+1] = a[i*2+0] - a[i*2+1] * 2; } len <<= 2; d += 2; } else { int len4 = len/4; int nlen = n/len; ull r1 = 1, r2 = 1, r3 = 1, imr1 = im, imr3 = im; for (int i = 0; i < len4; i++){ for (int j = 0; j < nlen; j++){ ull a0 = a[len4*0+i + len*j].val(); ull a1 = a[len4*1+i + len*j].val() * r1; ull a2 = a[len4*2+i + len*j].val() * r2; ull a3 = a[len4*3+i + len*j].val() * r3; ull b1 = a[len4*1+i + len*j].val() * imr1; ull b3 = a[len4*3+i + len*j].val() * imr3; ull a0p2 = a0 + a2; ull a1p3 = a1 + a3; ull a0m2 = a0 + mod2 - a2; ull b1m3 = b1 + mod2 - b3; a[len4*0+i + len*j] = a0p2 + a1p3; a[len4*1+i + len*j] = a0m2 + b1m3; a[len4*2+i + len*j] = a0p2 + mod2*2 - a1p3; a[len4*3+i + len*j] = a0m2 + mod2*2 - b1m3; } r1 = r1 * wp[d].val() % mod; r2 = r1 * r1 % mod; r3 = r1 * r2 % mod; imr1 = im * r1 % mod; imr3 = im * r3 % mod; } len <<= 2; d += 2; } } } vector<mint> multiply(const vector<mint> &a, const vector<mint> &b){ if (a.empty() || b.empty()) return {}; int d = a.size() + b.size() - 1; if (min<int>(a.size(), b.size()) <= 40){ vector<mint> s(d); rep(i,0,a.size()) rep(j,0,b.size()) s[i+j] += a[i]*b[j]; return s; } int k = 2, M = 4; while (M < d) M <<= 1, ++k; vector<mint> s(M), t(M); rep(i,0,a.size()) s[i] = a[i]; rep(i,0,b.size()) t[i] = b[i]; fft4(s,k); fft4(t,k); rep(i,0,M) s[i] *= t[i]; ifft4(s, k); s.resize(d); mint invm = mint(M).inv(); rep(i,0,d) s[i] *= invm; return s; } }; } // namespace lib