Library

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub ebi-fly13/Library

:heavy_check_mark: $f^k \mod g$
(fps/middle_product_arbitrary.hpp)

説明

$f^k \mod g$ を求める。 $O(N\log{N} \log{k})$

Depends on

Required by

Verified with

Code

#pragma once

#include <algorithm>
#include <cassert>
#include <vector>

#include "../convolution/ntt.hpp"
#include "../fps/middle_product.hpp"
#include "../modint/base.hpp"
#include "../modint/modint.hpp"
#include "../template/int_alias.hpp"

namespace ebi {

template <Modint mint>
std::vector<mint> middle_product_arbitrary(const std::vector<mint> &a,
                                           const std::vector<mint> &b) {
    static constexpr i32 m0 = 167772161;  // 2^25
    static constexpr i32 m1 = 469762049;  // 2^26
    static constexpr i32 m2 = 754974721;  // 2^24
    using mint0 = static_modint<m0>;
    using mint1 = static_modint<m1>;
    using mint2 = static_modint<m2>;
    static constexpr i32 inv01 = mint1(m0).inv().val();
    static constexpr i32 inv02 = mint2(m0).inv().val();
    static constexpr i32 inv12 = mint2(m1).inv().val();
    static constexpr i32 inv02inv12 = i64(inv02) * inv12 % m2;
    static constexpr i64 w1 = m0;
    static constexpr i64 w2 = i64(m0) * m1;
    const i32 mod = mint::mod();

    assert(a.size() >= b.size());
    if (std::min(a.size() - b.size() + 1, b.size()) <= 60) {
        return middle_product_naive<mint>(a, b);
    }
    int n = (int)a.size(), m = (int)b.size();
    std::vector<mint0> a0(n), b0(m);
    std::vector<mint1> a1(n), b1(m);
    std::vector<mint2> a2(n), b2(m);

    for (int i = 0; i < n; i++) {
        a0[i] = a[i].val();
        a1[i] = a[i].val();
        a2[i] = a[i].val();
    }
    for (int i = 0; i < m; i++) {
        b0[i] = b[i].val();
        b1[i] = b[i].val();
        b2[i] = b[i].val();
    }

    auto c0 = middle_product<mint0>(a0, b0);
    auto c1 = middle_product<mint1>(a1, b1);
    auto c2 = middle_product<mint2>(a2, b2);

    std::vector<mint> res(n - m + 1);
    const i32 W1 = w1 % mod;
    const i32 W2 = w2 % mod;
    for (int i = 0; i < n - m + 1; i++) {
        i32 n1 = c1[i].val(), n2 = c2[i].val(), a = c0[i].val();
        i32 b = i64(n1 + m1 - a) * inv01 % m1;
        i32 c = (i64(n2 + m2 - a) * inv02inv12 + i64(m2 - b) * inv12) % m2;
        res[i] = (i64(a) + i64(b) * W1 + i64(c) * W2) % mod;
    }
    return res;
}

std::vector<u64> middle_product_mod_2_64(const std::vector<u64> &a,
                                         const std::vector<u64> &b) {
    static constexpr i32 m0 = 998244353;
    static constexpr i32 m1 = 754974721;
    static constexpr i32 m2 = 167772161;
    static constexpr i32 m3 = 469762049;
    static constexpr i32 m4 = 880803841;
    using mint0 = static_modint<m0>;
    using mint1 = static_modint<m1>;
    using mint2 = static_modint<m2>;
    using mint3 = static_modint<m3>;
    using mint4 = static_modint<m4>;

    static const mint1 inv10 = mint1(m0).inv();
    static const mint2 inv21 = mint2(m1).inv(), inv20 = inv21 / mint2(m0);
    static const mint3 inv32 = mint3(m2).inv(), inv31 = inv32 / mint3(m1),
                       inv30 = inv31 / mint3(m0);
    static const mint4 inv43 = mint4(m3).inv(), inv42 = inv43 / mint4(m2),
                       inv41 = inv42 / mint4(m1), inv40 = inv41 / mint4(m0);

    assert(a.size() >= b.size());
    if (std::min(a.size() - b.size() + 1, b.size()) <= 60) {
        return middle_product_naive(a, b);
    }
    int n = (int)a.size(), m = (int)b.size();
    std::vector<mint0> a0(n), b0(m);
    std::vector<mint1> a1(n), b1(m);
    std::vector<mint2> a2(n), b2(m);
    std::vector<mint3> a3(n), b3(m);
    std::vector<mint4> a4(n), b4(m);

    for (int i = 0; i < n; i++) {
        a0[i] = a[i];
        a1[i] = a[i];
        a2[i] = a[i];
        a3[i] = a[i];
        a4[i] = a[i];
    }
    for (int i = 0; i < m; i++) {
        b0[i] = b[i];
        b1[i] = b[i];
        b2[i] = b[i];
        b3[i] = b[i];
        b4[i] = b[i];
    }

    auto c0 = middle_product<mint0>(a0, b0);
    auto c1 = middle_product<mint1>(a1, b1);
    auto c2 = middle_product<mint2>(a2, b2);
    auto c3 = middle_product<mint3>(a3, b3);
    auto c4 = middle_product<mint4>(a4, b4);

    std::vector<u64> res(n - m + 1);
    for (int i = 0; i < n - m + 1; i++) {
        i64 x0 = c0[i].val();
        i64 x1 = ((c1[i] - x0) * inv10).val();
        i64 x2 = (((c2[i] - x0)) * inv20 - mint2(x1) * inv21).val();
        i64 x3 = ((c3[i] - x0) * inv30 - mint3(x1) * inv31 - mint3(x2) * inv32)
                     .val();
        i64 x4 = ((c4[i] - x0) * inv40 - mint4(x1) * inv41 - mint4(x2) * inv42 -
                  mint4(x3) * inv43)
                     .val();
        res[i] = x0 + m0 * (x1 + m1 * (x2 + m2 * (x3 + m3 * (u64(x4)))));
    }
    return res;
}

}  // namespace ebi
#line 2 "fps/middle_product_arbitrary.hpp"

#include <algorithm>
#include <cassert>
#include <vector>

#line 2 "convolution/ntt.hpp"

#line 4 "convolution/ntt.hpp"
#include <array>
#include <bit>
#line 8 "convolution/ntt.hpp"

#line 2 "math/internal_math.hpp"

#line 4 "math/internal_math.hpp"

namespace ebi {

namespace internal {

constexpr int primitive_root_constexpr(int m) {
    if (m == 2) return 1;
    if (m == 167772161) return 3;
    if (m == 469762049) return 3;
    if (m == 754974721) return 11;
    if (m == 998244353) return 3;
    if (m == 880803841) return 26;
    if (m == 924844033) return 5;
    return -1;
}
template <int m> constexpr int primitive_root = primitive_root_constexpr(m);

}  // namespace internal

}  // namespace ebi
#line 2 "modint/base.hpp"

#include <concepts>
#include <iostream>
#include <utility>

namespace ebi {

template <class T>
concept Modint = requires(T a, T b) {
    a + b;
    a - b;
    a * b;
    a / b;
    a.inv();
    a.val();
    a.pow(std::declval<long long>());
    T::mod();
};

template <Modint mint> std::istream &operator>>(std::istream &os, mint &a) {
    long long x;
    os >> x;
    a = x;
    return os;
}

template <Modint mint>
std::ostream &operator<<(std::ostream &os, const mint &a) {
    return os << a.val();
}

}  // namespace ebi
#line 2 "template/int_alias.hpp"

#include <cstdint>

namespace ebi {

using ld = long double;
using std::size_t;
using i8 = std::int8_t;
using u8 = std::uint8_t;
using i16 = std::int16_t;
using u16 = std::uint16_t;
using i32 = std::int32_t;
using u32 = std::uint32_t;
using i64 = std::int64_t;
using u64 = std::uint64_t;
using i128 = __int128_t;
using u128 = __uint128_t;

}  // namespace ebi
#line 12 "convolution/ntt.hpp"

namespace ebi {

namespace internal {

template <Modint mint, int g = internal::primitive_root<mint::mod()>>
struct ntt_info {
    static constexpr int rank2 =
        std::countr_zero((unsigned int)(mint::mod() - 1));

    std::array<mint, rank2 + 1> root, inv_root;

    ntt_info() {
        root[rank2] = mint(g).pow((mint::mod() - 1) >> rank2);
        inv_root[rank2] = root[rank2].inv();
        for (int i = rank2 - 1; i >= 0; i--) {
            root[i] = root[i + 1] * root[i + 1];
            inv_root[i] = inv_root[i + 1] * inv_root[i + 1];
        }
    }
};

template <Modint mint> void fft2(std::vector<mint>& a) {
    static const ntt_info<mint> info;
    int n = int(a.size());
    int bit_size = std::countr_zero(a.size());
    assert(n == 1 << bit_size);
    for (int bit = bit_size - 1; bit >= 0; bit--) {
        int m = 1 << bit;
        for (int i = 0; i < n; i += 2 * m) {
            mint w = 1;
            for (int j = 0; j < m; j++) {
                mint p1 = a[i + j];
                mint p2 = a[i + j + m];
                a[i + j] = p1 + p2;
                a[i + j + m] = (p1 - p2) * w;
                w *= info.root[bit + 1];
            }
        }
    }
}

template <Modint mint> void ifft2(std::vector<mint>& a) {
    static const ntt_info<mint> info;
    int n = int(a.size());
    int bit_size = std::countr_zero(a.size());
    assert(n == 1 << bit_size);

    for (int bit = 0; bit < bit_size; bit++) {
        for (int i = 0; i < n / (1 << (bit + 1)); i++) {
            mint w = 1;
            for (int j = 0; j < (1 << bit); j++) {
                int idx = i * (1 << (bit + 1)) + j;
                int jdx = idx + (1 << bit);
                mint p1 = a[idx];
                mint p2 = w * a[jdx];
                a[idx] = p1 + p2;
                a[jdx] = p1 - p2;
                w *= info.inv_root[bit + 1];
            }
        }
    }
}

template <Modint mint> void fft4(std::vector<mint>& a) {
    static const ntt_info<mint> info;
    const u32 mod = mint::mod();
    const u64 iw = info.root[2].val();
    int n = int(a.size());
    int bit_size = std::countr_zero(a.size());
    assert(n == 1 << bit_size);
    int len = bit_size;
    while (len > 0) {
        if (len == 1) {
            for (int i = 0; i < n; i += 2) {
                mint p0 = a[i];
                mint p1 = a[i + 1];
                a[i] = p0 + p1;
                a[i + 1] = p0 - p1;
            }
            len--;
        } else {
            int m = 1 << (len - 2);
            u64 w1 = 1, w2 = 1, w3 = 1, iw1 = iw, iw3 = iw;
            for (int i = 0; i < m; i++) {
                for (int j = 0; j < n; j += 4 * m) {
                    int i0 = i + j, i1 = i0 + m, i2 = i1 + m, i3 = i2 + m;
                    u32 a0 = a[i0].val();
                    u32 a1 = a[i1].val();
                    u32 a2 = a[i2].val();
                    u32 a3 = a[i3].val();
                    u32 a0_plus_a2 = a0 + a2;
                    u32 a1_plus_a3 = a1 + a3;
                    u32 a0_minus_a2 = a0 + mod - a2;
                    u32 a1_minus_a3 = a1 + mod - a3;
                    a[i0] = a0_plus_a2 + a1_plus_a3;
                    a[i1] = a0_minus_a2 * w1 + a1_minus_a3 * iw1;
                    a[i2] = (a0_plus_a2 + 2 * mod - a1_plus_a3) * w2;
                    a[i3] = a0_minus_a2 * w3 + (2 * mod - a1_minus_a3) * iw3;
                }
                w1 = w1 * info.root[len].val() % mod;
                w2 = w1 * w1 % mod;
                w3 = w2 * w1 % mod;
                iw1 = iw * w1 % mod;
                iw3 = iw * w3 % mod;
            }
            len -= 2;
        }
    }
}

template <Modint mint> void ifft4(std::vector<mint>& a) {
    static const ntt_info<mint> info;
    const u32 mod = mint::mod();
    const u64 mod2 = u64(mod) * mod;
    const u64 iw = info.inv_root[2].val();
    int n = int(a.size());
    int bit_size = std::countr_zero(a.size());
    assert(n == 1 << bit_size);
    int len = (bit_size & 1 ? 1 : 2);
    while (len <= bit_size) {
        if (len == 1) {
            for (int i = 0; i < n; i += 2) {
                mint a0 = a[i];
                mint a1 = a[i + 1];
                a[i] = a0 + a1;
                a[i + 1] = a0 - a1;
            }
        } else {
            int m = 1 << (len - 2);
            u64 w1 = 1, w2 = 1, w3 = 1, iw1 = iw, iw3 = iw;
            for (int i = 0; i < m; i++) {
                for (int j = 0; j < n; j += 4 * m) {
                    int i0 = i + j, i1 = i0 + m, i2 = i1 + m, i3 = i2 + m;
                    u64 a0 = a[i0].val();
                    u64 a1 = w1 * a[i1].val();
                    u64 a2 = w2 * a[i2].val();
                    u64 a3 = w3 * a[i3].val();
                    u64 b1 = iw1 * a[i1].val();
                    u64 b3 = iw3 * a[i3].val();
                    u64 a0_plus_a2 = a0 + a2;
                    u64 a1_plus_a3 = a1 + a3;
                    u64 a0_minus_a2 = a0 + mod2 - a2;
                    u64 b1_minus_b3 = b1 + mod2 - b3;
                    a[i0] = a0_plus_a2 + a1_plus_a3;
                    a[i1] = a0_minus_a2 + b1_minus_b3;
                    a[i2] = a0_plus_a2 + mod2 * 2 - a1_plus_a3;
                    a[i3] = a0_minus_a2 + mod2 * 2 - b1_minus_b3;
                }
                w1 = w1 * info.inv_root[len].val() % mod;
                w2 = w1 * w1 % mod;
                w3 = w2 * w1 % mod;
                iw1 = iw * w1 % mod;
                iw3 = iw * w3 % mod;
            }
        }
        len += 2;
    }
}

}  // namespace internal

}  // namespace ebi
#line 2 "fps/middle_product.hpp"

#line 6 "fps/middle_product.hpp"
#include <ranges>
#line 8 "fps/middle_product.hpp"

#line 2 "fps/fps.hpp"

#line 5 "fps/fps.hpp"
#include <optional>
#line 7 "fps/fps.hpp"

#line 9 "fps/fps.hpp"

namespace ebi {

template <Modint mint> struct FormalPowerSeries : std::vector<mint> {
  private:
    using std::vector<mint>::vector;
    using std::vector<mint>::vector::operator=;
    using FPS = FormalPowerSeries;

  public:
    FormalPowerSeries(const std::vector<mint> &a) {
        *this = a;
    }

    FPS operator+(const FPS &rhs) const noexcept {
        return FPS(*this) += rhs;
    }
    FPS operator-(const FPS &rhs) const noexcept {
        return FPS(*this) -= rhs;
    }
    FPS operator*(const FPS &rhs) const noexcept {
        return FPS(*this) *= rhs;
    }
    FPS operator/(const FPS &rhs) const noexcept {
        return FPS(*this) /= rhs;
    }
    FPS operator%(const FPS &rhs) const noexcept {
        return FPS(*this) %= rhs;
    }

    FPS operator+(const mint &rhs) const noexcept {
        return FPS(*this) += rhs;
    }
    FPS operator-(const mint &rhs) const noexcept {
        return FPS(*this) -= rhs;
    }
    FPS operator*(const mint &rhs) const noexcept {
        return FPS(*this) *= rhs;
    }
    FPS operator/(const mint &rhs) const noexcept {
        return FPS(*this) /= rhs;
    }

    FPS &operator+=(const FPS &rhs) noexcept {
        if (this->size() < rhs.size()) this->resize(rhs.size());
        for (int i = 0; i < (int)rhs.size(); ++i) {
            (*this)[i] += rhs[i];
        }
        return *this;
    }

    FPS &operator-=(const FPS &rhs) noexcept {
        if (this->size() < rhs.size()) this->resize(rhs.size());
        for (int i = 0; i < (int)rhs.size(); ++i) {
            (*this)[i] -= rhs[i];
        }
        return *this;
    }

    FPS &operator*=(const FPS &);

    FPS &operator/=(const FPS &rhs) noexcept {
        int n = deg() - 1;
        int m = rhs.deg() - 1;
        if (n < m) {
            *this = {};
            return *this;
        }
        *this = (*this).rev() * rhs.rev().inv(n - m + 1);
        (*this).resize(n - m + 1);
        std::reverse((*this).begin(), (*this).end());
        return *this;
    }

    FPS &operator%=(const FPS &rhs) noexcept {
        *this -= *this / rhs * rhs;
        shrink();
        return *this;
    }

    FPS &operator+=(const mint &rhs) noexcept {
        if (this->empty()) this->resize(1);
        (*this)[0] += rhs;
        return *this;
    }

    FPS &operator-=(const mint &rhs) noexcept {
        if (this->empty()) this->resize(1);
        (*this)[0] -= rhs;
        return *this;
    }

    FPS &operator*=(const mint &rhs) noexcept {
        for (int i = 0; i < deg(); ++i) {
            (*this)[i] *= rhs;
        }
        return *this;
    }
    FPS &operator/=(const mint &rhs) noexcept {
        mint inv_rhs = rhs.inv();
        for (int i = 0; i < deg(); ++i) {
            (*this)[i] *= inv_rhs;
        }
        return *this;
    }

    FPS operator>>(int d) const {
        if (deg() <= d) return {};
        FPS f = *this;
        f.erase(f.begin(), f.begin() + d);
        return f;
    }

    FPS operator<<(int d) const {
        FPS f = *this;
        f.insert(f.begin(), d, 0);
        return f;
    }

    FPS operator-() const {
        FPS g(this->size());
        for (int i = 0; i < (int)this->size(); i++) g[i] = -(*this)[i];
        return g;
    }

    FPS pre(int sz) const {
        return FPS(this->begin(), this->begin() + std::min(deg(), sz));
    }

    FPS rev() const {
        auto f = *this;
        std::reverse(f.begin(), f.end());
        return f;
    }

    FPS differential() const {
        int n = deg();
        FPS g(std::max(0, n - 1));
        for (int i = 0; i < n - 1; i++) {
            g[i] = (*this)[i + 1] * (i + 1);
        }
        return g;
    }

    FPS integral() const {
        int n = deg();
        FPS g(n + 1);
        g[0] = 0;
        if (n > 0) g[1] = 1;
        auto mod = mint::mod();
        for (int i = 2; i <= n; i++) g[i] = (-g[mod % i]) * (mod / i);
        for (int i = 0; i < n; i++) g[i + 1] *= (*this)[i];
        return g;
    }

    FPS inv(int d = -1) const {
        int n = 1;
        if (d < 0) d = deg();
        FPS g(n);
        g[0] = (*this)[0].inv();
        while (n < d) {
            n <<= 1;
            g = (g * 2 - g * g * this->pre(n)).pre(n);
        }
        g.resize(d);
        return g;
    }

    FPS log(int d = -1) const {
        assert((*this)[0].val() == 1);
        if (d < 0) d = deg();
        return ((*this).differential() * (*this).inv(d)).pre(d - 1).integral();
    }

    FPS exp(int d = -1) const {
        assert((*this)[0].val() == 0);
        int n = 1;
        if (d < 0) d = deg();
        FPS g(n);
        g[0] = 1;
        while (n < d) {
            n <<= 1;
            g = (g * (this->pre(n) - g.log(n) + 1)).pre(n);
        }
        g.resize(d);
        return g;
    }

    FPS pow(long long k, int d = -1) const {
        assert(k >= 0);
        int n = deg();
        if (d < 0) d = n;
        if (k == 0) {
            FPS f(d);
            if (d > 0) f[0] = 1;
            return f;
        }
        int low = d;
        for (int i = n - 1; i >= 0; i--)
            if ((*this)[i] != 0) low = i;
        if (low >= (d + k - 1) / k) return FPS(d, 0);
        int offset = k * low;
        mint c = (*this)[low];
        FPS g(d - offset);
        for (int i = 0; i < std::min(n - low, d - offset); i++) {
            g[i] = (*this)[i + low];
        }
        g /= c;
        g = g.pow_1(k);
        return (g << offset) * c.pow(k);
    }

    FPS pow_1(mint k, int d = -1) const {
        assert((*this)[0] == 1);
        return ((*this).log(d) * k).exp(d);
    }

    FPS pow_newton(long long k, int d = -1) const {
        assert(k >= 0);
        const int n = deg();
        if (d < 0) d = n;
        if (k == 0) {
            FPS f(d);
            if (d > 0) f[0] = 1;
            return f;
        }
        for (int i = 0; i < n; i++) {
            if ((*this)[i] != 0) {
                mint rev = (*this)[i].inv();
                FPS f = (((*this * rev) >> i).log(d) * k).exp(d);
                f *= (*this)[i].pow(k);
                f = (f << (i * k)).pre(d);
                if (f.deg() < d) f.resize(d);
                return f;
            }
            if (i + 1 >= (d + k - 1) / k) break;
        }
        return FPS(d);
    }

    int deg() const {
        return (*this).size();
    }

    void shrink() {
        while ((!this->empty()) && this->back() == 0) this->pop_back();
    }

    int count_terms() const {
        int c = 0;
        for (int i = 0; i < deg(); i++) {
            if ((*this)[i] != 0) c++;
        }
        return c;
    }

    std::optional<FPS> sqrt(int d = -1) const;

    static FPS exp_x(int n) {
        FPS f(n);
        mint fact = 1;
        for (int i = 1; i < n; i++) fact *= i;
        f[n - 1] = fact.inv();
        for (int i = n - 1; i >= 0; i--) f[i - 1] = f[i] * i;
        return f;
    }

    void fft();
    void ifft();
};

}  // namespace ebi
#line 12 "fps/middle_product.hpp"

namespace ebi {

template <class T>
std::vector<T> middle_product_naive(const std::vector<T> &a,
                                    const std::vector<T> &b) {
    int n = (int)a.size();
    int m = (int)b.size();
    assert(n >= m);
    std::vector<T> c(n - m + 1, 0);
    for (int i : std::views::iota(0, n - m + 1)) {
        for (int j : std::views::iota(0, m)) {
            c[i] += b[j] * a[i + j];
        }
    }
    return c;
}

template <Modint mint>
std::vector<mint> middle_product(const std::vector<mint> &a,
                                 const std::vector<mint> &b) {
    assert(a.size() >= b.size());
    if (std::min(a.size() - b.size() + 1, b.size()) <= 60) {
        return middle_product_naive<mint>(a, b);
    }
    int n = std::bit_ceil(a.size());
    std::vector<mint> fa(n), fb(n);
    std::copy(a.begin(), a.end(), fa.begin());
    std::copy(b.rbegin(), b.rend(), fb.begin());
    internal::fft4(fa);
    internal::fft4(fb);
    for (int i = 0; i < n; i++) {
        fa[i] *= fb[i];
    }
    internal::ifft4(fa);
    mint inv_n = mint(n).inv();
    for (auto &x : fa) {
        x *= inv_n;
    }
    fa.resize(a.size());
    fa.erase(fa.begin(), fa.begin() + b.size() - 1);
    return fa;
}

template <Modint mint>
FormalPowerSeries<mint> middle_product(const FormalPowerSeries<mint> &a,
                                       const FormalPowerSeries<mint> &b) {
    using FPS = FormalPowerSeries<mint>;
    assert(a.size() >= b.size());
    if (std::min(a.size() - b.size() + 1, b.size()) <= 60) {
        return middle_product_naive<mint>(a, b);
    }
    int n = std::bit_ceil(a.size());
    FPS fa(n), fb(n);
    std::copy(a.begin(), a.end(), fa.begin());
    std::copy(b.rbegin(), b.rend(), fb.begin());
    fa.fft();
    fb.fft();
    for (int i = 0; i < n; i++) {
        fa[i] *= fb[i];
    }
    fa.ifft();
    fa /= n;
    fa = fa.pre(a.size());
    fa.erase(fa.begin(), fa.begin() + b.size() - 1);
    return fa;
}

}  // namespace ebi
#line 2 "modint/modint.hpp"

#line 5 "modint/modint.hpp"

#line 7 "modint/modint.hpp"

namespace ebi {

template <int m> struct static_modint {
  private:
    using modint = static_modint;

  public:
    static constexpr int mod() {
        return m;
    }

    static constexpr modint raw(int v) {
        modint x;
        x._v = v;
        return x;
    }

    constexpr static_modint() : _v(0) {}

    template <std::signed_integral T> constexpr static_modint(T v) {
        long long x = (long long)(v % (long long)(umod()));
        if (x < 0) x += umod();
        _v = (unsigned int)(x);
    }

    template <std::unsigned_integral T> constexpr static_modint(T v) {
        _v = (unsigned int)(v % umod());
    }

    constexpr unsigned int val() const {
        return _v;
    }

    constexpr unsigned int value() const {
        return val();
    }

    constexpr modint &operator++() {
        _v++;
        if (_v == umod()) _v = 0;
        return *this;
    }
    constexpr modint &operator--() {
        if (_v == 0) _v = umod();
        _v--;
        return *this;
    }

    constexpr modint operator++(int) {
        modint res = *this;
        ++*this;
        return res;
    }
    constexpr modint operator--(int) {
        modint res = *this;
        --*this;
        return res;
    }

    constexpr modint &operator+=(const modint &rhs) {
        _v += rhs._v;
        if (_v >= umod()) _v -= umod();
        return *this;
    }
    constexpr modint &operator-=(const modint &rhs) {
        _v -= rhs._v;
        if (_v >= umod()) _v += umod();
        return *this;
    }
    constexpr modint &operator*=(const modint &rhs) {
        unsigned long long x = _v;
        x *= rhs._v;
        _v = (unsigned int)(x % (unsigned long long)umod());
        return *this;
    }
    constexpr modint &operator/=(const modint &rhs) {
        return *this = *this * rhs.inv();
    }

    constexpr modint operator+() const {
        return *this;
    }
    constexpr modint operator-() const {
        return modint() - *this;
    }

    constexpr modint pow(long long n) const {
        assert(0 <= n);
        modint x = *this, res = 1;
        while (n) {
            if (n & 1) res *= x;
            x *= x;
            n >>= 1;
        }
        return res;
    }
    constexpr modint inv() const {
        assert(_v);
        return pow(umod() - 2);
    }

    friend modint operator+(const modint &lhs, const modint &rhs) {
        return modint(lhs) += rhs;
    }
    friend modint operator-(const modint &lhs, const modint &rhs) {
        return modint(lhs) -= rhs;
    }
    friend modint operator*(const modint &lhs, const modint &rhs) {
        return modint(lhs) *= rhs;
    }

    friend modint operator/(const modint &lhs, const modint &rhs) {
        return modint(lhs) /= rhs;
    }
    friend bool operator==(const modint &lhs, const modint &rhs) {
        return lhs.val() == rhs.val();
    }
    friend bool operator!=(const modint &lhs, const modint &rhs) {
        return !(lhs == rhs);
    }

  private:
    unsigned int _v = 0;

    static constexpr unsigned int umod() {
        return m;
    }
};

using modint998244353 = static_modint<998244353>;
using modint1000000007 = static_modint<1000000007>;

}  // namespace ebi
#line 12 "fps/middle_product_arbitrary.hpp"

namespace ebi {

template <Modint mint>
std::vector<mint> middle_product_arbitrary(const std::vector<mint> &a,
                                           const std::vector<mint> &b) {
    static constexpr i32 m0 = 167772161;  // 2^25
    static constexpr i32 m1 = 469762049;  // 2^26
    static constexpr i32 m2 = 754974721;  // 2^24
    using mint0 = static_modint<m0>;
    using mint1 = static_modint<m1>;
    using mint2 = static_modint<m2>;
    static constexpr i32 inv01 = mint1(m0).inv().val();
    static constexpr i32 inv02 = mint2(m0).inv().val();
    static constexpr i32 inv12 = mint2(m1).inv().val();
    static constexpr i32 inv02inv12 = i64(inv02) * inv12 % m2;
    static constexpr i64 w1 = m0;
    static constexpr i64 w2 = i64(m0) * m1;
    const i32 mod = mint::mod();

    assert(a.size() >= b.size());
    if (std::min(a.size() - b.size() + 1, b.size()) <= 60) {
        return middle_product_naive<mint>(a, b);
    }
    int n = (int)a.size(), m = (int)b.size();
    std::vector<mint0> a0(n), b0(m);
    std::vector<mint1> a1(n), b1(m);
    std::vector<mint2> a2(n), b2(m);

    for (int i = 0; i < n; i++) {
        a0[i] = a[i].val();
        a1[i] = a[i].val();
        a2[i] = a[i].val();
    }
    for (int i = 0; i < m; i++) {
        b0[i] = b[i].val();
        b1[i] = b[i].val();
        b2[i] = b[i].val();
    }

    auto c0 = middle_product<mint0>(a0, b0);
    auto c1 = middle_product<mint1>(a1, b1);
    auto c2 = middle_product<mint2>(a2, b2);

    std::vector<mint> res(n - m + 1);
    const i32 W1 = w1 % mod;
    const i32 W2 = w2 % mod;
    for (int i = 0; i < n - m + 1; i++) {
        i32 n1 = c1[i].val(), n2 = c2[i].val(), a = c0[i].val();
        i32 b = i64(n1 + m1 - a) * inv01 % m1;
        i32 c = (i64(n2 + m2 - a) * inv02inv12 + i64(m2 - b) * inv12) % m2;
        res[i] = (i64(a) + i64(b) * W1 + i64(c) * W2) % mod;
    }
    return res;
}

std::vector<u64> middle_product_mod_2_64(const std::vector<u64> &a,
                                         const std::vector<u64> &b) {
    static constexpr i32 m0 = 998244353;
    static constexpr i32 m1 = 754974721;
    static constexpr i32 m2 = 167772161;
    static constexpr i32 m3 = 469762049;
    static constexpr i32 m4 = 880803841;
    using mint0 = static_modint<m0>;
    using mint1 = static_modint<m1>;
    using mint2 = static_modint<m2>;
    using mint3 = static_modint<m3>;
    using mint4 = static_modint<m4>;

    static const mint1 inv10 = mint1(m0).inv();
    static const mint2 inv21 = mint2(m1).inv(), inv20 = inv21 / mint2(m0);
    static const mint3 inv32 = mint3(m2).inv(), inv31 = inv32 / mint3(m1),
                       inv30 = inv31 / mint3(m0);
    static const mint4 inv43 = mint4(m3).inv(), inv42 = inv43 / mint4(m2),
                       inv41 = inv42 / mint4(m1), inv40 = inv41 / mint4(m0);

    assert(a.size() >= b.size());
    if (std::min(a.size() - b.size() + 1, b.size()) <= 60) {
        return middle_product_naive(a, b);
    }
    int n = (int)a.size(), m = (int)b.size();
    std::vector<mint0> a0(n), b0(m);
    std::vector<mint1> a1(n), b1(m);
    std::vector<mint2> a2(n), b2(m);
    std::vector<mint3> a3(n), b3(m);
    std::vector<mint4> a4(n), b4(m);

    for (int i = 0; i < n; i++) {
        a0[i] = a[i];
        a1[i] = a[i];
        a2[i] = a[i];
        a3[i] = a[i];
        a4[i] = a[i];
    }
    for (int i = 0; i < m; i++) {
        b0[i] = b[i];
        b1[i] = b[i];
        b2[i] = b[i];
        b3[i] = b[i];
        b4[i] = b[i];
    }

    auto c0 = middle_product<mint0>(a0, b0);
    auto c1 = middle_product<mint1>(a1, b1);
    auto c2 = middle_product<mint2>(a2, b2);
    auto c3 = middle_product<mint3>(a3, b3);
    auto c4 = middle_product<mint4>(a4, b4);

    std::vector<u64> res(n - m + 1);
    for (int i = 0; i < n - m + 1; i++) {
        i64 x0 = c0[i].val();
        i64 x1 = ((c1[i] - x0) * inv10).val();
        i64 x2 = (((c2[i] - x0)) * inv20 - mint2(x1) * inv21).val();
        i64 x3 = ((c3[i] - x0) * inv30 - mint3(x1) * inv31 - mint3(x2) * inv32)
                     .val();
        i64 x4 = ((c4[i] - x0) * inv40 - mint4(x1) * inv41 - mint4(x2) * inv42 -
                  mint4(x3) * inv43)
                     .val();
        res[i] = x0 + m0 * (x1 + m1 * (x2 + m2 * (x3 + m3 * (u64(x4)))));
    }
    return res;
}

}  // namespace ebi
Back to top page