This documentation is automatically generated by online-judge-tools/verification-helper
#include "convolution/multivar_ntt.hpp"
Library Checker: Multivariate Convolution
$\displaystyle f(x_1, x_2, \dots, x_K), \ g(x_1, x_2, \dots, x_K) $
に対して,
$\displaystyle f \cdot g \bmod (x_1^{N_1} x_2^{N_2} \dots x_K^{N_K}) $
を計算(線形畳み込み,「はみ出し」分は無視).
popcount
に着目して要素ごとの格納する位置を決めたが,代わりに上のリンクにある $\chi$ 関数を使って決めてあげると万事うまくいく.#pragma once
#include "ntt.hpp"
#include <cassert>
#include <numeric>
#include <vector>
// CUT begin
// Multivariate convolution (Linear, overflow cutoff)
// Complexity: $O(kN \log N + k^2 N)$
// Note that the vectors store the infomation in **column-major order**
// Implementation idea: https://rushcheyo.blog.uoj.ac/blog/6547
// Details of my implementation: https://hitonanode.github.io/cplib-cpp/convolution/multivar_ntt.hpp
template <typename MODINT> struct multivar_ntt {
int K, N, fftlen;
std::vector<int> dim;
std::vector<int> chi;
MODINT invfftlen;
private:
void _initialize(const std::vector<int> &dim_) {
dim = dim_;
K = dim_.size();
N = std::accumulate(dim_.begin(), dim_.end(), 1, [&](int l, int r) { return l * r; });
fftlen = 1;
while (fftlen < N * 2) fftlen <<= 1;
invfftlen = MODINT(fftlen).inv();
chi.resize(fftlen);
int t = 1;
for (auto d : dim_) {
t *= d;
for (int s = t; s < fftlen; s += t) chi[s] += 1;
}
for (int i = 0; i + 1 < fftlen; i++) {
chi[i + 1] += chi[i];
if (chi[i + 1] >= K) chi[i + 1] -= K;
}
}
std::vector<MODINT> _convolve(const std::vector<MODINT> &f, const std::vector<MODINT> &g) const {
assert(int(f.size()) == N);
assert(int(g.size()) == N);
if (dim.empty()) return {f[0] * g[0]};
std::vector<std::vector<MODINT>> fex(K, std::vector<MODINT>(fftlen)),
gex(K, std::vector<MODINT>(fftlen));
for (int i = 0; i < N; i++) fex[chi[i]][i] = f[i], gex[chi[i]][i] = g[i];
for (auto &vec : fex) ntt(vec, false);
for (auto &vec : gex) ntt(vec, false);
std::vector<std::vector<MODINT>> hex(K, std::vector<MODINT>(fftlen));
for (int df = 0; df < K; df++) {
for (int dg = 0; dg < K; dg++) {
int dh = (df + dg < K) ? df + dg : df + dg - K;
for (int i = 0; i < fftlen; i++) hex[dh][i] += fex[df][i] * gex[dg][i];
}
}
for (auto &vec : hex) ntt(vec, true);
std::vector<MODINT> ret(N);
for (int i = 0; i < N; i++) ret[i] = hex[chi[i]][i];
return ret;
}
public:
multivar_ntt(const std::vector<int> &dim_) { _initialize(dim_); }
std::vector<MODINT>
operator()(const std::vector<MODINT> &f, const std::vector<MODINT> &g) const {
return _convolve(f, g);
}
};
#line 2 "modint.hpp"
#include <cassert>
#include <iostream>
#include <set>
#include <vector>
template <int md> struct ModInt {
using lint = long long;
constexpr static int mod() { return md; }
static int get_primitive_root() {
static int primitive_root = 0;
if (!primitive_root) {
primitive_root = [&]() {
std::set<int> fac;
int v = md - 1;
for (lint i = 2; i * i <= v; i++)
while (v % i == 0) fac.insert(i), v /= i;
if (v > 1) fac.insert(v);
for (int g = 1; g < md; g++) {
bool ok = true;
for (auto i : fac)
if (ModInt(g).pow((md - 1) / i) == 1) {
ok = false;
break;
}
if (ok) return g;
}
return -1;
}();
}
return primitive_root;
}
int val_;
int val() const noexcept { return val_; }
constexpr ModInt() : val_(0) {}
constexpr ModInt &_setval(lint v) { return val_ = (v >= md ? v - md : v), *this; }
constexpr ModInt(lint v) { _setval(v % md + md); }
constexpr explicit operator bool() const { return val_ != 0; }
constexpr ModInt operator+(const ModInt &x) const {
return ModInt()._setval((lint)val_ + x.val_);
}
constexpr ModInt operator-(const ModInt &x) const {
return ModInt()._setval((lint)val_ - x.val_ + md);
}
constexpr ModInt operator*(const ModInt &x) const {
return ModInt()._setval((lint)val_ * x.val_ % md);
}
constexpr ModInt operator/(const ModInt &x) const {
return ModInt()._setval((lint)val_ * x.inv().val() % md);
}
constexpr ModInt operator-() const { return ModInt()._setval(md - val_); }
constexpr ModInt &operator+=(const ModInt &x) { return *this = *this + x; }
constexpr ModInt &operator-=(const ModInt &x) { return *this = *this - x; }
constexpr ModInt &operator*=(const ModInt &x) { return *this = *this * x; }
constexpr ModInt &operator/=(const ModInt &x) { return *this = *this / x; }
friend constexpr ModInt operator+(lint a, const ModInt &x) { return ModInt(a) + x; }
friend constexpr ModInt operator-(lint a, const ModInt &x) { return ModInt(a) - x; }
friend constexpr ModInt operator*(lint a, const ModInt &x) { return ModInt(a) * x; }
friend constexpr ModInt operator/(lint a, const ModInt &x) { return ModInt(a) / x; }
constexpr bool operator==(const ModInt &x) const { return val_ == x.val_; }
constexpr bool operator!=(const ModInt &x) const { return val_ != x.val_; }
constexpr bool operator<(const ModInt &x) const {
return val_ < x.val_;
} // To use std::map<ModInt, T>
friend std::istream &operator>>(std::istream &is, ModInt &x) {
lint t;
return is >> t, x = ModInt(t), is;
}
constexpr friend std::ostream &operator<<(std::ostream &os, const ModInt &x) {
return os << x.val_;
}
constexpr ModInt pow(lint n) const {
ModInt ans = 1, tmp = *this;
while (n) {
if (n & 1) ans *= tmp;
tmp *= tmp, n >>= 1;
}
return ans;
}
static constexpr int cache_limit = std::min(md, 1 << 21);
static std::vector<ModInt> facs, facinvs, invs;
constexpr static void _precalculation(int N) {
const int l0 = facs.size();
if (N > md) N = md;
if (N <= l0) return;
facs.resize(N), facinvs.resize(N), invs.resize(N);
for (int i = l0; i < N; i++) facs[i] = facs[i - 1] * i;
facinvs[N - 1] = facs.back().pow(md - 2);
for (int i = N - 2; i >= l0; i--) facinvs[i] = facinvs[i + 1] * (i + 1);
for (int i = N - 1; i >= l0; i--) invs[i] = facinvs[i] * facs[i - 1];
}
constexpr ModInt inv() const {
if (this->val_ < cache_limit) {
if (facs.empty()) facs = {1}, facinvs = {1}, invs = {0};
while (this->val_ >= int(facs.size())) _precalculation(facs.size() * 2);
return invs[this->val_];
} else {
return this->pow(md - 2);
}
}
constexpr ModInt fac() const {
while (this->val_ >= int(facs.size())) _precalculation(facs.size() * 2);
return facs[this->val_];
}
constexpr ModInt facinv() const {
while (this->val_ >= int(facs.size())) _precalculation(facs.size() * 2);
return facinvs[this->val_];
}
constexpr ModInt doublefac() const {
lint k = (this->val_ + 1) / 2;
return (this->val_ & 1) ? ModInt(k * 2).fac() / (ModInt(2).pow(k) * ModInt(k).fac())
: ModInt(k).fac() * ModInt(2).pow(k);
}
constexpr ModInt nCr(int r) const {
if (r < 0 or this->val_ < r) return ModInt(0);
return this->fac() * (*this - r).facinv() * ModInt(r).facinv();
}
constexpr ModInt nPr(int r) const {
if (r < 0 or this->val_ < r) return ModInt(0);
return this->fac() * (*this - r).facinv();
}
static ModInt binom(int n, int r) {
static long long bruteforce_times = 0;
if (r < 0 or n < r) return ModInt(0);
if (n <= bruteforce_times or n < (int)facs.size()) return ModInt(n).nCr(r);
r = std::min(r, n - r);
ModInt ret = ModInt(r).facinv();
for (int i = 0; i < r; ++i) ret *= n - i;
bruteforce_times += r;
return ret;
}
// Multinomial coefficient, (k_1 + k_2 + ... + k_m)! / (k_1! k_2! ... k_m!)
// Complexity: O(sum(ks))
template <class Vec> static ModInt multinomial(const Vec &ks) {
ModInt ret{1};
int sum = 0;
for (int k : ks) {
assert(k >= 0);
ret *= ModInt(k).facinv(), sum += k;
}
return ret * ModInt(sum).fac();
}
// Catalan number, C_n = binom(2n, n) / (n + 1)
// C_0 = 1, C_1 = 1, C_2 = 2, C_3 = 5, C_4 = 14, ...
// https://oeis.org/A000108
// Complexity: O(n)
static ModInt catalan(int n) {
if (n < 0) return ModInt(0);
return ModInt(n * 2).fac() * ModInt(n + 1).facinv() * ModInt(n).facinv();
}
ModInt sqrt() const {
if (val_ == 0) return 0;
if (md == 2) return val_;
if (pow((md - 1) / 2) != 1) return 0;
ModInt b = 1;
while (b.pow((md - 1) / 2) == 1) b += 1;
int e = 0, m = md - 1;
while (m % 2 == 0) m >>= 1, e++;
ModInt x = pow((m - 1) / 2), y = (*this) * x * x;
x *= (*this);
ModInt z = b.pow(m);
while (y != 1) {
int j = 0;
ModInt t = y;
while (t != 1) j++, t *= t;
z = z.pow(1LL << (e - j - 1));
x *= z, z *= z, y *= z;
e = j;
}
return ModInt(std::min(x.val_, md - x.val_));
}
};
template <int md> std::vector<ModInt<md>> ModInt<md>::facs = {1};
template <int md> std::vector<ModInt<md>> ModInt<md>::facinvs = {1};
template <int md> std::vector<ModInt<md>> ModInt<md>::invs = {0};
using ModInt998244353 = ModInt<998244353>;
// using mint = ModInt<998244353>;
// using mint = ModInt<1000000007>;
#line 3 "convolution/ntt.hpp"
#include <algorithm>
#include <array>
#line 7 "convolution/ntt.hpp"
#include <tuple>
#line 9 "convolution/ntt.hpp"
// CUT begin
// Integer convolution for arbitrary mod
// with NTT (and Garner's algorithm) for ModInt / ModIntRuntime class.
// We skip Garner's algorithm if `skip_garner` is true or mod is in `nttprimes`.
// input: a (size: n), b (size: m)
// return: vector (size: n + m - 1)
template <typename MODINT>
std::vector<MODINT> nttconv(std::vector<MODINT> a, std::vector<MODINT> b, bool skip_garner);
constexpr int nttprimes[3] = {998244353, 167772161, 469762049};
// Integer FFT (Fast Fourier Transform) for ModInt class
// (Also known as Number Theoretic Transform, NTT)
// is_inverse: inverse transform
// ** Input size must be 2^n **
template <typename MODINT> void ntt(std::vector<MODINT> &a, bool is_inverse = false) {
int n = a.size();
if (n == 1) return;
static const int mod = MODINT::mod();
static const MODINT root = MODINT::get_primitive_root();
assert(__builtin_popcount(n) == 1 and (mod - 1) % n == 0);
static std::vector<MODINT> w{1}, iw{1};
for (int m = w.size(); m < n / 2; m *= 2) {
MODINT dw = root.pow((mod - 1) / (4 * m)), dwinv = 1 / dw;
w.resize(m * 2), iw.resize(m * 2);
for (int i = 0; i < m; i++) w[m + i] = w[i] * dw, iw[m + i] = iw[i] * dwinv;
}
if (!is_inverse) {
for (int m = n; m >>= 1;) {
for (int s = 0, k = 0; s < n; s += 2 * m, k++) {
for (int i = s; i < s + m; i++) {
MODINT x = a[i], y = a[i + m] * w[k];
a[i] = x + y, a[i + m] = x - y;
}
}
}
} else {
for (int m = 1; m < n; m *= 2) {
for (int s = 0, k = 0; s < n; s += 2 * m, k++) {
for (int i = s; i < s + m; i++) {
MODINT x = a[i], y = a[i + m];
a[i] = x + y, a[i + m] = (x - y) * iw[k];
}
}
}
int n_inv = MODINT(n).inv().val();
for (auto &v : a) v *= n_inv;
}
}
template <int MOD>
std::vector<ModInt<MOD>> nttconv_(const std::vector<int> &a, const std::vector<int> &b) {
int sz = a.size();
assert(a.size() == b.size() and __builtin_popcount(sz) == 1);
std::vector<ModInt<MOD>> ap(sz), bp(sz);
for (int i = 0; i < sz; i++) ap[i] = a[i], bp[i] = b[i];
ntt(ap, false);
if (a == b)
bp = ap;
else
ntt(bp, false);
for (int i = 0; i < sz; i++) ap[i] *= bp[i];
ntt(ap, true);
return ap;
}
long long garner_ntt_(int r0, int r1, int r2, int mod) {
using mint2 = ModInt<nttprimes[2]>;
static const long long m01 = 1LL * nttprimes[0] * nttprimes[1];
static const long long m0_inv_m1 = ModInt<nttprimes[1]>(nttprimes[0]).inv().val();
static const long long m01_inv_m2 = mint2(m01).inv().val();
int v1 = (m0_inv_m1 * (r1 + nttprimes[1] - r0)) % nttprimes[1];
auto v2 = (mint2(r2) - r0 - mint2(nttprimes[0]) * v1) * m01_inv_m2;
return (r0 + 1LL * nttprimes[0] * v1 + m01 % mod * v2.val()) % mod;
}
template <typename MODINT>
std::vector<MODINT> nttconv(std::vector<MODINT> a, std::vector<MODINT> b, bool skip_garner) {
if (a.empty() or b.empty()) return {};
int sz = 1, n = a.size(), m = b.size();
while (sz < n + m) sz <<= 1;
if (sz <= 16) {
std::vector<MODINT> ret(n + m - 1);
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) ret[i + j] += a[i] * b[j];
}
return ret;
}
int mod = MODINT::mod();
if (skip_garner or
std::find(std::begin(nttprimes), std::end(nttprimes), mod) != std::end(nttprimes)) {
a.resize(sz), b.resize(sz);
if (a == b) {
ntt(a, false);
b = a;
} else {
ntt(a, false), ntt(b, false);
}
for (int i = 0; i < sz; i++) a[i] *= b[i];
ntt(a, true);
a.resize(n + m - 1);
} else {
std::vector<int> ai(sz), bi(sz);
for (int i = 0; i < n; i++) ai[i] = a[i].val();
for (int i = 0; i < m; i++) bi[i] = b[i].val();
auto ntt0 = nttconv_<nttprimes[0]>(ai, bi);
auto ntt1 = nttconv_<nttprimes[1]>(ai, bi);
auto ntt2 = nttconv_<nttprimes[2]>(ai, bi);
a.resize(n + m - 1);
for (int i = 0; i < n + m - 1; i++)
a[i] = garner_ntt_(ntt0[i].val(), ntt1[i].val(), ntt2[i].val(), mod);
}
return a;
}
template <typename MODINT>
std::vector<MODINT> nttconv(const std::vector<MODINT> &a, const std::vector<MODINT> &b) {
return nttconv<MODINT>(a, b, false);
}
#line 4 "convolution/multivar_ntt.hpp"
#include <numeric>
#line 6 "convolution/multivar_ntt.hpp"
// CUT begin
// Multivariate convolution (Linear, overflow cutoff)
// Complexity: $O(kN \log N + k^2 N)$
// Note that the vectors store the infomation in **column-major order**
// Implementation idea: https://rushcheyo.blog.uoj.ac/blog/6547
// Details of my implementation: https://hitonanode.github.io/cplib-cpp/convolution/multivar_ntt.hpp
template <typename MODINT> struct multivar_ntt {
int K, N, fftlen;
std::vector<int> dim;
std::vector<int> chi;
MODINT invfftlen;
private:
void _initialize(const std::vector<int> &dim_) {
dim = dim_;
K = dim_.size();
N = std::accumulate(dim_.begin(), dim_.end(), 1, [&](int l, int r) { return l * r; });
fftlen = 1;
while (fftlen < N * 2) fftlen <<= 1;
invfftlen = MODINT(fftlen).inv();
chi.resize(fftlen);
int t = 1;
for (auto d : dim_) {
t *= d;
for (int s = t; s < fftlen; s += t) chi[s] += 1;
}
for (int i = 0; i + 1 < fftlen; i++) {
chi[i + 1] += chi[i];
if (chi[i + 1] >= K) chi[i + 1] -= K;
}
}
std::vector<MODINT> _convolve(const std::vector<MODINT> &f, const std::vector<MODINT> &g) const {
assert(int(f.size()) == N);
assert(int(g.size()) == N);
if (dim.empty()) return {f[0] * g[0]};
std::vector<std::vector<MODINT>> fex(K, std::vector<MODINT>(fftlen)),
gex(K, std::vector<MODINT>(fftlen));
for (int i = 0; i < N; i++) fex[chi[i]][i] = f[i], gex[chi[i]][i] = g[i];
for (auto &vec : fex) ntt(vec, false);
for (auto &vec : gex) ntt(vec, false);
std::vector<std::vector<MODINT>> hex(K, std::vector<MODINT>(fftlen));
for (int df = 0; df < K; df++) {
for (int dg = 0; dg < K; dg++) {
int dh = (df + dg < K) ? df + dg : df + dg - K;
for (int i = 0; i < fftlen; i++) hex[dh][i] += fex[df][i] * gex[dg][i];
}
}
for (auto &vec : hex) ntt(vec, true);
std::vector<MODINT> ret(N);
for (int i = 0; i < N; i++) ret[i] = hex[chi[i]][i];
return ret;
}
public:
multivar_ntt(const std::vector<int> &dim_) { _initialize(dim_); }
std::vector<MODINT>
operator()(const std::vector<MODINT> &f, const std::vector<MODINT> &g) const {
return _convolve(f, g);
}
};