OI XXXI - zyr (Matrix)

// https://szkopul.edu.pl/problemset/problem/AWwhm-sG6VDE2LwvWMCNS-fB/site/?key=statement

// author: pb0207

#include <bits/stdc++.h>
using namespace std;

// #define int long long

const int N = 1e7 + 1e3 + 7, P = 1e9 + 7;

int k, q;

int son[N];

struct Mat {
    int a[3][3];

    Mat(int I = 0) {
        for (int i = 0; i < 3; i++)
            for (int j = 0; j < 3; j++)
                a[i][j] = (i == j && I);
    }
};

Mat operator*(const Mat& a, const Mat& b) {
    Mat c;

    for (int i = 0; i < 3; i++)
        for (int k = 0; k < 3; k++)
            for (int j = 0; j < 3; j++)
                c.a[i][j] = (c.a[i][j] + 1ll * a.a[i][k] * b.a[k][j]) % P;

    return c;
}

Mat qpow(Mat a, int b) {
    Mat ret(1);

    while (b) {
        if (b & 1) ret = ret * a;

        b >>= 1;
        a = a * a;
    }

    return ret;
}

struct Vec {
    int a[3];
    Vec() { memset(a, 0, sizeof(a)); }
};

Vec operator*(const Vec& a, const Mat& b) {
    Vec ret;

    for (int i = 0; i < 3; i++)
        for (int j = 0; j < 3; j++)
            ret.a[j] = (ret.a[j] + 1ll * a.a[i] * b.a[i][j]) % P;

    return ret;
}

signed main() {
    ios::sync_with_stdio(false);
    cin.tie(0);
    cin >> k >> q;

    if (k == 1) {
        while (q--) {
            int d, p;
            cin >> d >> p;

            for (int i = 1, tmp; i <= d; i++)
                cin >> tmp;

            int ans = max(p - d + 1, 0);
            cout << ans * ((d > 1) + 1) % P << "\n";
        }

        return 0;
    }

    // {l, r, b, s}
    // {r + 1, to_r, b * (k + 1) / 2 + newb(l, r), s + b * (k + 1) / 2 + newb(l, r)}
    vector<int> d(k + 1), ex(k + 1);
    vector<int> l = {1}, r = {1};
    vector<Mat> trans = {Mat(1)};
    int flag = 0;

    vector<Mat> ctrans;
    vector<int> cr;
    int stdep = -1;
    vector<long long> tot(1);
    int stblk = 0;

    for (long long dep = 1, A = 1, B = 1;; dep++) {
        tot.push_back(0);
        tot[dep] = tot[dep - 1] + B - A + 1;
        // A->B contain
        int ok = 0, st = 0;

        for (int i = A; i <= B; i++) {
            if (i % k == 1) st = 1;

            if (i % k == 0) ok |= st;

            if (ok) break;
        }

        if (ok) {
            while (A % k != 1)
                A++;

            while (B % k != 0)
                B--;

            stblk = (B - A + 1) / k % P;
            stdep = dep;
            break;
        }

        long long nA = B + 1, nB = B;

        for (int i = A; i <= B; i++) {
            long long s = (i - 1) % k + 1;
            nB += s;
        }

        A = nA, B = nB;
    }

    d[1] = 1;

    for (int dep = 2;; dep++) {
        auto L = l.back(), R = r.back();
        int nL = (R == k ? 1 : R + 1), nR = (1ll * (1 + R) * R / 2) % k + 1;

        if (d[nL] != 0) ex[nL]++;

        if (d[nL] != 0 && d[nL] > stdep + 10 && ex[nL] > 1) {
            // we find a circle
            int clen = dep - d[nL];

            for (int j = 1; j <= clen; j++) {
                ctrans.push_back(trans[trans.size() - j]);
                cr.push_back(r[r.size() - j]);
            }

            reverse(cr.begin(), cr.end());
            reverse(ctrans.begin(), ctrans.end());
            break;
        }

        d[nL] = dep;
        l.push_back(nL), r.push_back(nR);

        if (!flag && dep == stdep) {
            flag = 1;
            Mat nw(1);
            nw.a[2][0] = (stblk) % P;
            nw.a[2][1] = (tot[dep] / k % P - tot[dep - 1] / k % P + P) % P;
            trans.push_back(nw);
        } else if (!flag) {
            Mat nw(1);
            nw.a[2][1] = (tot[dep] / k % P - tot[dep - 1] / k % P + P) % P;
            trans.push_back(nw);
        } else {
            Mat nw;
            nw.a[0][0] = (k + 1) / 2;
            nw.a[1][1] = 1;
            nw.a[2][2] = 1;
            nw.a[0][1] = (k + 1) / 2;
            nw.a[2][0] = ((1ll * (L + k) * (k - L + 1) / 2 - 1) / k + (1ll * (1 + R) * R / 2 + 1) / k) % P;
            nw.a[2][1] = ((1ll * (L + k) * (k - L + 1) / 2 - 1 + k - 1) / k + (1ll * (1 + R) * R / 2 + 1) / k) % P;

            if (L == 1) nw.a[2][0] = (nw.a[2][0] - (k + 1) / 2 + P) % P, nw.a[2][1] = (nw.a[2][1] - (k + 1) / 2 + P) % P;

            if (R == k) nw.a[2][0] = (nw.a[2][0] - (k + 1) / 2 + P) % P, nw.a[2][1] = (nw.a[2][1] - (k + 1) / 2 + P) % P;

            trans.push_back(nw);
        }
    }

    for (int i = 1; i < trans.size(); i++)
        trans[i] = trans[i - 1] * trans[i];

    for (int i = 1; i < ctrans.size(); i++)
        ctrans[i] = ctrans[i - 1] * ctrans[i];

    auto son = [&](auto x, auto y) {
        int L = (1ll * x * (x - 1) / 2 + 1) % k + 1;

        if (L + x - 1 <= k)
            return y >= L && y <= L + x - 1;
        else
            return y >= L || y <= L + x - 1 - k;
    };
    // how many x at dep 1.....d
    vector<Mat> pw(31);
    pw[0] = ctrans.back();

    for (int i = 1; i <= 30; i++)
        pw[i] = pw[i - 1] * pw[i - 1];

    auto calc = [&](int d, int x) {
        Vec tmp;
        tmp.a[2] = 1;

        if (d <= trans.size()) {
            int ret = (tmp * trans[d - 1]).a[1];

            if (r[d - 1] >= x && r[d - 1] != k) ret = (ret + 1) % P;

            return ret;
        }

        tmp = tmp * trans.back();
        d -= trans.size();
        int t = d / ctrans.size();

        // tmp=tmp*qpow(ctrans.back(),t);
        for (int i = 30; i >= 0; i--)
            if (t >> i & 1) tmp = tmp * pw[i];

        d -= t * ctrans.size();
        int R;

        if (d) {
            tmp = tmp * ctrans[d - 1];
            R = cr[d - 1];
        } else
            R = cr.back();

        int ret = tmp.a[1];

        if (R >= x && R != k) ret = (ret + 1) % P;

        return ret;
    };

    while (q--) {
        int d, p;
        cin >> d >> p;
        vector<int> a(d);

        for (auto& x : a)
            cin >> x;

        vector<int> okl(d), okr(d);

        for (int i = 0; i < d; i++) {
            if (!i)
                okl[i] = 1;
            else
                okl[i] = son(a[i], a[i - 1]) & okl[i - 1];
        }

        for (int i = d - 1; i >= 0; i--) {
            if (i == d - 1)
                okr[i] = 1;
            else
                okr[i] = son(a[i], a[i + 1]) & okr[i + 1];
        }

        int ans = 0;

        for (int i = 0; i < d; i++) {
            if (!okl[i] || !okr[i]) continue;

            if (i - 1 >= 0 && i + 1 < d && a[i - 1] == a[i + 1]) continue;

            int len = max(i + 1, d - i);

            if (p - len + 1 >= 1) ans = (ans + calc(p - len + 1, a[i])) % P;
        }

        cout << ans << "\n";
    }
}