Peter's codebook A blog full of codes

ZeroJudge b451 -- 圖片匹配

圖片匹配,給兩張圖片 A, B ,問 B 要對齊 A 的哪個位置,

可以使得 A, B 之間像素差平方的和最小?

根據題目敘述,可以知道

因為 在哪裡都一樣,只要算 的部分就可以了。

可以用二維 prefix sum 求到,

需要利用 FFT + 卷積。

並求出最小的 diff

傳送門

ZJ_b451

ZOJ1637

兩題一樣, FFT 程式碼就是借用 ZOJ 的。

ZeroJudge b451 程式碼

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
// ZJ, AC, 80ms, 20.3MB
#pragma GCC target ("avx2")
#pragma GCC optimize ("Os")
#pragma GCC optimize ("O3")
#include <iostream>
#include <algorithm>
#include <vector>
#include <complex>
#include <cmath>
#include <cstring>
#include <cstdint>
#ifdef M_PIl
#undef M_PIl
#endif
// PI 常數,也可以使用 acos(-1.) 啦
#define M_PIl 3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679
// 預防 for(int i; ...) 的 i 跑到 scope 外面
#define for if (0); else for

namespace FFT  // WARNING!!! do not reveal this namespace
{
    using namespace std; // be careful

    int NumberOfBitsNeeded(int PowerOfTwo) {
        for (int i = 0;; ++i) {
            if (PowerOfTwo & (1 << i)) {
                return i;
            }
        }
    }

    // 參考 morris + 掛長 的 blog 的快速翻轉 bit (MSB->LSB; LSB->MSB)
    inline uint32_t FastReverseBits(uint32_t a, int NumBits) {
        a = ( ( a & 0x55555555U ) << 1 ) | ( ( a & 0xAAAAAAAAU ) >> 1 ) ;
        a = ( ( a & 0x33333333U ) << 2 ) | ( ( a & 0xCCCCCCCCU ) >> 2 ) ;
        a = ( ( a & 0x0F0F0F0FU ) << 4 ) | ( ( a & 0xF0F0F0F0U ) >> 4 ) ;
        a = ( ( a & 0x00FF00FFU ) << 8 ) | ( ( a & 0xFF00FF00U ) >> 8 ) ;
        a = ( ( a & 0x0000FFFFU ) << 16 ) | ( ( a & 0xFFFF0000U ) >> 16 ) ;
        return a >> (32 - NumBits);
    }

    // 嘗試打開 fast-math 優化選項,如果 Judge 機不支援,請記得把這行註解掉
    void FFT(bool, const vector<complex<double> >&, vector<complex<double> >&) __attribute__((optimize("fast-math")));
    // FFT 本體, In 是輸入的向量(訊號),Out 是輸出的向量(訊號)
    // 這裏其實不太重要,主要會用得的部分是下方的卷積
    // NOTE:::::::::::: 兩個向量長度必須是 2^k,等長
    void FFT(bool InverseTransform, const vector<complex<double> >& In, vector<complex<double> >& Out) {
        // simultaneous data copy and bit-reversal ordering into outputs
        int NumSamples = In.size();
        int NumBits = NumberOfBitsNeeded(NumSamples);
        for (int i = 0; i < NumSamples; ++i) {
            Out[FastReverseBits((uint32_t)i, NumBits)] = In[i];
        }
        // the FFT process
        double angle_numerator = M_PIl * (InverseTransform ? -2 : 2);
        for (int BlockEnd = 1, BlockSize = 2; BlockSize <= NumSamples; BlockSize <<= 1) {
            double ndelta_angle = -(angle_numerator / BlockSize);
            double sin1 = sin(ndelta_angle);
            double cos1 = cos(ndelta_angle);
            double sin2 = 2*sin1*cos1;
            double cos2 = 2*cos1*cos1-1.0;
            for (int i = 0; i < NumSamples; i += BlockSize) {
                complex<double> a1(cos1, sin1), a2(cos2, sin2);
                for (int j = i, n = 0; n < BlockEnd; ++j, ++n) {
                    complex<double> a0(2 * cos1 * a1.real() - a2.real(), 2 * cos1 * a1.imag() - a2.imag());
                    a2 = a1;
                    a1 = a0;
                    complex<double> a = a0 * Out[j + BlockEnd];
                    Out[j + BlockEnd] = Out[j] - a;
                    Out[j] += a;
                }
            }
            BlockEnd = BlockSize;
        }
        // normalize if inverse transform
        if (InverseTransform) {
            for (int i = 0; i < NumSamples; ++i) {
                Out[i] /= NumSamples;
            }
        }
    }

    // 同上,如果編譯器不支援 fast-math 選項,記得註解掉下面兩行
    template<class T>
        void convolution(const vector<T> &a, const vector<T> &b, vector<double> &ret) __attribute__((optimize("fast-math")));
    // 卷積,輸入"""等長"""的 a, b 兩向量(長度必須是 2^k),會得到 a * b (a卷積b)的結果
    // 下面套用 ZOJ 上的例子:
    /**
     * Given two sequences {a1, a2, a3.. an} and {b1, b2, b3... bn},
     * their repeat convolution means:
     * r1 = a1*b1 + a2*b2 + a3*b3 + ... + an*bn
     * r2 = a1*bn + a2*b1 + a3*b2 + ... + an*bn-1
     * r3 = a1*bn-1 + a2*bn + a3*b1 + ... + an*bn-2
     * ...
     * rn = a1*b2 + a2*b3 + a3*b4 + ... + an-1*bn + an*b1
     * Notice n >= 2 and n must be power of 2.
     */
    template<class T>
        void convolution(const vector<T> &a, const vector<T> &b, vector<double> &ret) {
            int n = a.size();
            vector<complex<double> > s(n), d1(n), d2(n), y(n);
            for (int i = 0; i < n; ++i) {
                s[i] = complex<double>(a[i], 0);
            }
            FFT(false, s, d1);
            s[0] = complex<double>(b[0], 0);
            for (int i = 1; i < n; ++i) {
                s[i] = complex<double>(b[n - i], 0);
            }
            FFT(false, s, d2);
            for (int i = 0; i < n; ++i) {
                y[i] = d1[i] * d2[i];
            }
            FFT(true, y, s);
            ret.resize(n,0);
            for (int i = 0; i < n; ++i) {
                ret[i] = s[i].real();
            }
        }
}; // namespace FFT

double area[512][512];
std::vector<int> A;
std::vector<int> B;
std::vector<double> conv;
int Ah, Aw, Bh, Bw;
int W, H;

int main(void) {
    using namespace std;
    ios::sync_with_stdio(false);
    cin.tie(0);cout.tie(0);
    while (cin >> Ah >> Aw >> Bh >> Bw) {
        W = max(Aw,Bw);
        H = max(Ah,Bh);
        int n = H*W, n2base=1;
        for (n2base=1; n>n2base; n2base<<=1); // n2base==2^k , n2base>=n
        A.resize(n2base); B.resize(n2base);
        fill(A.begin(), A.end(), 0);
        fill(B.begin(), B.end(), 0);
        for (int i=0; i<Ah; ++i) {
            for (int j=0; j<Aw; ++j) {
                cin >> A[i*W+j];
            }
        }
        for (int i=0; i<Bh; ++i) {
            for (int j=0; j<Bw; ++j) {
                cin >> B[i*W+j];
            }
        }
        FFT::convolution<int>(A, B, conv);
#ifdef DEBUG
        for (int i=0; i<conv.size(); ++i) {
            int _i=i/W;
            int _j=i%W;
            if (_i+Bh>Ah||_j+Bw>Aw) continue;
            cout << "conv[" << (i/W) << "][" << (i%W) << "] = " << conv.at(i) << endl;
        }
#endif
        memset(area, 0x00, sizeof(area));
        for (int i=1; i<=Ah; ++i) {
            double sum=0.0;
            for (int j=1; j<=Aw; ++j) {
                sum += A[(i-1)*W+(j-1)]*A[(i-1)*W+(j-1)];
                area[i][j] = sum + area[i-1][j]; // 2D prefix sum
            }
        }
        double minval=1e300;
        int x=0, y=0;
        for (int i=0; i+Bh<=Ah; ++i) {
            for (int j=0; j+Bw<=Aw; ++j) {
                double diff=area[i+Bh][j+Bw] - area[i+Bh][j] - area[i][j+Bw] + area[i][j];
#ifdef DEBUG
                cout << "A2[" << i << "][" << j << "] = " << diff << endl;
#endif
                diff -= 2.0*conv[i*W+j];
                if (diff<minval) {
                    minval = diff;
                    x=j, y=i;
                }
            }
        }
        cout << y+1 << ' ' << x+1 << endl;
    }
    return 0;
}
comments powered by Disqus