# OpenMP实现并行快速傅里叶变换

## 题目

Final Project, Selected topics (not limited to):

1. Parallel Monte Carlo algorithm;
2. Parallellinearsolver;
3. Parallel QR factorization or linear least squares;
4. Parallelbenchmarks;
6. Parallel sorting or selection;
7. Parallelgraphpartitioning;

Implement with MPI or MapReduce or OpenMP or CUDA …

## 说明

### 编译

g++ ParallelFastFourierTransform.cpp -oParallelFastFourierTransform -fopenmp


### 运行

./ParallelFastFourierTransform <num-of-threads> <power-of-transform-length>


### 结果

• VAIO Z Flip 2016
• Intel(R) Core(TM) i7-6567U CPU @3.30GHZ 3.31GHz
• 8.00GB RAM
• Windows 10.0.18362.175, 64-bit
• Windows Subsystem for Linux [Ubuntu 18.04.2 LTS]：WSL是以软件的形式运行在Windows下的Linux子系统，是近些年微软推出来的新工具，可以在Windows系统上原生运行Linux。
• gcc version 7.3.0 (Ubuntu 7.3.0-27ubuntu1~18.04)：C语言程序编译器，Ubuntu自带。
$./ParallelFastFourierTransform 1 22 Serial Time: 4.29194s Parallel Time: 4.32889s$ ./ParallelFastFourierTransform 2 22
Serial Time: 4.30011s
Parallel Time: 2.9339s
$./ParallelFastFourierTransform 4 22 Serial Time: 4.26301s Parallel Time: 1.87973s$ ./ParallelFastFourierTransform 8 22
Serial Time: 4.27979s
Parallel Time: 1.89624s


### 源代码

#include <bits/stdc++.h>
#include <omp.h>
using namespace std;
typedef long long ll;
typedef double lf;
{
Rader(int n) : vector<int>(1 << int(ceil(log2(n))))
{
for (int i = at(0) = 0; i < size(); ++i)
if (at(i) = at(i >> 1) >> 1, i & 1)
at(i) += size() >> 1;
}
};
{
vector<complex<lf>> w;
FFT(int n) : Rader(n), w(size(), polar(1.0, 2 * M_PI / size()))
{
w[0] = 1;
for (int i = 1; i < size(); ++i)
w[i] *= w[i - 1];
}
vector<complex<lf>> pfft(const vector<complex<lf>> &a) const
{
vector<complex<lf>> x(size());
#pragma omp parallel for
for (int i = 0; i < a.size(); ++i)
x[at(i)] = a[i];
for (int i = 1; i < size(); i <<= 1)
#pragma omp parallel for
for (int j = 0; j < i; ++j)
for (int k = j; k < size(); k += i << 1)
{
complex<lf> t = w[size() / (i << 1) * j] * x[k + i];
x[k + i] = x[k] - t, x[k] += t;
}
return x;
}
vector<complex<lf>> fft(const vector<complex<lf>> &a) const
{
vector<complex<lf>> x(size());
for (int i = 0; i < a.size(); ++i)
x[at(i)] = a[i];
for (int i = 1; i < size(); i <<= 1)
for (int j = 0; j < i; ++j)
for (int k = j; k < size(); k += i << 1)
{
complex<lf> t = w[size() / (i << 1) * j] * x[k + i];
x[k + i] = x[k] - t, x[k] += t;
}
return x;
}
vector<ll> ask(const vector<ll> &a, const vector<ll> &b) const
{
vector<complex<lf>> xa(a.begin(), a.end()), xb(b.begin(), b.end());
xa = fft(xa), xb = fft(xb);
for (int i = 0; i < size(); ++i)
xa[i] *= xb[i];
vector<ll> ans(size());
xa = fft(xa), ans[0] = xa[0].real() / size() + 0.5;
for (int i = 1; i < size(); ++i)
ans[i] = xa[size() - i].real() / size() + 0.5;
return ans;
}
};
int main(int argc, char **argv)
{
if (argc < 3)
return cerr << "Error: No Enough parameters (" << argv[0] << " <num-of-threads> <power-of-transform-length>).\n", 0;