# 并行与分布式计算（4）

Consider a sparse matrix stored in the compressed row format (you may find a description of this format on the web or any suitable text on sparse linear algebra). Write an OpenMP program for computing the product of this matrix with a vector. Download sample matrics from the Matrix Market and test the performance of your implementation as a function of matrix size and number of threads.

``````#include <bits/stdc++.h>
#include <omp.h>
using namespace std;
typedef double lf;
typedef vector<lf> Vec;
typedef vector<vector<pair<int, lf>>> Mat;
int m, n, th;
Vec operator*(const Vec &v, const Mat &m)
{
Vec r(m.size(), 0);
#pragma omp parallel for num_threads(th)
for (int i = 0; i < m.size(); ++i)
for (const auto &p : m[i])
r[i] += v[p.first] * p.second;
return r;
}
int main()
{
ifstream fin("1138_bus.mtx");
while (fin.peek() == '%')
while (fin.get() != '\n')
;
fin >> m >> n >> th;
Mat ma(m);
for (int x, y, i = 0; i < th; ++i)
{
lf t;
fin >> x >> y >> t;
ma[x - 1].emplace_back(y - 1, t);
}
Vec ve(n);
for (int i = 0; i < n; ++i)
ve[i] = rand();
cout << "number of threads: ";
cin >> th;
auto begin = std::chrono::system_clock::now();
for (int i = 1e5; i; --i)
ve *ma;
auto end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end - begin;
std::cout << "elapsed time: " << elapsed_seconds.count() << "s\n";
}
``````
number of threads elapsed time
1 12.1646s
2 8.52163s
4 9.68788s
8 9.74822s
16 13.2045s
32 19.4011s
64 31.9306s

Implement a producer-consumer framework in OpenMP using sections to create a single producer task and a single consumer task. Ensure appropriate synchronization using locks. Test your program for a varying number of producers and consumers.

``````#include <chrono>
#include <iostream>
#include "wkMultiAccessQueue.hpp"
wk::MultiAccessQueue<int> q;
void producer(int cnt)
{
for (int i = 0; i < cnt; ++i)
q.push(i);
}
void consumer(int cnt)
{
for (int i = 0; i < cnt; ++i)
q.pop();
}
int main()
{
int num;
std::cout << "number of producer-consumers: ";
std::cin >> num;
auto begin = std::chrono::system_clock::now();
#pragma omp parallel for
for (int i = 0; i < num; ++i)
producer(1000);
#pragma omp parallel for
for (int i = 0; i < num; ++i)
consumer(1000);
auto end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end - begin;
std::cout << "elapsed time: " << elapsed_seconds.count() << "s\n";
}
``````
number of producer-consumers elapsed time
64 0.0644229s
512 0.37481s
4096 2.96336s
32768 23.2893s