高性能计算·实验（一）

实验过程

#include <stdio.h>
typedef struct pixel
{
int red, green, blue;
} pixel;
#define RIDX(i, j, dim) ((i) * (dim) + (j))
void naive_rotate(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(dim - 1 - j, i, dim)] = src[RIDX(i, j, dim)];
}
void rotate1(int dim, pixel *src, pixel *dst)
{
int i, j, ii, jj;
for (ii = 0; ii < dim; ii += 4)
for (jj = 0; jj < dim; jj += 4)
for (i = ii; i < ii + 4; i++)
for (j = jj; j < jj + 4; j++)
dst[RIDX(dim - 1 - j, i, dim)] = src[RIDX(i, j, dim)];
}
void rotate2(int dim, pixel *src, pixel *dst)
{
int i, j, ii, jj, k;
for (ii = 0; ii < dim; ii += 32)
for (jj = 0; jj < dim; jj += 32)
for (i = ii; i < ii + 32; i += 4)
for (j = jj; j < jj + 32; j += 4)
for (k = j; k < j + 4; ++k)
{
dst[RIDX(dim - 1 - k, i, dim)] = src[RIDX(i, k, dim)];
dst[RIDX(dim - 1 - k, i + 1, dim)] = src[RIDX(i + 1, k, dim)];
dst[RIDX(dim - 1 - k, i + 2, dim)] = src[RIDX(i + 2, k, dim)];
dst[RIDX(dim - 1 - k, i + 3, dim)] = src[RIDX(i + 3, k, dim)];
}
}
#define COPY(d, s) *(d) = *(s)
void rotate3(int dim, pixel *src, pixel *dst)
{
int i, j, k;
for (i = 0; i < dim; i += 32)
for (j = dim - 1; j >= 0; j -= 1)
{
pixel *dptr = dst + RIDX(dim - 1 - j, i, dim);
pixel *sptr = src + RIDX(i, j, dim);
for (k = 0; k < 32; ++k)
{
COPY(dptr + k, sptr);
sptr += dim;
}
}
}
#define N (1 << 13)
pixel s[N * N], d[N * N];
int main()
{
naive_rotate(N, s, d);
rotate1(N, s, d);
rotate2(N, s, d);
rotate3(N, s, d);
}


lnszyd@201-NF5280M3:~$gcc -g a.c lnszyd@201-NF5280M3:~$ sudo perf record -g ./a.out
[ perf record: Woken up 10 times to write data ]
[ perf record: Captured and wrote 2.468 MB perf.data (28285 samples) ]
lnszyd@201-NF5280M3:~\$ sudo perf report -g -i perf.data
-   52.13%    47.82%  a.out    a.out              [.] naive_rotate
+ 47.82% 0x2be258d4c544155
- 4.31% naive_rotate
- 2.48% page_fault
- 2.48% do_page_fault
- 2.41% __do_page_fault
- 2.23% handle_mm_fault
- 1.87% __handle_mm_fault
- 1.31% alloc_pages_vma
1.12% clear_page_erms
- 0.77% swapgs_restore_regs_and_return_to_usermode
prepare_exit_to_usermode
-   24.30%    23.90%  a.out    a.out              [.] rotate1
23.90% 0x2be258d4c544155
__libc_start_main
main
rotate1
-   11.86%    11.68%  a.out    a.out              [.] rotate3
11.68% 0x2be258d4c544155
__libc_start_main
main
rotate3
-   11.72%    11.49%  a.out    a.out              [.] rotate2
11.49% 0x2be258d4c544155
__libc_start_main
main
rotate2