Хабр Курсы для всех
РЕКЛАМА
Практикум, Хекслет, SkyPro, авторские курсы — собрали всех и попросили скидки. Осталось выбрать!
gcc -fopenmp -std=c99 -O2 pi.c -fdump-tree-optimized. Но там мало чего интересного. Содержимое блоков OpenMP вынесено в отдельные функции main._omp_fn.0 и main._omp_fn.1, а в main вызывается GOMP_parallel_start, которой передаётся указатель на функцию. Редукция чуть поинтересней.#include <stdio.h>
#include <math.h>
#include <unistd.h>
#include <sys/times.h>
#include <omp.h>
int main(int argc, char **argv)
{
const unsigned long numSteps = 500000000; /* default # of rectangles */
double PI25DT = 3.141592653589793238462643;
double step, pi = 0, sum=0.0, x;
#pragma omp parallel
{
#pragma omp master
{
int cntThreads = omp_get_num_threads();
printf("OpenMP. number of threads = %d\n", cntThreads);
}
}
clock_t clockStart, clockStop;
struct tms tmsStart, tmsStop;
step = 1./(double)numSteps;
clockStart = times(&tmsStart);
#pragma omp parallel for private (x), reduction (+:sum)
for (int i = 0; i < numSteps; i++)
{
x = (i + .5) * step;
sum = sum + 4.0 / (1. + x * x);
}
pi = sum * step;
clockStop = times(&tmsStop);
printf("The value of PI is %lf Error is %lf\n", pi, fabs(pi - PI25DT));
double secs = (clockStop - clockStart)/(double)sysconf(_SC_CLK_TCK);
printf("The time to calculate PI was %lf seconds\n", secs);
return 0;
};; Function main._omp_fn.0 (main._omp_fn.0, funcdef_no=23, decl_uid=3556, cgraph_uid=23)
main._omp_fn.0 (void * .omp_data_i)
{
int cntThreads;
int D.3622;
<bb 2>:
D.3622_1 = __builtin_omp_get_thread_num ();
if (D.3622_1 == 0)
goto <bb 4>;
else
goto <bb 3>;
<bb 3>:
return;
<bb 4>:
cntThreads_2 = __builtin_omp_get_num_threads ();
__printf_chk (1, "OpenMP. number of threads = %d\n", cntThreads_2); [tail call]
goto <bb 3>;
}
;; Function main._omp_fn.1 (main._omp_fn.1, funcdef_no=24, decl_uid=3560, cgraph_uid=27)
Removing basic block 10
Removing basic block 11
main._omp_fn.1 (struct .omp_data_s.1 * .omp_data_i)
{
double pretmp.15;
long unsigned int D.3620;
long unsigned int D.3618;
double D.3617;
double D.3616;
long unsigned int * {ref-all} D.3613;
double D.3611;
double D.3610;
double D.3609;
double x;
double D.3606;
double D.3605;
int D.3604;
int D.3602;
int tt.5;
int q.4;
int D.3599;
int D.3598;
int i;
double sum;
<bb 2>:
D.3598_9 = __builtin_omp_get_num_threads ();
D.3599_10 = __builtin_omp_get_thread_num ();
q.4_11 = 500000000 / D.3598_9;
tt.5_12 = 500000000 % D.3598_9;
if (D.3599_10 < tt.5_12)
goto <bb 9>;
else
goto <bb 3>;
<bb 3>:
# q.4_4 = PHI <q.4_14(9), q.4_11(2)>
# tt.5_5 = PHI <0(9), tt.5_12(2)>
D.3602_15 = q.4_4 * D.3599_10;
i_16 = D.3602_15 + tt.5_5;
D.3604_17 = i_16 + q.4_4;
if (i_16 >= D.3604_17)
goto <bb 6>;
else
goto <bb 4>;
<bb 4>:
pretmp.15_56 = .omp_data_i_21(D)->step;
<bb 5>:
# sum_1 = PHI <0.0(4), sum_27(5)>
# i_3 = PHI <i_16(4), i_28(5)>
D.3605_19 = (double) i_3;
D.3606_20 = D.3605_19 + 5.0e-1;
x_23 = D.3606_20 * pretmp.15_56;
D.3609_24 = x_23 * x_23;
D.3610_25 = D.3609_24 + 1.0e+0;
D.3611_26 = 4.0e+0 / D.3610_25;
sum_27 = D.3611_26 + sum_1;
i_28 = i_3 + 1;
if (i_28 != D.3604_17)
goto <bb 5>;
else
goto <bb 6>;
<bb 6>:
# sum_2 = PHI <0.0(3), sum_27(5)>
D.3613_30 = &.omp_data_i_21(D)->sum;
D.3620_31 = MEM[(long unsigned int * {ref-all}).omp_data_i_21(D) + 8B];
<bb 7>:
# D.3620_6 = PHI <D.3620_31(6), D.3620_36(7)>
D.3616_33 = VIEW_CONVERT_EXPR<double>(D.3620_6);
D.3617_34 = D.3616_33 + sum_2;
D.3618_35 = VIEW_CONVERT_EXPR<long unsigned int>(D.3617_34);
D.3620_36 = __sync_val_compare_and_swap_8 (D.3613_30, D.3620_6, D.3618_35);
if (D.3620_6 != D.3620_36)
goto <bb 7>;
else
goto <bb 8>;
<bb 8>:
return;
<bb 9>:
q.4_14 = q.4_11 + 1;
goto <bb 3>;
}
;; Function main (main, funcdef_no=22, decl_uid=3527, cgraph_uid=22) (executed once)
main (int argc, char * * argv)
{
double secs;
struct tms tmsStop;
struct tms tmsStart;
clock_t clockStop;
clock_t clockStart;
double sum;
double pi;
double step;
double D.3553;
long int D.3552;
double D.3551;
long int D.3550;
double D.3549;
double D.3548;
struct .omp_data_s.1 .omp_data_o.2;
<bb 2>:
__builtin_GOMP_parallel_start (main._omp_fn.0, 0B, 0);
main._omp_fn.0 (0B);
__builtin_GOMP_parallel_end ();
clockStart_6 = times (&tmsStart);
.omp_data_o.2.sum = 0.0;
.omp_data_o.2.step = 2.00000000000000012456318291555971283779413738557195756584e-9;
__builtin_GOMP_parallel_start (main._omp_fn.1, &.omp_data_o.2, 0);
main._omp_fn.1 (&.omp_data_o.2);
__builtin_GOMP_parallel_end ();
sum_7 = .omp_data_o.2.sum;
step_8 = .omp_data_o.2.step;
pi_9 = sum_7 * step_8;
clockStop_10 = times (&tmsStop);
D.3548_11 = pi_9 - 3.141592653589793115997963468544185161590576171875e+0;
D.3549_12 = ABS_EXPR <D.3548_11>;
__printf_chk (1, "The value of PI is %lf Error is %lf\n", pi_9, D.3549_12);
D.3550_13 = clockStop_10 - clockStart_6;
D.3551_14 = (double) D.3550_13;
D.3552_15 = sysconf (2);
D.3553_16 = (double) D.3552_15;
secs_17 = D.3551_14 / D.3553_16;
__printf_chk (1, "The time to calculate PI was %lf seconds\n", secs_17);
tmsStart ={v} {CLOBBER};
tmsStop ={v} {CLOBBER};
return 0;
}g++ -std=c++0x -pthread. #include <iostream>
#include <thread>
#include <mutex>
int main()
{
const size_t N = 5000, num_threads = std::thread::hardware_concurrency();
std::thread t[num_threads];
std::mutex mutex;
auto func = [&mutex, num_threads](size_t start, size_t end, double &result) {
auto sum = 0.0;
for (auto i = start; i < end; i++) {
auto x = (i + .5) / N;
sum += 4.0 / (1.0 + x * x);
}
std::lock_guard<std::mutex> lock(mutex);
result += sum;
};
auto sum = 0.0;
size_t i;
for (i = 0; i < num_threads - 1; i++)
t[i] = std::thread(func, i * num_threads, (i + 1) * num_threads, std::ref(sum));
if (i * num_threads < N)
t[i] = std::thread(func, i * num_threads, N, std::ref(sum));
for (auto &thread : t)
thread.join();
std::cout << (sum / N) << std::endl;
} omp_get_wtime().
Считаем Пи параллельно. Часть 1