Following are the two code snippets where I am working with OpenMP reduction, in the first case (reduction variable total_num_sp_edges) every time I get the correct result, but in the second case, different runs deliver different results for the reduction variable (total_num_splitters). Would like to know the reason behind the first reduction working but not the second.
First snippet
size_t total_num_sp_edges = 0;
...
#pragma omp parallel reduction(+:total_num_sp_edges)
{
int tid = omp_get_thread_num();
for(auto& sm_t: sm_graph)
{
combined_thread_local_sm_g[tid].insert(end(combined_thread_local_sm_g[tid]), sm_t[tid].begin(), sm_t[tid].end());
std::vector<std::pair<int,int>>().swap(sm_t[tid]);
}
sort(combined_thread_local_sm_g[tid].begin(), combined_thread_local_sm_g[tid].end(),
[](const std::pair<int, int>& edge1, const std::pair<int, int>& edge2) {
return (edge1.first < edge2.first) || (edge1.first == edge2.first && edge1.second < edge2.second);
});
combined_thread_local_sm_g[tid].erase(unique(combined_thread_local_sm_g[tid].begin(), combined_thread_local_sm_g[tid].end()), combined_thread_local_sm_g[tid].end());
num_thread_local_SpEdges[tid] = combined_thread_local_sm_g[tid].size();
total_num_sp_edges += num_thread_local_SpEdges[tid];
}
Second snippet
size_t total_num_splitters = 0;
...
#pragma omp parallel reduction(+:total_num_splitters)
{
std::vector<size_t> local_splitters;
size_t numThreads = omp_get_num_threads();
int tid = omp_get_thread_num();
#pragma omp single
{
num_thread_local_splitters.resize(numThreads);
displacements.resize(numThreads);
}
if(tid == 0)
{
local_splitters.push_back(0);
}
#pragma omp for schedule(static)
for(size_t i = 1; i < sortedSNData.size(); i++)
{
if(comp_afforest[sortedSNData[i]] != comp_afforest[sortedSNData[i-1]])
{
local_splitters.push_back(i);
}
}
num_thread_local_splitters[tid] = local_splitters.size();
total_num_splitters += num_thread_local_splitters[tid];
#pragma omp single
{
splitters.resize(total_num_splitters);