I am trying to optimize a fortran program and to this end I'm trying to use openMP directives of the intel fortran compiler.

I'm a newbie in openMP, so I've taken the most basic approach:

Code: Select all

```
program first_task
implicit none
integer :: ip,i,j
integer, parameter :: npd=40,nv=3,nc=4,ngkmax=64
complex(8),parameter :: one=(1.,0.),zero=(0.,0.)
complex(8),allocatable :: mat(:,:),sums(:),vaux(:),wfpw(:)
complex(8) :: aux,wfvr,aux2
allocate(mat(ngkmax,ngkmax),sums(npd),vaux(ngkmax),wfpw(ngkmax))
call omp_set_nested(.true.)
forall(i=1:ngkmax) wfpw(i)=cmplx(i)
sums=zero
!$OMP PARALLEL PRIVATE(aux,ip,i,j,wfvr) REDUCTION(+:aux2)
!$OMP DO
do ip=1,npd
aux=cmplx(1./ip)
call matrix(aux,ngkmax,mat)
aux2=zero
do i=1,nv
do j=1,nc
call zgemv('n',ngkmax,ngkmax,one,mat,ngkmax,wfpw,1,zero,vaux,1)
wfvr=dot_product(wfpw,vaux)
aux2=aux2+wfvr
end do
end do
sums(ip)=aux2
end do
!$OMP END PARALLEL
print*, sum(sums)
deallocate(mat,sums)
end program first_task
```

Code: Select all

```
subroutine matrix(fac,ndim,mat)
implicit none
integer,intent(in) ::ndim
complex(8),intent(in) :: fac
complex(8),intent(out) :: mat(:,:)
integer :: i
mat=(0.,0.)
forall(i=1:ndim) mat(i,i)=fac
end subroutine matrix
```

Using intel analysis tools, I could see the problem has to do with the fact some variables (mat and vaux, I think) are read and written by several threads, but honestly I do not how to fixed it.

Thanks in advance for any help,

Arcesio Castañeda Medina

Max-Planck Institute for Microstructure Physics

Weinberg 2

06120 Halle

Germany