パスワードを忘れた? アカウント作成
932707 journal
日記

t-nissieの日記: 【電脳】FFTWのOpenMP並列でいまいち3次元r2c, c2rの速さが出ない【その3】

日記 by t-nissie

$ for i in `jot 3`; do OMP_NUM_THREADS=8 ./r2c_3d_test_omp 32 32 160 100000 8; done ; for i in `jot 3`; do OMP_NUM_THREADS=8 ./r2c_3d_test_threads 32 32 160 100000 8; done
Lx =   32, Ly =   32, Lz =  160, N =  163840, M =  100000, NTHREADS =   8
Address of r = 0x0000000100400000
Address of c = 0x0000000100540000
FFT starts
FFT ends 256.37600
Lx =   32, Ly =   32, Lz =  160, N =  163840, M =  100000, NTHREADS =   8
Address of r = 0x0000000100400000
Address of c = 0x0000000100540000
FFT starts
FFT ends 254.20400
Lx =   32, Ly =   32, Lz =  160, N =  163840, M =  100000, NTHREADS =   8
Address of r = 0x0000000100400000
Address of c = 0x0000000100540000
FFT starts
FFT ends 263.79200
Lx =   32, Ly =   32, Lz =  160, N =  163840, M =  100000, NTHREADS =   8
Address of r = 0x0000000100400000
Address of c = 0x0000000100540000
FFT starts
FFT ends 239.95000
Lx =   32, Ly =   32, Lz =  160, N =  163840, M =  100000, NTHREADS =   8
Address of r = 0x0000000100400000
Address of c = 0x0000000100540000
FFT starts
FFT ends 237.44200
Lx =   32, Ly =   32, Lz =  160, N =  163840, M =  100000, NTHREADS =   8
Address of r = 0x0000000100400000
Address of c = 0x0000000100540000
FFT starts
FFT ends 236.12100
$

! r2c_3d_test.F -*-f90-*-
! Time-stamp: <2011-11-29 15:53:27 takeshi>
! Author: Takeshi NISHIMATSU
!!
#if defined(__PGI) || defined(SR11000) || defined(__sparc)
#  define command_argument_count iargc
#  define get_command_argument getarg
#endif
 
program r2c_3d_test
  implicit none
  real*8,     allocatable :: r(:,:,:)
  complex*16, allocatable :: c(:,:,:)
  integer*8               :: plan_r2c, plan_c2r, address, count0, count1, count_rate
  character(len=30)       :: str
  integer                 :: Lx, Ly, Lz, M, NTHREADS, i, j, ireturn
  real*8                  :: N_inv
# include "fftw3.f"
 
  call get_command_argument(1,str); read(str,*) Lx
  call get_command_argument(2,str); read(str,*) Ly
  call get_command_argument(3,str); read(str,*) Lz
  call get_command_argument(4,str); read(str,*) M
  call get_command_argument(5,str); read(str,*) NTHREADS
 
  write(6,'(3(a,i4),2(a,i7),a,i3)') 'Lx = ', Lx, &
       &             ', Ly = ', Ly, &
       &             ', Lz = ', Lz, &
       &              ', N = ', Lx*Ly*Lz, &
       &              ', M = ', M, &
       &       ', NTHREADS = ', NTHREADS
  N_inv = 1.0d0 / Lx / Ly / Lz
 
  call dfftw_init_threads(ireturn)
  call dfftw_plan_with_nthreads(NTHREADS)
 
  allocate(r(0:Lx-1, 0:Ly-1, 0:Lz-1))
  allocate(c(0:Lx/2, 0:Ly-1, 0:Lz-1))
  write (*,'(a,z16.16)') 'Address of r = 0x', address(r) ! Check 16-bit alignment,
  write (*,'(a,z16.16)') 'Address of c = 0x', address(c) ! or SSE2 won't be used.
 
  call dfftw_plan_dft_r2c_3d(plan_r2c, Lx, Ly, Lz, r, c, FFTW_PATIENT)
  call dfftw_plan_dft_c2r_3d(plan_c2r, Lx, Ly, Lz, c, r, FFTW_PATIENT)
 
  r(:,:,:) = 0.1d0
 
  write(6,'(a)') 'FFT starts'
  call flush(6)
  call system_clock(count0)
 
  do i = 1, M
     call dfftw_execute(plan_r2c)
     !$omp parallel do
     do j = 0, Lz-1
        c(:,:,j) = c(:,:,j) * N_inv
     end do
     !$omp end parallel do
     call dfftw_execute(plan_c2r)
  end do
 
  call system_clock(count1, count_rate)
  write(6,'(a,f10.5)') 'FFT ends', dble(count1-count0)/count_rate
 
  call dfftw_cleanup_threads(ireturn)
end program r2c_3d_test
 
!Local variables:
!  compile-command: "make -k && ./r2c_3d_test_omp 32 32 160 100 4"
!End:

typodupeerror

皆さんもソースを読むときに、行と行の間を読むような気持ちで見てほしい -- あるハッカー

読み込み中...