I tried to use cython.parallel prange. I can only see two cores 50% being used. How can I make use of all the cores. i.e. send the loops to the cores simultaneously sharing the arrays, volume and mc_vol?
EDIT: I also edited purely sequential for-loop which is about 30 seconds faster than than cython.parallel prange version. Both of them are using one core only. Is there are way to parallelize this.
cimport cython
from cython.parallel import prange, parallel, threadid
from libc.stdio cimport sprintf
from libc.stdlib cimport malloc, free
cimport numpy as np@cython.boundscheck(False)
@cython.wraparound(False)
cpdef MC_Surface(np.ndarray[np.int_t,ndim=3] volume, np.ndarray[np.float32_t,ndim=3] mc_vol):cdef int vol_len=len(volume)-1cdef int k, j, icdef char* pattern # a string pointer - allocate laterPerm_area = {"00000000": 0.000000,..."00011101": 1.515500}try:pattern = <char*>malloc(sizeof(char)*260)for k in range(vol_len):for j in range(vol_len):for i in range(vol_len):sprintf(pattern, "%i%i%i%i%i%i%i%i",volume[i, j, k],volume[i, j + 1, k],volume[i + 1, j, k],volume[i + 1, j + 1, k],volume[i, j, k + 1],volume[i, j + 1, k + 1],volume[i + 1, j, k + 1],volume[i + 1, j + 1, k + 1]);mc_vol[i, j, k] = Perm_area[pattern]# if Perm_area[pattern] > 0:# print pattern, 'Area: ', Perm_area[pattern]#total_area += Perm_area[pattern]finally:free(pattern)
return mc_vol
EDIT following DavidW's suggestion, but prange is considerably slower:
cpdef MC_Surface(np.ndarray[np.int_t,ndim=3] volume, np.ndarray[np.float32_t,ndim=3] mc_vol):cdef int vol_len=len(volume)-1cdef int k, j, icdef char* pattern # a string pointer - allocate laterPerm_area = {"00000000": 0.000000,..."00011101": 1.515500}with nogil,parallel():try:pattern = <char*>malloc(sizeof(char)*260)for k in prange(vol_len):for j in range(vol_len):for i in range(vol_len):sprintf(pattern, "%i%i%i%i%i%i%i%i",volume[i, j, k],volume[i, j + 1, k],volume[i + 1, j, k],volume[i + 1, j + 1, k],volume[i, j, k + 1],volume[i, j + 1, k + 1],volume[i + 1, j, k + 1],volume[i + 1, j + 1, k + 1]);with gil:mc_vol[i, j, k] = Perm_area[pattern]# if Perm_area[pattern] > 0:# print pattern, 'Area: ', Perm_area[pattern]# total_area += Perm_area[pattern]finally:free(pattern)return mc_vol
My setup file looks like:
setup(name='SurfaceArea',ext_modules=[Extension('c_marchSurf', ['c_marchSurf.pyx'], include_dirs=[numpy.get_include()],extra_compile_args=['-fopenmp'], extra_link_args=['-fopenmp'], language="c++")],cmdclass={'build_ext': build_ext}, requires=['Cython', 'numpy', 'matplotlib', 'pathos', 'scipy', 'cython.parallel']
)