CMAKE_MINIMUM_REQUIRED(VERSION 3.0)

FIND_PROGRAM(CMAKE_C_COMPILER NAMES $ENV{CC} gcc PATHS ENV PATH NO_DEFAULT_PATH)
FIND_PROGRAM(CMAKE_CXX_COMPILER NAMES $ENV{CXX} g++ PATHS ENV PATH NO_DEFAULT_PATH)

OPTION(USE_MKL "MKL" OFF)
OPTION(USE_HPTT "HPTT" OFF)
OPTION(USE_GPU "GPU" OFF)

IF (${USE_GPU})
    CMAKE_MINIMUM_REQUIRED(VERSION 3.9)
    FIND_PROGRAM(CMAKE_CUDA_COMPILER NAMES $ENV{NVCC} nvcc PATHS ENV PATH NO_DEFAULT_PATH)
    ENABLE_LANGUAGE(CUDA)
ENDIF()

SET(PROJECT_NAME block3)
PROJECT(${PROJECT_NAME} VERSION 3.0)

# Check Python3 and Pybind11

SET(Python_ADDITIONAL_VERSIONS 3)
FIND_PACKAGE(PythonInterp)

IF (NOT PYTHONINTERP_FOUND)
    MESSAGE(FATAL_ERROR "Python3 not found.")
ENDIF()

FIND_PACKAGE(PythonLibs ${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR})

EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import distutils.sysconfig; \
        print(distutils.sysconfig.get_config_var('LIBDIR'))"
        OUTPUT_VARIABLE PYTHON_LIB_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)
EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import distutils.sysconfig; \
        print(distutils.sysconfig.get_config_var('INCLUDEDIR'))"
        OUTPUT_VARIABLE PYTHON_INCLUDE_PATH OUTPUT_STRIP_TRAILING_WHITESPACE)

IF (NOT PYTHONLIBS_FOUND)
    EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import distutils.sysconfig; \
        import os; print(os.path.join(distutils.sysconfig.get_config_var('LIBDIR'), \
        distutils.sysconfig.get_config_var('LDLIBRARY')))"
        OUTPUT_VARIABLE PYTHON_LIBRARIES OUTPUT_STRIP_TRAILING_WHITESPACE)
    EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import get_python_inc; \
        print(get_python_inc())"
        OUTPUT_VARIABLE PYTHON_INCLUDE_DIRS OUTPUT_STRIP_TRAILING_WHITESPACE)
ENDIF()

EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import importlib.machinery; \
        print(importlib.machinery.EXTENSION_SUFFIXES[0])"
    OUTPUT_VARIABLE PYLIB_SUFFIX OUTPUT_STRIP_TRAILING_WHITESPACE)
EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import pybind11;print(pybind11.get_include())"
    OUTPUT_VARIABLE PYBIND_INCLUDE_DIRS OUTPUT_STRIP_TRAILING_WHITESPACE)

MESSAGE(STATUS "PROJECT_NAME = ${PROJECT_NAME}")
MESSAGE(STATUS "PYTHON_VERSION_MAJOR = ${PYTHON_VERSION_MAJOR}")
MESSAGE(STATUS "PYTHON_VERSION_MINOR = ${PYTHON_VERSION_MINOR}")
MESSAGE(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
MESSAGE(STATUS "PYTHON_EXECUTABLE = ${PYTHON_EXECUTABLE}")
MESSAGE(STATUS "PYTHON_EXECUTABLE_HINT = ${PYTHON_EXECUTABLE_HINT}")
MESSAGE(STATUS "PYTHON_INCLUDE_DIRS = ${PYTHON_INCLUDE_DIRS}")
MESSAGE(STATUS "PYLIB_SUFFIX = ${PYLIB_SUFFIX}")
MESSAGE(STATUS "PYBIND_INCLUDE_DIRS = ${PYBIND_INCLUDE_DIRS}")

IF (${PYTHON_EXECUTABLE_HINT})
    IF (NOT (${PYTHON_EXECUTABLE_HINT} EQUAL ${PYTHON_EXECUTABLE}))
        MESSAGE(FATAL_ERROR "Python3 used by cmake (${PYTHON_EXECUTABLE}) does not match Python3 \
            used by setup.py (${PYTHON_EXECUTABLE_HINT})!")
    ENDIF()
ENDIF()

SET(CMAKE_CXX_STANDARD 11)
SET(CMAKE_CXX_STANDARD_REQUIRED True)

IF (NOT(APPLE) AND NOT(WIN32))
    SET(NO_AS_NEEDED -Wl,--no-as-needed)
ELSEIF (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
    SET(XPREP -Xpreprocessor)
    INCLUDE_DIRECTORIES(/usr/local/include)
ENDIF()

SET(PTHREAD pthread)
CMAKE_POLICY(SET CMP0054 NEW) # quoted variable for STREQUAL

IF (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
    SET(OMP_FLAG -qopenmp)
ELSEIF (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
    SET(OMP_FLAG -openmp)
ELSE()
    SET(OMP_FLAG -fopenmp)
ENDIF()

IF (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
    SET(OMP_LIB_NAME "")
    SET(PTHREAD "")
    IF("${OMP_LIB}" STREQUAL "SEQ")
        SET(MKL_OMP_LIB_NAME mkl_sequential)
    ELSEIF ("${OMP_LIB}" STREQUAL "INTEL")
        SET(MKL_OMP_LIB_NAME mkl_intel_thread)
    ELSEIF ("${OMP_LIB}" STREQUAL "TBB")
        SET(MKL_OMP_LIB_NAME mkl_tbb_thread)
    ELSE()
        SET(OMP_LIB INTEL)
        SET(MKL_OMP_LIB_NAME mkl_intel_thread)
        FIND_LIBRARY(OMP_LIB_NAME NAMES libiomp5md PATHS $ENV{OMPROOT} $ENV{OMPROOT}/lib $ENV{OMPROOT}/lib/intel64_win)
    ENDIF()
ELSEIF("${OMP_LIB}" STREQUAL "SEQ")
    FIND_LIBRARY(OMP_LIB_NAME NAMES gomp PATHS /usr/local/lib /usr/lib64)
    IF(NOT OMP_LIB_NAME)
        SET(OMP_LIB_NAME "")
        SET(PTHREAD "")
    ENDIF()
    SET(MKL_OMP_LIB_NAME mkl_sequential)
ELSEIF ("${OMP_LIB}" STREQUAL "INTEL")
    FIND_LIBRARY(OMP_LIB_NAME NAMES iomp5 PATHS /usr/local/lib /usr/lib64)
    SET(MKL_OMP_LIB_NAME mkl_intel_thread)
ELSEIF ("${OMP_LIB}" STREQUAL "TBB")
    FIND_LIBRARY(OMP_LIB_NAME NAMES gomp PATHS /usr/local/lib /usr/lib64)
    IF(NOT OMP_LIB_NAME)
        SET(OMP_LIB_NAME "")
        SET(PTHREAD "")
    ENDIF()
    SET(MKL_OMP_LIB_NAME mkl_tbb_thread)
ELSEIF (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
    FIND_LIBRARY(OMP_LIB_NAME NAMES iomp5 PATHS /usr/local/lib /usr/lib64 ${PYTHON_LIB_PATH})
    SET(MKL_OMP_LIB_NAME mkl_intel_thread)
ELSEIF (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
    FIND_LIBRARY(OMP_LIB_NAME NAMES iomp5 PATHS /usr/local/lib /usr/lib64 ${PYTHON_LIB_PATH})
    SET(MKL_OMP_LIB_NAME mkl_intel_thread)
ELSE()
    SET(OMP_LIB GNU)
    FIND_LIBRARY(OMP_LIB_NAME NAMES gomp PATHS /usr/local/lib /usr/lib64 ${PYTHON_LIB_PATH})
    IF (NOT OMP_LIB_NAME)
        EXECUTE_PROCESS(
            COMMAND ${CMAKE_CXX_COMPILER} -print-search-dirs
            COMMAND grep "^lib" COMMAND awk -F "=" "{print $2}" COMMAND tr ":" ";"    
            OUTPUT_VARIABLE OMP_LIB_HINT OUTPUT_STRIP_TRAILING_WHITESPACE)
        FIND_LIBRARY(OMP_LIB_NAME NAMES gomp PATHS ${OMP_LIB_HINT})
    ENDIF()
    SET(MKL_OMP_LIB_NAME mkl_gnu_thread)
ENDIF()

IF (${USE_MKL})
    SET(CMAKE_FIND_LIBRARY_SUFFIXES_BKP ${CMAKE_FIND_LIBRARY_SUFFIXES})
    SET(CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_FIND_LIBRARY_SUFFIXES_BKP};.so.1;.so.2;.so.3;.so.4")
    FIND_PATH(MKL_INCLUDE_DIR NAMES mkl.h HINTS $ENV{MKLROOT}/include /usr/local/include ${PYTHON_INCLUDE_PATH})
    FIND_LIBRARY(MKL_LIB_LP NAMES mkl_intel_lp64
        PATHS $ENV{MKLROOT}/lib $ENV{MKLROOT}/lib/intel64 /usr/local/lib ${PYTHON_LIB_PATH} NO_DEFAULT_PATH)
    FIND_LIBRARY(MKL_LIB_CORE NAMES mkl_core
        PATHS $ENV{MKLROOT}/lib $ENV{MKLROOT}/lib/intel64 /usr/local/lib ${PYTHON_LIB_PATH} NO_DEFAULT_PATH)
    FIND_LIBRARY(MKL_LIB_GT NAMES ${MKL_OMP_LIB_NAME}
        PATHS $ENV{MKLROOT}/lib $ENV{MKLROOT}/lib/intel64 /usr/local/lib ${PYTHON_LIB_PATH} NO_DEFAULT_PATH)
    FIND_LIBRARY(MKL_LIB_AVX NAMES mkl_avx2
        PATHS $ENV{MKLROOT}/lib $ENV{MKLROOT}/lib/intel64 /usr/local/lib ${PYTHON_LIB_PATH} NO_DEFAULT_PATH)
    FIND_LIBRARY(MKL_LIB_AVX512 NAMES mkl_avx512
        PATHS $ENV{MKLROOT}/lib $ENV{MKLROOT}/lib/intel64 /usr/local/lib ${PYTHON_LIB_PATH} NO_DEFAULT_PATH)
    SET(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES_BKP})
    SET(MKL_LIBS ${NO_AS_NEEDED} ${PTHREADS} ${OMP_LIB_NAME} ${MKL_LIB_LP} ${MKL_LIB_CORE} ${MKL_LIB_GT} ${MKL_LIB_AVX} ${MKL_LIB_AVX512})
    MESSAGE(STATUS "MKL_INCLUDE_DIR = ${MKL_INCLUDE_DIR}")
    MESSAGE(STATUS "MKL_LIBS = ${MKL_LIBS}")
    SET(MKL_FLAG "-D_HAS_INTEL_MKL")
ELSE()
    # Check LAPACK and BLAS
    FIND_PACKAGE(BLAS REQUIRED)
    FIND_PACKAGE(LAPACK REQUIRED)

    SET(MKL_INCLUDE_DIR "")
    SET(MKL_LIBS "")
    SET(MKL_FLAG "")
ENDIF()

IF (${USE_HPTT})
    FIND_PATH(HPTT_INCLUDE_DIR NAMES hptt.h HINTS $ENV{HPTTHOME}/include /usr/local/include)
    FIND_LIBRARY(HPTT_LIB NAMES hptt PATHS $ENV{HPTTHOME}/lib /usr/local/lib)
    MESSAGE(STATUS "HPTT_INCLUDE_DIR = ${HPTT_INCLUDE_DIR}")
    MESSAGE(STATUS "HPTT_LIB = ${HPTT_LIB}")
    SET(HPTT_FLAG "-D_HAS_HPTT")
ELSE()
    SET(HPTT_INCLUDE_DIR "")
    SET(HPTT_LIB "")
    SET(HPTT_FLAG "")
ENDIF()

IF (${USE_GPU})
    FIND_PATH(CUT_INCLUDE_DIR NAMES cutensor.h HINTS $ENV{CUTENSOR_ROOT}/include /usr/local/include ${PYTHON_INCLUDE_PATH})
    FIND_LIBRARY(CUT_LIB NAMES cutensor
        PATHS $ENV{CUTENSOR_ROOT}/lib/11.0 $ENV{CUTENSOR_ROOT}/lib/11 $ENV{CUTENSOR_ROOT}/lib/10.1
        /usr/local/lib ${PYTHON_LIB_PATH})
    FIND_PATH(MGPU_INCLUDE_DIR NAMES moderngpu/kernel_load_balance.hxx
        HINTS $ENV{MGPUROOT} /usr/local/include ${PYTHON_INCLUDE_PATH})
    MESSAGE(STATUS "MGPU_INCLUDE_DIR = ${MGPU_INCLUDE_DIR}")
    MESSAGE(STATUS "CUTENSOR_INCLUDE_DIR = ${CUT_INCLUDE_DIR}")
    MESSAGE(STATUS "CUTENSOR_LIB = ${CUT_LIB}")
ELSE()
    SET(MGPU_INCLUDE_DIR "")
    SET(CUT_INCLUDE_DIR "")
    SET(CUT_LIB "")
ENDIF()

IF("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
    SET(OPT_FLAG -O0 -g ${XPREP} ${OMP_FLAG} -fstack-check -fno-omit-frame-pointer -fno-optimize-sibling-calls)
ELSE()
    SET(OPT_FLAG -O3 -funroll-loops ${XPREP} ${OMP_FLAG})
ENDIF()

SET(OPT_FLAG ${OPT_FLAG} $<$<COMPILE_LANGUAGE:CXX>:-Werror -Werror=return-type>)

IF (${USE_GPU})
    SET(GPU_FLAG "-D_USE_GPU")
    SET(OPT_FLAG ${OPT_FLAG}
        $<$<COMPILE_LANGUAGE:CUDA>:-Werror all-warnings -arch=compute_80 --expt-extended-lambda --expt-relaxed-constexpr>)
ELSE()
    SET(GPU_FLAG "")
ENDIF()

IF ((CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
    SET(OPT_FLAG ${OPT_FLAG} -Wno-error=instantiation-after-specialization -Wno-instantiation-after-specialization)
ENDIF()

FILE(GLOB SRCS src/*.cpp)
IF (${USE_GPU})
    FILE(GLOB CU_SRCS src/gpu/*.cu)
    SET(SRCS ${CU_SRCS} ${SRCS})
ENDIF()

MESSAGE(STATUS "SRCS = ${SRCS}")

ADD_LIBRARY(${PROJECT_NAME} MODULE ${SRCS})
SET_TARGET_PROPERTIES(${PROJECT_NAME} PROPERTIES SUFFIX "${PYLIB_SUFFIX}" PREFIX "")
IF (APPLE)
    TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC -Wl,-undefined,dynamic_lookup)
ENDIF()
SET(OPT_FLAG ${OPT_FLAG} -fvisibility=hidden)

IF ((NOT APPLE) AND (NOT WIN32))
    TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC rt)
ENDIF()

TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC ${OMP_LIB_NAME} ${PTHREAD})
TARGET_LINK_LIBRARIES(${PROJECT_NAME} PUBLIC ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} ${MKL_LIBS} ${HPTT_LIB} ${CUT_LIB})

MESSAGE(STATUS "OPT_FLAG = ${OPT_FLAG}")
MESSAGE(STATUS "OMP_LIB = ${OMP_LIB_NAME}")

TARGET_INCLUDE_DIRECTORIES(${PROJECT_NAME} PUBLIC ${PYTHON_INCLUDE_DIRS} ${PYBIND_INCLUDE_DIRS} ${MKL_INCLUDE_DIR}
    ${HPTT_INCLUDE_DIR} ${CUT_INCLUDE_DIR} ${MGPU_INCLUDE_DIR})
TARGET_COMPILE_OPTIONS(${PROJECT_NAME} BEFORE PRIVATE ${OPT_FLAG} ${MKL_FLAG} ${HPTT_FLAG} ${GPU_FLAG})
