Commit 1b651fb9 authored by Artem Shinkarov's avatar Artem Shinkarov
Browse files

Merge tag 'v1.3.3-MijasCosta'

Fixing broken ubuntu dependencies
parents 1954b48d 09335809
......@@ -76,6 +76,7 @@ stages:
- cp -r build/.sac2crc $HOME
- cd build
- make fulltest
- ctest --output-on-failure
artifacts:
expire_in: 12 hrs
untracked: true
......
CMAKE_MINIMUM_REQUIRED (VERSION 3.4)
# Name of the project
PROJECT (sac2c C)
PROJECT (sac2c)
# Handle Policies
#FIXME hans: currently we set the policy to the DEFAULT
......@@ -40,6 +40,8 @@ ADD_FEATURE_INFO(Dot DOT "used to generate a visual of the AST")
ADD_FEATURE_INFO(C++ CPLUSPLUS "to compile sac2c with")
ADD_FEATURE_INFO(ISL ISL "use Integer Set Library in sac2c")
ADD_FEATURE_INFO(BARVINOK BARVINOK "use Barvinok Library in sac2c")
ADD_FEATURE_INFO(FUNCTESTS FUNCTESTS
"enable functional tests (gtest is required)")
FEATURE_SUMMARY(WHAT ALL)
# Build executables for the tools.
......@@ -317,6 +319,9 @@ INCLUDE ("${PROJECT_SOURCE_DIR}/cmake/cpack/config.cmake")
# Here we describe what happens when we test stuff.
ENABLE_TESTING ()
ADD_SUBDIRECTORY (tests)
IF (FUNCTESTS)
ADD_SUBDIRECTORY ("src/tests")
ENDIF ()
# vim:ts=2:sw=2:et:
......@@ -6,12 +6,12 @@ SAC - Single Assignment C
SAC COPYRIGHT NOTICE, LICENSE, AND DISCLAIMER
(c) Copyright 1994 - 2016 by
(c) Copyright 1994 - 2018 by
SAC Development Team
http://www.sac-home.org
email:info@sac-home.org
web: http://www.sac-home.org
email: info@sac-home.org
---------------------------------------------------------------------------
......@@ -42,4 +42,3 @@ performance of this software is with you. Should this software prove
defective, you assume the cost of all servicing, repair, or correction.
---------------------------------------------------------------------------
......@@ -77,7 +77,7 @@ MACRO (ADD_SAC2C_BUILD _build_type)
COMMAND $(MAKE) DESTDIR=${PROJECT_BINARY_DIR}/toplevel/headers headers-install
COMMAND $(MAKE) DESTDIR=${PROJECT_BINARY_DIR}/toplevel/symlinks symlinks-install
TEST_EXCLUDE_FROM_MAIN 1
TEST_COMMAND ctest
TEST_COMMAND ctest --output-on-failure
BUILD_ALWAYS 1
CMAKE_ARGS
-DCMAKE_BUILD_TYPE=${_build_type}
......
......@@ -44,7 +44,6 @@ ENDIF ()
SET (CPACK_PACKAGE_NAME "sac2c-compiler")
SET (CPACK_PACKAGE_VENDOR "SaC Development Team")
SET (CPACK_PACKAGE_CONTACT "info@sac-home.org")
SET (CPACK_PACKAGE_VERSION "${SAC2C_VERSION}")
SET (CPACK_PACKAGE_VERSION_MAJOR "${SAC2C_VERSION_MAJOR}")
SET (CPACK_PACKAGE_VERSION_MINOR "${SAC2C_VERSION_MINOR}")
SET (CPACK_PACKAGE_VERSION_PATCH "${SAC2C_VERSION_PATCH}")
......@@ -53,7 +52,7 @@ SET (CPACK_PACKAGE_INSTALL_DIRECTORY "sac2c-${SAC2C_VERSION}") # XXX is this rea
SET (CPACK_PACKAGE_ICON "${SAC2C_SOURCE_DIR}/cmake/cpack/sac_logo.png")
# SET (CPACK_PACKAGE_DESCRIPTION_FILE ...)
SET (CPACK_PACKAGE_DESCRIPTION_SUMMARY "The sac2c compiler for a data-parallel array-based functional language SAC")
SET (CPACK_PACKAGE_DESCRIPTION_SUMMARY "The sac2c compiler for a data-parallel array-based functional language SaC")
# FIXME(artem) We need to decide on where do we put the stuff on the target system...)
#SET (CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
SET (CPACK_RESOURCE_FILE_LICENSE "${SAC2C_SOURCE_DIR}/LICENSE.txt")
......@@ -74,9 +73,8 @@ SET (CPACK_COMPONENT_SYMLINKS_DISPLAY_NAME "SaC Symlinks")
# Debian-specific variables
SET (CPACK_DEBIAN_PACKAGE_MAINTAINER "${CPACK_PACKAGE_VENDOR} <${CPACK_PACKAGE_CONTACT}>")
SET (CPACK_DEBIAN_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
#SET (CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) # non-functional
# FIXME Can we auto-generate these dependencies?
SET (CPACK_DEBIAN_PACKAGE_DEPENDS "gcc, libc6 (>= 2.13), uuid-runtime (>= 2.20)")
SET (CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON)
SET (CPACK_DEBIAN_PACKAGE_DEPENDS "gcc, libc6 (>= 2.13), uuid-runtime (>= 2.20), libhwloc-dev")
# RPM-specific variables
# XXX (hans): this may not be exhaustive - does not take into account if the user
......
......@@ -14,3 +14,4 @@ OPTION (CUDA "Build sac2c with CUDA backend support"
OPTION (HWLOC "Build sac2c with hwloc support" ON)
OPTION (ISL "Build sac2c with Integer Set Library" ON)
OPTION (BARVINOK "Build sac2c with Barvinok Library" ON)
OPTION (FUNCTESTS "Enable functional tests (requires GTest library)" ON)
......@@ -167,24 +167,45 @@ LIB_NEEDED ("m" "pow" "
# Check libraries for optional isl support
SET (ISL_LIB_PATH "")
SET (ENABLE_ISL OFF)
IF (ISL)
MESSAGE(STATUS "ISL setting is ${ISL}")
IF (${ISL} MATCHES "ON")
MESSAGE(STATUS "ISL was set to ON")
FIND_LIBRARY (LIB_ISL NAMES "isl")
CHECK_INCLUDE_FILES ("isl/ctx.h" HAVE_ISL_H)
IF (LIB_ISL AND HAVE_ISL_H)
FIND_PATH (ISL_INC_PATH NAMES "isl/ctx.h")
SET (ENABLE_ISL ON)
MESSAGE(STATUS "ISL include library found")
ELSE ()
MESSAGE(STATUS "ISL include library NOT found")
ENDIF ()
ELSE ()
IF (${ISL} MATCHES "OFF")
MESSAGE(STATUS "ISL was set to OFF")
ELSE ()
MESSAGE(FATAL_ERROR "ISL setting not valid; was: " ${ISL})
ENDIF ()
ENDIF ()
# Check libraries for optional barvinok support
SET (BARVINOK_LIB_PATH "")
SET (ENABLE_BARVINOK OFF)
IF (BARVINOK)
MESSAGE(STATUS "BARVINOK setting is ${BARVINOK}")
IF (${BARVINOK} MATCHES "ON")
FIND_LIBRARY (LIB_BARVINOK NAMES "barvinok")
CHECK_INCLUDE_FILES ("barvinok/barvinok.h" HAVE_BARVINOK_H)
IF (LIB_BARVINOK AND HAVE_BARVINOK_H)
FIND_PATH (BARVINOK_INC_PATH NAMES "barvinok/barvinok.h")
SET (ENABLE_BARVINOK ON)
MESSAGE(STATUS "BARVINOK include library found")
ELSE ()
MESSAGE(STATUS "BARVINOK include library NOT found")
ENDIF ()
ELSE ()
IF (${BARVINOK} MATCHES "OFF")
MESSAGE(STATUS "BARVINOK was set to OFF")
ELSE ()
MESSAGE(FATAL_ERROR "BARVINOK setting not valid; was: " ${BARVINOK})
ENDIF ()
ENDIF ()
......@@ -848,14 +869,18 @@ SET (BUILD_STATUS "
*
* Run-time specialization: ${ENABLE_RTSPEC}
* Private heap manager: ${PHM}
* Back-ends:
* Polyhedral optional packages:
* - ISL: ${ENABLE_ISL}
* - BARVINOK: ${ENABLE_BARVINOK}
* Back ends:
* - MT/pthread: ${ENABLE_MT}
* - MT/LPEL: ${ENABLE_MT_LPEL}
* - CUDA: ${ENABLE_CUDA}
* - OpenMP: ${ENABLE_OMP}
* - SL: ${ENABLE_SL}
* - HWLOC: ${ENABLE_HWLOC}
* - Distributed memory: ${ENABLE_DISTMEM}$distmem_details_print
* - Distributed memory: ${ENABLE_DISTMEM}
* ${distmem_details_print}
* ====== distmen is non-functional ======
* - CC: ${CMAKE_C_COMPILER} (${CMAKE_C_COMPILER_ID})
* - CCFLAGS: ${BUILD_TYPE_C_FLAGS}
......@@ -864,7 +889,7 @@ SET (BUILD_STATUS "
* - SaC Linksetsize: ${LINKSETSIZE}
*
* Status:
* - sac2c is in dirty state: ${SAC2C_IS_DIRTY}
* - sac2c dirty state is: ${SAC2C_IS_DIRTY}
*")
STRING (REPLACE "\n" "\\n\\\n" CPP_BUILD_STATUS ${BUILD_STATUS})
......
......@@ -241,10 +241,9 @@ def glob_versions (prefix, glob_expr, build_type=None):
# version. If the `build_type' argument is None then we consider
# all the build_types.
if build_type is None:
binary_to_build_types = {
"sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[x]: x
for x in SAC2C_BUILD_TYPE_POSTFIXES
}
binary_to_build_types = {}
for x in SAC2C_BUILD_TYPE_POSTFIXES:
binary_to_build_types["sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[x]] = x
else:
binary_to_build_types = {"sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[build_type] : build_type}
......@@ -284,10 +283,9 @@ def delete_versions (prefix, glob_expr, dryrun, build_type):
print "Nothing removed"
def binary_to_version (sac2c_binary):
binary_to_build_types = {
"sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[x]: x
for x in SAC2C_BUILD_TYPE_POSTFIXES
}
binary_to_build_types = {}
for x in SAC2C_BUILD_TYPE_POSTFIXES:
binary_to_build_types["sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[x]] = x
if not sac2c_binary in binary_to_build_types:
error ("undefined build type `%s' of sac2c found" % sac2c_binary)
......
......@@ -65,6 +65,7 @@ FOREACH(name ${XSL_FILES})
SET (dst "${CMAKE_CURRENT_BINARY_DIR}/${dst}")
ADD_CUSTOM_COMMAND(
OUTPUT "${dst}"
# FIXME(artem) it also depends on the bunch of xsl files...
DEPENDS "${CB}" "${src}" "${CMAKE_CURRENT_SOURCE_DIR}/xml/ast.xml"
COMMAND
${XSLT_EXEC} ${src}
......
......@@ -221,7 +221,8 @@ ISLUgetLoopCount (char *str, lut_t *varlut)
dim = isl_union_pw_qpolynomial_get_space (pwcard);
zro = isl_point_zero (isl_space_copy (dim));
val = isl_union_pw_qpolynomial_eval (pwcard, zro);
z = (NULL != val) ? isl_val_get_num_si (val) : z;
z = ((NULL != val) && (isl_val_is_rat (val)))
? isl_val_get_num_si (val) : z;
z = (0 == z) ? UNR_NONE : z;
isl_val_free (val);
isl_space_free (dim);
......
......@@ -2851,6 +2851,7 @@ PHUTanalyzeLoopDependentVariable (node *vid, node *rcv, node *fundef, lut_t *var
prf prfiv;
prf prfz;
int stridesignum = 0; // -1 for negative, 1 for positive, 0 for unknown or 0.
int lpcount = UNR_NONE;
DBUG_ENTER ();
......@@ -2897,8 +2898,9 @@ PHUTanalyzeLoopDependentVariable (node *vid, node *rcv, node *fundef, lut_t *var
TYmakeAKS (TYmakeSimpleType (T_int), SHcreateShape (0)));
PHUTinsertVarIntoLut (lpavis, varlut, fundef, AVIS_ISLCLASSEXISTENTIAL);
// If this is a loopfun with known loopcount, use it.
if ((FUNDEF_ISLOOPFUN (fundef)) && (UNR_NONE != loopcount)) {
resel = BuildIslSimpleConstraint (lpavis, F_lt_SxS, TBmakeNum (loopcount),
lpcount = FUNDEF_LOOPCOUNT (fundef);
if ((FUNDEF_ISLOOPFUN (fundef)) && (UNR_NONE != lpcount)) {
resel = BuildIslSimpleConstraint (lpavis, F_lt_SxS, TBmakeNum (lpcount),
NOPRFOP, NULL);
res = TCappendExprs (res, resel);
}
......
......@@ -1654,6 +1654,8 @@ PWLFprf (node *arg_node, info *arg_info)
AVIS_NAME (ID_AVIS (pwlid)), cwlnm, plresult);
DBUG_PRINT ("Building inverse projection for cwl=%s", cwlnm);
arg_node = BuildInverseProjections (arg_node, arg_info);
DBUG_ASSERT (POLY_RET_CCONTAINSB == plresult,
"Coding time, Bobbo. We need to slice cwl");
z = PWLFperformFold (arg_node, foldpwlpart, arg_info);
FREEdoFreeNode (arg_node);
arg_node = z;
......
......@@ -425,6 +425,12 @@ SWLDisDefinedInThisBlock (node *avis, int wldepth)
* @result: TRUE if iv is defined in this block.
*
*****************************************************************************/
#if 0
// I am commenting out this function for the time being, as it is not used
// anywhere in the compiler. It has a weird bug in it that makes it evaluate
// to true all the time. I'll leave the resolution to the person whp will
// actually want to use this.
bool
SWLDisDefinedInNextOuterBlock (node *avis, int wldepth)
{
......@@ -432,6 +438,9 @@ SWLDisDefinedInNextOuterBlock (node *avis, int wldepth)
DBUG_ENTER ();
// FIXME: I guess this expression should be
// z = (1 + wldepth) == AVIS_DEPTH (avis), othewise it evaluates to
// true all the time, which is weird.
z = 1 + (wldepth == AVIS_DEFDEPTH (avis));
if (z) {
......@@ -442,5 +451,6 @@ SWLDisDefinedInNextOuterBlock (node *avis, int wldepth)
DBUG_RETURN (z);
}
#endif
#undef DBUG_PREFIX
......@@ -21,7 +21,11 @@ extern node *SWLDids (node *arg_node, info *arg_info);
extern node *SWLDvardec (node *arg_node, info *arg_info);
extern node *SWLDarg (node *arg_node, info *arg_info);
#if 0
// Commented out due to a bug in it and lack of use in the compiler.
extern bool SWLDisDefinedInNextOuterBlock (node *avis, int wldepth);
#endif
extern bool SWLDisDefinedInThisBlock (node *avis, int wldepth);
#endif // _SAC_SET_WITHLOOP_DEPTH_H_
......@@ -149,6 +149,8 @@ PrintGlobalSwitches (void)
(global.trace.aa) ? 1 : 0);
fprintf (global.outfile, "#define SAC_DO_TRACE_MT %d\n",
(global.trace.mt) ? 1 : 0);
fprintf (global.outfile, "#define SAC_DO_TRACE_GPU %d\n",
(global.trace.gpu) ? 1 : 0);
fprintf (global.outfile, "#define SAC_DO_TRACE_RTSPEC %d\n",
(global.trace.rtspec) ? 1 : 0);
fprintf (global.outfile, "#define SAC_DO_TRACE_DISTMEM %d\n",
......
......@@ -192,6 +192,9 @@ ICMCompileCUDA_GLOBALFUN_AP (char *funname, int vararg_cnt, char **vararg)
INDENT;
INDENT;
fprintf (global.outfile,
"SAC_TR_GPU_PRINT (\" kernel name \\\"%s\\\"\\n\");",
funname);
if (global.backend == BE_cudahybrid) {
// on cudahybrid, we make use of streams, which have a fixed name
fprintf (global.outfile, "%s<<<grid, block, 0, *stream>>>(", funname);
......@@ -268,16 +271,32 @@ ICMCompileCUDA_GRID_BLOCK (int bounds_count, char **var_ANY)
#define CUDA_SET_GRID(fmt, ...) \
fprintf (global.outfile, "dim3 grid(" fmt ");\n", __VA_ARGS__); \
INDENT; \
fprintf (global.outfile, \
"SAC_TR_GPU_PRINT (\" CUDA XYZ grid dimension of " \
"%%u x %%u x %%u\", grid.x , grid.y , grid.z );\n"); \
INDENT; \
fprintf (global.outfile, "if (grid.x <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA X grid dimension must be bigger than zero. Current"\
" value is %%u\", grid.x);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (grid.y <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA Y grid dimension must be bigger than zero. Current"\
" value is %%u\", grid.y);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (grid.z <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA Z grid dimension must be bigger than zero. Current"\
" value is %%u\", grid.z);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (grid.x > %u || grid.y > %u || grid.z > %u) {\n", \
global.cuda_max_x_dim, global.cuda_max_yz_dim, global.cuda_max_yz_dim); \
global.cuda_max_x_grid, global.cuda_max_yz_grid, global.cuda_max_yz_grid); \
INDENT; \
INDENT; \
INDENT; \
fprintf (global.outfile, \
"SAC_RuntimeError(\"CUDA XYZ grid dimension exceeds compute " \
"compatibilities max value: %u x %u x %u\");\n", \
global.cuda_max_x_dim, global.cuda_max_yz_dim, global.cuda_max_yz_dim); \
"SAC_RuntimeError(\"CUDA XYZ grid dimension of %%u x %%u x %%u exceeds " \
"the compute capability's max value: %u x %u x %u\"," \
" grid.x, grid.y, grid.z );\n", \
global.cuda_max_x_grid, global.cuda_max_yz_grid, global.cuda_max_yz_grid); \
INDENT; \
INDENT; \
fprintf (global.outfile, "}\n");
......@@ -285,16 +304,49 @@ ICMCompileCUDA_GRID_BLOCK (int bounds_count, char **var_ANY)
#define CUDA_SET_BLOCK(fmt, ...) \
fprintf (global.outfile, "dim3 block(" fmt ");", __VA_ARGS__); \
INDENT; \
fprintf (global.outfile, \
"SAC_TR_GPU_PRINT (\" CUDA XYZ block dimension of " \
"%%u x %%u x %%u\", block.x , block.y , block.z );\n"); \
INDENT; \
fprintf (global.outfile, "if (block.x <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA X block dimension must be bigger than zero. " \
"Current value is %%u\", block.x);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (block.y <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA Y block dimension must be bigger than zero. " \
"Current value is %%u\", block.y);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (block.z <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA Z block dimension must be bigger than zero. " \
"Current value is %%u\", block.z);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (block.x > %u || block.y > %u || block.z > %u) {\n", \
global.cuda_max_x_dim, global.cuda_max_yz_dim, global.cuda_max_yz_dim); \
global.cuda_max_xy_block, global.cuda_max_xy_block, \
global.cuda_max_z_block); \
INDENT; \
INDENT; \
INDENT; \
fprintf (global.outfile, \
"SAC_RuntimeError(\"CUDA XYZ block dimension of %%u x %%u x %%u exceeds " \
"the compute capability's max value: %u x %u x %u\", " \
"block.x, block.y, block.z);\n", \
global.cuda_max_xy_block, global.cuda_max_xy_block, \
global.cuda_max_z_block); \
INDENT; \
INDENT; \
fprintf (global.outfile, "}\n"); \
INDENT; \
INDENT; \
fprintf (global.outfile, "if (block.x * block.y *block.z > %u ) {\n", \
global.cuda_max_threads_block); \
INDENT; \
INDENT; \
INDENT; \
fprintf (global.outfile, \
"SAC_RuntimeError(\"CUDA XYZ block dimension exceeds compute " \
"compatibilities max value: %u x %u x %u\");\n", \
global.cuda_max_x_dim, global.cuda_max_yz_dim, global.cuda_max_yz_dim); \
"SAC_RuntimeError(\"CUDA XYZ block dimension of %%u x %%u x %%u = %%u " \
"exceeds compute capability's max number of threads per block: %u\", " \
"block.x, block.y, block.z, block.x * block.y * block.z);\n", \
global.cuda_max_threads_block); \
INDENT; \
INDENT; \
fprintf (global.outfile, "}\n");
......@@ -334,6 +386,10 @@ ICMCompileCUDA_GRID_BLOCK (int bounds_count, char **var_ANY)
INDENT;
fprintf (global.outfile, "{\n");
fprintf (global.outfile,
"SAC_TR_GPU_PRINT (\"launching kernel for %dD With-Loop\");",
bounds_count/3);
INDENT;
if (bounds_count == 3) { /* 1D CUDA withloop */
INDENT;
INDENT;
......@@ -402,6 +458,9 @@ ICMCompileCUDA_ST_GLOBALFUN_AP (char *funname, int vararg_cnt, char **vararg)
*/
INDENT;
INDENT;
fprintf (global.outfile,
"SAC_TR_GPU_PRINT (\" kernel name \\\"%s\\\"\\n\");",
funname);
fprintf (global.outfile, "%s<<<1, 1>>>(", funname);
for (i = 0; i < 4 * vararg_cnt; i += 4) {
if (STReq (vararg[i + 1], "float_dev")) {
......@@ -782,6 +841,10 @@ ICMCompileCUDA_MEM_TRANSFER (char *to_NT, char *from_NT, char *basetype, char *d
"should have equal sizes!"));
INDENT;
fprintf (global.outfile,
"SAC_TR_GPU_PRINT (\"%s size %%d\\n\", SAC_ND_A_SIZE( %s));",
direction, from_NT);
fprintf (global.outfile, "SAC_CUDA_MEM_TRANSFER(%s, %s, %s, %s)", to_NT, from_NT,
basetype, direction);
......
......@@ -116,8 +116,11 @@ InitCudaBlockSizes (void)
global.cuda_blocking_factor = 16;
global.cuda_2d_block_x = 16;
global.cuda_2d_block_y = 16;
global.cuda_max_x_dim = 65535;
global.cuda_max_yz_dim = 65535;
global.cuda_max_x_grid = 65535;
global.cuda_max_yz_grid = 65535;
global.cuda_max_xy_block = 512;
global.cuda_max_z_block = 64;
global.cuda_max_threads_block = 512;
} else if (STReq (global.config.cuda_arch, "-arch=sm_12")
|| STReq (global.config.cuda_arch, "-arch=sm_13")) {
global.optimal_threads = 256;
......@@ -127,8 +130,11 @@ InitCudaBlockSizes (void)
global.cuda_blocking_factor = 16;
global.cuda_2d_block_x = 16;
global.cuda_2d_block_y = 16;
global.cuda_max_x_dim = 65535;
global.cuda_max_yz_dim = 65535;
global.cuda_max_x_grid = 65535;
global.cuda_max_yz_grid = 65535;
global.cuda_max_xy_block = 512;
global.cuda_max_z_block = 64;
global.cuda_max_threads_block = 512;
} else if (STReq (global.config.cuda_arch, "-arch=sm_20")) {
/*
global.optimal_threads = 512;
......@@ -148,8 +154,11 @@ InitCudaBlockSizes (void)
global.cuda_blocking_factor = 32;
global.cuda_2d_block_x = 16;
global.cuda_2d_block_y = 16;
global.cuda_max_x_dim = 65535;
global.cuda_max_yz_dim = 65535;
global.cuda_max_x_grid = 65535;
global.cuda_max_yz_grid = 65535;
global.cuda_max_xy_block = 1024;
global.cuda_max_z_block = 64;
global.cuda_max_threads_block = 1024;
} else if (STReq (global.config.cuda_arch, "-arch=sm_35")) {
global.optimal_threads = 512;
global.optimal_blocks = 3;
......@@ -163,8 +172,11 @@ InitCudaBlockSizes (void)
global.cuda_blocking_factor = 32;
global.cuda_2d_block_x = 16;
global.cuda_2d_block_y = 16;
global.cuda_max_x_dim = 2147483647;
global.cuda_max_yz_dim = 65535;
global.cuda_max_x_grid = 2147483647;
global.cuda_max_yz_grid = 65535;
global.cuda_max_xy_block = 1024;
global.cuda_max_z_block = 64;
global.cuda_max_threads_block = 1024;
} else if (STReq (global.config.cuda_arch, "-arch=sm_50")) {
global.optimal_threads = 512;
global.optimal_blocks = 3;
......@@ -178,8 +190,11 @@ InitCudaBlockSizes (void)
global.cuda_blocking_factor = 32;
global.cuda_2d_block_x = 32;
global.cuda_2d_block_y = 32;
global.cuda_max_x_dim = 2147483647;
global.cuda_max_yz_dim = 65535;
global.cuda_max_x_grid = 2147483647;
global.cuda_max_yz_grid = 65535;
global.cuda_max_xy_block = 1024;
global.cuda_max_z_block = 64;
global.cuda_max_threads_block = 1024;
} else {
if (STReq (global.config.cuda_arch, "no")) {
CTIwarn ("CUDA architecture was not detected during install, setting to "
......
......@@ -27,6 +27,7 @@ TRACE (mem, 'm', FALSE)
TRACE (wl, 'w', FALSE)
TRACE (aa, 's', FALSE)
TRACE (mt, 't', FALSE)
TRACE (gpu, 'g', FALSE) /* CUDA backend */
TRACE (cenv, 'c', FALSE)
TRACE (distmem, 'd', FALSE) /* Distributed memory backend */
......
......@@ -629,7 +629,7 @@ get_terminal_size (void)
*/
void
GLOBinitializeGlobal (int argc, char *argv[], tool_t tool, char *toolname)
GLOBinitializeGlobal (int argc, char *argv[], tool_t tool, const char *toolname)
{
DBUG_ENTER ();
......
......@@ -33,7 +33,7 @@
extern FILE *yyin;
extern global_t global;
extern void GLOBinitializeGlobal (int argc, char *argv[], tool_t tool, char *toolname);
extern void GLOBinitializeGlobal (int argc, char *argv[], tool_t tool, const char *toolname);
extern void GLOBsetupBackend (void);
extern void GLOBsetupDistMemCommLib (void);
extern void GLOBfinalizeGlobal (void);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment