Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
sac-group
sac2c
Commits
1b651fb9
Commit
1b651fb9
authored
Oct 23, 2018
by
Artem Shinkarov
Browse files
Merge tag 'v1.3.3-MijasCosta'
Fixing broken ubuntu dependencies
parents
1954b48d
09335809
Changes
57
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
176 additions
and
48 deletions
+176
-48
.gitlab-ci.yml
.gitlab-ci.yml
+1
-0
CMakeLists.txt
CMakeLists.txt
+6
-1
LICENSE.txt
LICENSE.txt
+3
-4
cmake/cpack/CMakeLists.txt
cmake/cpack/CMakeLists.txt
+1
-1
cmake/cpack/config.cmake
cmake/cpack/config.cmake
+3
-5
cmake/options.cmake
cmake/options.cmake
+1
-0
cmake/sac2c/config.cmake
cmake/sac2c/config.cmake
+30
-5
scripts/sac2c-version-manager.in
scripts/sac2c-version-manager.in
+6
-8
src/libsac2c/CMakeLists.txt
src/libsac2c/CMakeLists.txt
+1
-0
src/libsac2c/arrayopt/isl_utilities.c
src/libsac2c/arrayopt/isl_utilities.c
+2
-1
src/libsac2c/arrayopt/polyhedral_utilities.c
src/libsac2c/arrayopt/polyhedral_utilities.c
+4
-2
src/libsac2c/arrayopt/polyhedral_wlf.c
src/libsac2c/arrayopt/polyhedral_wlf.c
+2
-0
src/libsac2c/arrayopt/set_withloop_depth.c
src/libsac2c/arrayopt/set_withloop_depth.c
+10
-0
src/libsac2c/arrayopt/set_withloop_depth.h
src/libsac2c/arrayopt/set_withloop_depth.h
+4
-0
src/libsac2c/codegen/gen_startup_code.c
src/libsac2c/codegen/gen_startup_code.c
+2
-0
src/libsac2c/codegen/icm2c_cuda.c
src/libsac2c/codegen/icm2c_cuda.c
+72
-9
src/libsac2c/cuda/annotate_cuda_withloop2.c
src/libsac2c/cuda/annotate_cuda_withloop2.c
+25
-10
src/libsac2c/global/flags.mac
src/libsac2c/global/flags.mac
+1
-0
src/libsac2c/global/globals.c
src/libsac2c/global/globals.c
+1
-1
src/libsac2c/global/globals.h
src/libsac2c/global/globals.h
+1
-1
No files found.
.gitlab-ci.yml
View file @
1b651fb9
...
...
@@ -76,6 +76,7 @@ stages:
-
cp -r build/.sac2crc $HOME
-
cd build
-
make fulltest
-
ctest --output-on-failure
artifacts
:
expire_in
:
12 hrs
untracked
:
true
...
...
CMakeLists.txt
View file @
1b651fb9
CMAKE_MINIMUM_REQUIRED
(
VERSION 3.4
)
# Name of the project
PROJECT
(
sac2c
C
)
PROJECT
(
sac2c
)
# Handle Policies
#FIXME hans: currently we set the policy to the DEFAULT
...
...
@@ -40,6 +40,8 @@ ADD_FEATURE_INFO(Dot DOT "used to generate a visual of the AST")
ADD_FEATURE_INFO
(
C++ CPLUSPLUS
"to compile sac2c with"
)
ADD_FEATURE_INFO
(
ISL ISL
"use Integer Set Library in sac2c"
)
ADD_FEATURE_INFO
(
BARVINOK BARVINOK
"use Barvinok Library in sac2c"
)
ADD_FEATURE_INFO
(
FUNCTESTS FUNCTESTS
"enable functional tests (gtest is required)"
)
FEATURE_SUMMARY
(
WHAT ALL
)
# Build executables for the tools.
...
...
@@ -317,6 +319,9 @@ INCLUDE ("${PROJECT_SOURCE_DIR}/cmake/cpack/config.cmake")
# Here we describe what happens when we test stuff.
ENABLE_TESTING
()
ADD_SUBDIRECTORY
(
tests
)
IF
(
FUNCTESTS
)
ADD_SUBDIRECTORY
(
"src/tests"
)
ENDIF
()
# vim:ts=2:sw=2:et:
LICENSE.txt
View file @
1b651fb9
...
...
@@ -6,12 +6,12 @@ SAC - Single Assignment C
SAC COPYRIGHT NOTICE, LICENSE, AND DISCLAIMER
(c) Copyright 1994 - 201
6
by
(c) Copyright 1994 - 201
8
by
SAC Development Team
http://www.sac-home.org
email:info@sac-home.org
web:
http://www.sac-home.org
email:
info@sac-home.org
---------------------------------------------------------------------------
...
...
@@ -42,4 +42,3 @@ performance of this software is with you. Should this software prove
defective, you assume the cost of all servicing, repair, or correction.
---------------------------------------------------------------------------
cmake/cpack/CMakeLists.txt
View file @
1b651fb9
...
...
@@ -77,7 +77,7 @@ MACRO (ADD_SAC2C_BUILD _build_type)
COMMAND $
(
MAKE
)
DESTDIR=
${
PROJECT_BINARY_DIR
}
/toplevel/headers headers-install
COMMAND $
(
MAKE
)
DESTDIR=
${
PROJECT_BINARY_DIR
}
/toplevel/symlinks symlinks-install
TEST_EXCLUDE_FROM_MAIN 1
TEST_COMMAND ctest
TEST_COMMAND ctest
--output-on-failure
BUILD_ALWAYS 1
CMAKE_ARGS
-DCMAKE_BUILD_TYPE=
${
_build_type
}
...
...
cmake/cpack/config.cmake
View file @
1b651fb9
...
...
@@ -44,7 +44,6 @@ ENDIF ()
SET
(
CPACK_PACKAGE_NAME
"sac2c-compiler"
)
SET
(
CPACK_PACKAGE_VENDOR
"SaC Development Team"
)
SET
(
CPACK_PACKAGE_CONTACT
"info@sac-home.org"
)
SET
(
CPACK_PACKAGE_VERSION
"
${
SAC2C_VERSION
}
"
)
SET
(
CPACK_PACKAGE_VERSION_MAJOR
"
${
SAC2C_VERSION_MAJOR
}
"
)
SET
(
CPACK_PACKAGE_VERSION_MINOR
"
${
SAC2C_VERSION_MINOR
}
"
)
SET
(
CPACK_PACKAGE_VERSION_PATCH
"
${
SAC2C_VERSION_PATCH
}
"
)
...
...
@@ -53,7 +52,7 @@ SET (CPACK_PACKAGE_INSTALL_DIRECTORY "sac2c-${SAC2C_VERSION}") # XXX is this rea
SET
(
CPACK_PACKAGE_ICON
"
${
SAC2C_SOURCE_DIR
}
/cmake/cpack/sac_logo.png"
)
# SET (CPACK_PACKAGE_DESCRIPTION_FILE ...)
SET
(
CPACK_PACKAGE_DESCRIPTION_SUMMARY
"The sac2c compiler for a data-parallel array-based functional language S
A
C"
)
SET
(
CPACK_PACKAGE_DESCRIPTION_SUMMARY
"The sac2c compiler for a data-parallel array-based functional language S
a
C"
)
# FIXME(artem) We need to decide on where do we put the stuff on the target system...)
#SET (CPACK_PACKAGING_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
SET
(
CPACK_RESOURCE_FILE_LICENSE
"
${
SAC2C_SOURCE_DIR
}
/LICENSE.txt"
)
...
...
@@ -74,9 +73,8 @@ SET (CPACK_COMPONENT_SYMLINKS_DISPLAY_NAME "SaC Symlinks")
# Debian-specific variables
SET
(
CPACK_DEBIAN_PACKAGE_MAINTAINER
"
${
CPACK_PACKAGE_VENDOR
}
<
${
CPACK_PACKAGE_CONTACT
}
>"
)
SET
(
CPACK_DEBIAN_ARCHITECTURE
${
CMAKE_SYSTEM_PROCESSOR
}
)
#SET (CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) # non-functional
# FIXME Can we auto-generate these dependencies?
SET
(
CPACK_DEBIAN_PACKAGE_DEPENDS
"gcc, libc6 (>= 2.13), uuid-runtime (>= 2.20)"
)
SET
(
CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON
)
SET
(
CPACK_DEBIAN_PACKAGE_DEPENDS
"gcc, libc6 (>= 2.13), uuid-runtime (>= 2.20), libhwloc-dev"
)
# RPM-specific variables
# XXX (hans): this may not be exhaustive - does not take into account if the user
...
...
cmake/options.cmake
View file @
1b651fb9
...
...
@@ -14,3 +14,4 @@ OPTION (CUDA "Build sac2c with CUDA backend support"
OPTION
(
HWLOC
"Build sac2c with hwloc support"
ON
)
OPTION
(
ISL
"Build sac2c with Integer Set Library"
ON
)
OPTION
(
BARVINOK
"Build sac2c with Barvinok Library"
ON
)
OPTION
(
FUNCTESTS
"Enable functional tests (requires GTest library)"
ON
)
cmake/sac2c/config.cmake
View file @
1b651fb9
...
...
@@ -167,24 +167,45 @@ LIB_NEEDED ("m" "pow" "
# Check libraries for optional isl support
SET
(
ISL_LIB_PATH
""
)
SET
(
ENABLE_ISL OFF
)
IF
(
ISL
)
MESSAGE
(
STATUS
"ISL setting is
${
ISL
}
"
)
IF
(
${
ISL
}
MATCHES
"ON"
)
MESSAGE
(
STATUS
"ISL was set to ON"
)
FIND_LIBRARY
(
LIB_ISL NAMES
"isl"
)
CHECK_INCLUDE_FILES
(
"isl/ctx.h"
HAVE_ISL_H
)
IF
(
LIB_ISL AND HAVE_ISL_H
)
FIND_PATH
(
ISL_INC_PATH NAMES
"isl/ctx.h"
)
SET
(
ENABLE_ISL ON
)
MESSAGE
(
STATUS
"ISL include library found"
)
ELSE
()
MESSAGE
(
STATUS
"ISL include library NOT found"
)
ENDIF
()
ELSE
()
IF
(
${
ISL
}
MATCHES
"OFF"
)
MESSAGE
(
STATUS
"ISL was set to OFF"
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"ISL setting not valid; was: "
${
ISL
}
)
ENDIF
()
ENDIF
()
# Check libraries for optional barvinok support
SET
(
BARVINOK_LIB_PATH
""
)
SET
(
ENABLE_BARVINOK OFF
)
IF
(
BARVINOK
)
MESSAGE
(
STATUS
"BARVINOK setting is
${
BARVINOK
}
"
)
IF
(
${
BARVINOK
}
MATCHES
"ON"
)
FIND_LIBRARY
(
LIB_BARVINOK NAMES
"barvinok"
)
CHECK_INCLUDE_FILES
(
"barvinok/barvinok.h"
HAVE_BARVINOK_H
)
IF
(
LIB_BARVINOK AND HAVE_BARVINOK_H
)
FIND_PATH
(
BARVINOK_INC_PATH NAMES
"barvinok/barvinok.h"
)
SET
(
ENABLE_BARVINOK ON
)
MESSAGE
(
STATUS
"BARVINOK include library found"
)
ELSE
()
MESSAGE
(
STATUS
"BARVINOK include library NOT found"
)
ENDIF
()
ELSE
()
IF
(
${
BARVINOK
}
MATCHES
"OFF"
)
MESSAGE
(
STATUS
"BARVINOK was set to OFF"
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"BARVINOK setting not valid; was: "
${
BARVINOK
}
)
ENDIF
()
ENDIF
()
...
...
@@ -848,14 +869,18 @@ SET (BUILD_STATUS "
*
* Run-time specialization:
${
ENABLE_RTSPEC
}
* Private heap manager:
${
PHM
}
* Back-ends:
* Polyhedral optional packages:
* - ISL:
${
ENABLE_ISL
}
* - BARVINOK:
${
ENABLE_BARVINOK
}
* Back ends:
* - MT/pthread:
${
ENABLE_MT
}
* - MT/LPEL:
${
ENABLE_MT_LPEL
}
* - CUDA:
${
ENABLE_CUDA
}
* - OpenMP:
${
ENABLE_OMP
}
* - SL:
${
ENABLE_SL
}
* - HWLOC:
${
ENABLE_HWLOC
}
* - Distributed memory:
${
ENABLE_DISTMEM
}
$distmem_details_print
* - Distributed memory:
${
ENABLE_DISTMEM
}
*
${
distmem_details_print
}
* ====== distmen is non-functional ======
* - CC:
${
CMAKE_C_COMPILER
}
(
${
CMAKE_C_COMPILER_ID
}
)
* - CCFLAGS:
${
BUILD_TYPE_C_FLAGS
}
...
...
@@ -864,7 +889,7 @@ SET (BUILD_STATUS "
* - SaC Linksetsize:
${
LINKSETSIZE
}
*
* Status:
* - sac2c
is in
dirty state:
${
SAC2C_IS_DIRTY
}
* - sac2c dirty state
is
:
${
SAC2C_IS_DIRTY
}
*"
)
STRING
(
REPLACE
"
\n
"
"
\\
n
\\\n
"
CPP_BUILD_STATUS
${
BUILD_STATUS
}
)
...
...
scripts/sac2c-version-manager.in
View file @
1b651fb9
...
...
@@ -241,10 +241,9 @@ def glob_versions (prefix, glob_expr, build_type=None):
# version. If the `build_type' argument is None then we consider
# all the build_types.
if build_type is None:
binary_to_build_types = {
"sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[x]: x
for x in SAC2C_BUILD_TYPE_POSTFIXES
}
binary_to_build_types = {}
for x in SAC2C_BUILD_TYPE_POSTFIXES:
binary_to_build_types["sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[x]] = x
else:
binary_to_build_types = {"sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[build_type] : build_type}
...
...
@@ -284,10 +283,9 @@ def delete_versions (prefix, glob_expr, dryrun, build_type):
print "Nothing removed"
def binary_to_version (sac2c_binary):
binary_to_build_types = {
"sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[x]: x
for x in SAC2C_BUILD_TYPE_POSTFIXES
}
binary_to_build_types = {}
for x in SAC2C_BUILD_TYPE_POSTFIXES:
binary_to_build_types["sac2c" + SAC2C_BUILD_TYPE_POSTFIXES[x]] = x
if not sac2c_binary in binary_to_build_types:
error ("undefined build type `%s' of sac2c found" % sac2c_binary)
...
...
src/libsac2c/CMakeLists.txt
View file @
1b651fb9
...
...
@@ -65,6 +65,7 @@ FOREACH(name ${XSL_FILES})
SET
(
dst
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
dst
}
"
)
ADD_CUSTOM_COMMAND
(
OUTPUT
"
${
dst
}
"
# FIXME(artem) it also depends on the bunch of xsl files...
DEPENDS
"
${
CB
}
"
"
${
src
}
"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/xml/ast.xml"
COMMAND
${
XSLT_EXEC
}
${
src
}
...
...
src/libsac2c/arrayopt/isl_utilities.c
View file @
1b651fb9
...
...
@@ -221,7 +221,8 @@ ISLUgetLoopCount (char *str, lut_t *varlut)
dim
=
isl_union_pw_qpolynomial_get_space
(
pwcard
);
zro
=
isl_point_zero
(
isl_space_copy
(
dim
));
val
=
isl_union_pw_qpolynomial_eval
(
pwcard
,
zro
);
z
=
(
NULL
!=
val
)
?
isl_val_get_num_si
(
val
)
:
z
;
z
=
((
NULL
!=
val
)
&&
(
isl_val_is_rat
(
val
)))
?
isl_val_get_num_si
(
val
)
:
z
;
z
=
(
0
==
z
)
?
UNR_NONE
:
z
;
isl_val_free
(
val
);
isl_space_free
(
dim
);
...
...
src/libsac2c/arrayopt/polyhedral_utilities.c
View file @
1b651fb9
...
...
@@ -2851,6 +2851,7 @@ PHUTanalyzeLoopDependentVariable (node *vid, node *rcv, node *fundef, lut_t *var
prf
prfiv
;
prf
prfz
;
int
stridesignum
=
0
;
// -1 for negative, 1 for positive, 0 for unknown or 0.
int
lpcount
=
UNR_NONE
;
DBUG_ENTER
();
...
...
@@ -2897,8 +2898,9 @@ PHUTanalyzeLoopDependentVariable (node *vid, node *rcv, node *fundef, lut_t *var
TYmakeAKS
(
TYmakeSimpleType
(
T_int
),
SHcreateShape
(
0
)));
PHUTinsertVarIntoLut
(
lpavis
,
varlut
,
fundef
,
AVIS_ISLCLASSEXISTENTIAL
);
// If this is a loopfun with known loopcount, use it.
if
((
FUNDEF_ISLOOPFUN
(
fundef
))
&&
(
UNR_NONE
!=
loopcount
))
{
resel
=
BuildIslSimpleConstraint
(
lpavis
,
F_lt_SxS
,
TBmakeNum
(
loopcount
),
lpcount
=
FUNDEF_LOOPCOUNT
(
fundef
);
if
((
FUNDEF_ISLOOPFUN
(
fundef
))
&&
(
UNR_NONE
!=
lpcount
))
{
resel
=
BuildIslSimpleConstraint
(
lpavis
,
F_lt_SxS
,
TBmakeNum
(
lpcount
),
NOPRFOP
,
NULL
);
res
=
TCappendExprs
(
res
,
resel
);
}
...
...
src/libsac2c/arrayopt/polyhedral_wlf.c
View file @
1b651fb9
...
...
@@ -1654,6 +1654,8 @@ PWLFprf (node *arg_node, info *arg_info)
AVIS_NAME
(
ID_AVIS
(
pwlid
)),
cwlnm
,
plresult
);
DBUG_PRINT
(
"Building inverse projection for cwl=%s"
,
cwlnm
);
arg_node
=
BuildInverseProjections
(
arg_node
,
arg_info
);
DBUG_ASSERT
(
POLY_RET_CCONTAINSB
==
plresult
,
"Coding time, Bobbo. We need to slice cwl"
);
z
=
PWLFperformFold
(
arg_node
,
foldpwlpart
,
arg_info
);
FREEdoFreeNode
(
arg_node
);
arg_node
=
z
;
...
...
src/libsac2c/arrayopt/set_withloop_depth.c
View file @
1b651fb9
...
...
@@ -425,6 +425,12 @@ SWLDisDefinedInThisBlock (node *avis, int wldepth)
* @result: TRUE if iv is defined in this block.
*
*****************************************************************************/
#if 0
// I am commenting out this function for the time being, as it is not used
// anywhere in the compiler. It has a weird bug in it that makes it evaluate
// to true all the time. I'll leave the resolution to the person whp will
// actually want to use this.
bool
SWLDisDefinedInNextOuterBlock (node *avis, int wldepth)
{
...
...
@@ -432,6 +438,9 @@ SWLDisDefinedInNextOuterBlock (node *avis, int wldepth)
DBUG_ENTER ();
// FIXME: I guess this expression should be
// z = (1 + wldepth) == AVIS_DEPTH (avis), othewise it evaluates to
// true all the time, which is weird.
z = 1 + (wldepth == AVIS_DEFDEPTH (avis));
if (z) {
...
...
@@ -442,5 +451,6 @@ SWLDisDefinedInNextOuterBlock (node *avis, int wldepth)
DBUG_RETURN (z);
}
#endif
#undef DBUG_PREFIX
src/libsac2c/arrayopt/set_withloop_depth.h
View file @
1b651fb9
...
...
@@ -21,7 +21,11 @@ extern node *SWLDids (node *arg_node, info *arg_info);
extern
node
*
SWLDvardec
(
node
*
arg_node
,
info
*
arg_info
);
extern
node
*
SWLDarg
(
node
*
arg_node
,
info
*
arg_info
);
#if 0
// Commented out due to a bug in it and lack of use in the compiler.
extern bool SWLDisDefinedInNextOuterBlock (node *avis, int wldepth);
#endif
extern
bool
SWLDisDefinedInThisBlock
(
node
*
avis
,
int
wldepth
);
#endif // _SAC_SET_WITHLOOP_DEPTH_H_
src/libsac2c/codegen/gen_startup_code.c
View file @
1b651fb9
...
...
@@ -149,6 +149,8 @@ PrintGlobalSwitches (void)
(
global
.
trace
.
aa
)
?
1
:
0
);
fprintf
(
global
.
outfile
,
"#define SAC_DO_TRACE_MT %d
\n
"
,
(
global
.
trace
.
mt
)
?
1
:
0
);
fprintf
(
global
.
outfile
,
"#define SAC_DO_TRACE_GPU %d
\n
"
,
(
global
.
trace
.
gpu
)
?
1
:
0
);
fprintf
(
global
.
outfile
,
"#define SAC_DO_TRACE_RTSPEC %d
\n
"
,
(
global
.
trace
.
rtspec
)
?
1
:
0
);
fprintf
(
global
.
outfile
,
"#define SAC_DO_TRACE_DISTMEM %d
\n
"
,
...
...
src/libsac2c/codegen/icm2c_cuda.c
View file @
1b651fb9
...
...
@@ -192,6 +192,9 @@ ICMCompileCUDA_GLOBALFUN_AP (char *funname, int vararg_cnt, char **vararg)
INDENT
;
INDENT
;
fprintf
(
global
.
outfile
,
"SAC_TR_GPU_PRINT (
\"
kernel name
\\\"
%s
\\\"\\
n
\"
);"
,
funname
);
if
(
global
.
backend
==
BE_cudahybrid
)
{
// on cudahybrid, we make use of streams, which have a fixed name
fprintf
(
global
.
outfile
,
"%s<<<grid, block, 0, *stream>>>("
,
funname
);
...
...
@@ -268,16 +271,32 @@ ICMCompileCUDA_GRID_BLOCK (int bounds_count, char **var_ANY)
#define CUDA_SET_GRID(fmt, ...) \
fprintf (global.outfile, "dim3 grid(" fmt ");\n", __VA_ARGS__); \
INDENT; \
fprintf (global.outfile, \
"SAC_TR_GPU_PRINT (\" CUDA XYZ grid dimension of " \
"%%u x %%u x %%u\", grid.x , grid.y , grid.z );\n"); \
INDENT; \
fprintf (global.outfile, "if (grid.x <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA X grid dimension must be bigger than zero. Current"\
" value is %%u\", grid.x);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (grid.y <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA Y grid dimension must be bigger than zero. Current"\
" value is %%u\", grid.y);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (grid.z <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA Z grid dimension must be bigger than zero. Current"\
" value is %%u\", grid.z);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (grid.x > %u || grid.y > %u || grid.z > %u) {\n", \
global.cuda_max_x_
dim
, global.cuda_max_yz_
dim
, global.cuda_max_yz_
dim
);
\
global.cuda_max_x_
grid
, global.cuda_max_yz_
grid
, global.cuda_max_yz_
grid
); \
INDENT; \
INDENT; \
INDENT; \
fprintf (global.outfile, \
"SAC_RuntimeError(\"CUDA XYZ grid dimension exceeds compute " \
"compatibilities max value: %u x %u x %u\");\n", \
global.cuda_max_x_dim, global.cuda_max_yz_dim, global.cuda_max_yz_dim); \
"SAC_RuntimeError(\"CUDA XYZ grid dimension of %%u x %%u x %%u exceeds " \
"the compute capability's max value: %u x %u x %u\"," \
" grid.x, grid.y, grid.z );\n", \
global.cuda_max_x_grid, global.cuda_max_yz_grid, global.cuda_max_yz_grid); \
INDENT; \
INDENT; \
fprintf (global.outfile, "}\n");
...
...
@@ -285,16 +304,49 @@ ICMCompileCUDA_GRID_BLOCK (int bounds_count, char **var_ANY)
#define CUDA_SET_BLOCK(fmt, ...) \
fprintf (global.outfile, "dim3 block(" fmt ");", __VA_ARGS__); \
INDENT; \
fprintf (global.outfile, \
"SAC_TR_GPU_PRINT (\" CUDA XYZ block dimension of " \
"%%u x %%u x %%u\", block.x , block.y , block.z );\n"); \
INDENT; \
fprintf (global.outfile, "if (block.x <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA X block dimension must be bigger than zero. " \
"Current value is %%u\", block.x);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (block.y <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA Y block dimension must be bigger than zero. " \
"Current value is %%u\", block.y);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (block.z <= 0 ) {\n" \
"SAC_RuntimeError(\"CUDA Z block dimension must be bigger than zero. " \
"Current value is %%u\", block.z);"); \
fprintf (global.outfile, "}\n"); \
fprintf (global.outfile, "if (block.x > %u || block.y > %u || block.z > %u) {\n", \
global.cuda_max_x_dim, global.cuda_max_yz_dim, global.cuda_max_yz_dim); \
global.cuda_max_xy_block, global.cuda_max_xy_block, \
global.cuda_max_z_block); \
INDENT; \
INDENT; \
INDENT; \
fprintf (global.outfile, \
"SAC_RuntimeError(\"CUDA XYZ block dimension exceeds compute " \
"compatibilities max value: %u x %u x %u\");\n", \
global.cuda_max_x_dim, global.cuda_max_yz_dim, global.cuda_max_yz_dim); \
"SAC_RuntimeError(\"CUDA XYZ block dimension of %%u x %%u x %%u exceeds " \
"the compute capability's max value: %u x %u x %u\", " \
"block.x, block.y, block.z);\n", \
global.cuda_max_xy_block, global.cuda_max_xy_block, \
global.cuda_max_z_block); \
INDENT; \
INDENT; \
fprintf (global.outfile, "}\n"); \
INDENT; \
INDENT; \
fprintf (global.outfile, "if (block.x * block.y *block.z > %u ) {\n", \
global.cuda_max_threads_block); \
INDENT; \
INDENT; \
INDENT; \
fprintf (global.outfile, \
"SAC_RuntimeError(\"CUDA XYZ block dimension of %%u x %%u x %%u = %%u " \
"exceeds compute capability's max number of threads per block: %u\", " \
"block.x, block.y, block.z, block.x * block.y * block.z);\n", \
global.cuda_max_threads_block); \
INDENT; \
INDENT; \
fprintf (global.outfile, "}\n");
...
...
@@ -334,6 +386,10 @@ ICMCompileCUDA_GRID_BLOCK (int bounds_count, char **var_ANY)
INDENT
;
fprintf
(
global
.
outfile
,
"{
\n
"
);
fprintf
(
global
.
outfile
,
"SAC_TR_GPU_PRINT (
\"
launching kernel for %dD With-Loop
\"
);"
,
bounds_count
/
3
);
INDENT
;
if
(
bounds_count
==
3
)
{
/* 1D CUDA withloop */
INDENT
;
INDENT
;
...
...
@@ -402,6 +458,9 @@ ICMCompileCUDA_ST_GLOBALFUN_AP (char *funname, int vararg_cnt, char **vararg)
*/
INDENT
;
INDENT
;
fprintf
(
global
.
outfile
,
"SAC_TR_GPU_PRINT (
\"
kernel name
\\\"
%s
\\\"\\
n
\"
);"
,
funname
);
fprintf
(
global
.
outfile
,
"%s<<<1, 1>>>("
,
funname
);
for
(
i
=
0
;
i
<
4
*
vararg_cnt
;
i
+=
4
)
{
if
(
STReq
(
vararg
[
i
+
1
],
"float_dev"
))
{
...
...
@@ -781,7 +840,11 @@ ICMCompileCUDA_MEM_TRANSFER (char *to_NT, char *from_NT, char *basetype, char *d
ASSURE_TEXT
(
"cudaMemcpy: Destionation and source arrays "
"should have equal sizes!"
));
INDENT
;
INDENT
;
fprintf
(
global
.
outfile
,
"SAC_TR_GPU_PRINT (
\"
%s size %%d
\\
n
\"
, SAC_ND_A_SIZE( %s));"
,
direction
,
from_NT
);
fprintf
(
global
.
outfile
,
"SAC_CUDA_MEM_TRANSFER(%s, %s, %s, %s)"
,
to_NT
,
from_NT
,
basetype
,
direction
);
...
...
src/libsac2c/cuda/annotate_cuda_withloop2.c
View file @
1b651fb9
...
...
@@ -116,8 +116,11 @@ InitCudaBlockSizes (void)
global
.
cuda_blocking_factor
=
16
;
global
.
cuda_2d_block_x
=
16
;
global
.
cuda_2d_block_y
=
16
;
global
.
cuda_max_x_dim
=
65535
;
global
.
cuda_max_yz_dim
=
65535
;
global
.
cuda_max_x_grid
=
65535
;
global
.
cuda_max_yz_grid
=
65535
;
global
.
cuda_max_xy_block
=
512
;
global
.
cuda_max_z_block
=
64
;
global
.
cuda_max_threads_block
=
512
;
}
else
if
(
STReq
(
global
.
config
.
cuda_arch
,
"-arch=sm_12"
)
||
STReq
(
global
.
config
.
cuda_arch
,
"-arch=sm_13"
))
{
global
.
optimal_threads
=
256
;
...
...
@@ -127,8 +130,11 @@ InitCudaBlockSizes (void)
global
.
cuda_blocking_factor
=
16
;
global
.
cuda_2d_block_x
=
16
;
global
.
cuda_2d_block_y
=
16
;
global
.
cuda_max_x_dim
=
65535
;
global
.
cuda_max_yz_dim
=
65535
;
global
.
cuda_max_x_grid
=
65535
;
global
.
cuda_max_yz_grid
=
65535
;
global
.
cuda_max_xy_block
=
512
;
global
.
cuda_max_z_block
=
64
;
global
.
cuda_max_threads_block
=
512
;
}
else
if
(
STReq
(
global
.
config
.
cuda_arch
,
"-arch=sm_20"
))
{
/*
global.optimal_threads = 512;
...
...
@@ -148,8 +154,11 @@ InitCudaBlockSizes (void)
global
.
cuda_blocking_factor
=
32
;
global
.
cuda_2d_block_x
=
16
;
global
.
cuda_2d_block_y
=
16
;
global
.
cuda_max_x_dim
=
65535
;
global
.
cuda_max_yz_dim
=
65535
;
global
.
cuda_max_x_grid
=
65535
;
global
.
cuda_max_yz_grid
=
65535
;
global
.
cuda_max_xy_block
=
1024
;
global
.
cuda_max_z_block
=
64
;
global
.
cuda_max_threads_block
=
1024
;
}
else
if
(
STReq
(
global
.
config
.
cuda_arch
,
"-arch=sm_35"
))
{
global
.
optimal_threads
=
512
;
global
.
optimal_blocks
=
3
;
...
...
@@ -163,8 +172,11 @@ InitCudaBlockSizes (void)
global
.
cuda_blocking_factor
=
32
;
global
.
cuda_2d_block_x
=
16
;
global
.
cuda_2d_block_y
=
16
;
global
.
cuda_max_x_dim
=
2147483647
;
global
.
cuda_max_yz_dim
=
65535
;
global
.
cuda_max_x_grid
=
2147483647
;
global
.
cuda_max_yz_grid
=
65535
;
global
.
cuda_max_xy_block
=
1024
;
global
.
cuda_max_z_block
=
64
;
global
.
cuda_max_threads_block
=
1024
;
}
else
if
(
STReq
(
global
.
config
.
cuda_arch
,
"-arch=sm_50"
))
{
global
.
optimal_threads
=
512
;
global
.
optimal_blocks
=
3
;
...
...
@@ -178,8 +190,11 @@ InitCudaBlockSizes (void)
global
.
cuda_blocking_factor
=
32
;
global
.
cuda_2d_block_x
=
32
;
global
.
cuda_2d_block_y
=
32
;
global
.
cuda_max_x_dim
=
2147483647
;
global
.
cuda_max_yz_dim
=
65535
;
global
.
cuda_max_x_grid
=
2147483647
;
global
.
cuda_max_yz_grid
=
65535
;
global
.
cuda_max_xy_block
=
1024
;
global
.
cuda_max_z_block
=
64
;
global
.
cuda_max_threads_block
=
1024
;
}
else
{
if
(
STReq
(
global
.
config
.
cuda_arch
,
"no"
))
{
CTIwarn
(
"CUDA architecture was not detected during install, setting to "
...
...
src/libsac2c/global/flags.mac
View file @
1b651fb9
...
...
@@ -27,6 +27,7 @@ TRACE (mem, 'm', FALSE)
TRACE (wl, 'w', FALSE)
TRACE (aa, 's', FALSE)
TRACE (mt, 't', FALSE)
TRACE (gpu, 'g', FALSE) /* CUDA backend */
TRACE (cenv, 'c', FALSE)
TRACE (distmem, 'd', FALSE) /* Distributed memory backend */
...
...
src/libsac2c/global/globals.c
View file @
1b651fb9
...
...
@@ -629,7 +629,7 @@ get_terminal_size (void)
*/
void
GLOBinitializeGlobal
(
int
argc
,
char
*
argv
[],
tool_t
tool
,
char
*
toolname
)
GLOBinitializeGlobal
(
int
argc
,
char
*
argv
[],
tool_t
tool
,
const
char
*
toolname
)
{
DBUG_ENTER
();
...
...
src/libsac2c/global/globals.h
View file @
1b651fb9
...
...
@@ -33,7 +33,7 @@
extern
FILE
*
yyin
;
extern
global_t
global
;
extern
void
GLOBinitializeGlobal
(
int
argc
,
char
*
argv
[],
tool_t
tool
,
char
*
toolname
);
extern
void
GLOBinitializeGlobal
(
int
argc
,
char
*
argv
[],
tool_t
tool
,
const
char
*
toolname
);
extern
void
GLOBsetupBackend
(
void
);
extern
void
GLOBsetupDistMemCommLib
(
void
);
extern
void
GLOBfinalizeGlobal
(
void
);
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment