diff --git a/.gitignore b/.gitignore index 98b65133..a49ad728 100644 --- a/.gitignore +++ b/.gitignore @@ -20,12 +20,11 @@ Bin/Release/x64/App64.exe *.db *.sln *.d - *.o - *.a - *.so +*.stamp +*.depend App/Makefile diff --git a/.gitmodules b/.gitmodules index 43834ac7..e69de29b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "Anvil"] - path = Anvil - url = https://github.com/GPUOpen-LibrariesAndSDKs/Anvil.git diff --git a/3rdparty/Perl/lib/File/Copy/Recursive.pm b/3rdparty/Perl/lib/File/Copy/Recursive.pm new file mode 100644 index 00000000..f0e7d581 --- /dev/null +++ b/3rdparty/Perl/lib/File/Copy/Recursive.pm @@ -0,0 +1,696 @@ +package File::Copy::Recursive; + +use strict; +BEGIN { + # Keep older versions of Perl from trying to use lexical warnings + $INC{'warnings.pm'} = "fake warnings entry for < 5.6 perl ($])" if $] < 5.006; +} +use warnings; + +use Carp; +use File::Copy; +use File::Spec; #not really needed because File::Copy already gets it, but for good measure :) + +use vars qw( + @ISA @EXPORT_OK $VERSION $MaxDepth $KeepMode $CPRFComp $CopyLink + $PFSCheck $RemvBase $NoFtlPth $ForcePth $CopyLoop $RMTrgFil $RMTrgDir + $CondCopy $BdTrgWrn $SkipFlop $DirPerms +); + +require Exporter; +@ISA = qw(Exporter); +@EXPORT_OK = qw(fcopy rcopy dircopy fmove rmove dirmove pathmk pathrm pathempty pathrmdir); +$VERSION = '0.38'; + +$MaxDepth = 0; +$KeepMode = 1; +$CPRFComp = 0; +$CopyLink = eval { local $SIG{'__DIE__'};symlink '',''; 1 } || 0; +$PFSCheck = 1; +$RemvBase = 0; +$NoFtlPth = 0; +$ForcePth = 0; +$CopyLoop = 0; +$RMTrgFil = 0; +$RMTrgDir = 0; +$CondCopy = {}; +$BdTrgWrn = 0; +$SkipFlop = 0; +$DirPerms = 0777; + +my $samecheck = sub { + return 1 if $^O eq 'MSWin32'; # need better way to check for this on winders... + return if @_ != 2 || !defined $_[0] || !defined $_[1]; + return if $_[0] eq $_[1]; + + my $one = ''; + if($PFSCheck) { + $one = join( '-', ( stat $_[0] )[0,1] ) || ''; + my $two = join( '-', ( stat $_[1] )[0,1] ) || ''; + if ( $one eq $two && $one ) { + carp "$_[0] and $_[1] are identical"; + return; + } + } + + if(-d $_[0] && !$CopyLoop) { + $one = join( '-', ( stat $_[0] )[0,1] ) if !$one; + my $abs = File::Spec->rel2abs($_[1]); + my @pth = File::Spec->splitdir( $abs ); + while(@pth) { + my $cur = File::Spec->catdir(@pth); + last if !$cur; # probably not necessary, but nice to have just in case :) + my $two = join( '-', ( stat $cur )[0,1] ) || ''; + if ( $one eq $two && $one ) { + # $! = 62; # Too many levels of symbolic links + carp "Caught Deep Recursion Condition: $_[0] contains $_[1]"; + return; + } + + pop @pth; + } + } + + return 1; +}; + +my $glob = sub { + my ($do, $src_glob, @args) = @_; + + local $CPRFComp = 1; + + my @rt; + for my $path ( glob($src_glob) ) { + my @call = [$do->($path, @args)] or return; + push @rt, \@call; + } + + return @rt; +}; + +my $move = sub { + my $fl = shift; + my @x; + if($fl) { + @x = fcopy(@_) or return; + } else { + @x = dircopy(@_) or return; + } + if(@x) { + if($fl) { + unlink $_[0] or return; + } else { + pathrmdir($_[0]) or return; + } + if($RemvBase) { + my ($volm, $path) = File::Spec->splitpath($_[0]); + pathrm(File::Spec->catpath($volm,$path,''), $ForcePth, $NoFtlPth) or return; + } + } + return wantarray ? @x : $x[0]; +}; + +my $ok_todo_asper_condcopy = sub { + my $org = shift; + my $copy = 1; + if(exists $CondCopy->{$org}) { + if($CondCopy->{$org}{'md5'}) { + + } + if($copy) { + + } + } + return $copy; +}; + +sub fcopy { + $samecheck->(@_) or return; + if($RMTrgFil && (-d $_[1] || -e $_[1]) ) { + my $trg = $_[1]; + if( -d $trg ) { + my @trgx = File::Spec->splitpath( $_[0] ); + $trg = File::Spec->catfile( $_[1], $trgx[ $#trgx ] ); + } + $samecheck->($_[0], $trg) or return; + if(-e $trg) { + if($RMTrgFil == 1) { + unlink $trg or carp "\$RMTrgFil failed: $!"; + } else { + unlink $trg or return; + } + } + } + my ($volm, $path) = File::Spec->splitpath($_[1]); + if($path && !-d $path) { + pathmk(File::Spec->catpath($volm,$path,''), $NoFtlPth); + } + if( -l $_[0] && $CopyLink ) { + carp "Copying a symlink ($_[0]) whose target does not exist" + if !-e readlink($_[0]) && $BdTrgWrn; + symlink readlink(shift()), shift() or return; + } else { + copy(@_) or return; + + my @base_file = File::Spec->splitpath($_[0]); + my $mode_trg = -d $_[1] ? File::Spec->catfile($_[1], $base_file[ $#base_file ]) : $_[1]; + + chmod scalar((stat($_[0]))[2]), $mode_trg if $KeepMode; + } + return wantarray ? (1,0,0) : 1; # use 0's incase they do math on them and in case rcopy() is called in list context = no uninit val warnings +} + +sub rcopy { + if (-l $_[0] && $CopyLink) { + goto &fcopy; + } + + goto &dircopy if -d $_[0] || substr( $_[0], ( 1 * -1), 1) eq '*'; + goto &fcopy; +} + +sub rcopy_glob { + $glob->(\&rcopy, @_); +} + +sub dircopy { + if($RMTrgDir && -d $_[1]) { + if($RMTrgDir == 1) { + pathrmdir($_[1]) or carp "\$RMTrgDir failed: $!"; + } else { + pathrmdir($_[1]) or return; + } + } + my $globstar = 0; + my $_zero = $_[0]; + my $_one = $_[1]; + if ( substr( $_zero, ( 1 * -1 ), 1 ) eq '*') { + $globstar = 1; + $_zero = substr( $_zero, 0, ( length( $_zero ) - 1 ) ); + } + + $samecheck->( $_zero, $_[1] ) or return; + if ( !-d $_zero || ( -e $_[1] && !-d $_[1] ) ) { + $! = 20; + return; + } + + if(!-d $_[1]) { + pathmk($_[1], $NoFtlPth) or return; + } else { + if($CPRFComp && !$globstar) { + my @parts = File::Spec->splitdir($_zero); + while($parts[ $#parts ] eq '') { pop @parts; } + $_one = File::Spec->catdir($_[1], $parts[$#parts]); + } + } + my $baseend = $_one; + my $level = 0; + my $filen = 0; + my $dirn = 0; + + my $recurs; #must be my()ed before sub {} since it calls itself + $recurs = sub { + my ($str,$end,$buf) = @_; + $filen++ if $end eq $baseend; + $dirn++ if $end eq $baseend; + + $DirPerms = oct($DirPerms) if substr($DirPerms,0,1) eq '0'; + mkdir($end,$DirPerms) or return if !-d $end; + chmod scalar((stat($str))[2]), $end if $KeepMode; + if($MaxDepth && $MaxDepth =~ m/^\d+$/ && $level >= $MaxDepth) { + return ($filen,$dirn,$level) if wantarray; + return $filen; + } + $level++; + + + my @files; + if ( $] < 5.006 ) { + opendir(STR_DH, $str) or return; + @files = grep( $_ ne '.' && $_ ne '..', readdir(STR_DH)); + closedir STR_DH; + } + else { + opendir(my $str_dh, $str) or return; + @files = grep( $_ ne '.' && $_ ne '..', readdir($str_dh)); + closedir $str_dh; + } + + for my $file (@files) { + my ($file_ut) = $file =~ m{ (.*) }xms; + my $org = File::Spec->catfile($str, $file_ut); + my $new = File::Spec->catfile($end, $file_ut); + if( -l $org && $CopyLink ) { + carp "Copying a symlink ($org) whose target does not exist" + if !-e readlink($org) && $BdTrgWrn; + symlink readlink($org), $new or return; + } + elsif(-d $org) { + $recurs->($org,$new,$buf) if defined $buf; + $recurs->($org,$new) if !defined $buf; + $filen++; + $dirn++; + } + else { + if($ok_todo_asper_condcopy->($org)) { + if($SkipFlop) { + fcopy($org,$new,$buf) or next if defined $buf; + fcopy($org,$new) or next if !defined $buf; + } + else { + fcopy($org,$new,$buf) or return if defined $buf; + fcopy($org,$new) or return if !defined $buf; + } + chmod scalar((stat($org))[2]), $new if $KeepMode; + $filen++; + } + } + } + 1; + }; + + $recurs->($_zero, $_one, $_[2]) or return; + return wantarray ? ($filen,$dirn,$level) : $filen; +} + +sub fmove { $move->(1, @_) } + +sub rmove { + if (-l $_[0] && $CopyLink) { + goto &fmove; + } + + goto &dirmove if -d $_[0] || substr( $_[0], ( 1 * -1), 1) eq '*'; + goto &fmove; +} + +sub rmove_glob { + $glob->(\&rmove, @_); +} + +sub dirmove { $move->(0, @_) } + +sub pathmk { + my @parts = File::Spec->splitdir( shift() ); + my $nofatal = shift; + my $pth = $parts[0]; + my $zer = 0; + if(!$pth) { + $pth = File::Spec->catdir($parts[0],$parts[1]); + $zer = 1; + } + for($zer..$#parts) { + $DirPerms = oct($DirPerms) if substr($DirPerms,0,1) eq '0'; + mkdir($pth,$DirPerms) or return if !-d $pth && !$nofatal; + mkdir($pth,$DirPerms) if !-d $pth && $nofatal; + $pth = File::Spec->catdir($pth, $parts[$_ + 1]) unless $_ == $#parts; + } + 1; +} + +sub pathempty { + my $pth = shift; + + return 2 if !-d $pth; + + my @names; + my $pth_dh; + if ( $] < 5.006 ) { + opendir(PTH_DH, $pth) or return; + @names = grep !/^\.+$/, readdir(PTH_DH); + } + else { + opendir($pth_dh, $pth) or return; + @names = grep !/^\.+$/, readdir($pth_dh); + } + + for my $name (@names) { + my ($name_ut) = $name =~ m{ (.*) }xms; + my $flpth = File::Spec->catdir($pth, $name_ut); + + if( -l $flpth ) { + unlink $flpth or return; + } + elsif(-d $flpth) { + pathrmdir($flpth) or return; + } + else { + unlink $flpth or return; + } + } + + if ( $] < 5.006 ) { + closedir PTH_DH; + } + else { + closedir $pth_dh; + } + + 1; +} + +sub pathrm { + my $path = shift; + return 2 if !-d $path; + my @pth = File::Spec->splitdir( $path ); + my $force = shift; + + while(@pth) { + my $cur = File::Spec->catdir(@pth); + last if !$cur; # necessary ??? + if(!shift()) { + pathempty($cur) or return if $force; + rmdir $cur or return; + } + else { + pathempty($cur) if $force; + rmdir $cur; + } + pop @pth; + } + 1; +} + +sub pathrmdir { + my $dir = shift; + if( -e $dir ) { + return if !-d $dir; + } + else { + return 2; + } + + pathempty($dir) or return; + + rmdir $dir or return; +} + +1; + +__END__ + +=head1 NAME + +File::Copy::Recursive - Perl extension for recursively copying files and directories + +=head1 SYNOPSIS + + use File::Copy::Recursive qw(fcopy rcopy dircopy fmove rmove dirmove); + + fcopy($orig,$new[,$buf]) or die $!; + rcopy($orig,$new[,$buf]) or die $!; + dircopy($orig,$new[,$buf]) or die $!; + + fmove($orig,$new[,$buf]) or die $!; + rmove($orig,$new[,$buf]) or die $!; + dirmove($orig,$new[,$buf]) or die $!; + + rcopy_glob("orig/stuff-*", $trg [, $buf]) or die $!; + rmove_glob("orig/stuff-*", $trg [,$buf]) or die $!; + +=head1 DESCRIPTION + +This module copies and moves directories recursively (or single files, well... singley) to an optional depth and attempts to preserve each file or directory's mode. + +=head1 EXPORT + +None by default. But you can export all the functions as in the example above and the path* functions if you wish. + +=head2 fcopy() + +This function uses File::Copy's copy() function to copy a file but not a directory. Any directories are recursively created if need be. +One difference to File::Copy::copy() is that fcopy attempts to preserve the mode (see Preserving Mode below) +The optional $buf in the synopsis if the same as File::Copy::copy()'s 3rd argument +returns the same as File::Copy::copy() in scalar context and 1,0,0 in list context to accomidate rcopy()'s list context on regular files. (See below for more info) + +=head2 dircopy() + +This function recursively traverses the $orig directory's structure and recursively copies it to the $new directory. +$new is created if necessary (multiple non existant directories is ok (IE foo/bar/baz). The script logically and portably creates all of them if necessary). +It attempts to preserve the mode (see Preserving Mode below) and +by default it copies all the way down into the directory, (see Managing Depth) below. +If a directory is not specified it croaks just like fcopy croaks if its not a file that is specified. + +returns true or false, for true in scalar context it returns the number of files and directories copied, +In list context it returns the number of files and directories, number of directories only, depth level traversed. + + my $num_of_files_and_dirs = dircopy($orig,$new); + my($num_of_files_and_dirs,$num_of_dirs,$depth_traversed) = dircopy($orig,$new); + +Normally it stops and return's if a copy fails, to continue on regardless set $File::Copy::Recursive::SkipFlop to true. + + local $File::Copy::Recursive::SkipFlop = 1; + +That way it will copy everythgingit can ina directory and won't stop because of permissions, etc... + +=head2 rcopy() + +This function will allow you to specify a file *or* directory. It calls fcopy() if its a file and dircopy() if its a directory. +If you call rcopy() (or fcopy() for that matter) on a file in list context, the values will be 1,0,0 since no directories and no depth are used. +This is important becasue if its a directory in list context and there is only the initial directory the return value is 1,1,1. + +=head2 rcopy_glob() + +This function lets you specify a pattern suitable for perl's glob() as the first argument. Subsequently each path returned by perl's glob() gets rcopy()ied. + +It returns and array whose items are array refs that contain the return value of each rcopy() call. + +It forces behavior as if $File::Copy::Recursive::CPRFComp is true. + +=head2 fmove() + +Copies the file then removes the original. You can manage the path the original file is in according to $RemvBase. + +=head2 dirmove() + +Uses dircopy() to copy the directory then removes the original. You can manage the path the original directory is in according to $RemvBase. + +=head2 rmove() + +Like rcopy() but calls fmove() or dirmove() instead. + +=head2 rmove_glob() + +Like rcopy_glob() but calls rmove() instead of rcopy() + +=head3 $RemvBase + +Default is false. When set to true the *move() functions will not only attempt to remove the original file or directory but will remove the given path it is in. + +So if you: + + rmove('foo/bar/baz', '/etc/'); + # "baz" is removed from foo/bar after it is successfully copied to /etc/ + + local $File::Copy::Recursive::Remvbase = 1; + rmove('foo/bar/baz','/etc/'); + # if baz is successfully copied to /etc/ : + # first "baz" is removed from foo/bar + # then "foo/bar is removed via pathrm() + +=head4 $ForcePth + +Default is false. When set to true it calls pathempty() before any directories are removed to empty the directory so it can be rmdir()'ed when $RemvBase is in effect. + +=head2 Creating and Removing Paths + +=head3 $NoFtlPth + +Default is false. If set to true rmdir(), mkdir(), and pathempty() calls in pathrm() and pathmk() do not return() on failure. + +If its set to true they just silently go about their business regardless. This isn't a good idea but its there if you want it. + +=head3 $DirPerms + +Mode to pass to any mkdir() calls. Defaults to 0777 as per umask()'s POD. Explicitly having this allows older perls to be able to use FCR and might add a bit of flexibility for you. + +Any value you set it to should be suitable for oct() + +=head3 Path functions + +These functions exist soley because they were necessary for the move and copy functions to have the features they do and not because they are of themselves the purpose of this module. That being said, here is how they work so you can understand how the copy and move funtions work and use them by themselves if you wish. + +=head4 pathrm() + +Removes a given path recursively. It removes the *entire* path so be carefull!!! + +Returns 2 if the given path is not a directory. + + File::Copy::Recursive::pathrm('foo/bar/baz') or die $!; + # foo no longer exists + +Same as: + + rmdir 'foo/bar/baz' or die $!; + rmdir 'foo/bar' or die $!; + rmdir 'foo' or die $!; + +An optional second argument makes it call pathempty() before any rmdir()'s when set to true. + + File::Copy::Recursive::pathrm('foo/bar/baz', 1) or die $!; + # foo no longer exists + +Same as:PFSCheck + + File::Copy::Recursive::pathempty('foo/bar/baz') or die $!; + rmdir 'foo/bar/baz' or die $!; + File::Copy::Recursive::pathempty('foo/bar/') or die $!; + rmdir 'foo/bar' or die $!; + File::Copy::Recursive::pathempty('foo/') or die $!; + rmdir 'foo' or die $!; + +An optional third argument acts like $File::Copy::Recursive::NoFtlPth, again probably not a good idea. + +=head4 pathempty() + +Recursively removes the given directory's contents so it is empty. returns 2 if argument is not a directory, 1 on successfully emptying the directory. + + File::Copy::Recursive::pathempty($pth) or die $!; + # $pth is now an empty directory + +=head4 pathmk() + +Creates a given path recursively. Creates foo/bar/baz even if foo does not exist. + + File::Copy::Recursive::pathmk('foo/bar/baz') or die $!; + +An optional second argument if true acts just like $File::Copy::Recursive::NoFtlPth, which means you'd never get your die() if something went wrong. Again, probably a *bad* idea. + +=head4 pathrmdir() + +Same as rmdir() but it calls pathempty() first to recursively empty it first since rmdir can not remove a directory with contents. +Just removes the top directory the path given instead of the entire path like pathrm(). Return 2 if given argument does not exist (IE its already gone). Return false if it exists but is not a directory. + +=head2 Preserving Mode + +By default a quiet attempt is made to change the new file or directory to the mode of the old one. +To turn this behavior off set + $File::Copy::Recursive::KeepMode +to false; + +=head2 Managing Depth + +You can set the maximum depth a directory structure is recursed by setting: + $File::Copy::Recursive::MaxDepth +to a whole number greater than 0. + +=head2 SymLinks + +If your system supports symlinks then symlinks will be copied as symlinks instead of as the target file. +Perl's symlink() is used instead of File::Copy's copy() +You can customize this behavior by setting $File::Copy::Recursive::CopyLink to a true or false value. +It is already set to true or false dending on your system's support of symlinks so you can check it with an if statement to see how it will behave: + + if($File::Copy::Recursive::CopyLink) { + print "Symlinks will be preserved\n"; + } else { + print "Symlinks will not be preserved because your system does not support it\n"; + } + +If symlinks are being copied you can set $File::Copy::Recursive::BdTrgWrn to true to make it carp when it copies a link whose target does not exist. Its false by default. + + local $File::Copy::Recursive::BdTrgWrn = 1; + +=head2 Removing existing target file or directory before copying. + +This can be done by setting $File::Copy::Recursive::RMTrgFil or $File::Copy::Recursive::RMTrgDir for file or directory behavior respectively. + +0 = off (This is the default) + +1 = carp() $! if removal fails + +2 = return if removal fails + + local $File::Copy::Recursive::RMTrgFil = 1; + fcopy($orig, $target) or die $!; + # if it fails it does warn() and keeps going + + local $File::Copy::Recursive::RMTrgDir = 2; + dircopy($orig, $target) or die $!; + # if it fails it does your "or die" + +This should be unnecessary most of the time but its there if you need it :) + +=head2 Turning off stat() check + +By default the files or directories are checked to see if they are the same (IE linked, or two paths (absolute/relative or different relative paths) to the same file) by comparing the file's stat() info. +It's a very efficient check that croaks if they are and shouldn't be turned off but if you must for some weird reason just set $File::Copy::Recursive::PFSCheck to a false value. ("PFS" stands for "Physical File System") + +=head2 Emulating cp -rf dir1/ dir2/ + +By default dircopy($dir1,$dir2) will put $dir1's contents right into $dir2 whether $dir2 exists or not. + +You can make dircopy() emulate cp -rf by setting $File::Copy::Recursive::CPRFComp to true. + +NOTE: This only emulates -f in the sense that it does not prompt. It does not remove the target file or directory if it exists. +If you need to do that then use the variables $RMTrgFil and $RMTrgDir described in "Removing existing target file or directory before copying" above. + +That means that if $dir2 exists it puts the contents into $dir2/$dir1 instead of $dir2 just like cp -rf. +If $dir2 does not exist then the contents go into $dir2 like normal (also like cp -rf) + +So assuming 'foo/file': + + dircopy('foo', 'bar') or die $!; + # if bar does not exist the result is bar/file + # if bar does exist the result is bar/file + + $File::Copy::Recursive::CPRFComp = 1; + dircopy('foo', 'bar') or die $!; + # if bar does not exist the result is bar/file + # if bar does exist the result is bar/foo/file + +You can also specify a star for cp -rf glob type behavior: + + dircopy('foo/*', 'bar') or die $!; + # if bar does not exist the result is bar/file + # if bar does exist the result is bar/file + + $File::Copy::Recursive::CPRFComp = 1; + dircopy('foo/*', 'bar') or die $!; + # if bar does not exist the result is bar/file + # if bar does exist the result is bar/file + +NOTE: The '*' is only like cp -rf foo/* and *DOES NOT EXPAND PARTIAL DIRECTORY NAMES LIKE YOUR SHELL DOES* (IE not like cp -rf fo* to copy foo/*) + +=head2 Allowing Copy Loops + +If you want to allow: + + cp -rf . foo/ + +type behavior set $File::Copy::Recursive::CopyLoop to true. + +This is false by default so that a check is done to see if the source directory will contain the target directory and croaks to avoid this problem. + +If you ever find a situation where $CopyLoop = 1 is desirable let me know (IE its a bad bad idea but is there if you want it) + +(Note: On Windows this was necessary since it uses stat() to detemine samedness and stat() is essencially useless for this on Windows. +The test is now simply skipped on Windows but I'd rather have an actual reliable check if anyone in Microsoft land would care to share) + +=head1 SEE ALSO + +L L + +=head1 TO DO + +I am currently working on and reviewing some other modules to use in the new interface so we can lose the horrid globals as well as some other undesirable traits and also more easily make available some long standing requests. + +Tests will be easier to do with the new interface and hence the testing focus will shift to the new interface and aim to be comprehensive. + +The old interface will work, it just won't be brought in until it is used, so it will add no overhead for users of the new interface. + +I'll add this after the latest verision has been out for a while with no new features or issues found :) + +=head1 AUTHOR + +Daniel Muey, L + +=head1 COPYRIGHT AND LICENSE + +Copyright 2004 by Daniel Muey + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut diff --git a/3rdparty/Perl/lib/SDKDownloader.pm b/3rdparty/Perl/lib/SDKDownloader.pm new file mode 100644 index 00000000..f0b4e0e9 --- /dev/null +++ b/3rdparty/Perl/lib/SDKDownloader.pm @@ -0,0 +1,449 @@ +#!/usr/bin/perl + +package SDKDownloader; + +use strict; +use warnings; + +use Carp qw (croak); +use Cwd; +use File::Basename qw(dirname basename); +use File::Path qw(mkpath rmtree); +use File::Spec::Functions; +use File::Copy; +use File::Basename; +use Getopt::Long; +use lib File::Spec->rel2abs(dirname(__FILE__)) . '/.'; + +# Override this value to point to a local directory for local testing +my $base_url = "https://ono.unity3d.com/unity-extra/"; +my $base_url_mirror = "http://mercurial-mirror.hq.unity3d.com/unity-extra/"; + +# SDKDownloader fetches the default branch of a remote repository located at $base_url/$repo_name. +# The default branch contains an SDK.pm which specifies the version of the SDK along with branch information. +# It also has methods to install the sdk and check if it is installed. +# Named branches of the repository contain different versions of the sdk for different platforms. +# If necessary, this script clones the appropriate branch for the given version and installs the SDK. +# Version number is set in SDK.pm but can be overriden via $sdk_override passed to PepareSDK. +# By default the install location is set to HOME but each SDK can override this, and it can be overriden +# by setting the environment variable UNITY_SDK_LOCATION. + +# https://ono.unity3d.com/unity-extra/stv-sdk +# - branch: default +# - SDK.pm contains the following: +# +# our %default_values = ( +# # Current version of the sdk that everything will build with unless sdk-override is specified. +# # Change this to update SDK globally. +# version => "r03", +# +# # Format of the branches for updating +# # \$version is replaced with the version number +# # \$platform is replaced with the platform name +# branch => "stv-ndk-\$version-\$platform", +# +# # Format of the archive that exists after updating to the above branch. +# # The same substition applies as above. +# archive => "stv-ndk-\$version-\$platform.tar.bz2", +# +# # The format of the above archive. +# # Valid values: zip, gzip, bz2 +# compress_format => "bz2", +# +# # The following are filled out by the SDKDownloader.pm +# install_path => "", +# ); +# +# sub IsCorrectVersionInstalled +# - Determine if the SDK for $config->{version} is installed. +# +# sub InstallSDK +# - Install the SDK for $config->{version}. +# - SDK has already been cloned to $sdk_download->{clone_path} +# - SDK has already been extracted to $sdk_download->{unpack_path} +# - This should install it to $config->{install_path} if necessary, or put it wherever it goes. +# +# sub SetupSDK +# - This is called on each build after the SDK is confirmed to be installed. +# - This can do whatever per build setup you need to do here such as setting ENV vars. +# +# - branch: stv-ndk-r03-linux +# - stv-ndk-r03-linux.tar.bz2 +# +# The named branches have one platform / revision archive each. They are discovered via the patterns +# in SDK.pm's default_values' branch / archive strings. + +# Main entry point to SDKDownloader system: +# repo_name: name of the repo at $base_url +# sdk_override: SDK revision to install. Leave blank to install the "default" (from SDK.pm) +# artifacts_folder: Place to pull and update the inividual SDK repos. +sub PrepareSDK +{ + my ($repo_name, $sdk_override, $artifacts_folder) = @_; + + # let the user decide to use the SDK that's locally installed. + # Handy if the connection to the repo is really slow or unavailable. + if ($sdk_override eq "local") + { + print "[SDKDownloader] using local SDK.\n"; + return; + } + + $repo_name || croak("ERROR: repo_name not set"); + + my $cwd = getcwd; + + my $dir = File::Spec->rel2abs(dirname($0)); + if ($artifacts_folder) + { + $dir = catfile($artifacts_folder, "SDKDownloader"); + mkpath($dir); + } + chdir($dir); + + # Obtain platform specific information + my $host_config = GetHostDetails(); + + print("[SDKDownloader] Begin SDK check: $repo_name ...\n"); + my $vcs_url = GetBaseURL() . $repo_name; + + # Clones / Updates the specific SDK repo which contains the sdk perl module + UpdateSDKRepo($repo_name, $vcs_url, $dir); + + # Load in the SDK specific perl module + my $module = "SDK.pm"; + delete($INC{$module}); + push(@INC, catdir(($cwd, $dir, $repo_name))); + require $module; + + # Obtain default SDK configuration so we can fill out values + \%SDK::default_values || croak("ERROR: SDK.pm does not contain default values."); + my $sdk_config = \%SDK::default_values; + + # apply sdk override if necessary + $sdk_config->{version} = $sdk_override if $sdk_override; + + # set the branch/archive name given our current version and platform + $sdk_config->{branch} =~ s/\$version/$sdk_config->{version}/; + $sdk_config->{branch} =~ s/\$platform/$host_config->{platform}/; + + $sdk_config->{archive} =~ s/\$version/$sdk_config->{version}/; + $sdk_config->{archive} =~ s/\$platform/$host_config->{platform}/; + + $sdk_config->{install_path} = $host_config->{install_path}; + $sdk_config->{host_config} = $host_config; + $sdk_config->{vcs_url} = $vcs_url; + + # Determine if the SDK for $version is installed. + if (SDK::IsCorrectVersionInstalled($sdk_config)) + { + print("[SDKDownloader] SDK $sdk_config->{version} is already installed ...\n"); + } + else + { + my $sdk_download = DownloadAndExtractSDK($sdk_config, $repo_name); + + # Install the SDK for $version. + # SDK has already been cloned to $sdk_download->{clone_path} + # SDK has already been extracted to $sdk_download->{unpack_path} + # This should install it to $sdk_config->{install_path} if necessary, or put it wherever it goes. + SDK::InstallSDK($sdk_config, $sdk_download); + + CleanupSDKDownload($sdk_download); + + if (!SDK::IsCorrectVersionInstalled($sdk_config)) + { + croak("Failed to install SDK. Something is wrong with $vcs_url SDK.pm?"); + } + } + + # This is called on each build after the SDK is confirmed to be installed. + # This can do whatever per build setup you need to do here such as setting ENV vars. + print("[SDKDownloader] Setup SDK ...\n"); + SDK::SetupSDK($sdk_config); + + print("[SDKDownloader] Setup complete for $repo_name at version $sdk_config->{version}.\n"); + chdir($cwd); +} + +# Pulls / Updates the SDK repo $vcs_url to $dir (artifacts) if necessary. +# We avoid calling hg update every build by caching the parent revision and only updating when it changes. +sub UpdateSDKRepo +{ + my ($repo_name, $vcs_url, $dir) = @_; + + print("[SDKDownloader] Checking if we need to update $vcs_url ...\n"); + + # We want to avoid calling hg update every time we build, so cache the parent revision + # and only update if it changes + + my $parent_rev = "parent"; + + # check if we're in a mercurial repo + if (system("hg parent --template \"{node}\"") == 0) + { + $parent_rev = `hg parent --template "{node}"`; + } + + # check if we're in a git repo + elsif (system("git rev-parse HEAD") == 0) + { + $parent_rev = `git rev-parse HEAD`; + } + + # else we'll just pull every time + + print("\n"); + + my $old_rev = ""; + + if (open(REV, "<$repo_name-rev.txt")) + { + $old_rev = ; + close(REV); + } + + # If the sdk repo already exists just update it + my $updated = 0; + if (-d $repo_name) + { + print("[SDKDownloader] \tOLD REV: $old_rev\n"); + print("[SDKDownloader] \tPARENT_REV: $parent_rev\n"); + chdir($repo_name); + if ($old_rev ne $parent_rev) + { + print("[SDKDownloader] Updating $vcs_url (branch default) at $dir ...\n\n"); + system("hg pull $vcs_url") && croak("ERROR: can't hg pull $vcs_url"); + system("hg update") && croak("ERROR: can't hg update $vcs_url"); + $updated = 1; + } + } + else + { + print("[SDKDownloader] Cloning $vcs_url (branch default) to $dir ...\n"); + system("hg clone $vcs_url") && croak("ERROR: can't hg clone $vcs_url"); + chdir($repo_name); + $updated = 1; + } + + if ($updated) + { + open(REV, ">../$repo_name-rev.txt") or croak("ERROR: Couldn't open $repo_name-rev.txt for writing."); + print(REV $parent_rev); + close(REV); + } + else + { + print("[SDKDownloader] Repo has not changed parent since last build, no need to get latest $repo_name.\n"); + } +} + +# Pulls and extracts a specific revision of the SDK. +# sdk_config: +# version: version of sdk to install +# branch: branch name of repo to clone +# archive: archive name in repo to extract +# compress_format: format of archive +# temp_folder_name: basename of folder to use +# returns: +# sdk_download: +# clone_path: location the sdk repo is cloned to +# unpack_path: location the archive is extracted to +# +# NOTE: must call CleanupSDKDownload to remove clone_path and unpack_path when done. +sub DownloadAndExtractSDK +{ + my ($sdk_config, $temp_folder_name) = @_; + my $host_config = $sdk_config->{host_config}; + my $vcs_url = $sdk_config->{vcs_url}; + + # Download and extract SDK + my $temp_clone_path = catfile($host_config->{tmp}, $temp_folder_name); + my $temp_archive_file = catfile($temp_clone_path, $sdk_config->{archive}); + my $temp_unpack_path = catfile($host_config->{tmp}, $temp_folder_name . "_unpack"); + + print("[SDKDownloader]\tInstalling SDK $sdk_config->{version} ...\n"); + print("[SDKDownloader]\t\tTmp DL: " . $temp_clone_path . "\n"); + print("[SDKDownloader]\t\tTmp DL File: " . $temp_archive_file . "\n"); + print("[SDKDownloader]\t\tTmp unpack: " . $temp_unpack_path . "\n"); + + rmtree($temp_clone_path); + rmtree($temp_unpack_path); + mkpath($temp_clone_path); + mkpath($temp_unpack_path); + + # obtain sdk + print("\n[SDKDownloader]\t\tCloning SDK $vcs_url (branch $sdk_config->{branch}) ...\n"); + system("hg clone -b $sdk_config->{branch} $vcs_url $temp_clone_path") && croak("ERROR: can't hg clone -b $sdk_config->{branch} $vcs_url $temp_clone_path"); + + my $branchRev = qx(hg id -ib -r $sdk_config->{branch} -R $temp_clone_path); + print("\n[SDKDownloader]\t\tCloned to revision $branchRev ...\n"); + + # extract sdk + my $sdk_compressed_format = $sdk_config->{compress_format}; + print("[SDKDownloader]\t\tCompress format: $sdk_compressed_format\n"); + my $uncompress = $host_config->{uncompressors}{$sdk_compressed_format}; + $uncompress =~ s/\$ARCHIVE/$temp_archive_file/; + $uncompress =~ s/\$OUT_DIR/$temp_unpack_path/; + + print("[SDKDownloader]\t\tExtracting $temp_archive_file => $temp_unpack_path ...\n"); + print("[SDKDownloader]\t\t\t$uncompress\n"); + system($uncompress); + + unlink($temp_archive_file); + + my %sdk_download = ( + clone_path => $temp_clone_path, + unpack_path => $temp_unpack_path, + ); + + return \%sdk_download; +} + +# Removes temporary files from DownloadAndExtractSDK. +sub CleanupSDKDownload +{ + my ($sdk_download) = @_; + rmtree($sdk_download->{unpack_path}); + rmtree($sdk_download->{clone_path}); +} + +# Returns information about the host environment. +sub GetHostDetails +{ + my $HOST_ENV; + my $TMP; + my $HOME; + my %UNCOMPRESSORS = ( + "zip" => "unzip -d \$OUT_DIR \$ARCHIVE", + "gzip" => "tar -C \$OUT_DIR -xf \$ARCHIVE", + "bz2" => "tar -C \$OUT_DIR -xf \$ARCHIVE", + "7z" => "7za x \$ARCHIVE -o\$OUT_DIR", + "exe" => "cp \$ARCHIVE \$OUT_DIR", + ); + + if (lc $^O eq 'darwin') + { + $HOST_ENV = "macosx"; + $TMP = $ENV{"TMPDIR"}; + $HOME = $ENV{"HOME"}; + } + elsif (lc $^O eq 'linux') + { + $HOST_ENV = "linux"; + $TMP = "/tmp"; + $HOME = $ENV{"HOME"}; + } + elsif (lc $^O eq 'mswin32') + { + $HOST_ENV = "windows"; + $TMP = $ENV{"TMP"}; + $HOME = $ENV{"USERPROFILE"}; + my $WINZIP = "7z.exe"; + if (-e "External/7z/win32/7za.exe") + { + $WINZIP = "External/7z/win32/7za.exe"; + } + %UNCOMPRESSORS = ( + "zip" => "$WINZIP x \$ARCHIVE -o\$OUT_DIR", + "gzip" => "$WINZIP x -so -tgzip \$ARCHIVE | $WINZIP x -si -ttar -o\$OUT_DIR", + "bz2" => "$WINZIP x -so -tbzip2 \$ARCHIVE | $WINZIP x -si -ttar -o\$OUT_DIR", + "7z" => "$WINZIP x \$ARCHIVE -o\$OUT_DIR", + "exe" => "copy \$ARCHIVE \$OUT_DIR", + ); + } + elsif (lc $^O eq 'cygwin') + { + $HOST_ENV = "windows"; + $TMP = $ENV{"TMP"}; + $HOME = $ENV{"HOME"}; + } + elsif (lc $^O eq 'msys') + { + $HOST_ENV = "windows"; + $TMP = $ENV{"TMP"}; + + # since MSYS runs on top of windows, using $USERPROFILE here allows use + # to share cached sdks with windows-based invocations of SDKDownloader. + $HOME = CygPath($ENV{"USERPROFILE"}); + my $WINZIP = "7z.exe"; + if (-e "Tools/WinUtils/7z/7z.exe") + { + $WINZIP = "Tools/WinUtils/7z/7z.exe"; + } + %UNCOMPRESSORS = ( + "zip" => "$WINZIP x \$ARCHIVE -o\$OUT_DIR", + "gzip" => "$WINZIP x -so -tgzip \$ARCHIVE | $WINZIP x -si -ttar -o\$OUT_DIR", + "bz2" => "$WINZIP x -so -tbzip2 \$ARCHIVE | $WINZIP x -si -ttar -o\$OUT_DIR", + "7z" => "$WINZIP x \$ARCHIVE -o\$OUT_DIR", + "exe" => "cp \$ARCHIVE \$OUT_DIR", + ); + + # Try to see if TortoiseHg is installed, so that the user doesn't need to install + # the MSYS mercurial package in order for things to work. + # TODO (CH) :: Do this with Git as well. + my $hg_dir = CygPath("C:/Program Files/TortoiseHg/"); + my $hg_exe = $hg_dir . "hg.exe"; + if (-e $hg_exe) + { + $ENV{PATH} = $hg_dir . ":$ENV{PATH}"; + } + } + else + { + croak("UNKNOWN " . $^O); + } + + # override home location + if ($ENV{UNITY_SDK_LOCATION}) + { + $HOME = $ENV{UNITY_SDK_LOCATION}; + } + + my %host_config = ( + platform => $HOST_ENV, + tmp => $TMP, + install_path => $HOME, + uncompressors => \%UNCOMPRESSORS + ); + + return \%host_config; +} + +# On cygwin and msys systems, this function converts windows-style paths +# (ie, C:\blah) to paths usable in a cygwin/msys shell ((/cygdrive)?/c/blah) +sub CygPath +{ + my ($win_path) = @_; + my $nix_path = `cygpath '$win_path'`; + chomp($nix_path); + return $nix_path; +} + +# Returns HG url +sub GetBaseURL +{ + if ($ENV{UNITY_THISISABUILDMACHINE}) + { + return $base_url_mirror; + } + return $base_url; +} + +sub ParseCmdline +{ + my (@ARGV) = @_; + my ($repo_name, $sdk_override, $artifacts_folder); + + GetOptions( + "repo_name=s" => \$repo_name, + "sdk_override=s" => \$sdk_override, + "artifacts_folder=s" => \$artifacts_folder, + ) or croak("could not parse commandline"); + + PrepareSDK($repo_name, $sdk_override, $artifacts_folder); +} + +__PACKAGE__->ParseCmdline(@ARGV) unless caller; + +1; diff --git a/Anvil b/Anvil deleted file mode 160000 index 84d22865..00000000 --- a/Anvil +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 84d22865842d45fe7762a3d68ef2837e60a8e9f5 diff --git a/CLW/CL/cl.h b/CLW/CL/cl.h new file mode 100644 index 00000000..32ae73fc --- /dev/null +++ b/CLW/CL/cl.h @@ -0,0 +1,1804 @@ +/******************************************************************************* + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __OPENCL_CL_H +#define __OPENCL_CL_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/******************************************************************************/ + +typedef struct _cl_platform_id * cl_platform_id; +typedef struct _cl_device_id * cl_device_id; +typedef struct _cl_context * cl_context; +typedef struct _cl_command_queue * cl_command_queue; +typedef struct _cl_mem * cl_mem; +typedef struct _cl_program * cl_program; +typedef struct _cl_kernel * cl_kernel; +typedef struct _cl_event * cl_event; +typedef struct _cl_sampler * cl_sampler; + +typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ +typedef cl_ulong cl_bitfield; +typedef cl_bitfield cl_device_type; +typedef cl_uint cl_platform_info; +typedef cl_uint cl_device_info; +typedef cl_bitfield cl_device_fp_config; +typedef cl_uint cl_device_mem_cache_type; +typedef cl_uint cl_device_local_mem_type; +typedef cl_bitfield cl_device_exec_capabilities; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_device_svm_capabilities; +#endif +typedef cl_bitfield cl_command_queue_properties; +#ifdef CL_VERSION_1_2 +typedef intptr_t cl_device_partition_property; +typedef cl_bitfield cl_device_affinity_domain; +#endif + +typedef intptr_t cl_context_properties; +typedef cl_uint cl_context_info; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_queue_properties; +#endif +typedef cl_uint cl_command_queue_info; +typedef cl_uint cl_channel_order; +typedef cl_uint cl_channel_type; +typedef cl_bitfield cl_mem_flags; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_svm_mem_flags; +#endif +typedef cl_uint cl_mem_object_type; +typedef cl_uint cl_mem_info; +#ifdef CL_VERSION_1_2 +typedef cl_bitfield cl_mem_migration_flags; +#endif +typedef cl_uint cl_image_info; +#ifdef CL_VERSION_1_1 +typedef cl_uint cl_buffer_create_type; +#endif +typedef cl_uint cl_addressing_mode; +typedef cl_uint cl_filter_mode; +typedef cl_uint cl_sampler_info; +typedef cl_bitfield cl_map_flags; +#ifdef CL_VERSION_2_0 +typedef intptr_t cl_pipe_properties; +typedef cl_uint cl_pipe_info; +#endif +typedef cl_uint cl_program_info; +typedef cl_uint cl_program_build_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_program_binary_type; +#endif +typedef cl_int cl_build_status; +typedef cl_uint cl_kernel_info; +#ifdef CL_VERSION_1_2 +typedef cl_uint cl_kernel_arg_info; +typedef cl_uint cl_kernel_arg_address_qualifier; +typedef cl_uint cl_kernel_arg_access_qualifier; +typedef cl_bitfield cl_kernel_arg_type_qualifier; +#endif +typedef cl_uint cl_kernel_work_group_info; +#ifdef CL_VERSION_2_1 +typedef cl_uint cl_kernel_sub_group_info; +#endif +typedef cl_uint cl_event_info; +typedef cl_uint cl_command_type; +typedef cl_uint cl_profiling_info; +#ifdef CL_VERSION_2_0 +typedef cl_bitfield cl_sampler_properties; +typedef cl_uint cl_kernel_exec_info; +#endif + +typedef struct _cl_image_format { + cl_channel_order image_channel_order; + cl_channel_type image_channel_data_type; +} cl_image_format; + +#ifdef CL_VERSION_1_2 + +typedef struct _cl_image_desc { + cl_mem_object_type image_type; + size_t image_width; + size_t image_height; + size_t image_depth; + size_t image_array_size; + size_t image_row_pitch; + size_t image_slice_pitch; + cl_uint num_mip_levels; + cl_uint num_samples; +#ifdef CL_VERSION_2_0 +#ifdef __GNUC__ + __extension__ /* Prevents warnings about anonymous union in -pedantic builds */ +#endif +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 /Za builds */ +#endif + union { +#endif + cl_mem buffer; +#ifdef CL_VERSION_2_0 + cl_mem mem_object; + }; +#ifdef _MSC_VER +#pragma warning( pop ) +#endif +#endif +} cl_image_desc; + +#endif + +#ifdef CL_VERSION_1_1 + +typedef struct _cl_buffer_region { + size_t origin; + size_t size; +} cl_buffer_region; + +#endif + +/******************************************************************************/ + +/* Error Codes */ +#define CL_SUCCESS 0 +#define CL_DEVICE_NOT_FOUND -1 +#define CL_DEVICE_NOT_AVAILABLE -2 +#define CL_COMPILER_NOT_AVAILABLE -3 +#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 +#define CL_OUT_OF_RESOURCES -5 +#define CL_OUT_OF_HOST_MEMORY -6 +#define CL_PROFILING_INFO_NOT_AVAILABLE -7 +#define CL_MEM_COPY_OVERLAP -8 +#define CL_IMAGE_FORMAT_MISMATCH -9 +#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 +#define CL_BUILD_PROGRAM_FAILURE -11 +#define CL_MAP_FAILURE -12 +#ifdef CL_VERSION_1_1 +#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 +#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMPILE_PROGRAM_FAILURE -15 +#define CL_LINKER_NOT_AVAILABLE -16 +#define CL_LINK_PROGRAM_FAILURE -17 +#define CL_DEVICE_PARTITION_FAILED -18 +#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 +#endif + +#define CL_INVALID_VALUE -30 +#define CL_INVALID_DEVICE_TYPE -31 +#define CL_INVALID_PLATFORM -32 +#define CL_INVALID_DEVICE -33 +#define CL_INVALID_CONTEXT -34 +#define CL_INVALID_QUEUE_PROPERTIES -35 +#define CL_INVALID_COMMAND_QUEUE -36 +#define CL_INVALID_HOST_PTR -37 +#define CL_INVALID_MEM_OBJECT -38 +#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 +#define CL_INVALID_IMAGE_SIZE -40 +#define CL_INVALID_SAMPLER -41 +#define CL_INVALID_BINARY -42 +#define CL_INVALID_BUILD_OPTIONS -43 +#define CL_INVALID_PROGRAM -44 +#define CL_INVALID_PROGRAM_EXECUTABLE -45 +#define CL_INVALID_KERNEL_NAME -46 +#define CL_INVALID_KERNEL_DEFINITION -47 +#define CL_INVALID_KERNEL -48 +#define CL_INVALID_ARG_INDEX -49 +#define CL_INVALID_ARG_VALUE -50 +#define CL_INVALID_ARG_SIZE -51 +#define CL_INVALID_KERNEL_ARGS -52 +#define CL_INVALID_WORK_DIMENSION -53 +#define CL_INVALID_WORK_GROUP_SIZE -54 +#define CL_INVALID_WORK_ITEM_SIZE -55 +#define CL_INVALID_GLOBAL_OFFSET -56 +#define CL_INVALID_EVENT_WAIT_LIST -57 +#define CL_INVALID_EVENT -58 +#define CL_INVALID_OPERATION -59 +#define CL_INVALID_GL_OBJECT -60 +#define CL_INVALID_BUFFER_SIZE -61 +#define CL_INVALID_MIP_LEVEL -62 +#define CL_INVALID_GLOBAL_WORK_SIZE -63 +#ifdef CL_VERSION_1_1 +#define CL_INVALID_PROPERTY -64 +#endif +#ifdef CL_VERSION_1_2 +#define CL_INVALID_IMAGE_DESCRIPTOR -65 +#define CL_INVALID_COMPILER_OPTIONS -66 +#define CL_INVALID_LINKER_OPTIONS -67 +#define CL_INVALID_DEVICE_PARTITION_COUNT -68 +#endif +#ifdef CL_VERSION_2_0 +#define CL_INVALID_PIPE_SIZE -69 +#define CL_INVALID_DEVICE_QUEUE -70 +#endif +#ifdef CL_VERSION_2_2 +#define CL_INVALID_SPEC_ID -71 +#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72 +#endif + + +/* cl_bool */ +#define CL_FALSE 0 +#define CL_TRUE 1 +#ifdef CL_VERSION_1_2 +#define CL_BLOCKING CL_TRUE +#define CL_NON_BLOCKING CL_FALSE +#endif + +/* cl_platform_info */ +#define CL_PLATFORM_PROFILE 0x0900 +#define CL_PLATFORM_VERSION 0x0901 +#define CL_PLATFORM_NAME 0x0902 +#define CL_PLATFORM_VENDOR 0x0903 +#define CL_PLATFORM_EXTENSIONS 0x0904 +#ifdef CL_VERSION_2_1 +#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905 +#endif + +/* cl_device_type - bitfield */ +#define CL_DEVICE_TYPE_DEFAULT (1 << 0) +#define CL_DEVICE_TYPE_CPU (1 << 1) +#define CL_DEVICE_TYPE_GPU (1 << 2) +#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_TYPE_CUSTOM (1 << 4) +#endif +#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF + +/* cl_device_info */ +#define CL_DEVICE_TYPE 0x1000 +#define CL_DEVICE_VENDOR_ID 0x1001 +#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 +#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 +#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B +#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C +#define CL_DEVICE_ADDRESS_BITS 0x100D +#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E +#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F +#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 +#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 +#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 +#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 +#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 +#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 +#define CL_DEVICE_IMAGE_SUPPORT 0x1016 +#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 +#define CL_DEVICE_MAX_SAMPLERS 0x1018 +#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 +#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A +#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B +#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C +#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D +#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E +#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F +#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 +#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 +#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 +#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 +#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 +#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 +#define CL_DEVICE_ENDIAN_LITTLE 0x1026 +#define CL_DEVICE_AVAILABLE 0x1027 +#define CL_DEVICE_COMPILER_AVAILABLE 0x1028 +#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 +#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */ +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A +#endif +#define CL_DEVICE_NAME 0x102B +#define CL_DEVICE_VENDOR 0x102C +#define CL_DRIVER_VERSION 0x102D +#define CL_DEVICE_PROFILE 0x102E +#define CL_DEVICE_VERSION 0x102F +#define CL_DEVICE_EXTENSIONS 0x1030 +#define CL_DEVICE_PLATFORM 0x1031 +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 +#endif +/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */ +#ifdef CL_VERSION_1_1 +#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 +#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */ +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B +#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C +#define CL_DEVICE_OPENCL_C_VERSION 0x103D +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEVICE_LINKER_AVAILABLE 0x103E +#define CL_DEVICE_BUILT_IN_KERNELS 0x103F +#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 +#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 +#define CL_DEVICE_PARENT_DEVICE 0x1042 +#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 +#define CL_DEVICE_PARTITION_PROPERTIES 0x1044 +#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 +#define CL_DEVICE_PARTITION_TYPE 0x1046 +#define CL_DEVICE_REFERENCE_COUNT 0x1047 +#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 +#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 +#endif +#ifdef CL_VERSION_2_0 +#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A +#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B +#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C +#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D +#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E +#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F +#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050 +#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051 +#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052 +#define CL_DEVICE_SVM_CAPABILITIES 0x1053 +#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054 +#define CL_DEVICE_MAX_PIPE_ARGS 0x1055 +#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056 +#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057 +#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058 +#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059 +#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A +#endif +#ifdef CL_VERSION_2_1 +#define CL_DEVICE_IL_VERSION 0x105B +#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C +#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D +#endif + +/* cl_device_fp_config - bitfield */ +#define CL_FP_DENORM (1 << 0) +#define CL_FP_INF_NAN (1 << 1) +#define CL_FP_ROUND_TO_NEAREST (1 << 2) +#define CL_FP_ROUND_TO_ZERO (1 << 3) +#define CL_FP_ROUND_TO_INF (1 << 4) +#define CL_FP_FMA (1 << 5) +#ifdef CL_VERSION_1_1 +#define CL_FP_SOFT_FLOAT (1 << 6) +#endif +#ifdef CL_VERSION_1_2 +#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) +#endif + +/* cl_device_mem_cache_type */ +#define CL_NONE 0x0 +#define CL_READ_ONLY_CACHE 0x1 +#define CL_READ_WRITE_CACHE 0x2 + +/* cl_device_local_mem_type */ +#define CL_LOCAL 0x1 +#define CL_GLOBAL 0x2 + +/* cl_device_exec_capabilities - bitfield */ +#define CL_EXEC_KERNEL (1 << 0) +#define CL_EXEC_NATIVE_KERNEL (1 << 1) + +/* cl_command_queue_properties - bitfield */ +#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) +#define CL_QUEUE_PROFILING_ENABLE (1 << 1) +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_ON_DEVICE (1 << 2) +#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3) +#endif + +/* cl_context_info */ +#define CL_CONTEXT_REFERENCE_COUNT 0x1080 +#define CL_CONTEXT_DEVICES 0x1081 +#define CL_CONTEXT_PROPERTIES 0x1082 +#ifdef CL_VERSION_1_1 +#define CL_CONTEXT_NUM_DEVICES 0x1083 +#endif + +/* cl_context_properties */ +#define CL_CONTEXT_PLATFORM 0x1084 +#ifdef CL_VERSION_1_2 +#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_partition_property */ +#define CL_DEVICE_PARTITION_EQUALLY 0x1086 +#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 +#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 +#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_device_affinity_domain */ +#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) +#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) +#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) +#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) +#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) +#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_device_svm_capabilities */ +#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0) +#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1) +#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2) +#define CL_DEVICE_SVM_ATOMICS (1 << 3) + +#endif + +/* cl_command_queue_info */ +#define CL_QUEUE_CONTEXT 0x1090 +#define CL_QUEUE_DEVICE 0x1091 +#define CL_QUEUE_REFERENCE_COUNT 0x1092 +#define CL_QUEUE_PROPERTIES 0x1093 +#ifdef CL_VERSION_2_0 +#define CL_QUEUE_SIZE 0x1094 +#endif +#ifdef CL_VERSION_2_1 +#define CL_QUEUE_DEVICE_DEFAULT 0x1095 +#endif + +/* cl_mem_flags and cl_svm_mem_flags - bitfield */ +#define CL_MEM_READ_WRITE (1 << 0) +#define CL_MEM_WRITE_ONLY (1 << 1) +#define CL_MEM_READ_ONLY (1 << 2) +#define CL_MEM_USE_HOST_PTR (1 << 3) +#define CL_MEM_ALLOC_HOST_PTR (1 << 4) +#define CL_MEM_COPY_HOST_PTR (1 << 5) +/* reserved (1 << 6) */ +#ifdef CL_VERSION_1_2 +#define CL_MEM_HOST_WRITE_ONLY (1 << 7) +#define CL_MEM_HOST_READ_ONLY (1 << 8) +#define CL_MEM_HOST_NO_ACCESS (1 << 9) +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */ +#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */ +#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12) +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_mem_migration_flags - bitfield */ +#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) +#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) + +#endif + +/* cl_channel_order */ +#define CL_R 0x10B0 +#define CL_A 0x10B1 +#define CL_RG 0x10B2 +#define CL_RA 0x10B3 +#define CL_RGB 0x10B4 +#define CL_RGBA 0x10B5 +#define CL_BGRA 0x10B6 +#define CL_ARGB 0x10B7 +#define CL_INTENSITY 0x10B8 +#define CL_LUMINANCE 0x10B9 +#ifdef CL_VERSION_1_1 +#define CL_Rx 0x10BA +#define CL_RGx 0x10BB +#define CL_RGBx 0x10BC +#endif +#ifdef CL_VERSION_1_2 +#define CL_DEPTH 0x10BD +#define CL_DEPTH_STENCIL 0x10BE +#endif +#ifdef CL_VERSION_2_0 +#define CL_sRGB 0x10BF +#define CL_sRGBx 0x10C0 +#define CL_sRGBA 0x10C1 +#define CL_sBGRA 0x10C2 +#define CL_ABGR 0x10C3 +#endif + +/* cl_channel_type */ +#define CL_SNORM_INT8 0x10D0 +#define CL_SNORM_INT16 0x10D1 +#define CL_UNORM_INT8 0x10D2 +#define CL_UNORM_INT16 0x10D3 +#define CL_UNORM_SHORT_565 0x10D4 +#define CL_UNORM_SHORT_555 0x10D5 +#define CL_UNORM_INT_101010 0x10D6 +#define CL_SIGNED_INT8 0x10D7 +#define CL_SIGNED_INT16 0x10D8 +#define CL_SIGNED_INT32 0x10D9 +#define CL_UNSIGNED_INT8 0x10DA +#define CL_UNSIGNED_INT16 0x10DB +#define CL_UNSIGNED_INT32 0x10DC +#define CL_HALF_FLOAT 0x10DD +#define CL_FLOAT 0x10DE +#ifdef CL_VERSION_1_2 +#define CL_UNORM_INT24 0x10DF +#endif +#ifdef CL_VERSION_2_1 +#define CL_UNORM_INT_101010_2 0x10E0 +#endif + +/* cl_mem_object_type */ +#define CL_MEM_OBJECT_BUFFER 0x10F0 +#define CL_MEM_OBJECT_IMAGE2D 0x10F1 +#define CL_MEM_OBJECT_IMAGE3D 0x10F2 +#ifdef CL_VERSION_1_2 +#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 +#define CL_MEM_OBJECT_IMAGE1D 0x10F4 +#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 +#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_OBJECT_PIPE 0x10F7 +#endif + +/* cl_mem_info */ +#define CL_MEM_TYPE 0x1100 +#define CL_MEM_FLAGS 0x1101 +#define CL_MEM_SIZE 0x1102 +#define CL_MEM_HOST_PTR 0x1103 +#define CL_MEM_MAP_COUNT 0x1104 +#define CL_MEM_REFERENCE_COUNT 0x1105 +#define CL_MEM_CONTEXT 0x1106 +#ifdef CL_VERSION_1_1 +#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 +#define CL_MEM_OFFSET 0x1108 +#endif +#ifdef CL_VERSION_2_0 +#define CL_MEM_USES_SVM_POINTER 0x1109 +#endif + +/* cl_image_info */ +#define CL_IMAGE_FORMAT 0x1110 +#define CL_IMAGE_ELEMENT_SIZE 0x1111 +#define CL_IMAGE_ROW_PITCH 0x1112 +#define CL_IMAGE_SLICE_PITCH 0x1113 +#define CL_IMAGE_WIDTH 0x1114 +#define CL_IMAGE_HEIGHT 0x1115 +#define CL_IMAGE_DEPTH 0x1116 +#ifdef CL_VERSION_1_2 +#define CL_IMAGE_ARRAY_SIZE 0x1117 +#define CL_IMAGE_BUFFER 0x1118 +#define CL_IMAGE_NUM_MIP_LEVELS 0x1119 +#define CL_IMAGE_NUM_SAMPLES 0x111A +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_pipe_info */ +#define CL_PIPE_PACKET_SIZE 0x1120 +#define CL_PIPE_MAX_PACKETS 0x1121 + +#endif + +/* cl_addressing_mode */ +#define CL_ADDRESS_NONE 0x1130 +#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 +#define CL_ADDRESS_CLAMP 0x1132 +#define CL_ADDRESS_REPEAT 0x1133 +#ifdef CL_VERSION_1_1 +#define CL_ADDRESS_MIRRORED_REPEAT 0x1134 +#endif + +/* cl_filter_mode */ +#define CL_FILTER_NEAREST 0x1140 +#define CL_FILTER_LINEAR 0x1141 + +/* cl_sampler_info */ +#define CL_SAMPLER_REFERENCE_COUNT 0x1150 +#define CL_SAMPLER_CONTEXT 0x1151 +#define CL_SAMPLER_NORMALIZED_COORDS 0x1152 +#define CL_SAMPLER_ADDRESSING_MODE 0x1153 +#define CL_SAMPLER_FILTER_MODE 0x1154 +#ifdef CL_VERSION_2_0 +/* These enumerants are for the cl_khr_mipmap_image extension. + They have since been added to cl_ext.h with an appropriate + KHR suffix, but are left here for backwards compatibility. */ +#define CL_SAMPLER_MIP_FILTER_MODE 0x1155 +#define CL_SAMPLER_LOD_MIN 0x1156 +#define CL_SAMPLER_LOD_MAX 0x1157 +#endif + +/* cl_map_flags - bitfield */ +#define CL_MAP_READ (1 << 0) +#define CL_MAP_WRITE (1 << 1) +#ifdef CL_VERSION_1_2 +#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) +#endif + +/* cl_program_info */ +#define CL_PROGRAM_REFERENCE_COUNT 0x1160 +#define CL_PROGRAM_CONTEXT 0x1161 +#define CL_PROGRAM_NUM_DEVICES 0x1162 +#define CL_PROGRAM_DEVICES 0x1163 +#define CL_PROGRAM_SOURCE 0x1164 +#define CL_PROGRAM_BINARY_SIZES 0x1165 +#define CL_PROGRAM_BINARIES 0x1166 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_NUM_KERNELS 0x1167 +#define CL_PROGRAM_KERNEL_NAMES 0x1168 +#endif +#ifdef CL_VERSION_2_1 +#define CL_PROGRAM_IL 0x1169 +#endif +#ifdef CL_VERSION_2_2 +#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A +#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B +#endif + +/* cl_program_build_info */ +#define CL_PROGRAM_BUILD_STATUS 0x1181 +#define CL_PROGRAM_BUILD_OPTIONS 0x1182 +#define CL_PROGRAM_BUILD_LOG 0x1183 +#ifdef CL_VERSION_1_2 +#define CL_PROGRAM_BINARY_TYPE 0x1184 +#endif +#ifdef CL_VERSION_2_0 +#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185 +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_program_binary_type */ +#define CL_PROGRAM_BINARY_TYPE_NONE 0x0 +#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 +#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 +#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 + +#endif + +/* cl_build_status */ +#define CL_BUILD_SUCCESS 0 +#define CL_BUILD_NONE -1 +#define CL_BUILD_ERROR -2 +#define CL_BUILD_IN_PROGRESS -3 + +/* cl_kernel_info */ +#define CL_KERNEL_FUNCTION_NAME 0x1190 +#define CL_KERNEL_NUM_ARGS 0x1191 +#define CL_KERNEL_REFERENCE_COUNT 0x1192 +#define CL_KERNEL_CONTEXT 0x1193 +#define CL_KERNEL_PROGRAM 0x1194 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_ATTRIBUTES 0x1195 +#endif +#ifdef CL_VERSION_2_1 +#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9 +#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_info */ +#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 +#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 +#define CL_KERNEL_ARG_TYPE_NAME 0x1198 +#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 +#define CL_KERNEL_ARG_NAME 0x119A + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_address_qualifier */ +#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B +#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C +#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D +#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_access_qualifier */ +#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 +#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 +#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 +#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 + +#endif + +#ifdef CL_VERSION_1_2 + +/* cl_kernel_arg_type_qualifier */ +#define CL_KERNEL_ARG_TYPE_NONE 0 +#define CL_KERNEL_ARG_TYPE_CONST (1 << 0) +#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) +#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) +#ifdef CL_VERSION_2_0 +#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3) +#endif + +#endif + +/* cl_kernel_work_group_info */ +#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 +#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 +#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 +#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 +#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 +#ifdef CL_VERSION_1_2 +#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 +#endif + +#ifdef CL_VERSION_2_1 + +/* cl_kernel_sub_group_info */ +#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033 +#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034 +#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8 + +#endif + +#ifdef CL_VERSION_2_0 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6 +#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7 + +#endif + +/* cl_event_info */ +#define CL_EVENT_COMMAND_QUEUE 0x11D0 +#define CL_EVENT_COMMAND_TYPE 0x11D1 +#define CL_EVENT_REFERENCE_COUNT 0x11D2 +#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 +#ifdef CL_VERSION_1_1 +#define CL_EVENT_CONTEXT 0x11D4 +#endif + +/* cl_command_type */ +#define CL_COMMAND_NDRANGE_KERNEL 0x11F0 +#define CL_COMMAND_TASK 0x11F1 +#define CL_COMMAND_NATIVE_KERNEL 0x11F2 +#define CL_COMMAND_READ_BUFFER 0x11F3 +#define CL_COMMAND_WRITE_BUFFER 0x11F4 +#define CL_COMMAND_COPY_BUFFER 0x11F5 +#define CL_COMMAND_READ_IMAGE 0x11F6 +#define CL_COMMAND_WRITE_IMAGE 0x11F7 +#define CL_COMMAND_COPY_IMAGE 0x11F8 +#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 +#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA +#define CL_COMMAND_MAP_BUFFER 0x11FB +#define CL_COMMAND_MAP_IMAGE 0x11FC +#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD +#define CL_COMMAND_MARKER 0x11FE +#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF +#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 +#ifdef CL_VERSION_1_1 +#define CL_COMMAND_READ_BUFFER_RECT 0x1201 +#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 +#define CL_COMMAND_COPY_BUFFER_RECT 0x1203 +#define CL_COMMAND_USER 0x1204 +#endif +#ifdef CL_VERSION_1_2 +#define CL_COMMAND_BARRIER 0x1205 +#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 +#define CL_COMMAND_FILL_BUFFER 0x1207 +#define CL_COMMAND_FILL_IMAGE 0x1208 +#endif +#ifdef CL_VERSION_2_0 +#define CL_COMMAND_SVM_FREE 0x1209 +#define CL_COMMAND_SVM_MEMCPY 0x120A +#define CL_COMMAND_SVM_MEMFILL 0x120B +#define CL_COMMAND_SVM_MAP 0x120C +#define CL_COMMAND_SVM_UNMAP 0x120D +#endif + +/* command execution status */ +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +#ifdef CL_VERSION_1_1 + +/* cl_buffer_create_type */ +#define CL_BUFFER_CREATE_TYPE_REGION 0x1220 + +#endif + +/* cl_profiling_info */ +#define CL_PROFILING_COMMAND_QUEUED 0x1280 +#define CL_PROFILING_COMMAND_SUBMIT 0x1281 +#define CL_PROFILING_COMMAND_START 0x1282 +#define CL_PROFILING_COMMAND_END 0x1283 +#ifdef CL_VERSION_2_0 +#define CL_PROFILING_COMMAND_COMPLETE 0x1284 +#endif + +/********************************************************************************************************/ + +/* Platform API */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformIDs(cl_uint num_entries, + cl_platform_id * platforms, + cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPlatformInfo(cl_platform_id platform, + cl_platform_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Device APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDs(cl_platform_id platform, + cl_device_type device_type, + cl_uint num_entries, + cl_device_id * devices, + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceInfo(cl_device_id device, + cl_device_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateSubDevices(cl_device_id in_device, + const cl_device_partition_property * properties, + cl_uint num_devices, + cl_device_id * out_devices, + cl_uint * num_devices_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseDevice(cl_device_id device) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetDefaultDeviceCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceAndHostTimer(cl_device_id device, + cl_ulong* device_timestamp, + cl_ulong* host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetHostTimer(cl_device_id device, + cl_ulong * host_timestamp) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Context APIs */ +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContext(const cl_context_properties * properties, + cl_uint num_devices, + const cl_device_id * devices, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_context CL_API_CALL +clCreateContextFromType(const cl_context_properties * properties, + cl_device_type device_type, + void (CL_CALLBACK * pfn_notify)(const char * errinfo, + const void * private_info, + size_t cb, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseContext(cl_context context) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetContextInfo(cl_context context, + cl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Command Queue APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_command_queue CL_API_CALL +clCreateCommandQueueWithProperties(cl_context context, + cl_device_id device, + const cl_queue_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandQueue(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandQueueInfo(cl_command_queue command_queue, + cl_command_queue_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Memory Object APIs */ +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBuffer(cl_context context, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateSubBuffer(cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateImage(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + const cl_image_desc * image_desc, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreatePipe(cl_context context, + cl_mem_flags flags, + cl_uint pipe_packet_size, + cl_uint pipe_max_packets, + const cl_pipe_properties * properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseMemObject(cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedImageFormats(cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_image_format * image_formats, + cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemObjectInfo(cl_mem memobj, + cl_mem_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageInfo(cl_mem image, + cl_image_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetPipeInfo(cl_mem pipe, + cl_pipe_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetMemObjectDestructorCallback(cl_mem memobj, + void (CL_CALLBACK * pfn_notify)(cl_mem memobj, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* SVM Allocation APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY void * CL_API_CALL +clSVMAlloc(cl_context context, + cl_svm_mem_flags flags, + size_t size, + cl_uint alignment) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY void CL_API_CALL +clSVMFree(cl_context context, + void * svm_pointer) CL_API_SUFFIX__VERSION_2_0; + +#endif + +/* Sampler APIs */ + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_sampler CL_API_CALL +clCreateSamplerWithProperties(cl_context context, + const cl_sampler_properties * sampler_properties, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSamplerInfo(cl_sampler sampler, + cl_sampler_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Program Object APIs */ +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithSource(cl_context context, + cl_uint count, + const char ** strings, + const size_t * lengths, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBinary(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const size_t * lengths, + const unsigned char ** binaries, + cl_int * binary_status, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithBuiltInKernels(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * kernel_names, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_program CL_API_CALL +clCreateProgramWithIL(cl_context context, + const void* il, + size_t length, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseProgram(cl_program program) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clBuildProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clCompileProgram(cl_program program, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_headers, + const cl_program * input_headers, + const char ** header_include_names, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_program CL_API_CALL +clLinkProgram(cl_context context, + cl_uint num_devices, + const cl_device_id * device_list, + const char * options, + cl_uint num_input_programs, + const cl_program * input_programs, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramReleaseCallback(cl_program program, + void (CL_CALLBACK * pfn_notify)(cl_program program, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_2_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetProgramSpecializationConstant(cl_program program, + cl_uint spec_id, + size_t spec_size, + const void* spec_value) CL_API_SUFFIX__VERSION_2_2; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramInfo(cl_program program, + cl_program_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetProgramBuildInfo(cl_program program, + cl_device_id device, + cl_program_build_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Kernel Object APIs */ +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCreateKernel(cl_program program, + const char * kernel_name, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCreateKernelsInProgram(cl_program program, + cl_uint num_kernels, + cl_kernel * kernels, + cl_uint * num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_kernel CL_API_CALL +clCloneKernel(cl_kernel source_kernel, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseKernel(cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArg(cl_kernel kernel, + cl_uint arg_index, + size_t arg_size, + const void * arg_value) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgSVMPointer(cl_kernel kernel, + cl_uint arg_index, + const void * arg_value) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void * param_value) CL_API_SUFFIX__VERSION_2_0; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelInfo(cl_kernel kernel, + cl_kernel_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelArgInfo(cl_kernel kernel, + cl_uint arg_indx, + cl_kernel_arg_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelWorkGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_work_group_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSubGroupInfo(cl_kernel kernel, + cl_device_id device, + cl_kernel_sub_group_info param_name, + size_t input_value_size, + const void* input_value, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; + +#endif + +/* Event Object APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clWaitForEvents(cl_uint num_events, + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventInfo(cl_event event, + cl_event_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateUserEvent(cl_context context, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetUserEventStatus(cl_event event, + cl_int execution_status) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetEventCallback(cl_event event, + cl_int command_exec_callback_type, + void (CL_CALLBACK * pfn_notify)(cl_event event, + cl_int event_command_status, + void * user_data), + void * user_data) CL_API_SUFFIX__VERSION_1_1; + +#endif + +/* Profiling APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clGetEventProfilingInfo(cl_event event, + cl_profiling_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +/* Flush and Finish APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clFlush(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinish(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; + +/* Enqueued Commands APIs */ +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + size_t offset, + size_t size, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_read, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + size_t offset, + size_t size, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteBufferRect(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_write, + const size_t * buffer_offset, + const size_t * host_offset, + const size_t * region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillBuffer(cl_command_queue command_queue, + cl_mem buffer, + const void * pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBuffer(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferRect(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_1; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReadImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_read, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWriteImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_write, + const size_t * origin, + const size_t * region, + size_t input_row_pitch, + size_t input_slice_pitch, + const void * ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueFillImage(cl_command_queue command_queue, + cl_mem image, + const void * fill_color, + const size_t * origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImage(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t * src_origin, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyImageToBuffer(cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t * src_origin, + const size_t * region, + size_t dst_offset, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCopyBufferToImage(cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t * dst_origin, + const size_t * region, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapBuffer(cl_command_queue command_queue, + cl_mem buffer, + cl_bool blocking_map, + cl_map_flags map_flags, + size_t offset, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY void * CL_API_CALL +clEnqueueMapImage(cl_command_queue command_queue, + cl_mem image, + cl_bool blocking_map, + cl_map_flags map_flags, + const size_t * origin, + const size_t * region, + size_t * image_row_pitch, + size_t * image_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueUnmapMemObject(cl_command_queue command_queue, + cl_mem memobj, + void * mapped_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemObjects(cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem * mem_objects, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNDRangeKernel(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t * global_work_offset, + const size_t * global_work_size, + const size_t * local_work_size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueNativeKernel(cl_command_queue command_queue, + void (CL_CALLBACK * user_func)(void *), + void * args, + size_t cb_args, + cl_uint num_mem_objects, + const cl_mem * mem_list, + const void ** args_mem_loc, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMarkerWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueBarrierWithWaitList(cl_command_queue command_queue, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_VERSION_2_0 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMFree(cl_command_queue command_queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue, + cl_uint num_svm_pointers, + void * svm_pointers[], + void * user_data), + void * user_data, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpy(cl_command_queue command_queue, + cl_bool blocking_copy, + void * dst_ptr, + const void * src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFill(cl_command_queue command_queue, + void * svm_ptr, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMap(cl_command_queue command_queue, + cl_bool blocking_map, + cl_map_flags flags, + void * svm_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMUnmap(cl_command_queue command_queue, + void * svm_ptr, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_0; + +#endif + +#ifdef CL_VERSION_2_1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMigrateMem(cl_command_queue command_queue, + cl_uint num_svm_pointers, + const void ** svm_pointers, + const size_t * sizes, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_2_1; + +#endif + +#ifdef CL_VERSION_1_2 + +/* Extension function access + * + * Returns the extension function address for the given function name, + * or NULL if a valid function can not be found. The client must + * check to make sure the address is not NULL, before using or + * calling the returned function address. + */ +extern CL_API_ENTRY void * CL_API_CALL +clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, + const char * func_name) CL_API_SUFFIX__VERSION_1_2; + +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + /* + * WARNING: + * This API introduces mutable state into the OpenCL implementation. It has been REMOVED + * to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the + * OpenCL 1.1 conformance test, and consequently may not work or may not work dependably. + * It is likely to be non-performant. Use of this API is not advised. Use at your own risk. + * + * Software developers previously relying on this API are instructed to set the command queue + * properties when creating the queue, instead. + */ + extern CL_API_ENTRY cl_int CL_API_CALL + clSetCommandQueueProperty(cl_command_queue command_queue, + cl_command_queue_properties properties, + cl_bool enable, + cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; +#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage2D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_row_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateImage3D(cl_context context, + cl_mem_flags flags, + const cl_image_format * image_format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch, + void * host_ptr, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueMarker(cl_command_queue command_queue, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueWaitForEvents(cl_command_queue command_queue, + cl_uint num_events, + const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* Deprecated OpenCL 2.0 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +clCreateCommandQueue(cl_context context, + cl_device_id device, + cl_command_queue_properties properties, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +clCreateSampler(cl_context context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +clEnqueueTask(cl_command_queue command_queue, + cl_kernel kernel, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_H */ diff --git a/CLW/CL/cl_gl.h b/CLW/CL/cl_gl.h new file mode 100644 index 00000000..fbdaf629 --- /dev/null +++ b/CLW/CL/cl_gl.h @@ -0,0 +1,171 @@ +/********************************************************************************** + * Copyright (c) 2008-2019 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __OPENCL_CL_GL_H +#define __OPENCL_CL_GL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_gl_object_type; +typedef cl_uint cl_gl_texture_info; +typedef cl_uint cl_gl_platform_info; +typedef struct __GLsync *cl_GLsync; + +/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ +#define CL_GL_OBJECT_BUFFER 0x2000 +#define CL_GL_OBJECT_TEXTURE2D 0x2001 +#define CL_GL_OBJECT_TEXTURE3D 0x2002 +#define CL_GL_OBJECT_RENDERBUFFER 0x2003 +#ifdef CL_VERSION_1_2 +#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E +#define CL_GL_OBJECT_TEXTURE1D 0x200F +#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 +#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 +#endif + +/* cl_gl_texture_info */ +#define CL_GL_TEXTURE_TARGET 0x2004 +#define CL_GL_MIPMAP_LEVEL 0x2005 +#ifdef CL_VERSION_1_2 +#define CL_GL_NUM_SAMPLES 0x2012 +#endif + + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLBuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +#ifdef CL_VERSION_1_2 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLTexture(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +#endif + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromGLRenderbuffer(cl_context context, + cl_mem_flags flags, + cl_GLuint renderbuffer, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLObjectInfo(cl_mem memobj, + cl_gl_object_type * gl_object_type, + cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLTextureInfo(cl_mem memobj, + cl_gl_texture_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseGLObjects(cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem * mem_objects, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event) CL_API_SUFFIX__VERSION_1_0; + + +/* Deprecated OpenCL 1.1 APIs */ +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture2D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +clCreateFromGLTexture3D(cl_context context, + cl_mem_flags flags, + cl_GLenum target, + cl_GLint miplevel, + cl_GLuint texture, + cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + +/* cl_khr_gl_sharing extension */ + +#define cl_khr_gl_sharing 1 + +typedef cl_uint cl_gl_context_info; + +/* Additional Error Codes */ +#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 + +/* cl_gl_context_info */ +#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 +#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 + +/* Additional cl_context_properties */ +#define CL_GL_CONTEXT_KHR 0x2008 +#define CL_EGL_DISPLAY_KHR 0x2009 +#define CL_GLX_DISPLAY_KHR 0x200A +#define CL_WGL_HDC_KHR 0x200B +#define CL_CGL_SHAREGROUP_KHR 0x200C + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetGLContextInfoKHR(const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( + const cl_context_properties * properties, + cl_gl_context_info param_name, + size_t param_value_size, + void * param_value, + size_t * param_value_size_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* __OPENCL_CL_GL_H */ diff --git a/CLW/CL/cl_platform.h b/CLW/CL/cl_platform.h new file mode 100644 index 00000000..7f4ddea5 --- /dev/null +++ b/CLW/CL/cl_platform.h @@ -0,0 +1,1384 @@ +/********************************************************************************** + * Copyright (c) 2008-2018 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + **********************************************************************************/ + +#ifndef __CL_PLATFORM_H +#define __CL_PLATFORM_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(_WIN32) + #define CL_API_ENTRY + #define CL_API_CALL __stdcall + #define CL_CALLBACK __stdcall +#else + #define CL_API_ENTRY + #define CL_API_CALL + #define CL_CALLBACK +#endif + +/* + * Deprecation flags refer to the last version of the header in which the + * feature was not deprecated. + * + * E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without + * deprecation but is deprecated in versions later than 1.1. + */ + +#define CL_EXTENSION_WEAK_LINK +#define CL_API_SUFFIX__VERSION_1_0 +#define CL_EXT_SUFFIX__VERSION_1_0 +#define CL_API_SUFFIX__VERSION_1_1 +#define CL_EXT_SUFFIX__VERSION_1_1 +#define CL_API_SUFFIX__VERSION_1_2 +#define CL_EXT_SUFFIX__VERSION_1_2 +#define CL_API_SUFFIX__VERSION_2_0 +#define CL_EXT_SUFFIX__VERSION_2_0 +#define CL_API_SUFFIX__VERSION_2_1 +#define CL_EXT_SUFFIX__VERSION_2_1 +#define CL_API_SUFFIX__VERSION_2_2 +#define CL_EXT_SUFFIX__VERSION_2_2 + + +#ifdef __GNUC__ + #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_EXT_PREFIX_DEPRECATED +#elif defined(_WIN32) + #define CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) +#else + #define CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED +#else + #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED + #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED +#endif + +#if (defined (_WIN32) && defined(_MSC_VER)) + +/* scalar types */ +typedef signed __int8 cl_char; +typedef unsigned __int8 cl_uchar; +typedef signed __int16 cl_short; +typedef unsigned __int16 cl_ushort; +typedef signed __int32 cl_int; +typedef unsigned __int32 cl_uint; +typedef signed __int64 cl_long; +typedef unsigned __int64 cl_ulong; + +typedef unsigned __int16 cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 1.7976931348623158e+308 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#define CL_NAN (CL_INFINITY - CL_INFINITY) +#define CL_HUGE_VALF ((cl_float) 1e50) +#define CL_HUGE_VAL ((cl_double) 1e500) +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#else + +#include + +/* scalar types */ +typedef int8_t cl_char; +typedef uint8_t cl_uchar; +typedef int16_t cl_short; +typedef uint16_t cl_ushort; +typedef int32_t cl_int; +typedef uint32_t cl_uint; +typedef int64_t cl_long; +typedef uint64_t cl_ulong; + +typedef uint16_t cl_half; +typedef float cl_float; +typedef double cl_double; + +/* Macro names and corresponding values defined by OpenCL */ +#define CL_CHAR_BIT 8 +#define CL_SCHAR_MAX 127 +#define CL_SCHAR_MIN (-127-1) +#define CL_CHAR_MAX CL_SCHAR_MAX +#define CL_CHAR_MIN CL_SCHAR_MIN +#define CL_UCHAR_MAX 255 +#define CL_SHRT_MAX 32767 +#define CL_SHRT_MIN (-32767-1) +#define CL_USHRT_MAX 65535 +#define CL_INT_MAX 2147483647 +#define CL_INT_MIN (-2147483647-1) +#define CL_UINT_MAX 0xffffffffU +#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL) +#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL) +#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL) + +#define CL_FLT_DIG 6 +#define CL_FLT_MANT_DIG 24 +#define CL_FLT_MAX_10_EXP +38 +#define CL_FLT_MAX_EXP +128 +#define CL_FLT_MIN_10_EXP -37 +#define CL_FLT_MIN_EXP -125 +#define CL_FLT_RADIX 2 +#define CL_FLT_MAX 340282346638528859811704183484516925440.0f +#define CL_FLT_MIN 1.175494350822287507969e-38f +#define CL_FLT_EPSILON 1.1920928955078125e-7f + +#define CL_HALF_DIG 3 +#define CL_HALF_MANT_DIG 11 +#define CL_HALF_MAX_10_EXP +4 +#define CL_HALF_MAX_EXP +16 +#define CL_HALF_MIN_10_EXP -4 +#define CL_HALF_MIN_EXP -13 +#define CL_HALF_RADIX 2 +#define CL_HALF_MAX 65504.0f +#define CL_HALF_MIN 6.103515625e-05f +#define CL_HALF_EPSILON 9.765625e-04f + +#define CL_DBL_DIG 15 +#define CL_DBL_MANT_DIG 53 +#define CL_DBL_MAX_10_EXP +308 +#define CL_DBL_MAX_EXP +1024 +#define CL_DBL_MIN_10_EXP -307 +#define CL_DBL_MIN_EXP -1021 +#define CL_DBL_RADIX 2 +#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0 +#define CL_DBL_MIN 2.225073858507201383090e-308 +#define CL_DBL_EPSILON 2.220446049250313080847e-16 + +#define CL_M_E 2.7182818284590452354 +#define CL_M_LOG2E 1.4426950408889634074 +#define CL_M_LOG10E 0.43429448190325182765 +#define CL_M_LN2 0.69314718055994530942 +#define CL_M_LN10 2.30258509299404568402 +#define CL_M_PI 3.14159265358979323846 +#define CL_M_PI_2 1.57079632679489661923 +#define CL_M_PI_4 0.78539816339744830962 +#define CL_M_1_PI 0.31830988618379067154 +#define CL_M_2_PI 0.63661977236758134308 +#define CL_M_2_SQRTPI 1.12837916709551257390 +#define CL_M_SQRT2 1.41421356237309504880 +#define CL_M_SQRT1_2 0.70710678118654752440 + +#define CL_M_E_F 2.718281828f +#define CL_M_LOG2E_F 1.442695041f +#define CL_M_LOG10E_F 0.434294482f +#define CL_M_LN2_F 0.693147181f +#define CL_M_LN10_F 2.302585093f +#define CL_M_PI_F 3.141592654f +#define CL_M_PI_2_F 1.570796327f +#define CL_M_PI_4_F 0.785398163f +#define CL_M_1_PI_F 0.318309886f +#define CL_M_2_PI_F 0.636619772f +#define CL_M_2_SQRTPI_F 1.128379167f +#define CL_M_SQRT2_F 1.414213562f +#define CL_M_SQRT1_2_F 0.707106781f + +#if defined( __GNUC__ ) + #define CL_HUGE_VALF __builtin_huge_valf() + #define CL_HUGE_VAL __builtin_huge_val() + #define CL_NAN __builtin_nanf( "" ) +#else + #define CL_HUGE_VALF ((cl_float) 1e50) + #define CL_HUGE_VAL ((cl_double) 1e500) + float nanf( const char * ); + #define CL_NAN nanf( "" ) +#endif +#define CL_MAXFLOAT CL_FLT_MAX +#define CL_INFINITY CL_HUGE_VALF + +#endif + +#include + +/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */ +typedef unsigned int cl_GLuint; +typedef int cl_GLint; +typedef unsigned int cl_GLenum; + +/* + * Vector types + * + * Note: OpenCL requires that all types be naturally aligned. + * This means that vector types must be naturally aligned. + * For example, a vector of four floats must be aligned to + * a 16 byte boundary (calculated as 4 * the natural 4-byte + * alignment of the float). The alignment qualifiers here + * will only function properly if your compiler supports them + * and if you don't actively work to defeat them. For example, + * in order for a cl_float4 to be 16 byte aligned in a struct, + * the start of the struct must itself be 16-byte aligned. + * + * Maintaining proper alignment is the user's responsibility. + */ + +/* Define basic vector types */ +#if defined( __VEC__ ) + #include /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */ + typedef __vector unsigned char __cl_uchar16; + typedef __vector signed char __cl_char16; + typedef __vector unsigned short __cl_ushort8; + typedef __vector signed short __cl_short8; + typedef __vector unsigned int __cl_uint4; + typedef __vector signed int __cl_int4; + typedef __vector float __cl_float4; + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_UINT4__ 1 + #define __CL_INT4__ 1 + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef float __cl_float4 __attribute__((vector_size(16))); + #else + typedef __m128 __cl_float4; + #endif + #define __CL_FLOAT4__ 1 +#endif + +#if defined( __SSE2__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16))); + typedef cl_char __cl_char16 __attribute__((vector_size(16))); + typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16))); + typedef cl_short __cl_short8 __attribute__((vector_size(16))); + typedef cl_uint __cl_uint4 __attribute__((vector_size(16))); + typedef cl_int __cl_int4 __attribute__((vector_size(16))); + typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16))); + typedef cl_long __cl_long2 __attribute__((vector_size(16))); + typedef cl_double __cl_double2 __attribute__((vector_size(16))); + #else + typedef __m128i __cl_uchar16; + typedef __m128i __cl_char16; + typedef __m128i __cl_ushort8; + typedef __m128i __cl_short8; + typedef __m128i __cl_uint4; + typedef __m128i __cl_int4; + typedef __m128i __cl_ulong2; + typedef __m128i __cl_long2; + typedef __m128d __cl_double2; + #endif + #define __CL_UCHAR16__ 1 + #define __CL_CHAR16__ 1 + #define __CL_USHORT8__ 1 + #define __CL_SHORT8__ 1 + #define __CL_INT4__ 1 + #define __CL_UINT4__ 1 + #define __CL_ULONG2__ 1 + #define __CL_LONG2__ 1 + #define __CL_DOUBLE2__ 1 +#endif + +#if defined( __MMX__ ) + #include + #if defined( __GNUC__ ) + typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8))); + typedef cl_char __cl_char8 __attribute__((vector_size(8))); + typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8))); + typedef cl_short __cl_short4 __attribute__((vector_size(8))); + typedef cl_uint __cl_uint2 __attribute__((vector_size(8))); + typedef cl_int __cl_int2 __attribute__((vector_size(8))); + typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8))); + typedef cl_long __cl_long1 __attribute__((vector_size(8))); + typedef cl_float __cl_float2 __attribute__((vector_size(8))); + #else + typedef __m64 __cl_uchar8; + typedef __m64 __cl_char8; + typedef __m64 __cl_ushort4; + typedef __m64 __cl_short4; + typedef __m64 __cl_uint2; + typedef __m64 __cl_int2; + typedef __m64 __cl_ulong1; + typedef __m64 __cl_long1; + typedef __m64 __cl_float2; + #endif + #define __CL_UCHAR8__ 1 + #define __CL_CHAR8__ 1 + #define __CL_USHORT4__ 1 + #define __CL_SHORT4__ 1 + #define __CL_INT2__ 1 + #define __CL_UINT2__ 1 + #define __CL_ULONG1__ 1 + #define __CL_LONG1__ 1 + #define __CL_FLOAT2__ 1 +#endif + +#if defined( __AVX__ ) + #if defined( __MINGW64__ ) + #include + #else + #include + #endif + #if defined( __GNUC__ ) + typedef cl_float __cl_float8 __attribute__((vector_size(32))); + typedef cl_double __cl_double4 __attribute__((vector_size(32))); + #else + typedef __m256 __cl_float8; + typedef __m256d __cl_double4; + #endif + #define __CL_FLOAT8__ 1 + #define __CL_DOUBLE4__ 1 +#endif + +/* Define capabilities for anonymous struct members. */ +#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >= 1500 + /* Microsoft Developer Studio 2008 supports anonymous structs, but + * complains by default. */ + #define __CL_HAS_ANON_STRUCT__ 1 + #define __CL_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless + * struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) + #endif +#else +#define __CL_HAS_ANON_STRUCT__ 0 +#define __CL_ANON_STRUCT__ +#endif + +/* Define alignment keys */ +#if defined( __GNUC__ ) + #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) +#elif defined( _WIN32) && (_MSC_VER) + /* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */ + /* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */ + /* #include */ + /* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */ + #define CL_ALIGNED(_x) +#else + #warning Need to implement some method to align data here + #define CL_ALIGNED(_x) +#endif + +/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */ +#if __CL_HAS_ANON_STRUCT__ + /* .xyzw and .s0123...{f|F} are supported */ + #define CL_HAS_NAMED_VECTOR_FIELDS 1 + /* .hi and .lo are supported */ + #define CL_HAS_HI_LO_VECTOR_FIELDS 1 +#endif + +/* Define cl_vector types */ + +/* ---- cl_charn ---- */ +typedef union +{ + cl_char CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_char lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2; +#endif +}cl_char2; + +typedef union +{ + cl_char CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[2]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4; +#endif +}cl_char4; + +/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */ +typedef cl_char4 cl_char3; + +typedef union +{ + cl_char CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[4]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[2]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8; +#endif +}cl_char8; + +typedef union +{ + cl_char CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; }; +#endif +#if defined( __CL_CHAR2__) + __cl_char2 v2[8]; +#endif +#if defined( __CL_CHAR4__) + __cl_char4 v4[4]; +#endif +#if defined( __CL_CHAR8__ ) + __cl_char8 v8[2]; +#endif +#if defined( __CL_CHAR16__ ) + __cl_char16 v16; +#endif +}cl_char16; + + +/* ---- cl_ucharn ---- */ +typedef union +{ + cl_uchar CL_ALIGNED(2) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; }; +#endif +#if defined( __cl_uchar2__) + __cl_uchar2 v2; +#endif +}cl_uchar2; + +typedef union +{ + cl_uchar CL_ALIGNED(4) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[2]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4; +#endif +}cl_uchar4; + +/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */ +typedef cl_uchar4 cl_uchar3; + +typedef union +{ + cl_uchar CL_ALIGNED(8) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[4]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[2]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8; +#endif +}cl_uchar8; + +typedef union +{ + cl_uchar CL_ALIGNED(16) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; }; +#endif +#if defined( __CL_UCHAR2__) + __cl_uchar2 v2[8]; +#endif +#if defined( __CL_UCHAR4__) + __cl_uchar4 v4[4]; +#endif +#if defined( __CL_UCHAR8__ ) + __cl_uchar8 v8[2]; +#endif +#if defined( __CL_UCHAR16__ ) + __cl_uchar16 v16; +#endif +}cl_uchar16; + + +/* ---- cl_shortn ---- */ +typedef union +{ + cl_short CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_short lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2; +#endif +}cl_short2; + +typedef union +{ + cl_short CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[2]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4; +#endif +}cl_short4; + +/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */ +typedef cl_short4 cl_short3; + +typedef union +{ + cl_short CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[4]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[2]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8; +#endif +}cl_short8; + +typedef union +{ + cl_short CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; }; +#endif +#if defined( __CL_SHORT2__) + __cl_short2 v2[8]; +#endif +#if defined( __CL_SHORT4__) + __cl_short4 v4[4]; +#endif +#if defined( __CL_SHORT8__ ) + __cl_short8 v8[2]; +#endif +#if defined( __CL_SHORT16__ ) + __cl_short16 v16; +#endif +}cl_short16; + + +/* ---- cl_ushortn ---- */ +typedef union +{ + cl_ushort CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2; +#endif +}cl_ushort2; + +typedef union +{ + cl_ushort CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[2]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4; +#endif +}cl_ushort4; + +/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */ +typedef cl_ushort4 cl_ushort3; + +typedef union +{ + cl_ushort CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[4]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[2]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8; +#endif +}cl_ushort8; + +typedef union +{ + cl_ushort CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; }; +#endif +#if defined( __CL_USHORT2__) + __cl_ushort2 v2[8]; +#endif +#if defined( __CL_USHORT4__) + __cl_ushort4 v4[4]; +#endif +#if defined( __CL_USHORT8__ ) + __cl_ushort8 v8[2]; +#endif +#if defined( __CL_USHORT16__ ) + __cl_ushort16 v16; +#endif +}cl_ushort16; + + +/* ---- cl_halfn ---- */ +typedef union +{ + cl_half CL_ALIGNED(4) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_half lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2; +#endif +}cl_half2; + +typedef union +{ + cl_half CL_ALIGNED(8) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[2]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4; +#endif +}cl_half4; + +/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */ +typedef cl_half4 cl_half3; + +typedef union +{ + cl_half CL_ALIGNED(16) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[4]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[2]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8; +#endif +}cl_half8; + +typedef union +{ + cl_half CL_ALIGNED(32) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; }; +#endif +#if defined( __CL_HALF2__) + __cl_half2 v2[8]; +#endif +#if defined( __CL_HALF4__) + __cl_half4 v4[4]; +#endif +#if defined( __CL_HALF8__ ) + __cl_half8 v8[2]; +#endif +#if defined( __CL_HALF16__ ) + __cl_half16 v16; +#endif +}cl_half16; + +/* ---- cl_intn ---- */ +typedef union +{ + cl_int CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_int lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2; +#endif +}cl_int2; + +typedef union +{ + cl_int CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[2]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4; +#endif +}cl_int4; + +/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */ +typedef cl_int4 cl_int3; + +typedef union +{ + cl_int CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[4]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[2]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8; +#endif +}cl_int8; + +typedef union +{ + cl_int CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; }; +#endif +#if defined( __CL_INT2__) + __cl_int2 v2[8]; +#endif +#if defined( __CL_INT4__) + __cl_int4 v4[4]; +#endif +#if defined( __CL_INT8__ ) + __cl_int8 v8[2]; +#endif +#if defined( __CL_INT16__ ) + __cl_int16 v16; +#endif +}cl_int16; + + +/* ---- cl_uintn ---- */ +typedef union +{ + cl_uint CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_uint lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2; +#endif +}cl_uint2; + +typedef union +{ + cl_uint CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[2]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4; +#endif +}cl_uint4; + +/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */ +typedef cl_uint4 cl_uint3; + +typedef union +{ + cl_uint CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[4]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[2]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8; +#endif +}cl_uint8; + +typedef union +{ + cl_uint CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; }; +#endif +#if defined( __CL_UINT2__) + __cl_uint2 v2[8]; +#endif +#if defined( __CL_UINT4__) + __cl_uint4 v4[4]; +#endif +#if defined( __CL_UINT8__ ) + __cl_uint8 v8[2]; +#endif +#if defined( __CL_UINT16__ ) + __cl_uint16 v16; +#endif +}cl_uint16; + +/* ---- cl_longn ---- */ +typedef union +{ + cl_long CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_long lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2; +#endif +}cl_long2; + +typedef union +{ + cl_long CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[2]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4; +#endif +}cl_long4; + +/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */ +typedef cl_long4 cl_long3; + +typedef union +{ + cl_long CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[4]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[2]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8; +#endif +}cl_long8; + +typedef union +{ + cl_long CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; }; +#endif +#if defined( __CL_LONG2__) + __cl_long2 v2[8]; +#endif +#if defined( __CL_LONG4__) + __cl_long4 v4[4]; +#endif +#if defined( __CL_LONG8__ ) + __cl_long8 v8[2]; +#endif +#if defined( __CL_LONG16__ ) + __cl_long16 v16; +#endif +}cl_long16; + + +/* ---- cl_ulongn ---- */ +typedef union +{ + cl_ulong CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2; +#endif +}cl_ulong2; + +typedef union +{ + cl_ulong CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[2]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4; +#endif +}cl_ulong4; + +/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */ +typedef cl_ulong4 cl_ulong3; + +typedef union +{ + cl_ulong CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[4]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[2]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8; +#endif +}cl_ulong8; + +typedef union +{ + cl_ulong CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; }; +#endif +#if defined( __CL_ULONG2__) + __cl_ulong2 v2[8]; +#endif +#if defined( __CL_ULONG4__) + __cl_ulong4 v4[4]; +#endif +#if defined( __CL_ULONG8__ ) + __cl_ulong8 v8[2]; +#endif +#if defined( __CL_ULONG16__ ) + __cl_ulong16 v16; +#endif +}cl_ulong16; + + +/* --- cl_floatn ---- */ + +typedef union +{ + cl_float CL_ALIGNED(8) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_float lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2; +#endif +}cl_float2; + +typedef union +{ + cl_float CL_ALIGNED(16) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[2]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4; +#endif +}cl_float4; + +/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */ +typedef cl_float4 cl_float3; + +typedef union +{ + cl_float CL_ALIGNED(32) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[4]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[2]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8; +#endif +}cl_float8; + +typedef union +{ + cl_float CL_ALIGNED(64) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; }; +#endif +#if defined( __CL_FLOAT2__) + __cl_float2 v2[8]; +#endif +#if defined( __CL_FLOAT4__) + __cl_float4 v4[4]; +#endif +#if defined( __CL_FLOAT8__ ) + __cl_float8 v8[2]; +#endif +#if defined( __CL_FLOAT16__ ) + __cl_float16 v16; +#endif +}cl_float16; + +/* --- cl_doublen ---- */ + +typedef union +{ + cl_double CL_ALIGNED(16) s[2]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1; }; + __CL_ANON_STRUCT__ struct{ cl_double lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2; +#endif +}cl_double2; + +typedef union +{ + cl_double CL_ALIGNED(32) s[4]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; }; + __CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[2]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4; +#endif +}cl_double4; + +/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */ +typedef cl_double4 cl_double3; + +typedef union +{ + cl_double CL_ALIGNED(64) s[8]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; }; + __CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[4]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[2]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8; +#endif +}cl_double8; + +typedef union +{ + cl_double CL_ALIGNED(128) s[16]; +#if __CL_HAS_ANON_STRUCT__ + __CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; }; + __CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; }; + __CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; }; +#endif +#if defined( __CL_DOUBLE2__) + __cl_double2 v2[8]; +#endif +#if defined( __CL_DOUBLE4__) + __cl_double4 v4[4]; +#endif +#if defined( __CL_DOUBLE8__ ) + __cl_double8 v8[2]; +#endif +#if defined( __CL_DOUBLE16__ ) + __cl_double16 v16; +#endif +}cl_double16; + +/* Macro to facilitate debugging + * Usage: + * Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source. + * The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \" + * Each line thereafter of OpenCL C source must end with: \n\ + * The last line ends in "; + * + * Example: + * + * const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\ + * kernel void foo( int a, float * b ) \n\ + * { \n\ + * // my comment \n\ + * *b[ get_global_id(0)] = a; \n\ + * } \n\ + * "; + * + * This should correctly set up the line, (column) and file information for your source + * string so you can do source level debugging. + */ +#define __CL_STRINGIFY( _x ) # _x +#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x ) +#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n" + +#ifdef __cplusplus +} +#endif + +#undef __CL_HAS_ANON_STRUCT__ +#undef __CL_ANON_STRUCT__ +#if defined( _WIN32) && defined(_MSC_VER) + #if _MSC_VER >=1500 + #pragma warning( pop ) + #endif +#endif + +#endif /* __CL_PLATFORM_H */ diff --git a/CLW/CL/cl_version.h b/CLW/CL/cl_version.h new file mode 100644 index 00000000..bb766cb9 --- /dev/null +++ b/CLW/CL/cl_version.h @@ -0,0 +1,86 @@ +/******************************************************************************* + * Copyright (c) 2018 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __CL_VERSION_H +#define __CL_VERSION_H + +/* Detect which version to target */ +#if !defined(CL_TARGET_OPENCL_VERSION) +#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)") +#define CL_TARGET_OPENCL_VERSION 220 +#endif +#if CL_TARGET_OPENCL_VERSION != 100 && \ + CL_TARGET_OPENCL_VERSION != 110 && \ + CL_TARGET_OPENCL_VERSION != 120 && \ + CL_TARGET_OPENCL_VERSION != 200 && \ + CL_TARGET_OPENCL_VERSION != 210 && \ + CL_TARGET_OPENCL_VERSION != 220 +#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)") +#undef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 220 +#endif + + +/* OpenCL Version */ +#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2) +#define CL_VERSION_2_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1) +#define CL_VERSION_2_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0) +#define CL_VERSION_2_0 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2) +#define CL_VERSION_1_2 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1) +#define CL_VERSION_1_1 1 +#endif +#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0) +#define CL_VERSION_1_0 1 +#endif + +/* Allow deprecated APIs for older OpenCL versions. */ +#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +#define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif + +#endif /* __CL_VERSION_H */ diff --git a/CLW/CL/lib/OpenCL.lib b/CLW/CL/lib/OpenCL.lib new file mode 100644 index 00000000..cacfadb6 Binary files /dev/null and b/CLW/CL/lib/OpenCL.lib differ diff --git a/CLW/CL/lib/libOpenCL.a b/CLW/CL/lib/libOpenCL.a new file mode 100644 index 00000000..ef645779 Binary files /dev/null and b/CLW/CL/lib/libOpenCL.a differ diff --git a/CMakeLists.txt b/CMakeLists.txt index f7d96316..ced8258d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(RR_USE_VULKAN "Use vulkan for GPU hit testing" OFF) option(RR_NO_TESTS "Don't add any unit tests and remove any test functionality from the library" OFF) option(RR_ENABLE_STATIC "Create static libraries rather than dynamic" OFF) option(RR_SHARED_CALC "Link Calc(compute abstraction layer) dynamically" OFF) +option(RR_ENABLE_RAYACTIVEFLAG "Enable ray masking in intersection kernels" OFF) option(RR_ENABLE_RAYMASK "Enable ray masking in intersection kernels" OFF) option(RR_ENABLE_BACKFACE_CULL "Enable backface culling in intersection kernels" OFF) #option(RR_TUTORIALS "Add tutorials projects" OFF) @@ -23,6 +24,7 @@ elseif (UNIX) add_definitions(-fvisibility=hidden) endif (WIN32) +set(CMAKE_DEBUG_POSTFIX D) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) @@ -35,17 +37,44 @@ find_package(Threads) find_package(PythonInterp 2.7 REQUIRED) set (STRINGIFY_SCRIPT ${RadeonRaysSDK_SOURCE_DIR}/Tools/scripts/stringify.py) -set (EMBREE_INCLUDE_PATH ${RadeonRaysSDK_SOURCE_DIR}/3rdParty/embree/include) +set (EMBREE_INCLUDE_PATH ${RadeonRaysSDK_SOURCE_DIR}/3rdparty/embree/include) if (WIN32) - set (EMBREE_LIB ${RadeonRaysSDK_SOURCE_DIR}/3rdParty/embree/lib/x64/embree.lib) + set (EMBREE_LIB ${RadeonRaysSDK_SOURCE_DIR}/3rdparty/embree/lib/x64/embree.lib) elseif (APPLE) - set (EMBREE_LIB ${RadeonRaysSDK_SOURCE_DIR}/3rdParty/embree/lib/x64/libembree.2.dylib) + set (EMBREE_LIB ${RadeonRaysSDK_SOURCE_DIR}/3rdparty/embree/lib/x64/libembree.2.dylib) elseif (UNIX) set (EMBREE_LIB embree2) endif (WIN32) if (RR_USE_OPENCL) - find_package(OpenCL REQUIRED) + set(OpenCL_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/CLW/CL") + find_library(OpenCL_LIBRARY + NAMES OpenCL + PATHS "${CMAKE_CURRENT_SOURCE_DIR}/CLW/CL/lib" + ) + + if(NOT IS_ABSOLUTE ${OpenCL_LIBRARY}) + set(p "${CMAKE_CURRENT_SOURCE_DIR}/${OpenCL_LIBRARY}") + endif() + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args( + OpenCL + FOUND_VAR OpenCL_FOUND + REQUIRED_VARS OpenCL_LIBRARY OpenCL_INCLUDE_DIRS + ) + + mark_as_advanced( + OpenCL_INCLUDE_DIRS + OpenCL_LIBRARY + ) + + add_library(OpenCL::OpenCL UNKNOWN IMPORTED) + set_target_properties(OpenCL::OpenCL PROPERTIES + IMPORTED_LOCATION "${OpenCL_LIBRARY}") + set_target_properties(OpenCL::OpenCL PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${OpenCL_INCLUDE_DIRS}") + add_definitions(-DUSE_OPENCL=1) add_subdirectory(CLW) @@ -66,5 +95,3 @@ if (NOT RR_NO_TESTS) add_subdirectory(Gtest) add_subdirectory(UnitTest) endif (NOT RR_NO_TESTS) - - diff --git a/Calc/inc/device.h b/Calc/inc/device.h index 67204b29..eed94b3a 100644 --- a/Calc/inc/device.h +++ b/Calc/inc/device.h @@ -77,6 +77,9 @@ namespace Calc virtual Buffer* CreateBuffer(std::size_t size, std::uint32_t flags, void* initdata) = 0; virtual void DeleteBuffer(Buffer* buffer) = 0; + // Unity hack for accessing internal BVH + virtual void* GetNativeHandle(Buffer const* buffer) const = 0; + // Data movement // Calls are blocking if passed nullptr for an event, otherwise use Event to sync virtual void ReadBuffer(Buffer const* buffer, std::uint32_t queue, std::size_t offset, std::size_t size, void* dst, Event** e) const = 0; diff --git a/Calc/src/device_clw.cpp b/Calc/src/device_clw.cpp index d1f3f532..7e4c218e 100644 --- a/Calc/src/device_clw.cpp +++ b/Calc/src/device_clw.cpp @@ -282,6 +282,14 @@ namespace Calc delete buffer; } + void* DeviceClw::GetNativeHandle(Buffer const* buffer) const + { + BufferClw const* bufferClw = static_cast(buffer); + if (!bufferClw) + return nullptr; + return bufferClw->GetData(); + } + void DeviceClw::ReadBuffer(Buffer const* buffer, std::uint32_t queue, std::size_t offset, std::size_t size, void* dst, Event** e) const { auto buffer_clw = static_cast(buffer); @@ -415,7 +423,7 @@ namespace Calc int numheaders, char const* options ) - { + { try { std::string buildopts = options ? options : ""; @@ -561,7 +569,7 @@ namespace Calc { m_event_pool.push(e); } - + Buffer* DeviceClw::CreateBuffer(cl_mem buffer) { try @@ -574,7 +582,6 @@ namespace Calc } } - class PrimitivesClw : public Primitives { public: diff --git a/Calc/src/device_clw.h b/Calc/src/device_clw.h index ef04f4d5..f5762977 100644 --- a/Calc/src/device_clw.h +++ b/Calc/src/device_clw.h @@ -23,7 +23,7 @@ THE SOFTWARE. #include "device.h" #include "device_cl.h" -#include "CLW.h" +#include "../../CLW/CLW.h" #include @@ -48,6 +48,9 @@ namespace Calc Buffer* CreateBuffer(std::size_t size, std::uint32_t flags, void* initdata) override; void DeleteBuffer(Buffer* buffer) override; + // Unity hack for accessing internal BVH + void* GetNativeHandle(Buffer const* buffer) const override; + // Data movement void ReadBuffer(Buffer const* buffer, std::uint32_t queue, std::size_t offset, std::size_t size, void* dst, Event** e) const override; void WriteBuffer(Buffer const* buffer, std::uint32_t queue, std::size_t offset, std::size_t size, void* src, Event** e) override; diff --git a/Calc/src/device_vkw.cpp b/Calc/src/device_vkw.cpp index 2c86f986..10e8eab6 100644 --- a/Calc/src/device_vkw.cpp +++ b/Calc/src/device_vkw.cpp @@ -173,6 +173,11 @@ namespace Calc } } + void* DeviceVulkanw::GetNativeHandle(Buffer const* buffer) const + { + return nullptr; + } + // Data movement void DeviceVulkanw::ReadBuffer( Buffer const* buffer, std::uint32_t queue, std::size_t offset, std::size_t size, void* dst, Event** e ) const { diff --git a/Calc/src/device_vkw.h b/Calc/src/device_vkw.h index a6cac3e2..be9f51aa 100644 --- a/Calc/src/device_vkw.h +++ b/Calc/src/device_vkw.h @@ -54,6 +54,9 @@ namespace Calc Buffer* CreateBuffer( std::size_t size, std::uint32_t flags, void* initdata ) override; void DeleteBuffer( Buffer* buffer ) override; + // Unity hack for accessing internal BVH + void* GetNativeHandle(Buffer const* buffer) const override; + // Data movement void ReadBuffer( Buffer const* buffer, std::uint32_t queue, std::size_t offset, std::size_t size, void* dst, Event** e ) const override; void WriteBuffer( Buffer const* buffer, std::uint32_t queue, std::size_t offset, std::size_t size, void* src, Event** e ) override; diff --git a/RadeonRays/CMakeLists.txt b/RadeonRays/CMakeLists.txt index 321a3177..d0bd46ca 100644 --- a/RadeonRays/CMakeLists.txt +++ b/RadeonRays/CMakeLists.txt @@ -191,6 +191,9 @@ if (RR_USE_EMBREE) target_link_libraries(RadeonRays PUBLIC ${EMBREE_LIB}) endif (RR_USE_EMBREE) +if (RR_ENABLE_RAYACTIVEFLAG) + target_compile_definitions(RadeonRays PRIVATE RR_RAY_ACTIVE_FLAG) +endif (RR_ENABLE_RAYACTIVEFLAG) if (RR_ENABLE_RAYMASK) target_compile_definitions(RadeonRays PRIVATE RR_RAY_MASK) endif (RR_ENABLE_RAYMASK) @@ -217,7 +220,9 @@ endif (RR_USE_VULKAN) target_compile_features(RadeonRays PRIVATE cxx_std_14) if (UNIX AND NOT APPLE) + # TODO minimum SSE level is 4.1 for Windows and macOS? target_compile_options(RadeonRays PUBLIC -msse4.2 -fPIC) + target_link_libraries(RadeonRays PUBLIC dl) target_link_libraries(RadeonRays INTERFACE "-Wl,--no-undefined") #read version from header @@ -226,7 +231,7 @@ if (UNIX AND NOT APPLE) set_target_properties(RadeonRays PROPERTIES SOVERSION ${RR_API_VERSION}) elseif (APPLE) - target_compile_options(RadeonRays PUBLIC -stdlib=libc++) + target_compile_options(RadeonRays PUBLIC -stdlib=libc++ -msse4.1 -mmacosx-version-min=10.10) endif (UNIX AND NOT APPLE) diff --git a/RadeonRays/include/radeon_rays.h b/RadeonRays/include/radeon_rays.h index 17d26b16..daf16420 100644 --- a/RadeonRays/include/radeon_rays.h +++ b/RadeonRays/include/radeon_rays.h @@ -29,10 +29,10 @@ THE SOFTWARE. #include "math/ray.h" #include "math/mathutils.h" #include - + #define RADEONRAYS_API_VERSION 2.0 -#if !RR_STATIC_LIBRARY +#ifndef RR_STATIC_LIBRARY #ifdef WIN32 #ifdef EXPORT_API #define RRAPI __declspec(dllexport) @@ -143,12 +143,8 @@ namespace RadeonRays Id shapeid; // Primitve ID Id primid; - - int padding0; - int padding1; - // UV parametrization - float4 uvwt; + float2 uv; Intersection(); }; @@ -169,6 +165,15 @@ namespace RadeonRays // class RRAPI IntersectionApi { + public: + + enum GpuDataType + { + kGpuData_BvhBuffer = 0, + kGpuData_VerticesBuffer, + kGpuData_FacesBuffer + }; + public: /****************************************** @@ -276,6 +281,11 @@ namespace RadeonRays // The call is asynchronous. Event pointer mights be nullptrs. virtual void QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitresults, Event const* waitevent, Event** event) const = 0; + /****************************************** + Unity hack for accessing internal BVH + ******************************************/ + // Gets the gpu data for current intersector/device + virtual void *GetGpuData( GpuDataType type ) const = 0; /****************************************** Utility ******************************************/ diff --git a/RadeonRays/include/radeon_rays_cl.h b/RadeonRays/include/radeon_rays_cl.h index 751760b1..a0602372 100644 --- a/RadeonRays/include/radeon_rays_cl.h +++ b/RadeonRays/include/radeon_rays_cl.h @@ -26,7 +26,7 @@ THE SOFTWARE. #include "radeon_rays.h" -#if USE_OPENCL +#ifdef USE_OPENCL #ifdef __APPLE__ #include #else diff --git a/RadeonRays/src/accelerator/bvh2.cpp b/RadeonRays/src/accelerator/bvh2.cpp index c52d7a4d..bf470922 100644 --- a/RadeonRays/src/accelerator/bvh2.cpp +++ b/RadeonRays/src/accelerator/bvh2.cpp @@ -35,7 +35,21 @@ namespace RadeonRays inline void *Align(std::size_t alignment, std::size_t size, std::size_t space, void *ptr) { +#if !defined(__GNUC__) || __GNUC__ >= 5 return std::align(alignment, size, ptr, space); +#else + // gcc4 doesn't have std::align + const auto intptr = reinterpret_cast(ptr); + const auto aligned = (intptr - 1u + alignment) & -alignment; + const auto diff = aligned - intptr; + if ((size + diff) > space) + return nullptr; + else + { + space -= diff; + return ptr = reinterpret_cast(aligned); + } +#endif } #ifdef __GNUC__ @@ -225,13 +239,12 @@ namespace RadeonRays // Mutex to guard cv std::mutex mutex; // Indicates if we need to shutdown all the threads - std::atomic shutdown; + bool shutdown = false; // Number of primitives processed so far std::atomic num_refs_processed; + num_refs_processed.store(0); - num_refs_processed.store(0); - shutdown.store(false); - + // Push root request requests.push(SplitRequest{ scene_min, scene_max, @@ -243,33 +256,42 @@ namespace RadeonRays 0u }); + // Worker build function auto worker_thread = [&]() { + // Local stack for requests thread_local std::stack local_requests; + // Thread loop for (;;) { // Wait for signal { + // Wait on the global stack to receive a request std::unique_lock lock(mutex); cv.wait(lock, [&]() { return !requests.empty() || shutdown; }); + // If we have been awaken by shutdown, we need to leave asap if (shutdown) return; - + // Otherwise take a request from global stack and put it + // into our local stack local_requests.push(requests.top()); requests.pop(); } + // Allocated space for requests _MM_ALIGN16 SplitRequest request; _MM_ALIGN16 SplitRequest request_left; _MM_ALIGN16 SplitRequest request_right; - // Process local requests + // Start handling local stack of requests while (!local_requests.empty()) { + // Pop next request request = local_requests.top(); local_requests.pop(); + // Handle it auto node_type = HandleRequest( request, aabb_min, @@ -281,28 +303,40 @@ namespace RadeonRays request_left, request_right); + // If it is a leaf, update number of processed primitives + // and continue if (node_type == kLeaf) { num_refs_processed += static_cast(request.num_refs); continue; } - if (request_right.num_refs > 4096u) + // Here we know we have just built and internal node, + // so we are going to handle its left child on this thread and + // its right child on: + // - this thread if it is small + // - another thread if it is huge (since this one is going to handle left child) + if (request_right.num_refs > 2048u) { + // Put request into the global queue std::unique_lock lock(mutex); requests.push(request_right); + // Wake up one of the workers cv.notify_one(); } else { + // Put small request into the local queue local_requests.push(request_right); } + // Put left request to local stack (always handled on this thread) local_requests.push(request_left); } } }; + // Launch several threads auto num_threads = std::thread::hardware_concurrency(); std::vector threads(num_threads); @@ -311,14 +345,18 @@ namespace RadeonRays threads[i] = std::thread(worker_thread); } + // Wait until all primitives are handled while (num_refs_processed != num_aabbs) { std::this_thread::sleep_for(std::chrono::milliseconds(20)); } // Signal shutdown and wake up all the threads - shutdown.store(true); + { + std::unique_lock lock(mutex); + shutdown = true; cv.notify_all(); + } // Wait for all the threads to finish for (auto i = 0u; i < num_threads; ++i) diff --git a/RadeonRays/src/accelerator/bvh2.h b/RadeonRays/src/accelerator/bvh2.h index a80b580a..acd10298 100644 --- a/RadeonRays/src/accelerator/bvh2.h +++ b/RadeonRays/src/accelerator/bvh2.h @@ -56,6 +56,11 @@ namespace RadeonRays , m_nodecount(0) { } + + ~Bvh2() + { + this->Clear(); + } // Build function template diff --git a/RadeonRays/src/api/radeon_rays.cpp b/RadeonRays/src/api/radeon_rays.cpp index 199a2e6d..c1664fa6 100644 --- a/RadeonRays/src/api/radeon_rays.cpp +++ b/RadeonRays/src/api/radeon_rays.cpp @@ -52,11 +52,11 @@ THE SOFTWARE. #define HANDLE_TYPE HMODULE #ifndef _DEBUG -#define LIBNAME "Calc64.dll" -#define LONGNAME "../Bin/Release/x64/##LIBNAME" +#define LIBNAME "Calc.dll" +#define LONGNAME "../bin/Release/##LIBNAME" #else -#define LIBNAME "Calc64D.dll" -#define LONGNAME "../Bin/Debug/x64/##LIBNAME" +#define LIBNAME "CalcD.dll" +#define LONGNAME "../bin/Debug/##LIBNAME" #endif #elif __linux__ // Linux @@ -66,11 +66,11 @@ THE SOFTWARE. #define HANDLE_TYPE void* #ifndef _DEBUG -#define LIBNAME "libCalc64.so" -#define LONGNAME "../Bin/Release/x64/##LIBNAME" +#define LIBNAME "libCalc.so" +#define LONGNAME "../bin/Release/##LIBNAME" #else -#define LIBNAME "libCalc64D.so" -#define LONGNAME "../Bin/Debug/x64/##LIBNAME" +#define LIBNAME "libCalcD.so" +#define LONGNAME "../bin/Debug/##LIBNAME" #endif #else // MacOS @@ -80,11 +80,11 @@ THE SOFTWARE. #define HANDLE_TYPE void* #ifndef _DEBUG -#define LIBNAME "libCalc64.dylib" -#define LONGNAME "../Bin/Release/x64/##LIBNAME" +#define LIBNAME "libCalc.dylib" +#define LONGNAME "../bin/Release/##LIBNAME" #else -#define LIBNAME "libCalc64D.dylib" -#define LONGNAME "../Bin/Debug/x64/##LIBNAME" +#define LIBNAME "libCalcD.dylib" +#define LONGNAME "../bin/Debug/##LIBNAME" #endif #endif #endif @@ -287,6 +287,19 @@ namespace RadeonRays #ifdef USE_OPENCL RRAPI IntersectionApi* CreateFromOpenClContext(cl_context context, cl_device_id device, cl_command_queue queue) { + if (s_calc_platform == DeviceInfo::kEmbree) + { + IntersectionApi* api = nullptr; +#ifdef USE_EMBREE + api = IntersectionApi::Create(0); + if (!api) + return nullptr; + EmbreeIntersectionDevice* device = static_cast(static_cast(api)->GetDevice()); + device->SetCommandQueue(queue); +#endif + return api; + } + auto calc = dynamic_cast(GetCalcOpenCL()); if (calc) diff --git a/RadeonRays/src/api/radeon_rays_impl.cpp b/RadeonRays/src/api/radeon_rays_impl.cpp index 10b2e787..6c7f50d8 100644 --- a/RadeonRays/src/api/radeon_rays_impl.cpp +++ b/RadeonRays/src/api/radeon_rays_impl.cpp @@ -141,6 +141,11 @@ namespace RadeonRays m_device->QueryOcclusion(rays, numrays, maxrays, hitresults, waitevent, event); } + void* IntersectionApiImpl::GetGpuData( GpuDataType type ) const + { + return m_device->GetGpuData(type); + } + void IntersectionApiImpl::DeleteEvent(Event* event) const { m_device->DeleteEvent(event); diff --git a/RadeonRays/src/api/radeon_rays_impl.h b/RadeonRays/src/api/radeon_rays_impl.h index bde5fa04..ec155acd 100644 --- a/RadeonRays/src/api/radeon_rays_impl.h +++ b/RadeonRays/src/api/radeon_rays_impl.h @@ -121,6 +121,11 @@ namespace RadeonRays // The call is asynchronous. Event pointer mights be nullptrs. void QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitresults, Event const* waitevent, Event** event) const override; + /****************************************** + Unity hack + ******************************************/ + void *GetGpuData( GpuDataType type ) const override; + /****************************************** Utility ******************************************/ diff --git a/RadeonRays/src/device/calc_intersection_device.cpp b/RadeonRays/src/device/calc_intersection_device.cpp index d13d5a14..9d4c20f2 100644 --- a/RadeonRays/src/device/calc_intersection_device.cpp +++ b/RadeonRays/src/device/calc_intersection_device.cpp @@ -91,7 +91,8 @@ namespace RadeonRays // Get implementation auto shapeimpl = static_cast(shape); // Check if it is an instance and update flag - use2level = use2level | shapeimpl->is_instance(); + if (shapeimpl->is_instance()) + use2level = true; } } } @@ -335,7 +336,11 @@ namespace RadeonRays { m_intersector->QueryOcclusion(0, ray_buffer, numrays_buffer, maxrays, hit_buffer, e, nullptr); } + } + void* CalcIntersectionDevice::GetGpuData( IntersectionApi::GpuDataType type ) const + { + return m_intersector->GetGpuData(type); } CalcEventHolder* CalcIntersectionDevice::CreateEventHolder() const diff --git a/RadeonRays/src/device/calc_intersection_device.h b/RadeonRays/src/device/calc_intersection_device.h index bca52bce..2652b519 100644 --- a/RadeonRays/src/device/calc_intersection_device.h +++ b/RadeonRays/src/device/calc_intersection_device.h @@ -66,6 +66,9 @@ namespace RadeonRays void QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitresults, Event const* waitevent, Event** event) const override; + void *GetGpuData( IntersectionApi::GpuDataType type ) const override; + + Calc::Platform GetPlatform() const { return m_device->GetPlatform(); } protected: CalcEventHolder* CreateEventHolder() const; diff --git a/RadeonRays/src/device/embree_intersection_device.cpp b/RadeonRays/src/device/embree_intersection_device.cpp index 61b80ec9..14746b01 100644 --- a/RadeonRays/src/device/embree_intersection_device.cpp +++ b/RadeonRays/src/device/embree_intersection_device.cpp @@ -109,7 +109,8 @@ namespace RadeonRays }; EmbreeIntersectionDevice::EmbreeIntersectionDevice() - : m_pool(1) + : m_command_queue(nullptr) + , m_pool(1) { m_device = rtcNewDevice(nullptr); RTCError result = rtcDeviceGetError(m_device); @@ -488,12 +489,23 @@ namespace RadeonRays void EmbreeIntersectionDevice::QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const { - Throw("Not implemented for embree device."); + if (m_command_queue) + clFinish(m_command_queue); // wait for kernels to complete so numrays is available + const EmbreeBuffer* fireNumrays = dynamic_cast(numrays); ThrowIf(!fireNumrays, "Invalid embree buffer."); + QueryIntersection(rays, std::min(*static_cast(fireNumrays->GetData()), maxrays), hits, waitevent, event); } void EmbreeIntersectionDevice::QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const { - Throw("Not implemented for embree device."); + if (m_command_queue) + clFinish(m_command_queue); // wait for kernels to complete so numrays is available + const EmbreeBuffer* fireNumrays = dynamic_cast(numrays); ThrowIf(!fireNumrays, "Invalid embree buffer."); + QueryOcclusion(rays, std::min(*static_cast(fireNumrays->GetData()), maxrays), hits, waitevent, event); + } + + void* EmbreeIntersectionDevice::GetGpuData( IntersectionApi::GpuDataType type ) const + { + return nullptr; } RTCScene EmbreeIntersectionDevice::GetEmbreeMesh(const RadeonRays::Mesh* mesh) diff --git a/RadeonRays/src/device/embree_intersection_device.h b/RadeonRays/src/device/embree_intersection_device.h index 14cf218d..15ff6232 100644 --- a/RadeonRays/src/device/embree_intersection_device.h +++ b/RadeonRays/src/device/embree_intersection_device.h @@ -27,6 +27,12 @@ THE SOFTWARE. #include #include "../async/thread_pool.h" +#ifdef __APPLE__ +#include +#else +#include +#endif + namespace RadeonRays { class Mesh; @@ -53,7 +59,12 @@ namespace RadeonRays void QueryOcclusion(Buffer const* rays, int numrays, Buffer* hitresults, Event const* waitevent, Event** event) const override; void QueryIntersection(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitinfos, Event const* waitevent, Event** event) const override; void QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hitresults, Event const* waitevent, Event** event) const override; - + + void *GetGpuData( IntersectionApi::GpuDataType type ) const override; + + inline cl_command_queue GetCommandQueue() const { return m_command_queue; } + inline void SetCommandQueue(cl_command_queue command_queue) { m_command_queue = command_queue; } + protected: RTCScene GetEmbreeMesh(const Mesh*); void UpdateShape(const ShapeImpl*); @@ -65,9 +76,10 @@ namespace RadeonRays // embree device RTCDevice m_device; - + // Unity hack: OpenCL command queue + cl_command_queue m_command_queue; // scene for intersection - RTCScene m_scene; + RTCScene m_scene; //thread pool for parallelizing work with buffers mutable thread_pool m_pool; diff --git a/RadeonRays/src/device/intersection_device.h b/RadeonRays/src/device/intersection_device.h index 76b7c97e..b717bcd4 100644 --- a/RadeonRays/src/device/intersection_device.h +++ b/RadeonRays/src/device/intersection_device.h @@ -88,7 +88,10 @@ namespace RadeonRays // The call waits until waitevent is resolved (on a target device) if waitevent != nullptr. // The call is non-blocking if event is passed it, otherwise (event == nullptr) it is blocking. virtual void QueryOcclusion(Buffer const* rays, Buffer const* numrays, int maxrays, Buffer* hits, Event const* waitevent, Event** event) const = 0; - + + // Gets the BVH + virtual void* GetGpuData( IntersectionApi::GpuDataType type ) const = 0; + IntersectionDevice(IntersectionDevice const&) = delete; IntersectionDevice& operator = (IntersectionDevice const&) = delete; }; diff --git a/RadeonRays/src/intersector/intersector.cpp b/RadeonRays/src/intersector/intersector.cpp index f8f8a726..3ae658f0 100644 --- a/RadeonRays/src/intersector/intersector.cpp +++ b/RadeonRays/src/intersector/intersector.cpp @@ -24,7 +24,7 @@ namespace RadeonRays { return true; } - + void Intersector::QueryIntersection(std::uint32_t queue_idx, Calc::Buffer const *rays, std::uint32_t num_rays, Calc::Buffer *hits, Calc::Event const *wait_event, Calc::Event **event) const { @@ -52,4 +52,10 @@ namespace RadeonRays { Occluded(queue_idx, rays, num_rays, max_rays, hits, wait_event, event); } + + void* Intersector::GetGpuData( IntersectionApi::GpuDataType type ) const + { + return nullptr; + } + } diff --git a/RadeonRays/src/intersector/intersector.h b/RadeonRays/src/intersector/intersector.h index 1e613197..4105e7af 100644 --- a/RadeonRays/src/intersector/intersector.h +++ b/RadeonRays/src/intersector/intersector.h @@ -135,6 +135,12 @@ namespace RadeonRays void QueryOcclusion(std::uint32_t queue_idx, Calc::Buffer const* rays, Calc::Buffer const* num_rays, std::uint32_t max_rays, Calc::Buffer* hits, Calc::Event const* wait_event, Calc::Event** event) const; + /* + \brief Gets the gpu data + \returns The requested gpu data + */ + virtual void *GetGpuData( IntersectionApi::GpuDataType type ) const; + // Disallow intersector copies Intersector(Intersector const&) = delete; Intersector& operator = (Intersector const&) = delete; @@ -144,6 +150,7 @@ namespace RadeonRays virtual void Process(World const& world) = 0; // Compatibility check implemetation virtual bool IsCompatibleImpl(World const& world) const; + // Intersection implementation virtual void Intersect(std::uint32_t queue_idx, Calc::Buffer const *rays, Calc::Buffer const *num_rays, std::uint32_t max_rays, Calc::Buffer *hits, diff --git a/RadeonRays/src/intersector/intersector_2level.cpp b/RadeonRays/src/intersector/intersector_2level.cpp index 1745b8dd..7846b9a0 100644 --- a/RadeonRays/src/intersector/intersector_2level.cpp +++ b/RadeonRays/src/intersector/intersector_2level.cpp @@ -261,7 +261,7 @@ namespace RadeonRays // Create actual BVH objects for (int i = 0; i < nummeshes + 1; ++i) { - m_bvhs[i] = std::make_unique(traversal_cost, num_bins, use_sah); + m_bvhs[i] = std::unique_ptr(new Bvh(traversal_cost, num_bins, use_sah)); m_cpudata->bvhptrs[i] = m_bvhs[i].get(); } @@ -589,7 +589,7 @@ namespace RadeonRays use_sah = true; } - m_bvhs[nummeshes] = std::make_unique(traversal_cost, num_bins, use_sah); + m_bvhs[nummeshes] = std::unique_ptr(new Bvh(traversal_cost, num_bins, use_sah)); m_bvhs[nummeshes]->Build(&object_bounds[0], nummeshes + numinstances); m_cpudata->bvhptrs[nummeshes] = m_bvhs[nummeshes].get(); diff --git a/RadeonRays/src/intersector/intersector_bittrail.cpp b/RadeonRays/src/intersector/intersector_bittrail.cpp index 72215479..b338f5ca 100644 --- a/RadeonRays/src/intersector/intersector_bittrail.cpp +++ b/RadeonRays/src/intersector/intersector_bittrail.cpp @@ -89,6 +89,9 @@ namespace RadeonRays , m_bvh(nullptr) { std::string buildopts; +#ifdef RR_RAY_ACTIVE_FLAG + buildopts.append("-D RR_RAY_ACTIVE_FLAG "); +#endif #ifdef RR_RAY_MASK buildopts.append("-D RR_RAY_MASK "); #endif diff --git a/RadeonRays/src/intersector/intersector_hlbvh.cpp b/RadeonRays/src/intersector/intersector_hlbvh.cpp index 65186020..714f5462 100644 --- a/RadeonRays/src/intersector/intersector_hlbvh.cpp +++ b/RadeonRays/src/intersector/intersector_hlbvh.cpp @@ -80,6 +80,9 @@ namespace RadeonRays , m_bvh(nullptr) { std::string buildopts; +#ifdef RR_RAY_ACTIVE_FLAG + buildopts.append("-D RR_RAY_ACTIVE_FLAG "); +#endif #ifdef RR_RAY_MASK buildopts.append("-D RR_RAY_MASK "); #endif @@ -148,7 +151,7 @@ namespace RadeonRays std::vector mesh_faces_start_idx(numshapes); // - m_bvh = std::make_unique(m_device); + m_bvh = std::unique_ptr(new Hlbvh(m_device)); // Here we now that only Meshes are present, otherwise 2level strategy would have been used for (int i = 0; i < numshapes; ++i) diff --git a/RadeonRays/src/intersector/intersector_lds.cpp b/RadeonRays/src/intersector/intersector_lds.cpp index d3f7bb0d..eaa30b0d 100644 --- a/RadeonRays/src/intersector/intersector_lds.cpp +++ b/RadeonRays/src/intersector/intersector_lds.cpp @@ -24,9 +24,11 @@ THE SOFTWARE. #include "calc.h" #include "executable.h" #include "../accelerator/bvh2.h" +#include "../device/calc_holder.h" #include "../primitive/mesh.h" #include "../primitive/instance.h" #include "../translator/q_bvh_translator.h" +#include "../../Calc/src/device_clw.h" #include "../world/world.h" namespace RadeonRays @@ -97,6 +99,9 @@ namespace RadeonRays , m_gpudata(new GpuData(device)) { std::string buildopts; +#ifdef RR_RAY_ACTIVE_FLAG + buildopts.append("-D RR_RAY_ACTIVE_FLAG "); +#endif #ifdef RR_RAY_MASK buildopts.append("-D RR_RAY_MASK "); #endif @@ -150,9 +155,11 @@ namespace RadeonRays #endif #endif - m_gpudata->bvh_prog.isect_func = m_gpudata->bvh_prog.executable->CreateFunction("intersect_main"); - m_gpudata->bvh_prog.occlude_func = m_gpudata->bvh_prog.executable->CreateFunction("occluded_main"); - + if (m_gpudata->bvh_prog.executable) + { + m_gpudata->bvh_prog.isect_func = m_gpudata->bvh_prog.executable->CreateFunction("intersect_main"); + m_gpudata->bvh_prog.occlude_func = m_gpudata->bvh_prog.executable->CreateFunction("occluded_main"); + } if (m_gpudata->qbvh_prog.executable) { m_gpudata->qbvh_prog.isect_func = m_gpudata->qbvh_prog.executable->CreateFunction("intersect_main"); @@ -263,6 +270,14 @@ namespace RadeonRays } } + void *IntersectorLDS::GetGpuData( IntersectionApi::GpuDataType type ) const + { + if (m_device->GetPlatform() == Calc::Platform::kOpenCL && type == IntersectionApi::kGpuData_BvhBuffer ) + return static_cast(m_device)->GetNativeHandle(m_gpudata->bvh); + + return nullptr; + } + void IntersectorLDS::Intersect(std::uint32_t queue_idx, const Calc::Buffer *rays, const Calc::Buffer *num_rays, std::uint32_t max_rays, Calc::Buffer *hits, const Calc::Event *wait_event, Calc::Event **event) const diff --git a/RadeonRays/src/intersector/intersector_lds.h b/RadeonRays/src/intersector/intersector_lds.h index b81894ed..8f3bd0fa 100644 --- a/RadeonRays/src/intersector/intersector_lds.h +++ b/RadeonRays/src/intersector/intersector_lds.h @@ -33,9 +33,12 @@ namespace RadeonRays // Constructor IntersectorLDS(Calc::Device *device); + void *GetGpuData( IntersectionApi::GpuDataType type ) const override; + private: // World preprocessing implementation void Process(const World &world) override; + // Intersection implementation void Intersect(std::uint32_t queue_idx, const Calc::Buffer *rays, const Calc::Buffer *num_rays, std::uint32_t max_rays, Calc::Buffer *hits, diff --git a/RadeonRays/src/intersector/intersector_short_stack.cpp b/RadeonRays/src/intersector/intersector_short_stack.cpp index 074239ca..abfee7bd 100644 --- a/RadeonRays/src/intersector/intersector_short_stack.cpp +++ b/RadeonRays/src/intersector/intersector_short_stack.cpp @@ -84,6 +84,9 @@ namespace RadeonRays , m_bvh(nullptr) { std::string buildopts; +#ifdef RR_RAY_ACTIVE_FLAG + buildopts.append("-D RR_RAY_ACTIVE_FLAG "); +#endif #ifdef RR_RAY_MASK buildopts.append("-D RR_RAY_MASK "); #endif diff --git a/RadeonRays/src/intersector/intersector_skip_links.cpp b/RadeonRays/src/intersector/intersector_skip_links.cpp index 1021f500..533c68f7 100644 --- a/RadeonRays/src/intersector/intersector_skip_links.cpp +++ b/RadeonRays/src/intersector/intersector_skip_links.cpp @@ -33,6 +33,8 @@ THE SOFTWARE. #include "executable.h" #include +#include "../../Calc/src/device_clw.h" + // Preferred work group size for Radeon devices static int const kWorkGroupSize = 64; @@ -59,6 +61,8 @@ namespace RadeonRays , vertices(nullptr) , faces(nullptr) , executable(nullptr) + , isect_func(nullptr) + , occlude_func(nullptr) { } @@ -123,11 +127,6 @@ namespace RadeonRays } #endif #endif - - assert(m_gpudata->executable); - - m_gpudata->isect_func = m_gpudata->executable->CreateFunction("intersect_main"); - m_gpudata->occlude_func = m_gpudata->executable->CreateFunction("occluded_main"); } void IntersectorSkipLinks::Process(World const& world) @@ -447,4 +446,20 @@ namespace RadeonRays m_device->Execute(func, queueidx, globalsize, localsize, event); } + void* IntersectorSkipLinks::GetGpuData( IntersectionApi::GpuDataType type ) const + { + if (m_device->GetPlatform() == Calc::Platform::kOpenCL) + { + Calc::DeviceClw *clw = static_cast(m_device); + switch(type) + { + case IntersectionApi::kGpuData_BvhBuffer : return clw->GetNativeHandle(m_gpudata->bvh); + case IntersectionApi::kGpuData_VerticesBuffer: return clw->GetNativeHandle(m_gpudata->vertices); + case IntersectionApi::kGpuData_FacesBuffer : return clw->GetNativeHandle(m_gpudata->faces); + } + } + + return nullptr; + } + } diff --git a/RadeonRays/src/intersector/intersector_skip_links.h b/RadeonRays/src/intersector/intersector_skip_links.h index 3d5b1bcb..f8c7673b 100644 --- a/RadeonRays/src/intersector/intersector_skip_links.h +++ b/RadeonRays/src/intersector/intersector_skip_links.h @@ -77,6 +77,8 @@ namespace RadeonRays // Constructor IntersectorSkipLinks(Calc::Device* device); + void *GetGpuData( IntersectionApi::GpuDataType type ) const override; + private: // Preprocess implementation void Process(World const& world) override; diff --git a/RadeonRays/src/kernels/CL/common.cl b/RadeonRays/src/kernels/CL/common.cl index 753aef7b..aada9437 100644 --- a/RadeonRays/src/kernels/CL/common.cl +++ b/RadeonRays/src/kernels/CL/common.cl @@ -64,9 +64,7 @@ typedef struct { int shape_id; int prim_id; - int2 padding; - - float4 uvwt; + float2 uv; } Intersection; @@ -82,7 +80,11 @@ int ray_get_mask(ray const* r) INLINE int ray_is_active(ray const* r) { +#ifdef RR_RAY_ACTIVE_FLAG return r->extra.y; +#else + return true; +#endif } INLINE diff --git a/RadeonRays/src/kernels/CL/intersect_bvh2_bittrail.cl b/RadeonRays/src/kernels/CL/intersect_bvh2_bittrail.cl index 4b51565a..82a1737b 100644 --- a/RadeonRays/src/kernels/CL/intersect_bvh2_bittrail.cl +++ b/RadeonRays/src/kernels/CL/intersect_bvh2_bittrail.cl @@ -437,7 +437,7 @@ KERNEL void intersect_main( // Update hit information hits[global_id].shape_id = node.shape_id; hits[global_id].prim_id = node.prim_id; - hits[global_id].uvwt = make_float4(uv.x, uv.y, 0.f, t_max); + hits[global_id].uv = make_float2(uv.x, uv.y); } else { diff --git a/RadeonRays/src/kernels/CL/intersect_bvh2_lds.cl b/RadeonRays/src/kernels/CL/intersect_bvh2_lds.cl index f5aee6cc..0ce0ee06 100644 --- a/RadeonRays/src/kernels/CL/intersect_bvh2_lds.cl +++ b/RadeonRays/src/kernels/CL/intersect_bvh2_lds.cl @@ -62,7 +62,7 @@ INLINE float2 fast_intersect_bbox2(float3 pmin, float3 pmax, float3 invdir, floa return (float2)(t0, t1); } -__attribute__((reqd_work_group_size(64, 1, 1))) +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) KERNEL void intersect_main( // Bvh nodes GLOBAL const bvh_node *restrict nodes, @@ -213,7 +213,7 @@ KERNEL void intersect_main( // Update hit information hits[index].prim_id = GetPrimId(node); hits[index].shape_id = GetMeshId(node); - hits[index].uvwt = (float4)(uv.x, uv.y, 0.0f, closest_t); + hits[index].uv = (float2)(uv.x, uv.y); } else { @@ -225,7 +225,7 @@ KERNEL void intersect_main( } } -__attribute__((reqd_work_group_size(64, 1, 1))) +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) KERNEL void occluded_main( // Bvh nodes GLOBAL const bvh_node *restrict nodes, diff --git a/RadeonRays/src/kernels/CL/intersect_bvh2_lds_fp16.cl b/RadeonRays/src/kernels/CL/intersect_bvh2_lds_fp16.cl index 3acda2a1..f9e0ed26 100644 --- a/RadeonRays/src/kernels/CL/intersect_bvh2_lds_fp16.cl +++ b/RadeonRays/src/kernels/CL/intersect_bvh2_lds_fp16.cl @@ -127,7 +127,7 @@ INLINE void stack_push( *lds_sptr = *lds_sptr + 1; } -__attribute__((reqd_work_group_size(64, 1, 1))) +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) KERNEL void intersect_main( // Bvh nodes GLOBAL const bvh_node *restrict nodes, @@ -301,7 +301,7 @@ KERNEL void intersect_main( // Update hit information hits[index].prim_id = node.aabb23_min_or_v2_and_addr2_or_prim_id.w; hits[index].shape_id = node.aabb01_max_or_v1_and_addr1_or_mesh_id.w; - hits[index].uvwt = (float4)(uv.x, uv.y, 0.0f, closest_t); + hits[index].uv = (float2)(uv.x, uv.y); } else { @@ -313,7 +313,7 @@ KERNEL void intersect_main( } } -__attribute__((reqd_work_group_size(64, 1, 1))) +__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) KERNEL void occluded_main( // Bvh nodes GLOBAL const bvh_node *restrict nodes, diff --git a/RadeonRays/src/kernels/CL/intersect_bvh2_short_stack.cl b/RadeonRays/src/kernels/CL/intersect_bvh2_short_stack.cl index 72c91ec8..2ee1b482 100644 --- a/RadeonRays/src/kernels/CL/intersect_bvh2_short_stack.cl +++ b/RadeonRays/src/kernels/CL/intersect_bvh2_short_stack.cl @@ -453,7 +453,7 @@ KERNEL void intersect_main( // Update hit information hits[global_id].shape_id = node.shape_id; hits[global_id].prim_id = node.prim_id; - hits[global_id].uvwt = make_float4(uv.x, uv.y, 0.f, t_max); + hits[global_id].uv = make_float2(uv.x, uv.y); } else { diff --git a/RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl b/RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl index e00c94f1..a8c8bc80 100644 --- a/RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl +++ b/RadeonRays/src/kernels/CL/intersect_bvh2_skiplinks.cl @@ -190,7 +190,7 @@ void intersect_main( // Update hit information hits[global_id].shape_id = face.shape_id; hits[global_id].prim_id = face.prim_id; - hits[global_id].uvwt = make_float4(uv.x, uv.y, 0.f, t_max); + hits[global_id].uv = make_float2(uv.x, uv.y); } else { diff --git a/RadeonRays/src/kernels/CL/intersect_bvh2level_skiplinks.cl b/RadeonRays/src/kernels/CL/intersect_bvh2level_skiplinks.cl index bced56c2..997c6978 100644 --- a/RadeonRays/src/kernels/CL/intersect_bvh2level_skiplinks.cl +++ b/RadeonRays/src/kernels/CL/intersect_bvh2level_skiplinks.cl @@ -294,7 +294,7 @@ KERNEL void intersect_main( // Update hit information hits[global_id].shape_id = closest_shape_id; hits[global_id].prim_id = closest_prim_id; - hits[global_id].uvwt = make_float4(closest_barycentrics.x, closest_barycentrics.y, 0.f, t_max); + hits[global_id].uv = make_float2(closest_barycentrics.x, closest_barycentrics.y); } else { diff --git a/RadeonRays/src/kernels/CL/intersect_hlbvh_stack.cl b/RadeonRays/src/kernels/CL/intersect_hlbvh_stack.cl index cb229edc..1f6913f9 100644 --- a/RadeonRays/src/kernels/CL/intersect_hlbvh_stack.cl +++ b/RadeonRays/src/kernels/CL/intersect_hlbvh_stack.cl @@ -411,7 +411,7 @@ KERNEL void intersect_main( // Update hit information hits[global_id].shape_id = face.shape_id; hits[global_id].prim_id = face.prim_id; - hits[global_id].uvwt = make_float4(uv.x, uv.y, 0.f, t_max); + hits[global_id].uv = make_float2(uv.x,uv.y); } else { diff --git a/UnitTest/CMakeLists.txt b/UnitTest/CMakeLists.txt index eeb17872..076a9292 100644 --- a/UnitTest/CMakeLists.txt +++ b/UnitTest/CMakeLists.txt @@ -44,6 +44,9 @@ endif (RR_USE_EMBREE) add_executable(UnitTest ${SOURCES}) target_link_libraries(UnitTest PRIVATE GTest RadeonRays Calc) +set_target_properties(UnitTest PROPERTIES + VS_DEBUGGER_WORKING_DIRECTORY ${RadeonRaysSDK_SOURCE_DIR}/RadeonRays) + #Add root for unittests. They use private headers target_include_directories(UnitTest PRIVATE "${RadeonRaysSDK_SOURCE_DIR}") @@ -53,7 +56,6 @@ else (NOT RR_SHARED_CALC) target_compile_definitions(UnitTest PRIVATE CALC_STATIC_LIBRARY) endif (RR_SHARED_CALC) - target_compile_features(UnitTest PRIVATE cxx_std_11) if (APPLE) target_compile_options(UnitTest PRIVATE -stdlib=libc++) @@ -71,7 +73,9 @@ if (RR_USE_VULKAN) target_compile_definitions(UnitTest PRIVATE USE_VULKAN=1) endif (RR_USE_VULKAN) - +if (RR_ENABLE_RAYACTIVEFLAG) + target_compile_definitions(UnitTest PRIVATE RR_RAY_ACTIVE_FLAG) +endif (RR_ENABLE_RAYACTIVEFLAG) if (RR_ENABLE_RAYMASK) target_compile_definitions(UnitTest PRIVATE RR_RAY_MASK) endif (RR_ENABLE_RAYMASK) diff --git a/UnitTest/radeon_rays_apitest_embree.h b/UnitTest/radeon_rays_apitest_embree.h index f335e097..505d767a 100644 --- a/UnitTest/radeon_rays_apitest_embree.h +++ b/UnitTest/radeon_rays_apitest_embree.h @@ -268,8 +268,13 @@ TEST_F(ApiBackendEmbree, Intersection_1Ray) } +#ifdef RR_RAY_MASK // The test creates a single triangle mesh and tests attach/detach functionality TEST_F(ApiBackendEmbree, Intersection_1Ray_Masked) +#else +// The test creates a single triangle mesh and tests attach/detach functionality +TEST_F(ApiBackendEmbree, DISABLED_Intersection_1Ray_Masked) +#endif { Shape* mesh = nullptr; diff --git a/build.cmd b/build.cmd new file mode 100644 index 00000000..3b96d5f0 --- /dev/null +++ b/build.cmd @@ -0,0 +1,2 @@ +@echo off +perl build.pl \ No newline at end of file diff --git a/build.pl b/build.pl new file mode 100644 index 00000000..2692004e --- /dev/null +++ b/build.pl @@ -0,0 +1,136 @@ +#!/usr/bin/env perl -w +use Cwd qw(getcwd); +use File::Path; + +my $pathToLib; +BEGIN { $pathToLib = getcwd . '/3rdparty/Perl/lib' } +use lib $pathToLib; +use File::Copy::Recursive qw(fcopy dircopy); +use Config; +use Archive::Zip; +use SDKDownloader; + +my $buildCommandPrefix = ''; +sub CheckInstallSDK +{ + print 'Setting up the Linux SDK'; + SDKDownloader::PrepareSDK('linux-sdk', '20180928', "artifacts"); + $buildCommandPrefix = "schroot -c $ENV{LINUX_BUILD_ENVIRONMENT} --"; +} + +my $err; # used by CheckFileError + +my $mac = "cmake -DCMAKE_BUILD_TYPE=Release -DRR_USE_EMBREE=OFF -DRR_USE_OPENCL=ON -DRR_EMBED_KERNELS=OFF -DRR_SAFE_MATH=ON -DRR_SHARED_CALC=OFF"; +my $linuxD = "cmake -DCMAKE_BUILD_TYPE=Debug -DRR_USE_EMBREE=OFF -DRR_USE_OPENCL=ON -DRR_EMBED_KERNELS=OFF -DRR_SAFE_MATH=ON -DRR_SHARED_CALC=OFF -DRR_ENABLE_STATIC=ON -DRR_USE_VULKAN=OFF"; +my $linuxR = "cmake -DCMAKE_BUILD_TYPE=Release -DRR_USE_EMBREE=OFF -DRR_USE_OPENCL=ON -DRR_EMBED_KERNELS=OFF -DRR_SAFE_MATH=ON -DRR_SHARED_CALC=OFF -DRR_ENABLE_STATIC=ON -DRR_USE_VULKAN=OFF"; +my $windows = "cmake -G \"Visual Studio 14 2015 Win64\" -DRR_USE_EMBREE=OFF -DRR_USE_OPENCL=ON -DRR_EMBED_KERNELS=ON -DRR_SAFE_MATH=ON -DRR_SHARED_CALC=ON -DCMAKE_PREFIX_PATH=3rdparty/opencl"; + +sub BuildRadeonRays +{ + my $cmakeString = shift; + system("$buildCommandPrefix $cmakeString") && die("cmake failed"); + if ($Config{osname} eq "MSWin32") + { + system("\"C:/Program Files (x86)/Microsoft Visual Studio 14.0/Common7/IDE/devenv.exe\" RadeonRaysSDK.sln /Build Debug"); + system("\"C:/Program Files (x86)/Microsoft Visual Studio 14.0/Common7/IDE/devenv.exe\" RadeonRaysSDK.sln /Build RelWithDebInfo"); + } + else + { + system("$buildCommandPrefix make") && die("Failed make"); + } +} + +sub CopyHeaders +{ + mkpath('artifacts/include', {error => \ $err} ); + CheckFileError(); + dircopy("RadeonRays/include", "artifacts/include") or die("Failed to copy RadeonRays headers."); + dircopy("Calc/inc", "artifacts/include") or die("Failed to copy Calc headers."); +} + +sub CheckFileError +{ + if (@$err) + { + for my $diag (@$err) + { + my ($file, $message) = %$diag; + if ($file eq '') + { + die("general error: $message\n"); + } + else + { + die("problem unlinking $file: $message\n"); + } + } + } +} + + +mkpath('artifacts', {error => \ $err} ); +CheckFileError(); +mkpath('artifacts/lib', {error => \ $err} ); +CheckFileError(); +mkpath('builds', {error => \ $err} ); +CheckFileError(); + +if ($Config{osname} eq "darwin") +{ + BuildRadeonRays($mac); + mkpath('artifacts/lib/macOS', {error => \ $err} ); + CheckFileError(); + fcopy("bin/libRadeonRays.dylib", "artifacts/lib/macOS/libRadeonRays.dylib") or die "Copy of libRadeonRays.dylib failed: $!"; +} + +if ($Config{osname} eq "linux") +{ + CheckInstallSDK(); + mkpath('artifacts/lib/linux', {error => \ $err} ); + CheckFileError(); + + BuildRadeonRays($linuxD); + fcopy("RadeonRays/libRadeonRaysD.a", "artifacts/lib/linux/libRadeonRaysD.a") or die "Copy of libRadeonRaysD.a failed: $!"; + fcopy("Calc/libCalcD.a", "artifacts/lib/linux/libCalcD.a") or die "Copy of libCalcD.a failed: $!"; + fcopy("CLW/libCLWD.a", "artifacts/lib/linux/libCLWD.a") or die "Copy of libCLWD.a failed: $!"; + + BuildRadeonRays($linuxR); + fcopy("RadeonRays/libRadeonRays.a", "artifacts/lib/linux/libRadeonRays.a") or die "Copy of libRadeonRays.a failed: $!"; + fcopy("Calc/libCalc.a", "artifacts/lib/linux/libCalc.a") or die "Copy of libCalc.a failed: $!"; + fcopy("CLW/libCLW.a", "artifacts/lib/linux/libCLW.a") or die "Copy of libCLW.a failed: $!"; + + system("rm -r artifacts/SDKDownloader") && die("Unable to clean up SDKDownloader directory."); +} + +if ($Config{osname} eq "MSWin32") +{ + BuildRadeonRays($windows); + + # copy dll files + mkpath('artifacts/bin', {error => \ $err} ); + CheckFileError(); + mkpath('artifacts/bin/Windows', {error => \ $err} ); + CheckFileError(); + + # Release + fcopy("bin/RelWithDebInfo/Calc.dll", "artifacts/bin/Windows/Calc.dll") or die "Copy of Calc.dll failed: $!"; + fcopy("bin/RelWithDebInfo/Calc.pdb", "artifacts/bin/Windows/Calc.pdb") or die "Copy of Calc.pdb failed: $!"; + fcopy("bin/RelWithDebInfo/RadeonRays.dll", "artifacts/bin/Windows/RadeonRays.dll") or die "Copy of RadeonRays.dll failed: $!"; + fcopy("bin/RelWithDebInfo/RadeonRays.pdb", "artifacts/bin/Windows/RadeonRays.pdb") or die "Copy of RadeonRays.pdb failed: $!"; + + # Debug + fcopy("bin/Debug/CalcD.dll", "artifacts/bin/Windows/CalcD.dll") or die "Copy of CalcD.dll failed: $!"; + fcopy("bin/Debug/CalcD.pdb", "artifacts/bin/Windows/CalcD.pdb") or die "Copy of CalcD.pdb failed: $!"; + fcopy("bin/Debug/RadeonRaysD.dll", "artifacts/bin/Windows/RadeonRaysD.dll") or die "Copy of RadeonRaysD.dll failed: $!"; + fcopy("bin/Debug/RadeonRaysD.pdb", "artifacts/bin/Windows/RadeonRaysD.pdb") or die "Copy of RadeonRaysD.pdb failed: $!"; + + # write build version.txt, only needed once as ACompleteBuild will combine all artifacts. + my $branch = qx("git symbolic-ref -q HEAD"); + my $revision = qx("git rev-parse HEAD"); + open(BUILD_INFO_FILE, '>', "artifacts/version.txt") or die("Unable to write build information to version.txt"); + print BUILD_INFO_FILE "$branch"; + print BUILD_INFO_FILE "$revision"; + close(BUILD_INFO_FILE); +} + +CopyHeaders(); \ No newline at end of file diff --git a/completebuild.pl b/completebuild.pl new file mode 100644 index 00000000..385a25af --- /dev/null +++ b/completebuild.pl @@ -0,0 +1 @@ +system("zip -r builds.zip *") eq 0 or die("failed creating builds.zip"); \ No newline at end of file