forked from openshmem-org/specification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathshmem_broadcast.tex
133 lines (111 loc) · 6.43 KB
/
shmem_broadcast.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
\apisummary{
Broadcasts a block of data from one \ac{PE} to one or more destination
\acp{PE}.
}
\begin{apidefinition}
\begin{Csynopsis}
void shmem_broadcast32(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync);
void shmem_broadcast64(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync);
\end{Csynopsis}
\begin{Fsynopsis}
INTEGER nelems, PE_root, PE_start, logPE_stride, PE_size
INTEGER pSync(SHMEM_BCAST_SYNC_SIZE)
CALL SHMEM_BROADCAST4(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size, pSync)
CALL SHMEM_BROADCAST8(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size, pSync)
CALL SHMEM_BROADCAST32(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size,pSync)
CALL SHMEM_BROADCAST64(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size,pSync)
\end{Fsynopsis}
\begin{apiarguments}
\apiargument{OUT}{dest}{A symmetric data object.}
\apiargument{IN}{source}{A symmetric data object that can be of any data type
that is permissible for the \dest{} argument.}
\apiargument{IN}{nelems}{The number of elements in \source. For
\FUNC{shmem\_broadcast32} and \FUNC{shmem\_broadcast4}, this is the number of
32-bit halfwords. nelems must be of type \VAR{size\_t} in \Cstd. When
using \Fortran, it must be a default integer value.}
\apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to
the \activeset, from which the data is copied. Must be greater than or equal to
0 and less than \VAR{PE\_size}. \VAR{PE\_root} must be of type integer. When using \Fortran, it must be a default integer value.}
\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of
\acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran,
it must be a default integer value.}
\apiargument{IN}{logPE\_stride}{ The log (base 2) of the stride between
consecutive \ac{PE} numbers in the \activeset. \VAR{log\_PE\_stride} must be of
type integer. When using \Fortran, it must be a default integer value.}
\apiargument{IN}{PE\_size}{ The number of \acp{PE} in the \activeset.
\VAR{PE\_size} must be of type integer. When using \Fortran, it must be a
default integer value.}
\apiargument{IN}{pSync}{ A symmetric work array. In \CorCpp, \VAR{pSync} must
be of type long and size \CONST{SHMEM\_BCAST\_SYNC\_SIZE}. In \Fortran,
\VAR{pSync} must be of type integer and size \CONST{SHMEM\_BCAST\_SYNC\_SIZE}.
Every element of this array must be initialized with the value
\CONST{SHMEM\_SYNC\_VALUE} (in \CorCpp) or \CONST{SHMEM\_SYNC\_VALUE} (in
\Fortran) before any of the \acp{PE} in the \activeset{} enter
\FUNC{shmem\_broadcast}.}
\end{apiarguments}
\apidescription{
\openshmem broadcast routines are collective routines. They copy data object
\source{} on the processor specified by \VAR{PE\_root} and store the values at
\dest{} on the other \acp{PE} specified by the triplet \VAR{PE\_start},
\VAR{logPE\_stride}, \VAR{PE\_size}. The data is not copied to the \dest{} area
on the root \ac{PE}.
As with all \openshmem collective routines, each of these routines assumes that
only \acp{PE} in the \activeset{} call the routine. If a \ac{PE} not in the
\activeset{} calls an \openshmem collective routine, undefined behavior results.
The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride},
and \VAR{PE\_size} must be equal on all \acp{PE} in the \activeset. The same
\dest{} and \source{} data objects and the same \VAR{pSync} work array must be
passed to all \acp{PE} in the \activeset.
Before any \ac{PE} calls a broadcast routine, the following
conditions must be met: (synchronization via a barrier or some other method is often
needed to ensure this): The \VAR{pSync} array on all \acp{PE} in the
\activeset{} is not still in use from a prior call to a broadcast routine. The
\dest{} array on all \acp{PE} in the \activeset{} is ready to accept the
broadcast data.
Upon return from a broadcast routine, the following are true for the local
\ac{PE}: If the current \ac{PE} is not the root \ac{PE}, the \dest{} data object
is updated. The \source{} data object may be safely reused.
The values in the \VAR{pSync} array are restored to the original values.
}
\apidesctable{
The \dest{} and \source{} data objects must conform to certain typing
constraints, which are as follows:
}{Routine}{Data type of \VAR{dest} and \VAR{source}}
\apitablerow{shmem\_broadcast8, shmem\_broadcast64}{Any noncharacter
type that has an element size of \CONST{64} bits. No \Fortran derived types or
\CorCpp{} structures are allowed.}
\apitablerow{shmem\_broadcast4, shmem\_broadcast32}{Any noncharacter
type that has an element size of \CONST{32} bits. No \Fortran
derived types or \CorCpp{} structures are allowed.}
\apireturnvalues{
None.
}
\apinotes{
All \openshmem broadcast routines restore \VAR{pSync} to its original contents.
Multiple calls to \openshmem routines that use the same \VAR{pSync} array do not
require that \VAR{pSync} be reinitialized after the first call.
The user must ensure that the \VAR{pSync} array is not being updated by any
\ac{PE} in the \activeset{} while any of the \acp{PE} participates in processing
of an \openshmem broadcast routine. Be careful to avoid these situations: If the
\VAR{pSync} array is initialized at run time, some type of synchronization is
needed to ensure that all \acp{PE} in the \activeset{} have initialized
\VAR{pSync} before any of them enter an \openshmem routine called with the
\VAR{pSync} synchronization array. A \VAR{pSync} array may be reused on a
subsequent \openshmem broadcast routine only if none of the \acp{PE} in the
\activeset{} are still processing a prior \openshmem broadcast routine call that
used the same \VAR{pSync} array. In general, this can be ensured only by doing
some type of synchronization.
}
\begin{apiexamples}
\apicexample
{In the following examples, the call to \FUNC{shmem\_broadcast64} copies \source{}
on \ac{PE} 4 to \dest{} on \acp{PE} 5, 6, and 7.
\CorCpp{} example:}
{./example_code/shmem_broadcast_example.c}
{}
\apifexample
{\Fortran example:}
{./example_code/shmem_broadcast_example.f90}
{}
\end{apiexamples}
\end{apidefinition}