21 #ifndef MCA_COLL_TUNED_EXPORT_H
22 #define MCA_COLL_TUNED_EXPORT_H
24 #include "ompi_config.h"
32 #include "coll_tuned_topo.h"
35 #include "coll_tuned_dynamic_rules.h"
38 typedef enum COLLTYPE {
59 #define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
60 #define ALLGATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
61 #define ALLREDUCE_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
62 #define ALLTOALL_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
63 #define ALLTOALLV_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
64 #define ALLTOALLW_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
65 #define BARRIER_ARGS struct ompi_communicator_t *comm, mca_coll_base_module_t *module
66 #define BCAST_ARGS void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
67 #define EXSCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
68 #define GATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
69 #define GATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
70 #define REDUCE_ARGS void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
71 #define REDUCESCATTER_ARGS void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
72 #define SCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
73 #define SCATTER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
74 #define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
80 extern int ompi_coll_tuned_stream;
81 extern int ompi_coll_tuned_priority;
82 extern int ompi_coll_tuned_preallocate_memory_comm_size_limit;
83 extern int ompi_coll_tuned_use_dynamic_rules;
84 extern char* ompi_coll_tuned_dynamic_rules_filename;
85 extern int ompi_coll_tuned_init_tree_fanout;
86 extern int ompi_coll_tuned_init_chain_fanout;
87 extern int ompi_coll_tuned_init_max_requests;
93 int algorithm_param_index;
94 int segsize_param_index;
95 int tree_fanout_param_index;
96 int chain_fanout_param_index;
97 int max_requests_param_index;
117 extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
125 int ompi_coll_tuned_init_query(
bool enable_progress_threads,
126 bool enable_mpi_threads);
141 int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS);
142 int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS);
143 int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS);
144 int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS,
int algorithm,
int faninout,
int segsize);
146 int ompi_coll_tuned_allgather_intra_bruck(ALLGATHER_ARGS);
147 int ompi_coll_tuned_allgather_intra_recursivedoubling(ALLGATHER_ARGS);
148 int ompi_coll_tuned_allgather_intra_ring(ALLGATHER_ARGS);
149 int ompi_coll_tuned_allgather_intra_neighborexchange(ALLGATHER_ARGS);
150 int ompi_coll_tuned_allgather_intra_basic_linear(ALLGATHER_ARGS);
151 int ompi_coll_tuned_allgather_intra_two_procs(ALLGATHER_ARGS);
152 int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS);
153 int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS);
156 int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS);
157 int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
158 int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS);
159 int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS,
int algorithm,
int faninout,
int segsize);
161 int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS);
162 int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS);
163 int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS);
164 int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS);
165 int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS);
166 int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS);
167 int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS);
170 int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS);
171 int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
172 int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS);
173 int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS,
int algorithm,
int faninout,
int segsize);
175 int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS);
176 int ompi_coll_tuned_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS);
177 int ompi_coll_tuned_allreduce_intra_ring(ALLREDUCE_ARGS);
178 int ompi_coll_tuned_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize);
179 int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS);
180 int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS);
181 int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS);
184 int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS);
185 int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
186 int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS);
187 int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS,
int algorithm,
int faninout,
int segsize,
int max_requests);
189 int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS);
190 int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS);
191 int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS);
192 int ompi_coll_tuned_alltoall_intra_linear_sync(ALLTOALL_ARGS,
int max_requests);
193 int ompi_coll_tuned_alltoall_intra_two_procs(ALLTOALL_ARGS);
194 int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS);
195 int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS);
198 int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS);
199 int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS);
200 int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS);
201 int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS,
int algorithm);
203 int ompi_coll_tuned_alltoallv_intra_pairwise(ALLTOALLV_ARGS);
204 int ompi_coll_tuned_alltoallv_intra_basic_linear(ALLTOALLV_ARGS);
205 int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS);
206 int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS);
209 int ompi_coll_tuned_alltoallw_intra_dec_fixed(ALLTOALLW_ARGS);
210 int ompi_coll_tuned_alltoallw_intra_dec_dynamic(ALLTOALLW_ARGS);
211 int ompi_coll_tuned_alltoallw_inter_dec_fixed(ALLTOALLW_ARGS);
212 int ompi_coll_tuned_alltoallw_inter_dec_dynamic(ALLTOALLW_ARGS);
215 int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS);
216 int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS);
217 int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS);
218 int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS,
int algorithm,
int faninout,
int segsize);
220 int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS);
221 int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS);
222 int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS);
223 int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS);
224 int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS);
225 int ompi_coll_tuned_barrier_intra_two_procs(BARRIER_ARGS);
226 int ompi_coll_tuned_barrier_intra_linear(BARRIER_ARGS);
227 int ompi_coll_tuned_barrier_intra_tree(BARRIER_ARGS);
230 int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment,
ompi_coll_tree_t* tree );
231 int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
232 int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
233 int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS);
234 int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS,
int algorithm,
int faninout,
int segsize);
236 int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS);
237 int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
238 int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
239 int ompi_coll_tuned_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize);
240 int ompi_coll_tuned_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize);
241 int ompi_coll_tuned_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize);
242 int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS);
243 int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS);
246 int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS);
247 int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS);
248 int ompi_coll_tuned_exscan_inter_dec_fixed(EXSCAN_ARGS);
249 int ompi_coll_tuned_exscan_inter_dec_dynamic(EXSCAN_ARGS);
252 int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS);
253 int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS);
254 int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS);
255 int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS,
int algorithm,
int faninout,
int segsize);
257 int ompi_coll_tuned_gather_intra_basic_linear(GATHER_ARGS);
258 int ompi_coll_tuned_gather_intra_binomial(GATHER_ARGS);
259 int ompi_coll_tuned_gather_intra_linear_sync(GATHER_ARGS,
int first_segment_size);
260 int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS);
261 int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS);
264 int ompi_coll_tuned_gatherv_intra_dec_fixed(GATHERV_ARGS);
265 int ompi_coll_tuned_gatherv_intra_dec_dynamic(GATHER_ARGS);
266 int ompi_coll_tuned_gatherv_inter_dec_fixed(GATHER_ARGS);
267 int ompi_coll_tuned_gatherv_inter_dec_dynamic(GATHER_ARGS);
270 int ompi_coll_tuned_reduce_generic( REDUCE_ARGS,
ompi_coll_tree_t* tree,
int count_by_segment,
int max_outstanding_reqs );
271 int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS);
272 int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
273 int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS);
274 int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS,
int algorithm,
int faninout,
int segsize,
int max_oustanding_reqs);
276 int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS);
277 int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize,
int fanout,
int max_outstanding_reqs );
278 int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize,
int max_outstanding_reqs );
279 int ompi_coll_tuned_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize,
int max_outstanding_reqs );
280 int ompi_coll_tuned_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize,
int max_outstanding_reqs );
281 int ompi_coll_tuned_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize,
int max_outstanding_reqs );
282 int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS);
283 int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS);
286 int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS);
287 int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS);
288 int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS);
289 int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS,
int algorithm,
int faninout,
int segsize);
291 int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS);
292 int ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS);
293 int ompi_coll_tuned_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
295 int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS);
296 int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS);
299 int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS);
300 int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS);
301 int ompi_coll_tuned_scan_inter_dec_fixed(SCAN_ARGS);
302 int ompi_coll_tuned_scan_inter_dec_dynamic(SCAN_ARGS);
305 int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS);
306 int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS);
307 int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS);
308 int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS,
int algorithm,
int faninout,
int segsize);
310 int ompi_coll_tuned_scatter_intra_basic_linear(SCATTER_ARGS);
311 int ompi_coll_tuned_scatter_intra_binomial(SCATTER_ARGS);
312 int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS);
313 int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS);
316 int ompi_coll_tuned_scatterv_intra_dec_fixed(SCATTERV_ARGS);
317 int ompi_coll_tuned_scatterv_intra_dec_dynamic(SCATTERV_ARGS);
318 int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS);
319 int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS);
321 int mca_coll_tuned_ft_event(
int state);
326 static inline void ompi_coll_tuned_free_reqs(
ompi_request_t **reqs,
int count)
329 for (i = 0; i < count; ++i)
383 int cached_ntree_root;
384 int cached_ntree_fanout;
388 int cached_bintree_root;
392 int cached_bmtree_root;
396 int cached_in_order_bmtree_root;
400 int cached_chain_root;
401 int cached_chain_fanout;
405 int cached_pipeline_root;
429 #define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \
431 mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data; \
432 if( !( (coll_comm->cached_bintree) \
433 && (coll_comm->cached_bintree_root == (ROOT)) ) ) { \
434 if( coll_comm->cached_bintree ) { \
435 ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_bintree) ); \
437 coll_comm->cached_bintree = ompi_coll_tuned_topo_build_tree(2,(OMPI_COMM),(ROOT)); \
438 coll_comm->cached_bintree_root = (ROOT); \
442 #define COLL_TUNED_UPDATE_BMTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \
444 mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data; \
445 if( !( (coll_comm->cached_bmtree) \
446 && (coll_comm->cached_bmtree_root == (ROOT)) ) ) { \
447 if( coll_comm->cached_bmtree ) { \
448 ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_bmtree) ); \
450 coll_comm->cached_bmtree = ompi_coll_tuned_topo_build_bmtree( (OMPI_COMM), (ROOT) ); \
451 coll_comm->cached_bmtree_root = (ROOT); \
455 #define COLL_TUNED_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \
457 mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data; \
458 if( !( (coll_comm->cached_in_order_bmtree) \
459 && (coll_comm->cached_in_order_bmtree_root == (ROOT)) ) ) { \
460 if( coll_comm->cached_in_order_bmtree ) { \
461 ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_in_order_bmtree) ); \
463 coll_comm->cached_in_order_bmtree = ompi_coll_tuned_topo_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
464 coll_comm->cached_in_order_bmtree_root = (ROOT); \
468 #define COLL_TUNED_UPDATE_PIPELINE( OMPI_COMM, TUNED_MODULE, ROOT ) \
470 mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data; \
471 if( !( (coll_comm->cached_pipeline) \
472 && (coll_comm->cached_pipeline_root == (ROOT)) ) ) { \
473 if (coll_comm->cached_pipeline) { \
474 ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_pipeline) ); \
476 coll_comm->cached_pipeline = ompi_coll_tuned_topo_build_chain( 1, (OMPI_COMM), (ROOT) ); \
477 coll_comm->cached_pipeline_root = (ROOT); \
481 #define COLL_TUNED_UPDATE_CHAIN( OMPI_COMM, TUNED_MODULE, ROOT, FANOUT ) \
483 mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data; \
484 if( !( (coll_comm->cached_chain) \
485 && (coll_comm->cached_chain_root == (ROOT)) \
486 && (coll_comm->cached_chain_fanout == (FANOUT)) ) ) { \
487 if( coll_comm->cached_chain) { \
488 ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_chain) ); \
490 coll_comm->cached_chain = ompi_coll_tuned_topo_build_chain((FANOUT), (OMPI_COMM), (ROOT)); \
491 coll_comm->cached_chain_root = (ROOT); \
492 coll_comm->cached_chain_fanout = (FANOUT); \
496 #define COLL_TUNED_UPDATE_IN_ORDER_BINTREE( OMPI_COMM, TUNED_MODULE ) \
498 mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data; \
499 if( !(coll_comm->cached_in_order_bintree) ) { \
502 coll_comm->cached_in_order_bintree = \
503 ompi_coll_tuned_topo_build_in_order_bintree((OMPI_COMM)); \
514 #define COLL_TUNED_COMPUTED_SEGCOUNT(SEGSIZE, TYPELNG, SEGCOUNT) \
515 if( ((SEGSIZE) >= (TYPELNG)) && \
516 ((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) { \
518 (SEGCOUNT) = (int)((SEGSIZE) / (TYPELNG)); \
519 residual = (SEGSIZE) - (SEGCOUNT) * (TYPELNG); \
520 if( residual > ((TYPELNG) >> 1) ) \
535 #define COLL_TUNED_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX, \
536 EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
537 EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS; \
538 SPLIT_INDEX = COUNT % NUM_BLOCKS; \
539 if (0 != SPLIT_INDEX) { \
540 EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1; \
Definition: coll_tuned_dynamic_rules.h:64
Definition: coll_tuned_dynamic_rules.h:49
ompi_coll_alg_rule_t * all_base_rules
global stuff that I need the component to store
Definition: coll_tuned.h:345
Collective module interface.
Definition: coll.h:316
Definition: coll_tuned_topo.h:28
Collective component interface.
Definition: coll.h:283
Definition: coll_tuned.h:419
Top-level interface for all MCA components.
Collective Communication Interface.
int tuned_priority
MCA parameter: Priority of this component.
Definition: coll_tuned.h:338
Top-level description of requests.
Definition: coll_tuned.h:92
mca_coll_base_component_2_0_0_t super
Base coll component.
Definition: coll_tuned.h:335
Definition: coll_tuned.h:333
static int ompi_request_free(ompi_request_t **request)
Free a request.
Definition: request.h:371
Definition: coll_tuned.h:105
Definition: communicator.h:118
Main top-level request struct definition.
Definition: request.h:100
Definition: coll_tuned.h:361
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236