OpenMPI  0.1.1
orte_globals.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2011 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  * All rights reserved.
12  * Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved.
13  * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
14  * Copyright (c) 2011 Los Alamos National Security, LLC. All rights
15  * reserved.
16  * $COPYRIGHT$
17  *
18  * Additional copyrights may follow
19  *
20  * $HEADER$
21  */
22 
23 /**
24  * @file
25  *
26  * Global params for OpenRTE
27  */
28 #ifndef ORTE_RUNTIME_ORTE_GLOBALS_H
29 #define ORTE_RUNTIME_ORTE_GLOBALS_H
30 
31 #include "orte_config.h"
32 #include "orte/types.h"
33 
34 #include <sys/types.h>
35 #ifdef HAVE_SYS_TIME_H
36 #include <sys/time.h>
37 #endif
38 
40 #include "opal/class/opal_value_array.h"
42 #include "opal/threads/threads.h"
43 #include "opal/mca/hwloc/hwloc.h"
45 
46 #include "orte/mca/plm/plm_types.h"
47 #include "orte/mca/rml/rml_types.h"
48 #include "orte/util/proc_info.h"
49 #include "orte/util/name_fns.h"
50 #include "orte/runtime/runtime.h"
51 #include "orte/runtime/orte_wait.h"
52 
53 
54 BEGIN_C_DECLS
55 
56 ORTE_DECLSPEC extern int orte_debug_verbosity; /* instantiated in orte/runtime/orte_init.c */
57 ORTE_DECLSPEC extern char *orte_prohibited_session_dirs; /* instantiated in orte/runtime/orte_init.c */
58 ORTE_DECLSPEC extern bool orte_xml_output; /* instantiated in orte/runtime/orte_globals.c */
59 ORTE_DECLSPEC extern FILE *orte_xml_fp; /* instantiated in orte/runtime/orte_globals.c */
60 ORTE_DECLSPEC extern bool orte_help_want_aggregate; /* instantiated in orte/util/show_help.c */
61 ORTE_DECLSPEC extern char *orte_job_ident; /* instantiated in orte/runtime/orte_globals.c */
62 ORTE_DECLSPEC extern bool orte_create_session_dirs; /* instantiated in orte/runtime/orte_init.c */
63 ORTE_DECLSPEC extern bool orte_execute_quiet; /* instantiated in orte/runtime/orte_globals.c */
64 ORTE_DECLSPEC extern bool orte_report_silent_errors; /* instantiated in orte/runtime/orte_globals.c */
65 
66 /* Shortcut for some commonly used names */
67 #define ORTE_NAME_WILDCARD (&orte_name_wildcard)
68 ORTE_DECLSPEC extern orte_process_name_t orte_name_wildcard; /** instantiated in orte/runtime/orte_init.c */
69 #define ORTE_NAME_INVALID (&orte_name_invalid)
70 ORTE_DECLSPEC extern orte_process_name_t orte_name_invalid; /** instantiated in orte/runtime/orte_init.c */
71 
72 #define ORTE_PROC_MY_NAME (&orte_process_info.my_name)
73 
74 /* define a special name that point to my parent (aka the process that spawned me) */
75 #define ORTE_PROC_MY_PARENT (&orte_process_info.my_parent)
76 
77 /* define a special name that belongs to orterun */
78 #define ORTE_PROC_MY_HNP (&orte_process_info.my_hnp)
79 
80 /* define the name of my daemon */
81 #define ORTE_PROC_MY_DAEMON (&orte_process_info.my_daemon)
82 
83 ORTE_DECLSPEC extern bool orte_in_parallel_debugger;
84 
85 /* error manager callback function */
86 typedef void (*orte_err_cb_fn_t)(orte_process_name_t *proc, orte_proc_state_t state, void *cbdata);
87 
88 ORTE_DECLSPEC extern int orte_exit_status;
89 
90 #if ORTE_DISABLE_FULL_SUPPORT
91 
92 /* These types are used in interface functions that should never be
93  used or implemented in the non-full interface, but need to be
94  declared for various reasons. So have a dummy type to keep things
95  simple (and throw an error if someone does try to use them) */
96 struct orte_job_t;
97 struct orte_proc_t;
98 struct orte_node_t;
99 struct orte_app_context_t;
100 
101 typedef struct orte_job_t orte_job_t;
102 typedef struct orte_proc_t orte_proc_t;
103 typedef struct orte_node_t orte_node_t;
105 
106 #else
107 
108 #define ORTE_GLOBAL_ARRAY_BLOCK_SIZE 64
109 #define ORTE_GLOBAL_ARRAY_MAX_SIZE INT_MAX
110 
111 /* define a default error return code for ORTE */
112 #define ORTE_ERROR_DEFAULT_EXIT_CODE 1
113 
114 /**
115  * Define a macro for updating the orte_exit_status
116  * The macro provides a convenient way of doing this
117  * so that we can add thread locking at some point
118  * since the orte_exit_status is a global variable.
119  *
120  * Ensure that we do not overwrite the exit status if it has
121  * already been set to some non-zero value. If we don't make
122  * this check, then different parts of the code could overwrite
123  * each other's exit status in the case of abnormal termination.
124  *
125  * For example, if a process aborts, we would record the initial
126  * exit code from the aborted process. However, subsequent processes
127  * will have been aborted by signal as we kill the job. We don't want
128  * the subsequent processes to overwrite the original exit code so
129  * we can tell the user the exit code from the process that caused
130  * the whole thing to happen.
131  */
132 #define ORTE_UPDATE_EXIT_STATUS(newstatus) \
133  do { \
134  if (0 == orte_exit_status && 0 != newstatus) { \
135  OPAL_OUTPUT_VERBOSE((1, orte_debug_output, \
136  "%s:%s(%d) updating exit status to %d", \
137  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
138  __FILE__, __LINE__, newstatus)); \
139  orte_exit_status = newstatus; \
140  } \
141  } while(0);
142 
143 /* sometimes we need to reset the exit status - for example, when we
144  * are restarting a failed process
145  */
146 #define ORTE_RESET_EXIT_STATUS() \
147  do { \
148  OPAL_OUTPUT_VERBOSE((1, orte_debug_output, \
149  "%s:%s(%d) reseting exit status", \
150  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
151  __FILE__, __LINE__)); \
152  orte_exit_status = 0; \
153  } while(0);
154 
155 
156 /* define a macro for computing time differences - used for timing tests
157  * across the code base
158  */
159 #define ORTE_COMPUTE_TIME_DIFF(r, ur, s1, us1, s2, us2) \
160  do { \
161  (r) = (s2) - (s1); \
162  if ((us2) >= (us1)) { \
163  (ur) = (us2) - (us1); \
164  } else { \
165  (r)--; \
166  (ur) = 1000000 - (us1) + (us2); \
167  } \
168  } while(0);
169 
170 /* define a set of flags to control the launch of a job */
171 typedef uint16_t orte_job_controls_t;
172 #define ORTE_JOB_CONTROL OPAL_UINT16
173 
174 #define ORTE_JOB_CONTROL_NON_ORTE_JOB 0x0002
175 #define ORTE_JOB_CONTROL_DEBUGGER_DAEMON 0x0014
176 #define ORTE_JOB_CONTROL_FORWARD_OUTPUT 0x0008
177 #define ORTE_JOB_CONTROL_DO_NOT_MONITOR 0x0010
178 #define ORTE_JOB_CONTROL_FORWARD_COMM 0x0020
179 #define ORTE_JOB_CONTROL_CONTINUOUS_OP 0x0040
180 #define ORTE_JOB_CONTROL_RECOVERABLE 0x0080
181 #define ORTE_JOB_CONTROL_SPIN_FOR_DEBUG 0x0100
182 
183 /* global type definitions used by RTE - instanced in orte_globals.c */
184 
185 /************
186 * Declare this to allow us to use it before fully
187 * defining it - resolves potential circular definition
188 */
189 struct orte_proc_t;
190 struct orte_job_map_t;
191 /************/
192 
193 /**
194 * Information about a specific application to be launched in the RTE.
195  */
196 typedef struct {
197  /** Parent object */
199  /** Unique index when multiple apps per job */
200  orte_app_idx_t idx;
201  /** Absolute pathname of argv[0] */
202  char *app;
203  /** Number of copies of this process that are to be launched */
205  /** Standard argv-style array, including a final NULL pointer */
206  char **argv;
207  /** Standard environ-style array, including a final NULL pointer */
208  char **env;
209  /** Current working directory for this app */
210  char *cwd;
211  /** Whether the cwd was set by the user or by the system */
213  /* Any hostfile that was specified */
214  char *hostfile;
215  /* Hostfile for adding hosts to an existing allocation */
216  char *add_hostfile;
217  /* Hosts to be added to an existing allocation - analagous to -host */
218  char **add_host;
219  /** argv of hosts passed in to -host */
220  char ** dash_host;
221  /** list of resource constraints to be applied
222  * when selecting hosts for this app
223  */
225  /** Prefix directory for this app (or NULL if no override necessary) */
226  char *prefix_dir;
227  /** Preload the binary on the remote machine (in PLM via FileM) */
229  /** Preload the libraries on the remote machine (in PLM via FileM) */
231  /** Preload the comma separated list of files to the remote machines cwd */
233  /** Destination directory for the preloaded files
234  * If NULL then the absolute and relative paths are obeyed */
236  /** Source directory for the preloaded files
237  * If NULL then the absolute and relative paths are obeyed */
239  /* is being used on the local node */
240  bool used_on_node;
241 #if OPAL_ENABLE_FT_CR == 1
242  /** What files SStore should load before local launch, if any */
243  char *sstore_load;
244 #endif
245  /* recovery policy has been defined */
246  bool recovery_defined;
247  /* max number of times a process can be restarted */
248  int32_t max_restarts;
250 
252 
253 
254 typedef struct {
255  /** Base object so this can be put on a list */
257  /* index of this node object in global array */
258  orte_std_cntr_t index;
259  /** String node name */
260  char *name;
261  /* argv-like array of aliases for this node */
262  char **alias;
263  /* daemon on this node */
264  struct orte_proc_t *daemon;
265  /* whether or not this daemon has been launched */
266  bool daemon_launched;
267  /** Launch id - needed by some systems to launch a proc on this node */
268  int32_t launch_id;
269  /** number of procs on this node */
270  orte_vpid_t num_procs;
271  /* array of pointers to procs on this node */
272  opal_pointer_array_t *procs;
273  /* next node rank on this node */
274  orte_node_rank_t next_node_rank;
275  /* whether or not we are oversubscribed */
276  bool oversubscribed;
277  /* whether we have been added to the current map */
278  bool mapped;
279  /** State of this node */
280  orte_node_state_t state;
281  /** A "soft" limit on the number of slots available on the node.
282  This will typically correspond to the number of physical CPUs
283  that we have been allocated on this note and would be the
284  "ideal" number of processes for us to launch. */
286  /** How many processes have already been launched, used by one or
287  more jobs on this node. */
289  /** This represents the number of slots we (the allocator) are
290  attempting to allocate to the current job - or the number of
291  slots allocated to a specific job on a query for the jobs
292  allocations */
294  /** A "hard" limit (if set -- a value of 0 implies no hard limit)
295  on the number of slots that can be allocated on a given
296  node. This is for some environments (e.g. grid) there may be
297  fixed limits on the number of slots that can be used.
298 
299  This value also could have been a boolean - but we may want to
300  allow the hard limit be different than the soft limit - in
301  other words allow the node to be oversubscribed up to a
302  specified limit. For example, if we have two processors, we
303  may want to allow up to four processes but no more. */
305  /** Username on this node, if specified */
306  char *username;
307 #if OPAL_HAVE_HWLOC
308  /* system topology for this node */
309  hwloc_topology_t topology;
310 #endif
311  /* history of resource usage - sized by sensor framework */
312  opal_ring_buffer_t stats;
313 } orte_node_t;
314 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_node_t);
315 
316 typedef struct {
317  /** Base object so this can be put on a list */
319  /* a name for this job */
320  char *name;
321  /* a name for this instance of the job */
322  char *instance;
323  /* jobid for this job */
324  orte_jobid_t jobid;
325  /* app_context array for this job */
326  opal_pointer_array_t *apps;
327  /* number of app_contexts in the array */
328  orte_app_idx_t num_apps;
329  /* flags to control the launch of this job - see above
330  * for description of supported flags
331  */
332  orte_job_controls_t controls;
333  /* rank desiring stdin - for now, either one rank, all ranks
334  * (wildcard), or none (invalid)
335  */
336  orte_vpid_t stdin_target;
337  /* total slots allocated to this job */
338  orte_std_cntr_t total_slots_alloc;
339  /* number of procs in this job */
340  orte_vpid_t num_procs;
341  /* array of pointers to procs in this job */
342  opal_pointer_array_t *procs;
343  /* map of the job */
344  struct orte_job_map_t *map;
345  /* bookmark for where we are in mapping - this
346  * indicates the node where we stopped
347  */
348  orte_node_t *bookmark;
349  /* state of the overall job */
350  orte_job_state_t state;
351  /* number of procs launched */
352  orte_vpid_t num_launched;
353  /* number of procs reporting contact info */
354  orte_vpid_t num_reported;
355  /* number of procs terminated */
356  orte_vpid_t num_terminated;
357  /* number of daemons reported launched so we can track progress */
358  orte_vpid_t num_daemons_reported;
359  /* lock/cond/flag for tracking when all procs reported on dynamic spawn */
360  opal_mutex_t dyn_spawn_lock;
361  opal_condition_t dyn_spawn_cond;
362  bool dyn_spawn_active;
363  /* did this job abort? */
364  bool abort;
365  /* proc that caused that to happen */
366  struct orte_proc_t *aborted_proc;
367  /* recovery policy has been defined */
368  bool recovery_defined;
369  /* enable recovery of these processes */
370  bool enable_recovery;
371  /* time launch message was sent */
372  struct timeval launch_msg_sent;
373  /* max time for launch msg to be received */
374  struct timeval max_launch_msg_recvd;
375 #if OPAL_ENABLE_FT_CR == 1
376  /* ckpt state */
377  size_t ckpt_state;
378  /* snapshot reference */
379  char *ckpt_snapshot_ref;
380  /* snapshot location */
381  char *ckpt_snapshot_loc;
382 #endif
383 } orte_job_t;
384 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_t);
385 
386 struct orte_proc_t {
387  /** Base object so this can be put on a list */
389  /* process name */
390  orte_process_name_t name;
391  /* pid */
392  pid_t pid;
393  /* local rank amongst my peers on the node
394  * where this is running - this value is
395  * needed by MPI procs so that the lowest
396  * rank on a node can perform certain fns -
397  * e.g., open an sm backing file
398  */
399  orte_local_rank_t local_rank;
400  /* local rank on the node across all procs
401  * and jobs known to this HNP - this is
402  * needed so that procs can do things like
403  * know which static IP port to use
404  */
405  orte_node_rank_t node_rank;
406  /* rank of this proc within its app context - this
407  * will just equal its vpid for single app_context
408  * applications
409  */
410  int32_t app_rank;
411  /* Last state used to trigger the errmgr for this proc */
412  orte_proc_state_t last_errmgr_state;
413  /* process state */
414  orte_proc_state_t state;
415  /* exit code */
416  orte_exit_code_t exit_code;
417  /* the app_context that generated this proc */
418  orte_app_idx_t app_idx;
419 #if OPAL_HAVE_HWLOC
420  /* hwloc object to which this process was mapped */
421  hwloc_obj_t locale;
422  /* where the proc was bound */
423  unsigned int bind_idx;
424  /* string representation of cpu bindings */
425  char *cpu_bitmap;
426 #endif
427  /* pointer to the node where this proc is executing */
428  orte_node_t *node;
429  /* pointer to the node where this proc last executed */
430  orte_node_t *prior_node;
431  /* name of the node where this proc is executing - this
432  * is used simply to pass that info to a calling
433  * tool since it may not have a node array available
434  */
435  char *nodename;
436  /* RML contact info */
437  char *rml_uri;
438  /* number of times this process has been restarted */
439  int32_t restarts;
440  /* time of last restart */
441  struct timeval last_failure;
442  /* number of failures in "fast" window */
443  int32_t fast_failures;
444  /* flag to indicate proc has reported in */
445  bool reported;
446  /* if heartbeat recvd during last time period */
447  int beat;
448  /* history of resource usage - sized by sensor framework */
449  opal_ring_buffer_t stats;
450 #if OPAL_ENABLE_FT_CR == 1
451  /* ckpt state */
452  size_t ckpt_state;
453  /* snapshot reference */
454  char *ckpt_snapshot_ref;
455  /* snapshot location */
456  char *ckpt_snapshot_loc;
457 #endif
458 };
459 typedef struct orte_proc_t orte_proc_t;
460 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_proc_t);
461 
462 typedef struct {
463  /* base object */
465  /* index in the array */
466  int index;
467  /* nodename */
468  char *name;
469  /* vpid of this job family's daemon on this node */
470  orte_vpid_t daemon;
471  /* whether or not this node is oversubscribed */
472  bool oversubscribed;
473 } orte_nid_t;
474 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_nid_t);
475 
476 typedef struct {
477  /* base object */
478  opal_object_t super;
479  /* index to node */
480  int32_t node;
481  /* local rank */
482  orte_local_rank_t local_rank;
483  /* node rank */
484  orte_node_rank_t node_rank;
485  /* locality */
486  opal_paffinity_locality_t locality;
487 } orte_pmap_t;
488 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_pmap_t);
489 
490 typedef struct {
491  /* base object */
492  opal_object_t super;
493  /* jobid */
494  orte_jobid_t job;
495  /* number of procs in this job */
496  orte_vpid_t num_procs;
497 #if OPAL_HAVE_HWLOC
498  /* binding level of the job */
499  opal_hwloc_level_t bind_level;
500 #endif
501  /* array of data for procs */
503 } orte_jmap_t;
504 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_jmap_t);
505 
506 /**
507 * Get a job data object
508  * We cannot just reference a job data object with its jobid as
509  * the jobid is no longer an index into the array. This change
510  * was necessitated by modification of the jobid to include
511  * an mpirun-unique qualifer to eliminate any global name
512  * service
513  */
515 
516 /* Find the lowest vpid alive in a given job */
517 ORTE_DECLSPEC orte_vpid_t orte_get_lowest_vpid_alive(orte_jobid_t job);
518 
519 /* global variables used by RTE - instanced in orte_globals.c */
520 ORTE_DECLSPEC extern bool orte_timing;
521 ORTE_DECLSPEC extern FILE *orte_timing_output;
522 ORTE_DECLSPEC extern bool orte_timing_details;
523 ORTE_DECLSPEC extern bool orte_debug_daemons_flag;
524 ORTE_DECLSPEC extern bool orte_debug_daemons_file_flag;
525 ORTE_DECLSPEC extern bool orte_leave_session_attached;
526 ORTE_DECLSPEC extern bool orte_do_not_launch;
527 ORTE_DECLSPEC extern bool orted_spin_flag;
528 ORTE_DECLSPEC extern char *orte_local_cpu_type;
529 ORTE_DECLSPEC extern char *orte_local_cpu_model;
530 ORTE_DECLSPEC extern char *orte_basename;
531 
532 /* ORTE OOB port flags */
533 ORTE_DECLSPEC extern bool orte_static_ports;
534 ORTE_DECLSPEC extern char *orte_oob_static_ports;
535 ORTE_DECLSPEC extern bool orte_standalone_operation;
536 
537 ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames;
538 ORTE_DECLSPEC extern bool orte_have_fqdn_allocation;
539 ORTE_DECLSPEC extern bool orte_show_resolved_nodenames;
540 ORTE_DECLSPEC extern int orted_debug_failure;
541 ORTE_DECLSPEC extern int orted_debug_failure_delay;
542 ORTE_DECLSPEC extern bool orte_homogeneous_nodes;
543 ORTE_DECLSPEC extern bool orte_hetero_apps;
544 ORTE_DECLSPEC extern bool orte_hetero_nodes;
545 ORTE_DECLSPEC extern bool orte_never_launched;
546 ORTE_DECLSPEC extern bool orte_devel_level_output;
547 ORTE_DECLSPEC extern bool orte_display_topo_with_map;
548 ORTE_DECLSPEC extern bool orte_display_diffable_output;
549 
550 ORTE_DECLSPEC extern char **orte_launch_environ;
551 
552 ORTE_DECLSPEC extern bool orte_hnp_is_allocated;
553 ORTE_DECLSPEC extern bool orte_allocation_required;
554 
555 /* launch agents */
556 ORTE_DECLSPEC extern char *orte_launch_agent;
557 ORTE_DECLSPEC extern char **orted_cmd_line;
558 ORTE_DECLSPEC extern char **orte_fork_agent;
559 
560 /* debugger job */
561 ORTE_DECLSPEC extern orte_job_t *orte_debugger_daemon;
562 ORTE_DECLSPEC extern bool orte_debugger_dump_proctable;
563 ORTE_DECLSPEC extern char *orte_debugger_test_daemon;
564 ORTE_DECLSPEC extern bool orte_debugger_test_attach;
565 ORTE_DECLSPEC extern int orte_debugger_check_rate;
566 
567 /* exit flags */
568 ORTE_DECLSPEC extern bool orte_abnormal_term_ordered;
569 ORTE_DECLSPEC extern bool orte_routing_is_enabled;
570 ORTE_DECLSPEC extern bool orte_job_term_ordered;
571 ORTE_DECLSPEC extern bool orte_orteds_term_ordered;
572 
573 ORTE_DECLSPEC extern int orte_startup_timeout;
574 
575 ORTE_DECLSPEC extern int orte_timeout_usec_per_proc;
576 ORTE_DECLSPEC extern float orte_max_timeout;
577 
578 ORTE_DECLSPEC extern opal_buffer_t *orte_tree_launch_cmd;
579 
580 /* global arrays for data storage */
581 ORTE_DECLSPEC extern opal_pointer_array_t *orte_job_data;
582 ORTE_DECLSPEC extern opal_pointer_array_t *orte_node_pool;
583 ORTE_DECLSPEC extern opal_pointer_array_t *orte_node_topologies;
584 
585 /* a clean output channel without prefix */
586 ORTE_DECLSPEC extern int orte_clean_output;
587 
588 /* Nidmap and job maps */
589 ORTE_DECLSPEC extern opal_pointer_array_t orte_nidmap;
590 ORTE_DECLSPEC extern opal_pointer_array_t orte_jobmap;
591 ORTE_DECLSPEC extern char *orted_launch_cmd;
592 
593 /* list of local children on a daemon */
594 ORTE_DECLSPEC extern opal_list_t orte_local_children;
595 ORTE_DECLSPEC extern opal_mutex_t orte_local_children_lock;
596 ORTE_DECLSPEC extern opal_condition_t orte_local_children_cond;
597 
598 /* list of job data for local children on a daemon */
599 ORTE_DECLSPEC extern opal_list_t orte_local_jobdata;
600 ORTE_DECLSPEC extern opal_mutex_t orte_local_jobdata_lock;
601 ORTE_DECLSPEC extern opal_condition_t orte_local_jobdata_cond;
602 
603 /* whether or not to forward SIGTSTP and SIGCONT signals */
604 ORTE_DECLSPEC extern bool orte_forward_job_control;
605 
606 /* IOF controls */
607 ORTE_DECLSPEC extern bool orte_tag_output;
608 ORTE_DECLSPEC extern bool orte_timestamp_output;
609 ORTE_DECLSPEC extern char *orte_output_filename;
610 /* generate new xterm windows to display output from specified ranks */
611 ORTE_DECLSPEC extern char *orte_xterm;
612 
613 /* whether or not to report launch progress */
614 ORTE_DECLSPEC extern bool orte_report_launch_progress;
615 
616 /* allocation specification */
617 ORTE_DECLSPEC extern char *orte_default_hostfile;
618 ORTE_DECLSPEC extern bool orte_default_hostfile_given;
619 ORTE_DECLSPEC extern char *orte_rankfile;
620 #ifdef __WINDOWS__
621 ORTE_DECLSPEC extern char *orte_ccp_headnode;
622 #endif
623 ORTE_DECLSPEC extern int orte_num_allocated_nodes;
624 ORTE_DECLSPEC extern char *orte_node_regex;
625 
626 /* tool communication controls */
627 ORTE_DECLSPEC extern bool orte_report_events;
628 ORTE_DECLSPEC extern char *orte_report_events_uri;
629 
630 /* barrier control */
631 ORTE_DECLSPEC extern bool orte_do_not_barrier;
632 
633 /* process recovery */
634 ORTE_DECLSPEC extern bool orte_enable_recovery;
635 ORTE_DECLSPEC extern int32_t orte_max_restarts;
636 
637 /* comm interface */
638 typedef void (*orte_default_cbfunc_t)(int fd, short event, void *data);
639 
640 typedef int (*orte_default_comm_fn_t)(orte_process_name_t *recipient,
641  opal_buffer_t *buf,
642  orte_rml_tag_t tag,
643  orte_default_cbfunc_t cbfunc);
644 /* comm fn for updating state */
645 ORTE_DECLSPEC extern orte_default_comm_fn_t orte_comm;
646 ORTE_DECLSPEC int orte_global_comm(orte_process_name_t *recipient,
647  opal_buffer_t *buf, orte_rml_tag_t tag,
648  orte_default_cbfunc_t cbfunc);
649 
650 /* exit status reporting */
651 ORTE_DECLSPEC extern bool orte_report_child_jobs_separately;
652 ORTE_DECLSPEC extern struct timeval orte_child_time_to_exit;
653 ORTE_DECLSPEC extern bool orte_abort_non_zero_exit;
654 
655 /* length of stat history to keep */
656 ORTE_DECLSPEC extern int orte_stat_history_size;
657 
658 /* envars to forward */
659 ORTE_DECLSPEC extern char *orte_forward_envars;
660 
661 /* preload binaries */
662 ORTE_DECLSPEC extern bool orte_preload_binaries;
663 
664 #endif /* ORTE_DISABLE_FULL_SUPPORT */
665 
666 END_C_DECLS
667 
668 #endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */
char * preload_files
Preload the comma separated list of files to the remote machines cwd.
Definition: orte_globals.h:232
Information about a specific application to be launched in the RTE.
Definition: orte_globals.h:196
Populates global structure with process-specific information.
orte_vpid_t num_procs
number of procs on this node
Definition: orte_globals.h:270
orte_std_cntr_t slots
A "soft" limit on the number of slots available on the node.
Definition: orte_globals.h:285
dynamic pointer array
Definition: opal_pointer_array.h:45
char ** dash_host
argv of hosts passed in to -host
Definition: orte_globals.h:220
orte_app_idx_t idx
Unique index when multiple apps per job.
Definition: orte_globals.h:200
bool preload_libs
Preload the libraries on the remote machine (in PLM via FileM)
Definition: orte_globals.h:230
Definition: condition.h:49
int32_t launch_id
Launch id - needed by some systems to launch a proc on this node.
Definition: orte_globals.h:268
char * prefix_dir
Prefix directory for this app (or NULL if no override necessary)
Definition: orte_globals.h:226
Interface into the Open MPI Run Time Environment.
char * preload_files_src_dir
Source directory for the preloaded files If NULL then the absolute and relative paths are obeyed...
Definition: orte_globals.h:238
Populates global structure with system-specific information.
orte_std_cntr_t slots_alloc
This represents the number of slots we (the allocator) are attempting to allocate to the current job ...
Definition: orte_globals.h:293
dynamic pointer ring
Definition: opal_ring_buffer.h:38
uint32_t orte_jobid_t
Set the allowed range for ids in each space.
Definition: types.h:76
orte_std_cntr_t slots_inuse
How many processes have already been launched, used by one or more jobs on this node.
Definition: orte_globals.h:288
Definition: types.h:146
Structure to represent a single event.
Definition: event_struct.h:87
int32_t orte_std_cntr_t
Supported datatypes for messaging and storage operations.
Definition: types.h:34
char * preload_files_dest_dir
Destination directory for the preloaded files If NULL then the absolute and relative paths are obeyed...
Definition: orte_globals.h:235
opal_object_t super
Parent object.
Definition: orte_globals.h:198
Definition: mutex_unix.h:53
opal_list_item_t super
Base object so this can be put on a list.
Definition: orte_globals.h:256
Definition: orte_globals.h:490
See opal_bitmap.h for an explanation of why there is a split between OPAL and ORTE for this generic c...
char * cwd
Current working directory for this app.
Definition: orte_globals.h:210
orte_std_cntr_t slots_max
A "hard" limit (if set – a value of 0 implies no hard limit) on the number of slots that can be allo...
Definition: orte_globals.h:304
Structure of a topology object.
Definition: hwloc.h:329
Definition: opal_list.h:98
Definition: orte_globals.h:386
opal_list_item_t super
Base object so this can be put on a list.
Definition: orte_globals.h:388
Interface for waitpid / async notification of child death with the libevent runtime system...
bool preload_binary
Preload the binary on the remote machine (in PLM via FileM)
Definition: orte_globals.h:228
Definition: orte_globals.h:462
ORTE_DECLSPEC orte_job_t * orte_get_job_data_object(orte_jobid_t job)
Get a job data object We cannot just reference a job data object with its jobid as the jobid is no lo...
Definition: orte_globals.c:486
paffinity (processor affinity) framework component interface definitions.
bool user_specified_cwd
Whether the cwd was set by the user or by the system.
Definition: orte_globals.h:212
char ** argv
Standard argv-style array, including a final NULL pointer.
Definition: orte_globals.h:206
Base object.
Definition: opal_object.h:182
char * username
Username on this node, if specified.
Definition: orte_globals.h:306
uint32_t orte_rml_tag_t
Message matching tag.
Definition: rml_types.h:220
orte_std_cntr_t num_procs
Number of copies of this process that are to be launched.
Definition: orte_globals.h:204
Definition: orte_globals.h:316
Definition: opal_list.h:147
Definition: rmaps_types.h:47
Structure for holding a buffer to be used with the RML or OOB subsystems.
Definition: dss_types.h:159
orte_node_state_t state
State of this node.
Definition: orte_globals.h:280
char ** env
Standard environ-style array, including a final NULL pointer.
Definition: orte_globals.h:208
uint16_t orte_local_rank_t
rank on node, used for both local and node rank.
Definition: types.h:46
opal_list_item_t super
Base object so this can be put on a list.
Definition: orte_globals.h:318
Definition: private.h:56
char * app
Absolute pathname of argv[0].
Definition: orte_globals.h:202
opal_list_t resource_constraints
list of resource constraints to be applied when selecting hosts for this app
Definition: orte_globals.h:224
char * name
String node name.
Definition: orte_globals.h:260
Definition: orte_globals.h:254
Contains the typedefs for the use of the rml.
Definition: orte_globals.h:476
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236