28 #ifndef ORTE_RUNTIME_ORTE_GLOBALS_H
29 #define ORTE_RUNTIME_ORTE_GLOBALS_H
31 #include "orte_config.h"
34 #include <sys/types.h>
35 #ifdef HAVE_SYS_TIME_H
40 #include "opal/class/opal_value_array.h"
42 #include "opal/threads/threads.h"
43 #include "opal/mca/hwloc/hwloc.h"
46 #include "orte/mca/plm/plm_types.h"
56 ORTE_DECLSPEC
extern int orte_debug_verbosity;
57 ORTE_DECLSPEC
extern char *orte_prohibited_session_dirs;
58 ORTE_DECLSPEC
extern bool orte_xml_output;
59 ORTE_DECLSPEC
extern FILE *orte_xml_fp;
60 ORTE_DECLSPEC
extern bool orte_help_want_aggregate;
61 ORTE_DECLSPEC
extern char *orte_job_ident;
62 ORTE_DECLSPEC
extern bool orte_create_session_dirs;
63 ORTE_DECLSPEC
extern bool orte_execute_quiet;
64 ORTE_DECLSPEC
extern bool orte_report_silent_errors;
67 #define ORTE_NAME_WILDCARD (&orte_name_wildcard)
69 #define ORTE_NAME_INVALID (&orte_name_invalid)
72 #define ORTE_PROC_MY_NAME (&orte_process_info.my_name)
75 #define ORTE_PROC_MY_PARENT (&orte_process_info.my_parent)
78 #define ORTE_PROC_MY_HNP (&orte_process_info.my_hnp)
81 #define ORTE_PROC_MY_DAEMON (&orte_process_info.my_daemon)
83 ORTE_DECLSPEC
extern bool orte_in_parallel_debugger;
86 typedef void (*orte_err_cb_fn_t)(
orte_process_name_t *proc, orte_proc_state_t state,
void *cbdata);
88 ORTE_DECLSPEC
extern int orte_exit_status;
90 #if ORTE_DISABLE_FULL_SUPPORT
108 #define ORTE_GLOBAL_ARRAY_BLOCK_SIZE 64
109 #define ORTE_GLOBAL_ARRAY_MAX_SIZE INT_MAX
112 #define ORTE_ERROR_DEFAULT_EXIT_CODE 1
132 #define ORTE_UPDATE_EXIT_STATUS(newstatus) \
134 if (0 == orte_exit_status && 0 != newstatus) { \
135 OPAL_OUTPUT_VERBOSE((1, orte_debug_output, \
136 "%s:%s(%d) updating exit status to %d", \
137 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
138 __FILE__, __LINE__, newstatus)); \
139 orte_exit_status = newstatus; \
146 #define ORTE_RESET_EXIT_STATUS() \
148 OPAL_OUTPUT_VERBOSE((1, orte_debug_output, \
149 "%s:%s(%d) reseting exit status", \
150 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
151 __FILE__, __LINE__)); \
152 orte_exit_status = 0; \
159 #define ORTE_COMPUTE_TIME_DIFF(r, ur, s1, us1, s2, us2) \
162 if ((us2) >= (us1)) { \
163 (ur) = (us2) - (us1); \
166 (ur) = 1000000 - (us1) + (us2); \
171 typedef uint16_t orte_job_controls_t;
172 #define ORTE_JOB_CONTROL OPAL_UINT16
174 #define ORTE_JOB_CONTROL_NON_ORTE_JOB 0x0002
175 #define ORTE_JOB_CONTROL_DEBUGGER_DAEMON 0x0014
176 #define ORTE_JOB_CONTROL_FORWARD_OUTPUT 0x0008
177 #define ORTE_JOB_CONTROL_DO_NOT_MONITOR 0x0010
178 #define ORTE_JOB_CONTROL_FORWARD_COMM 0x0020
179 #define ORTE_JOB_CONTROL_CONTINUOUS_OP 0x0040
180 #define ORTE_JOB_CONTROL_RECOVERABLE 0x0080
181 #define ORTE_JOB_CONTROL_SPIN_FOR_DEBUG 0x0100
241 #if OPAL_ENABLE_FT_CR == 1
246 bool recovery_defined;
248 int32_t max_restarts;
266 bool daemon_launched;
274 orte_node_rank_t next_node_rank;
328 orte_app_idx_t num_apps;
332 orte_job_controls_t controls;
336 orte_vpid_t stdin_target;
340 orte_vpid_t num_procs;
350 orte_job_state_t state;
352 orte_vpid_t num_launched;
354 orte_vpid_t num_reported;
356 orte_vpid_t num_terminated;
358 orte_vpid_t num_daemons_reported;
362 bool dyn_spawn_active;
368 bool recovery_defined;
370 bool enable_recovery;
372 struct timeval launch_msg_sent;
374 struct timeval max_launch_msg_recvd;
375 #if OPAL_ENABLE_FT_CR == 1
379 char *ckpt_snapshot_ref;
381 char *ckpt_snapshot_loc;
405 orte_node_rank_t node_rank;
412 orte_proc_state_t last_errmgr_state;
414 orte_proc_state_t state;
416 orte_exit_code_t exit_code;
418 orte_app_idx_t app_idx;
423 unsigned int bind_idx;
441 struct timeval last_failure;
443 int32_t fast_failures;
450 #if OPAL_ENABLE_FT_CR == 1
454 char *ckpt_snapshot_ref;
456 char *ckpt_snapshot_loc;
484 orte_node_rank_t node_rank;
486 opal_paffinity_locality_t locality;
496 orte_vpid_t num_procs;
499 opal_hwloc_level_t bind_level;
517 ORTE_DECLSPEC orte_vpid_t orte_get_lowest_vpid_alive(
orte_jobid_t job);
520 ORTE_DECLSPEC
extern bool orte_timing;
521 ORTE_DECLSPEC
extern FILE *orte_timing_output;
522 ORTE_DECLSPEC
extern bool orte_timing_details;
523 ORTE_DECLSPEC
extern bool orte_debug_daemons_flag;
524 ORTE_DECLSPEC
extern bool orte_debug_daemons_file_flag;
525 ORTE_DECLSPEC
extern bool orte_leave_session_attached;
526 ORTE_DECLSPEC
extern bool orte_do_not_launch;
527 ORTE_DECLSPEC
extern bool orted_spin_flag;
528 ORTE_DECLSPEC
extern char *orte_local_cpu_type;
529 ORTE_DECLSPEC
extern char *orte_local_cpu_model;
530 ORTE_DECLSPEC
extern char *orte_basename;
533 ORTE_DECLSPEC
extern bool orte_static_ports;
534 ORTE_DECLSPEC
extern char *orte_oob_static_ports;
535 ORTE_DECLSPEC
extern bool orte_standalone_operation;
537 ORTE_DECLSPEC
extern bool orte_keep_fqdn_hostnames;
538 ORTE_DECLSPEC
extern bool orte_have_fqdn_allocation;
539 ORTE_DECLSPEC
extern bool orte_show_resolved_nodenames;
540 ORTE_DECLSPEC
extern int orted_debug_failure;
541 ORTE_DECLSPEC
extern int orted_debug_failure_delay;
542 ORTE_DECLSPEC
extern bool orte_homogeneous_nodes;
543 ORTE_DECLSPEC
extern bool orte_hetero_apps;
544 ORTE_DECLSPEC
extern bool orte_hetero_nodes;
545 ORTE_DECLSPEC
extern bool orte_never_launched;
546 ORTE_DECLSPEC
extern bool orte_devel_level_output;
547 ORTE_DECLSPEC
extern bool orte_display_topo_with_map;
548 ORTE_DECLSPEC
extern bool orte_display_diffable_output;
550 ORTE_DECLSPEC
extern char **orte_launch_environ;
552 ORTE_DECLSPEC
extern bool orte_hnp_is_allocated;
553 ORTE_DECLSPEC
extern bool orte_allocation_required;
556 ORTE_DECLSPEC
extern char *orte_launch_agent;
557 ORTE_DECLSPEC
extern char **orted_cmd_line;
558 ORTE_DECLSPEC
extern char **orte_fork_agent;
561 ORTE_DECLSPEC
extern orte_job_t *orte_debugger_daemon;
562 ORTE_DECLSPEC
extern bool orte_debugger_dump_proctable;
563 ORTE_DECLSPEC
extern char *orte_debugger_test_daemon;
564 ORTE_DECLSPEC
extern bool orte_debugger_test_attach;
565 ORTE_DECLSPEC
extern int orte_debugger_check_rate;
568 ORTE_DECLSPEC
extern bool orte_abnormal_term_ordered;
569 ORTE_DECLSPEC
extern bool orte_routing_is_enabled;
570 ORTE_DECLSPEC
extern bool orte_job_term_ordered;
571 ORTE_DECLSPEC
extern bool orte_orteds_term_ordered;
573 ORTE_DECLSPEC
extern int orte_startup_timeout;
575 ORTE_DECLSPEC
extern int orte_timeout_usec_per_proc;
576 ORTE_DECLSPEC
extern float orte_max_timeout;
586 ORTE_DECLSPEC
extern int orte_clean_output;
591 ORTE_DECLSPEC
extern char *orted_launch_cmd;
594 ORTE_DECLSPEC
extern opal_list_t orte_local_children;
595 ORTE_DECLSPEC
extern opal_mutex_t orte_local_children_lock;
599 ORTE_DECLSPEC
extern opal_list_t orte_local_jobdata;
600 ORTE_DECLSPEC
extern opal_mutex_t orte_local_jobdata_lock;
604 ORTE_DECLSPEC
extern bool orte_forward_job_control;
607 ORTE_DECLSPEC
extern bool orte_tag_output;
608 ORTE_DECLSPEC
extern bool orte_timestamp_output;
609 ORTE_DECLSPEC
extern char *orte_output_filename;
611 ORTE_DECLSPEC
extern char *orte_xterm;
614 ORTE_DECLSPEC
extern bool orte_report_launch_progress;
617 ORTE_DECLSPEC
extern char *orte_default_hostfile;
618 ORTE_DECLSPEC
extern bool orte_default_hostfile_given;
619 ORTE_DECLSPEC
extern char *orte_rankfile;
621 ORTE_DECLSPEC
extern char *orte_ccp_headnode;
623 ORTE_DECLSPEC
extern int orte_num_allocated_nodes;
624 ORTE_DECLSPEC
extern char *orte_node_regex;
627 ORTE_DECLSPEC
extern bool orte_report_events;
628 ORTE_DECLSPEC
extern char *orte_report_events_uri;
631 ORTE_DECLSPEC
extern bool orte_do_not_barrier;
634 ORTE_DECLSPEC
extern bool orte_enable_recovery;
635 ORTE_DECLSPEC
extern int32_t orte_max_restarts;
638 typedef void (*orte_default_cbfunc_t)(
int fd,
short event,
void *data);
643 orte_default_cbfunc_t cbfunc);
645 ORTE_DECLSPEC
extern orte_default_comm_fn_t orte_comm;
648 orte_default_cbfunc_t cbfunc);
651 ORTE_DECLSPEC
extern bool orte_report_child_jobs_separately;
652 ORTE_DECLSPEC
extern struct timeval orte_child_time_to_exit;
653 ORTE_DECLSPEC
extern bool orte_abort_non_zero_exit;
656 ORTE_DECLSPEC
extern int orte_stat_history_size;
659 ORTE_DECLSPEC
extern char *orte_forward_envars;
662 ORTE_DECLSPEC
extern bool orte_preload_binaries;
char * preload_files
Preload the comma separated list of files to the remote machines cwd.
Definition: orte_globals.h:232
Information about a specific application to be launched in the RTE.
Definition: orte_globals.h:196
Populates global structure with process-specific information.
orte_vpid_t num_procs
number of procs on this node
Definition: orte_globals.h:270
orte_std_cntr_t slots
A "soft" limit on the number of slots available on the node.
Definition: orte_globals.h:285
dynamic pointer array
Definition: opal_pointer_array.h:45
char ** dash_host
argv of hosts passed in to -host
Definition: orte_globals.h:220
orte_app_idx_t idx
Unique index when multiple apps per job.
Definition: orte_globals.h:200
bool preload_libs
Preload the libraries on the remote machine (in PLM via FileM)
Definition: orte_globals.h:230
Definition: condition.h:49
int32_t launch_id
Launch id - needed by some systems to launch a proc on this node.
Definition: orte_globals.h:268
char * prefix_dir
Prefix directory for this app (or NULL if no override necessary)
Definition: orte_globals.h:226
Interface into the Open MPI Run Time Environment.
char * preload_files_src_dir
Source directory for the preloaded files If NULL then the absolute and relative paths are obeyed...
Definition: orte_globals.h:238
Populates global structure with system-specific information.
orte_std_cntr_t slots_alloc
This represents the number of slots we (the allocator) are attempting to allocate to the current job ...
Definition: orte_globals.h:293
dynamic pointer ring
Definition: opal_ring_buffer.h:38
uint32_t orte_jobid_t
Set the allowed range for ids in each space.
Definition: types.h:76
orte_std_cntr_t slots_inuse
How many processes have already been launched, used by one or more jobs on this node.
Definition: orte_globals.h:288
Structure to represent a single event.
Definition: event_struct.h:87
int32_t orte_std_cntr_t
Supported datatypes for messaging and storage operations.
Definition: types.h:34
char * preload_files_dest_dir
Destination directory for the preloaded files If NULL then the absolute and relative paths are obeyed...
Definition: orte_globals.h:235
opal_object_t super
Parent object.
Definition: orte_globals.h:198
Definition: mutex_unix.h:53
opal_list_item_t super
Base object so this can be put on a list.
Definition: orte_globals.h:256
Definition: orte_globals.h:490
See opal_bitmap.h for an explanation of why there is a split between OPAL and ORTE for this generic c...
char * cwd
Current working directory for this app.
Definition: orte_globals.h:210
orte_std_cntr_t slots_max
A "hard" limit (if set – a value of 0 implies no hard limit) on the number of slots that can be allo...
Definition: orte_globals.h:304
Structure of a topology object.
Definition: hwloc.h:329
Definition: opal_list.h:98
Definition: orte_globals.h:386
opal_list_item_t super
Base object so this can be put on a list.
Definition: orte_globals.h:388
Interface for waitpid / async notification of child death with the libevent runtime system...
bool preload_binary
Preload the binary on the remote machine (in PLM via FileM)
Definition: orte_globals.h:228
Definition: orte_globals.h:462
ORTE_DECLSPEC orte_job_t * orte_get_job_data_object(orte_jobid_t job)
Get a job data object We cannot just reference a job data object with its jobid as the jobid is no lo...
Definition: orte_globals.c:486
paffinity (processor affinity) framework component interface definitions.
bool user_specified_cwd
Whether the cwd was set by the user or by the system.
Definition: orte_globals.h:212
char ** argv
Standard argv-style array, including a final NULL pointer.
Definition: orte_globals.h:206
Base object.
Definition: opal_object.h:182
char * username
Username on this node, if specified.
Definition: orte_globals.h:306
uint32_t orte_rml_tag_t
Message matching tag.
Definition: rml_types.h:220
orte_std_cntr_t num_procs
Number of copies of this process that are to be launched.
Definition: orte_globals.h:204
Definition: orte_globals.h:316
Definition: opal_list.h:147
Definition: rmaps_types.h:47
Structure for holding a buffer to be used with the RML or OOB subsystems.
Definition: dss_types.h:159
orte_node_state_t state
State of this node.
Definition: orte_globals.h:280
char ** env
Standard environ-style array, including a final NULL pointer.
Definition: orte_globals.h:208
uint16_t orte_local_rank_t
rank on node, used for both local and node rank.
Definition: types.h:46
opal_list_item_t super
Base object so this can be put on a list.
Definition: orte_globals.h:318
char * app
Absolute pathname of argv[0].
Definition: orte_globals.h:202
opal_list_t resource_constraints
list of resource constraints to be applied when selecting hosts for this app
Definition: orte_globals.h:224
char * name
String node name.
Definition: orte_globals.h:260
Definition: orte_globals.h:254
Contains the typedefs for the use of the rml.
Definition: orte_globals.h:476
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236