OpenMPI  0.1.1
odls_types.h
Go to the documentation of this file.
1 /* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
2  * University Research and Technology
3  * Corporation. All rights reserved.
4  * Copyright (c) 2004-2011 The University of Tennessee and The University
5  * of Tennessee Research Foundation. All rights
6  * reserved.
7  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
8  * University of Stuttgart. All rights reserved.
9  * Copyright (c) 2004-2005 The Regents of the University of California.
10  * All rights reserved.
11  * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
12  * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
13  * $COPYRIGHT$
14  *
15  * Additional copyrights may follow
16  *
17  * $HEADER$
18  */
19 /** @file:
20  */
21 
22 #ifndef ORTE_MCA_ODLS_TYPES_H
23 #define ORTE_MCA_ODLS_TYPES_H
24 
25 #include "orte_config.h"
26 #include "orte/types.h"
27 
28 #ifdef HAVE_SYS_TIME_H
29 #include <sys/time.h>
30 #endif
31 
32 #include "opal/class/opal_list.h"
34 #include "opal/dss/dss_types.h"
35 #include "opal/threads/mutex.h"
36 #include "opal/threads/condition.h"
37 #include "opal/mca/hwloc/hwloc.h"
38 
39 #include "orte/mca/plm/plm_types.h"
43 
44 BEGIN_C_DECLS
45 
46 /* define the orted command flag type */
47 typedef uint8_t orte_daemon_cmd_flag_t;
48 #define ORTE_DAEMON_CMD_T OPAL_UINT8
49 
50 
51 /*
52  * Definitions needed for communication
53  */
54 #define ORTE_DAEMON_CONTACT_QUERY_CMD (orte_daemon_cmd_flag_t) 1
55 #define ORTE_DAEMON_KILL_LOCAL_PROCS (orte_daemon_cmd_flag_t) 2
56 #define ORTE_DAEMON_SIGNAL_LOCAL_PROCS (orte_daemon_cmd_flag_t) 3
57 #define ORTE_DAEMON_ADD_LOCAL_PROCS (orte_daemon_cmd_flag_t) 4
58 #define ORTE_DAEMON_TREE_SPAWN (orte_daemon_cmd_flag_t) 5
59 #define ORTE_DAEMON_HEARTBEAT_CMD (orte_daemon_cmd_flag_t) 6
60 #define ORTE_DAEMON_EXIT_CMD (orte_daemon_cmd_flag_t) 7
61 #define ORTE_DAEMON_PROCESS_AND_RELAY_CMD (orte_daemon_cmd_flag_t) 9
62 #define ORTE_DAEMON_MESSAGE_LOCAL_PROCS (orte_daemon_cmd_flag_t) 10
63 #define ORTE_DAEMON_NULL_CMD (orte_daemon_cmd_flag_t) 11
64 #define ORTE_DAEMON_SYNC_BY_PROC (orte_daemon_cmd_flag_t) 12
65 #define ORTE_DAEMON_SYNC_WANT_NIDMAP (orte_daemon_cmd_flag_t) 13
66 
67 /* commands for use by tools */
68 #define ORTE_DAEMON_REPORT_JOB_INFO_CMD (orte_daemon_cmd_flag_t) 14
69 #define ORTE_DAEMON_REPORT_NODE_INFO_CMD (orte_daemon_cmd_flag_t) 15
70 #define ORTE_DAEMON_REPORT_PROC_INFO_CMD (orte_daemon_cmd_flag_t) 16
71 #define ORTE_DAEMON_SPAWN_JOB_CMD (orte_daemon_cmd_flag_t) 17
72 #define ORTE_DAEMON_TERMINATE_JOB_CMD (orte_daemon_cmd_flag_t) 18
73 #define ORTE_DAEMON_HALT_VM_CMD (orte_daemon_cmd_flag_t) 19
74 
75 /* proc termination sync cmds */
76 #define ORTE_DAEMON_WAITPID_FIRED (orte_daemon_cmd_flag_t) 20
77 #define ORTE_DAEMON_IOF_COMPLETE (orte_daemon_cmd_flag_t) 21
78 
79 /* request proc resource usage */
80 #define ORTE_DAEMON_TOP_CMD (orte_daemon_cmd_flag_t) 22
81 
82 /* bootstrap */
83 #define ORTE_DAEMON_NAME_REQ_CMD (orte_daemon_cmd_flag_t) 23
84 #define ORTE_DAEMON_CHECKIN_CMD (orte_daemon_cmd_flag_t) 24
85 #define ORTE_TOOL_CHECKIN_CMD (orte_daemon_cmd_flag_t) 25
86 
87 /* process msg command */
88 #define ORTE_DAEMON_PROCESS_CMD (orte_daemon_cmd_flag_t) 26
89 
90 /* process called "errmgr.abort" */
91 #define ORTE_DAEMON_ABORT_CALLED (orte_daemon_cmd_flag_t) 27
92 /* process called "errmgr.abort_procs" */
93 #define ORTE_DAEMON_ABORT_PROCS_CALLED (orte_daemon_cmd_flag_t) 28
94 
95 /*
96  * List object to locally store the process names and pids of
97  * our children. This can subsequently be used to order termination
98  * or pass signals without looking the info up again.
99  */
100 typedef struct {
101  opal_list_item_t super; /* required to place this on a list */
102  orte_process_name_t *name; /* the OmpiRTE name of the proc */
103  int32_t restarts; /* number of times this proc has been restarted */
104  pid_t pid; /* local pid of the proc */
105  orte_app_idx_t app_idx; /* index of the app_context for this proc */
106  bool alive; /* is this proc alive? */
107  bool coll_recvd; /* collective operation recvd */
108  orte_proc_state_t state; /* the state of the process */
109  orte_exit_code_t exit_code; /* process exit code */
110  bool init_recvd; /* process called orte_init */
111  bool fini_recvd; /* process called orte_finalize */
112  char *rml_uri; /* contact info for this child */
113 #if OPAL_HAVE_HWLOC
114  char *cpu_bitmap; /* binding pattern for this child */
115 #endif
116  bool waitpid_recvd; /* waitpid has detected proc termination */
117  bool iof_complete; /* IOF has noted proc terminating all channels */
118  struct timeval starttime; /* when the proc was started - for timing purposes only */
119  bool do_not_barrier; /* the proc should not barrier in orte_init */
120  bool notified; /* notification of termination has been sent */
121  opal_ring_buffer_t stats;
124 
125 #if !ORTE_DISABLE_FULL_SUPPORT
126 
127 /*
128  * List object to locally store job related info
129  */
130 typedef struct orte_odls_job_t {
131  opal_list_item_t super; /* required to place this on a list */
132  opal_mutex_t lock;
133  opal_condition_t cond;
134  orte_job_state_t state; /* state of the job */
135  orte_jobid_t jobid; /* jobid for this data */
136  char *instance; /* keep handy for scheduler restart */
137  char *name; /* keep handy for scheduler restart */
138  bool launch_msg_processed; /* launch msg has been fully processed */
139  opal_pointer_array_t apps; /* app_contexts for this job */
140  orte_app_idx_t num_apps; /* number of app_contexts */
141 #if OPAL_HAVE_HWLOC
142  opal_binding_policy_t binding; /* binding policy */
143 #endif
144  int16_t cpus_per_rank; /* number of cpus/rank */
145  int16_t stride; /* step size between cores of multi-core/rank procs */
146  orte_job_controls_t controls; /* control flags for job */
147  orte_vpid_t stdin_target; /* where stdin is to go */
148  orte_std_cntr_t total_slots_alloc;
149  orte_std_cntr_t num_nodes; /* number of nodes involved in the job */
150  orte_vpid_t num_procs;
151  int32_t num_local_procs;
152  opal_byte_object_t *pmap; /* local copy of pidmap byte object */
153  opal_buffer_t collection_bucket;
154  opal_buffer_t local_collection;
155  orte_grpcomm_coll_t collective_type;
156  int32_t num_contributors;
157  int num_participating;
158  int num_collected;
159  struct timeval launch_msg_recvd; /* when the launch msg for this job was recvd - for timing purposes only */
160  bool enable_recovery; /* enable recovery of failed processes */
163 
164 #endif
165 
166 END_C_DECLS
167 
168 #endif
dynamic pointer array
Definition: opal_pointer_array.h:45
Definition: condition.h:49
Definition: odls_types.h:130
dynamic pointer ring
Definition: opal_ring_buffer.h:38
uint32_t orte_jobid_t
Set the allowed range for ids in each space.
Definition: types.h:76
Definition: types.h:146
int32_t orte_std_cntr_t
Supported datatypes for messaging and storage operations.
Definition: types.h:34
Definition: mutex_unix.h:53
The opal_list_t interface is used to provide a generic doubly-linked list container for Open MPI...
Definition: opal_list.h:98
Definition: odls_types.h:100
Buffer management types.
Global params for OpenRTE.
The OpenRTE Group Communications.
Structure for holding a buffer to be used with the RML or OOB subsystems.
Definition: dss_types.h:159
Definition: dss_types.h:47
Mutual exclusion functions.
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236