OpenMPI  0.1.1
plm_types.h
1 /*
2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2008 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  * All rights reserved.
12  * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
13  * $COPYRIGHT$
14  *
15  * Additional copyrights may follow
16  *
17  * $HEADER$
18  */
19 
20 #ifndef ORTE_PLM_TYPES_H
21 #define ORTE_PLM_TYPES_H
22 
23 #include "orte_config.h"
24 #include "orte/types.h"
25 
26 
27 
28 BEGIN_C_DECLS
29 
30 /*
31  * Process exit codes
32  */
33 
34 typedef int32_t orte_exit_code_t;
35 #define ORTE_EXIT_CODE_T OPAL_INT32
36 
37 /*
38  * Process state codes
39  */
40 
41 typedef uint32_t orte_proc_state_t;
42 #define ORTE_PROC_STATE_T OPAL_UINT32
43 
44 #define ORTE_PROC_STATE_UNDEF 0x00000000 /* undefined process state */
45 #define ORTE_PROC_STATE_INIT 0x00000001 /* process entry has been created by rmaps */
46 #define ORTE_PROC_STATE_RESTART 0x00000002 /* the proc is ready for restart */
47 #define ORTE_PROC_STATE_LAUNCHED 0x00000004 /* process has been launched */
48 #define ORTE_PROC_STATE_TERMINATE 0x00000008 /* process is marked for termination */
49 #define ORTE_PROC_STATE_RUNNING 0x00000010 /* daemon has locally fork'd process */
50 #define ORTE_PROC_STATE_REGISTERED 0x00000020 /* process has registered for sync */
51 #define ORTE_PROC_STATE_DEREGISTERED 0x00000040 /* process has deregistered after sync */
52 /*
53  * Define a "boundary" so we can easily and quickly determine
54  * if a proc is still running or not - any value less than
55  * this one means that we are not terminated
56  */
57 #define ORTE_PROC_STATE_UNTERMINATED 0x00000080
58 
59 #define ORTE_PROC_STATE_TERMINATED 0x00000100 /* process has terminated and is no longer running */
60 #define ORTE_PROC_STATE_KILLED_BY_CMD 0x00000200 /* process was killed by ORTE cmd */
61 #define ORTE_PROC_STATE_ABORTED 0x00000400 /* process aborted */
62 #define ORTE_PROC_STATE_FAILED_TO_START 0x00000800 /* process failed to start */
63 #define ORTE_PROC_STATE_ABORTED_BY_SIG 0x00001000 /* process aborted by signal */
64 #define ORTE_PROC_STATE_TERM_WO_SYNC 0x00002000 /* process exit'd w/o required sync */
65 #define ORTE_PROC_STATE_COMM_FAILED 0x00004000 /* process communication has failed */
66 #define ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED 0x00008000 /* process exceeded a sensor limit */
67 #define ORTE_PROC_STATE_CALLED_ABORT 0x00010000 /* process called "errmgr.abort" */
68 #define ORTE_PROC_STATE_HEARTBEAT_FAILED 0x00020000 /* heartbeat failed to arrive */
69 #define ORTE_PROC_STATE_MIGRATING 0x00040000 /* process is migrating */
70 #define ORTE_PROC_STATE_CANNOT_RESTART 0x00080000 /* process failed and cannot be restarted */
71 #define ORTE_PROC_STATE_TERM_NON_ZERO 0x00100000 /* process exited with a non-zero status, indicating abnormal */
72 #define ORTE_PROC_STATE_RESTARTED 0x00200000 /* process restarted */
73 /*
74  * Job state codes
75  */
76 
77 typedef uint32_t orte_job_state_t;
78 #define ORTE_JOB_STATE_T OPAL_UINT32
79 
80 #define ORTE_JOB_STATE_UNDEF 0x00000000
81 #define ORTE_JOB_STATE_INIT 0x00000001 /* job entry has been created by rmaps */
82 #define ORTE_JOB_STATE_RESTART 0x00000002 /* the job is ready for restart after one or more procs failed */
83 #define ORTE_JOB_STATE_LAUNCHED 0x00000004 /* job has been launched by plm */
84 #define ORTE_JOB_STATE_RUNNING 0x00000008 /* all process have been fork'd */
85 #define ORTE_JOB_STATE_SUSPENDED 0x00000010 /* job has been suspended */
86 #define ORTE_JOB_STATE_REGISTERED 0x00000020 /* all procs registered for sync */
87 /*
88  * Define a "boundary" so we can easily and quickly determine
89  * if a job is still running or not - any value less than
90  * this one means that we are not terminated
91  */
92 #define ORTE_JOB_STATE_UNTERMINATED 0x00000040
93 
94 #define ORTE_JOB_STATE_TERMINATED 0x00000080 /* all processes have terminated and is no longer running */
95 #define ORTE_JOB_STATE_ABORTED 0x00000100 /* at least one process aborted, causing job to abort */
96 #define ORTE_JOB_STATE_FAILED_TO_START 0x00000200 /* at least one process failed to start */
97 #define ORTE_JOB_STATE_ABORTED_BY_SIG 0x00000400 /* job was killed by a signal */
98 #define ORTE_JOB_STATE_ABORTED_WO_SYNC 0x00000800 /* job was aborted because proc exit'd w/o required sync */
99 #define ORTE_JOB_STATE_KILLED_BY_CMD 0x00001000 /* job was killed by ORTE cmd */
100 #define ORTE_JOB_STATE_COMM_FAILED 0x00002000 /* communication has failed */
101 #define ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED 0x00004000 /* job had a process that exceeded a sensor limit */
102 #define ORTE_JOB_STATE_CALLED_ABORT 0x00008000 /* at least one process called "errmgr.abort" */
103 #define ORTE_JOB_STATE_HEARTBEAT_FAILED 0x00010000 /* heartbeat failed to arrive */
104 #define ORTE_JOB_STATE_PROCS_MIGRATING 0x00020000 /* procs waiting to migrate */
105 #define ORTE_JOB_STATE_NON_ZERO_TERM 0x00040000 /* at least one process exited with non-zero status */
106 #define ORTE_JOB_STATE_SILENT_ABORT 0x00080000 /* an error occurred and was reported elsewhere, so error out quietly */
107 
108 /* the job never even attempted to launch due to an error earlier in the
109  * launch procedure
110  */
111 #define ORTE_JOB_STATE_NEVER_LAUNCHED 0x10000000
112 
113 /* the processes in this job have been ordered to "die", but may not have completed it yet. Don't order it again */
114 #define ORTE_JOB_STATE_ABORT_ORDERED 0x20010000
115 
116 
117 /**
118 * Node State, corresponding to the ORTE_NODE_STATE_* #defines,
119  * below. These are #defines instead of an enum because the thought
120  * is that we may have lots and lots of entries of these in the
121  * registry and by making this an int8_t, it's only 1 byte, whereas an
122  * enum defaults to an int (probably 4 bytes). So it's a bit of a
123  * space savings.
124  */
125 typedef int8_t orte_node_state_t;
126 #define ORTE_NODE_STATE_T OPAL_INT8
127 
128 /** Node is in an unknown state (see orte_node_state_t) */
129 #define ORTE_NODE_STATE_UNKNOWN 0
130 /** Node is down (see orte_node_state_t) */
131 #define ORTE_NODE_STATE_DOWN 1
132 /** Node is up / available for use (see orte_node_state_t) */
133 #define ORTE_NODE_STATE_UP 2
134 /** Node is rebooting (only some systems will support this; see
135 orte_node_state_t) */
136 #define ORTE_NODE_STATE_REBOOT 3
137 /** Node is up, but not available for use for the next mapping */
138 #define ORTE_NODE_STATE_DO_NOT_USE 4
139 /** Node is up, but not part of the node pool for jobs */
140 #define ORTE_NODE_STATE_NOT_INCLUDED 5
141 
142 /*
143  * PLM commands
144  */
145 typedef uint8_t orte_plm_cmd_flag_t;
146 #define ORTE_PLM_CMD OPAL_UINT8
147 #define ORTE_PLM_LAUNCH_JOB_CMD 1
148 #define ORTE_PLM_UPDATE_PROC_STATE 2
149 #define ORTE_PLM_INIT_ROUTES_CMD 3
150 
151 END_C_DECLS
152 
153 #endif