OpenMPI  0.1.1
base.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2011 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  * All rights reserved.
12  * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
13  * $COPYRIGHT$
14  *
15  * Additional copyrights may follow
16  *
17  * $HEADER$
18  */
19 /** @file:
20  */
21 
22 #ifndef ORTE_MCA_ERRMGR_BASE_H
23 #define ORTE_MCA_ERRMGR_BASE_H
24 
25 /*
26  * includes
27  */
28 #include "orte_config.h"
29 #include "orte/constants.h"
30 
31 #include "opal/class/opal_list.h"
32 
33 #include "opal/mca/mca.h"
34 #include "orte/mca/snapc/base/base.h"
35 #include "orte/mca/errmgr/errmgr.h"
36 
37 
38 BEGIN_C_DECLS
39 
40 /*
41  * MCA Framework functions
42  */
43 ORTE_DECLSPEC int orte_errmgr_base_open(void);
44 ORTE_DECLSPEC int orte_errmgr_base_select(void);
45 ORTE_DECLSPEC int orte_errmgr_base_close(void);
46 
47 /**
48  * Output and component variables
49  */
51 
52 /**
53  * Internal module reference
54  */
56 
57 /**
58  * Interfaces for orte-migrate tool
59  */
60 #if OPAL_ENABLE_FT_CR
61 /**
62  * Migrating States
63  */
64 #define ORTE_ERRMGR_MIGRATE_STATE_ERROR (ORTE_SNAPC_CKPT_MAX + 1)
65 #define ORTE_ERRMGR_MIGRATE_STATE_ERR_INPROGRESS (ORTE_SNAPC_CKPT_MAX + 2)
66 #define ORTE_ERRMGR_MIGRATE_STATE_NONE (ORTE_SNAPC_CKPT_MAX + 3)
67 #define ORTE_ERRMGR_MIGRATE_STATE_REQUEST (ORTE_SNAPC_CKPT_MAX + 4)
68 #define ORTE_ERRMGR_MIGRATE_STATE_RUNNING (ORTE_SNAPC_CKPT_MAX + 5)
69 #define ORTE_ERRMGR_MIGRATE_STATE_RUN_CKPT (ORTE_SNAPC_CKPT_MAX + 6)
70 #define ORTE_ERRMGR_MIGRATE_STATE_STARTUP (ORTE_SNAPC_CKPT_MAX + 7)
71 #define ORTE_ERRMGR_MIGRATE_STATE_FINISH (ORTE_SNAPC_CKPT_MAX + 8)
72 #define ORTE_ERRMGR_MIGRATE_MAX (ORTE_SNAPC_CKPT_MAX + 9)
73 
74 /*
75  * Commands for command line tool and ErrMgr interaction
76  */
77 typedef uint8_t orte_errmgr_tool_cmd_flag_t;
78 #define ORTE_ERRMGR_MIGRATE_TOOL_CMD OPAL_UINT8
79 #define ORTE_ERRMGR_MIGRATE_TOOL_INIT_CMD 1
80 #define ORTE_ERRMGR_MIGRATE_TOOL_UPDATE_CMD 2
81 
82 /* Initialize/Finalize the orte-migrate communication functionality */
83 ORTE_DECLSPEC int orte_errmgr_base_tool_init(void);
84 ORTE_DECLSPEC int orte_errmgr_base_tool_finalize(void);
85 
86 ORTE_DECLSPEC void orte_errmgr_base_migrate_state_notify(int state);
87 ORTE_DECLSPEC int orte_errmgr_base_migrate_state_str(char ** state_str, int state);
88 
89 ORTE_DECLSPEC int orte_errmgr_base_migrate_update(int status);
90 
91 /*
92  * Interfaces for C/R related recovery
93  */
94 ORTE_DECLSPEC int orte_errmgr_base_update_app_context_for_cr_recovery(orte_job_t *jobdata,
95  orte_proc_t *proc,
96  opal_list_t *local_snapshots);
97 
98 ORTE_DECLSPEC int orte_errmgr_base_restart_job(orte_jobid_t jobid, char * global_handle, int seq_num);
99 ORTE_DECLSPEC int orte_errmgr_base_migrate_job(orte_jobid_t jobid, orte_snapc_base_request_op_t *datum);
100 
101 /* Interface to report process state to the notifier */
102 ORTE_DECLSPEC void orte_errmgr_base_proc_state_notify(orte_proc_state_t state, orte_process_name_t *proc);
103 
104 #endif /* OPAL_ENABLE_FT_CR */
105 
106 #if OPAL_ENABLE_FT_MPI
107 ORTE_DECLSPEC int orte_errmgr_base_setup_listener(void);
108 ORTE_DECLSPEC int orte_errmgr_base_shutdown_listener(void);
109 
110 /**
111  * A callback for the upper layer to register if it wants to be notified
112  * of process errors as they occur.
113  */
114 typedef int (*orte_errmgr_base_app_notify_callback_fn_t) (orte_process_name_t, orte_proc_state_t);
115 ORTE_DECLSPEC extern orte_errmgr_base_app_notify_callback_fn_t orte_errmgr_base_app_callback;
116 
117 ORTE_DECLSPEC int orte_errmgr_base_app_reg_notify_callback
118 (orte_errmgr_base_app_notify_callback_fn_t new_func,
119  orte_errmgr_base_app_notify_callback_fn_t *prev_func);
120 #endif /* OPAL_ENABLE_FT_MPI */
121 
122 /*
123  * Additional External API function declared in errmgr.h
124  */
125 ORTE_DECLSPEC orte_errmgr_fault_callback_t *orte_errmgr_base_set_fault_callback(orte_errmgr_fault_callback_t *cbfunc);
126 
127 END_C_DECLS
128 
129 #endif
BEGIN_C_DECLS ORTE_DECLSPEC int orte_errmgr_base_open(void)
Function for finding and opening either all MCA components, or the one that was specifically requeste...
Definition: errmgr_base_open.c:95
ORTE_DECLSPEC orte_errmgr_fault_callback_t * orte_errmgr_base_set_fault_callback(orte_errmgr_fault_callback_t *cbfunc)
Interfaces for orte-migrate tool.
Definition: errmgr_base_fns.c:739
Definition: errmgr.h:320
uint32_t orte_jobid_t
Set the allowed range for ids in each space.
Definition: types.h:76
Definition: types.h:146
ORTE_DECLSPEC orte_errmgr_base_component_t orte_errmgr_base_selected_component
Internal module reference.
Definition: errmgr_base_open.c:56
ORTE_DECLSPEC opal_list_t orte_errmgr_base_components_available
Output and component variables.
Definition: errmgr_base_open.c:52
The opal_list_t interface is used to provide a generic doubly-linked list container for Open MPI...
Definition: snapc.h:241
Top-level interface for all MCA components.
Definition: orte_globals.h:386
The Open RTE Error and Recovery Manager (ErrMgr)
Definition: orte_globals.h:316
Definition: opal_list.h:147