OpenMPI  0.1.1
snapc_full.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004-2010 The Trustees of Indiana University.
3  * All rights reserved.
4  * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
5  * All rights reserved.
6  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
7  * University of Stuttgart. All rights reserved.
8  * Copyright (c) 2004-2005 The Regents of the University of California.
9  * All rights reserved.
10  * $COPYRIGHT$
11  *
12  * Additional copyrights may follow
13  *
14  * $HEADER$
15  */
16 
17 /**
18  * @file
19  *
20  * FULL SNAPC component
21  *
22  * Simple, braindead implementation.
23  */
24 
25 #ifndef MCA_SNAPC_FULL_EXPORT_H
26 #define MCA_SNAPC_FULL_EXPORT_H
27 
28 #include "orte_config.h"
29 
30 #include "opal/mca/mca.h"
31 #include "opal/mca/event/event.h"
32 
33 #include "orte/mca/sstore/sstore.h"
34 #include "orte/mca/snapc/snapc.h"
35 
36 BEGIN_C_DECLS
37 
38 /*
39  * cmds for base receive
40  */
41 typedef uint8_t orte_snapc_full_cmd_flag_t;
42 #define ORTE_SNAPC_FULL_CMD OPAL_UINT8
43 #define ORTE_SNAPC_FULL_UPDATE_JOB_STATE_CMD 1
44 #define ORTE_SNAPC_FULL_UPDATE_JOB_STATE_QUICK_CMD 2
45 #define ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_CMD 3
46 #define ORTE_SNAPC_FULL_UPDATE_ORTED_STATE_QUICK_CMD 4
47 #define ORTE_SNAPC_FULL_VPID_ASSOC_CMD 5
48 #define ORTE_SNAPC_FULL_ESTABLISH_DIR_CMD 6
49 #define ORTE_SNAPC_FULL_RESTART_PROC_INFO 7
50 #define ORTE_SNAPC_FULL_REQUEST_OP_CMD 8
51 #define ORTE_SNAPC_FULL_MAX 9
52 
53  /*
54  * Local Component structures
55  */
57  orte_snapc_base_component_t super; /** Base SNAPC component */
58  };
60  OPAL_MODULE_DECLSPEC extern orte_snapc_full_component_t mca_snapc_full_component;
61 
62  /*
63  * Global Coordinator per orted metadata
64  */
66  /** Base SNAPC Global snapshot type */
68 
69  /** ORTE Process name */
71 
72  /** State of the checkpoint */
73  int state;
74  };
77 
78  /*
79  * Local Coordinator per app metadata
80  */
82  /** Base SNAPC Global snapshot type */
84 
85  /** Named Pipe Read and Write */
86  char * comm_pipe_r;
87  char * comm_pipe_w;
88  int comm_pipe_r_fd;
89  int comm_pipe_w_fd;
90  int unique_pipe_id;
91 
92  /* An opal event handle for the read pipe */
93  opal_event_t comm_pipe_r_eh;
94  bool is_eh_active;
95 
96  /** Process pid */
97  pid_t process_pid;
98 
99  /** Is this process a migration target */
100  bool migrating;
101 
102  /** Finished flag */
103  bool finished;
104  };
107 
108  extern bool orte_snapc_full_skip_app;
109  extern bool orte_snapc_full_timing_enabled;
110  extern int orte_snapc_full_progress_meter;
111  extern int orte_snapc_full_max_wait_time;
112 
113  int orte_snapc_full_component_query(mca_base_module_t **module, int *priority);
114 
115  /*
116  * Module functions
117  */
118  int orte_snapc_full_module_init(bool seed, bool app);
119  int orte_snapc_full_module_finalize(void);
120 
121  int orte_snapc_full_setup_job(orte_jobid_t jobid);
122  int orte_snapc_full_release_job(orte_jobid_t jobid);
123 
124  int orte_snapc_full_ft_event(int state);
125 
126  int orte_snapc_full_start_ckpt(orte_snapc_base_quiesce_t *datum);
127  int orte_snapc_full_end_ckpt(orte_snapc_base_quiesce_t *datum);
128  int orte_snapc_full_request_op(orte_snapc_base_request_op_t *datum);
129 
130  /*
131  * Global Coordinator Functionality
132  */
133  int global_coord_init(void);
134  int global_coord_finalize(void);
135  int global_coord_setup_job(orte_jobid_t jobid);
136  int global_coord_release_job(orte_jobid_t jobid);
137  int global_coord_orted_state_update(orte_process_name_t proc_name,
138  int proc_ckpt_state,
139  char **proc_ckpt_ref,
140  char **proc_ckpt_loc,
141  char **agent_ckpt);
142  int global_coord_start_ckpt(orte_snapc_base_quiesce_t *datum);
143  int global_coord_end_ckpt(orte_snapc_base_quiesce_t *datum);
144  int global_coord_restart_proc_info(pid_t local_pid,
145  char * local_hostname);
146 
147  /*
148  * Local Coordinator Functionality
149  */
150  int local_coord_init(void);
151  int local_coord_finalize(void);
152  int local_coord_setup_job(orte_jobid_t jobid);
153  int local_coord_release_job(orte_jobid_t jobid);
154  int local_coord_job_state_update(orte_jobid_t jobid,
155  int job_ckpt_state,
156  orte_sstore_base_handle_t ss_handle,
158 
159  /*
160  * Application Coordinator Functionality
161  */
162  int app_coord_init(void);
163  int app_coord_finalize(void);
164  int app_coord_ft_event(int state);
165  int app_coord_request_op(orte_snapc_base_request_op_t *datum);
166 
167 END_C_DECLS
168 
169 #endif /* MCA_SNAPC_FULL_EXPORT_H */
Definition: snapc_full.h:81
Common type for all MCA modules.
Definition: mca.h:100
Definition: snapc_full.h:65
uint32_t orte_jobid_t
Set the allowed range for ids in each space.
Definition: types.h:76
Definition: types.h:146
Structure for SNAPC components.
Definition: snapc.h:351
pid_t process_pid
Process pid.
Definition: snapc_full.h:97
Structure to represent a single event.
Definition: event_struct.h:87
Definition of the global snapshot.
Definition: snapc.h:167
Distributed Stable Storage (SStore) Interface.
bool finished
Finished flag.
Definition: snapc_full.h:103
Definition: snapc.h:241
Top-level interface for all MCA components.
char * comm_pipe_r
Named Pipe Read and Write.
Definition: snapc_full.h:86
Definition: snapc_full.h:56
orte_snapc_base_local_snapshot_t super
Base SNAPC Global snapshot type.
Definition: snapc_full.h:83
Definition: snapc.h:185
bool migrating
Is this process a migration target.
Definition: snapc_full.h:100
Definition of a orte local snapshot.
Definition: snapc.h:144
int state
State of the checkpoint.
Definition: snapc_full.h:73
Snapshot Coordination (SNAPC) Interface.
orte_snapc_base_global_snapshot_t super
Base SNAPC Global snapshot type.
Definition: snapc_full.h:67
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236
orte_process_name_t process_name
ORTE Process name.
Definition: snapc_full.h:70