75 #include "orte_config.h"
76 #include "orte/constants.h"
80 #include "opal/mca/base/base.h"
82 #include "opal/mca/crs/base/base.h"
96 #define ORTE_SNAPC_CKPT_STATE_ERROR 0
99 #define ORTE_SNAPC_CKPT_STATE_NONE 1
101 #define ORTE_SNAPC_CKPT_STATE_REQUEST 2
103 #define ORTE_SNAPC_CKPT_STATE_PENDING 3
105 #define ORTE_SNAPC_CKPT_STATE_RUNNING 4
107 #define ORTE_SNAPC_CKPT_STATE_INC_PREPED 5
109 #define ORTE_SNAPC_CKPT_STATE_STOPPED 6
111 #define ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL 7
113 #define ORTE_SNAPC_CKPT_STATE_MIGRATING 8
115 #define ORTE_SNAPC_CKPT_STATE_ESTABLISHED 9
117 #define ORTE_SNAPC_CKPT_STATE_RECOVERED 10
119 #define ORTE_SNAPC_CKPT_STATE_NO_CKPT 11
121 #define ORTE_SNAPC_CKPT_STATE_NO_RESTART 12
122 #define ORTE_SNAPC_CKPT_MAX 13
128 #define ORTE_SNAPC_CKPT_SHIFT 131072
131 #define ORTE_SNAPC_CKPT_NOTIFY(state) (ORTE_SNAPC_CKPT_SHIFT + state)
134 #define ORTE_SNAPC_CKPT_STATE(state) (state - ORTE_SNAPC_CKPT_SHIFT)
137 #define CHECK_ORTE_SNAPC_CKPT_STATE(state) (state >= ORTE_SNAPC_CKPT_SHIFT)
229 ORTE_SNAPC_OP_NONE = 0,
232 ORTE_SNAPC_OP_FIN_ACK,
233 ORTE_SNAPC_OP_CHECKPOINT,
234 ORTE_SNAPC_OP_RESTART,
235 ORTE_SNAPC_OP_MIGRATE,
236 ORTE_SNAPC_OP_QUIESCE_START,
237 ORTE_SNAPC_OP_QUIESCE_CHECKPOINT,
238 ORTE_SNAPC_OP_QUIESCE_END
286 (
bool seed,
bool app);
396 #define ORTE_SNAPC_BASE_VERSION_2_0_0 \
397 MCA_BASE_VERSION_2_0_0, \
bool restarting
Restarting?
Definition: snapc.h:212
Common type for all MCA components.
Definition: mca.h:250
orte_snapc_base_start_checkpoint_fn_t start_ckpt
Handle internal request for checkpoint.
Definition: snapc.h:382
opal_list_t local_snapshots
A list of orte_snapc_base_snapshot_t's.
Definition: snapc.h:172
OPAL output stream facility.
int * mig_vpid_pref
Migrating vpid preference list.
Definition: snapc.h:271
mca_base_component_t base_version
MCA base component.
Definition: snapc.h:353
opal_crs_state_type_t cr_state
State of operation if checkpointing.
Definition: snapc.h:208
opal_object_t super
Parent is an object type.
Definition: snapc.h:187
char * crs_name
Requested CRS.
Definition: snapc.h:192
int(* orte_snapc_base_start_checkpoint_fn_t)(orte_snapc_base_quiesce_t *datum)
Start a checkpoint originating from an internal source.
Definition: snapc.h:331
dynamic pointer array
Definition: opal_pointer_array.h:45
int(* orte_snapc_base_module_finalize_fn_t)(void)
Module finalization function.
Definition: snapc.h:293
char * global_handle
Global Handle.
Definition: snapc.h:258
mca_base_component_data_t base_data
MCA base data.
Definition: snapc.h:355
int(* orte_snapc_base_end_checkpoint_fn_t)(orte_snapc_base_quiesce_t *datum)
Signal end of checkpoint epoch originating from an internal source.
Definition: snapc.h:340
int epoch
Current epoch.
Definition: snapc.h:190
uint32_t orte_jobid_t
Set the allowed range for ids in each space.
Definition: types.h:76
char * cmdline
Command Line.
Definition: snapc.h:206
Structure for SNAPC components.
Definition: snapc.h:351
bool migrating
Migrating?
Definition: snapc.h:215
int(* orte_snapc_base_request_op_fn_t)(orte_snapc_base_request_op_t *datum)
Request a checkpoint related operation to take place.
Definition: snapc.h:346
int(* orte_snapc_base_ft_event_fn_t)(int state)
Handle fault tolerance updates.
Definition: snapc.h:318
Definition of the global snapshot.
Definition: snapc.h:167
See opal_bitmap.h for an explanation of why there is a split between OPAL and ORTE for this generic c...
opal_crs_base_ckpt_options_t * options
Checkpoint Options.
Definition: snapc.h:175
Distributed Stable Storage (SStore) Interface.
orte_sstore_base_handle_t ss_handle
Stable Storage Handle.
Definition: snapc.h:178
Structure for SNAPC modules.
Definition: snapc.h:370
orte_sstore_base_handle_t ss_handle
Stable Storage Handle (must equal the global version)
Definition: snapc.h:155
int verbose
Verbosity Level.
Definition: snapc.h:358
int priority
Default Priority.
Definition: snapc.h:362
orte_snapc_base_module_init_fn_t snapc_init
Initialization Function.
Definition: snapc.h:372
opal_object_t super
Parent is an object type.
Definition: snapc.h:243
orte_snapc_base_ft_event_fn_t ft_event
Handle any FT Notifications.
Definition: snapc.h:380
int seq_num
Sequence Number.
Definition: snapc.h:255
int leader
Leader of the operation.
Definition: snapc.h:252
Top-level interface for all MCA components.
Definition: opal_list.h:98
orte_snapc_base_global_snapshot_t * snapshot
snapshot list
Definition: snapc.h:196
int(* orte_snapc_base_setup_job_fn_t)(orte_jobid_t jobid)
Setup the necessary structures for this job Returns ORTE_SUCCESS.
Definition: snapc.h:300
int(* orte_snapc_base_module_init_fn_t)(bool seed, bool app)
Module initialization function.
Definition: snapc.h:286
int mig_num
Migrating vpid list of participants.
Definition: snapc.h:264
orte_process_name_t process_name
ORTE Process name.
Definition: snapc.h:149
orte_snapc_base_module_finalize_fn_t snapc_finalize
Finalization Function.
Definition: snapc.h:374
int num_migrating
List of migrating processes.
Definition: snapc.h:217
opal_list_item_t super
List super object.
Definition: snapc.h:146
orte_snapc_base_request_op_event_t event
Event to request.
Definition: snapc.h:246
char(* mig_host_pref)[OPAL_MAX_PROCESSOR_NAME]
Migrating hostname preference list.
Definition: snapc.h:268
orte_sstore_base_handle_t ss_handle
Stable Storage Handle.
Definition: snapc.h:199
Definition of a orte local snapshot.
Definition: snapc.h:144
orte_sstore_base_global_snapshot_info_t * ss_snapshot
Stable Storage Snapshot list.
Definition: snapc.h:201
Base object.
Definition: opal_object.h:182
orte_snapc_base_request_op_fn_t request_op
Handle a checkpoint related request.
Definition: snapc.h:385
opal_list_item_t super
This is an object, so must have super.
Definition: snapc.h:169
Meta data for MCA v2.0.0 components.
Definition: mca.h:309
Definition: opal_list.h:147
orte_snapc_base_request_op_event_t
Application request for a global checkpoint related operation.
Definition: snapc.h:228
orte_snapc_base_setup_job_fn_t setup_job
Setup structures for a job.
Definition: snapc.h:376
char * target_dir
Target Directory.
Definition: snapc.h:204
bool checkpointing
Checkpointing?
Definition: snapc.h:210
int output_handle
Output Handle for opal_output.
Definition: snapc.h:360
orte_sstore_base_handle_t ss_handle
Stable Storage Handle.
Definition: snapc.h:261
orte_snapc_base_release_job_fn_t release_job
Release job.
Definition: snapc.h:378
bool is_active
Is this request still active.
Definition: snapc.h:249
int(* orte_snapc_base_release_job_fn_t)(orte_jobid_t jobid)
Setup the necessary structures for this job Returns ORTE_SUCCESS.
Definition: snapc.h:307
Checkpoint and Restart Service (CRS) Interface.
int state
State of the checkpoint.
Definition: snapc.h:152
int * mig_off_node
Info key.
Definition: snapc.h:274
A simple C-language object-oriented system with single inheritance and ownership-based memory managem...
char * handle
Handle for reference.
Definition: snapc.h:194
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236
opal_crs_state_type_t
States of the module.
Definition: crs.h:60