OpenMPI
0.1.1
|
Snapshot Coordination (SNAPC) Interface. More...
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#include "opal/class/opal_object.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/util/output.h"
#include "orte/mca/sstore/sstore.h"
Go to the source code of this file.
Data Structures | |
struct | orte_snapc_base_local_snapshot_1_0_0_t |
Definition of a orte local snapshot. More... | |
struct | orte_snapc_base_global_snapshot_1_0_0_t |
Definition of the global snapshot. More... | |
struct | orte_snapc_base_quiesce_1_0_0_t |
struct | orte_snapc_base_request_op_1_0_0_t |
struct | orte_snapc_base_component_2_0_0_t |
Structure for SNAPC components. More... | |
struct | orte_snapc_base_module_1_0_0_t |
Structure for SNAPC modules. More... | |
Macros | |
#define | ORTE_SNAPC_CKPT_STATE_ERROR 0 |
States that a process can be in while checkpointing. | |
#define | ORTE_SNAPC_CKPT_STATE_NONE 1 |
#define | ORTE_SNAPC_CKPT_STATE_REQUEST 2 |
#define | ORTE_SNAPC_CKPT_STATE_PENDING 3 |
#define | ORTE_SNAPC_CKPT_STATE_RUNNING 4 |
#define | ORTE_SNAPC_CKPT_STATE_INC_PREPED 5 |
#define | ORTE_SNAPC_CKPT_STATE_STOPPED 6 |
#define | ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL 7 |
#define | ORTE_SNAPC_CKPT_STATE_MIGRATING 8 |
#define | ORTE_SNAPC_CKPT_STATE_ESTABLISHED 9 |
#define | ORTE_SNAPC_CKPT_STATE_RECOVERED 10 |
#define | ORTE_SNAPC_CKPT_STATE_NO_CKPT 11 |
#define | ORTE_SNAPC_CKPT_STATE_NO_RESTART 12 |
#define | ORTE_SNAPC_CKPT_MAX 13 |
#define | ORTE_SNAPC_CKPT_SHIFT 131072 |
Sufficiently high shift value to avoid colliding the process checkpointing states above with the ORTE process states. | |
#define | ORTE_SNAPC_CKPT_NOTIFY(state) (ORTE_SNAPC_CKPT_SHIFT + state) |
#define | ORTE_SNAPC_CKPT_STATE(state) (state - ORTE_SNAPC_CKPT_SHIFT) |
#define | CHECK_ORTE_SNAPC_CKPT_STATE(state) (state >= ORTE_SNAPC_CKPT_SHIFT) |
#define | ORTE_SNAPC_BASE_VERSION_2_0_0 |
Macro for use in components that are of type SNAPC. More... | |
Typedefs | |
typedef struct orte_snapc_base_local_snapshot_1_0_0_t | orte_snapc_base_local_snapshot_1_0_0_t |
typedef struct orte_snapc_base_local_snapshot_1_0_0_t | orte_snapc_base_local_snapshot_t |
typedef struct orte_snapc_base_global_snapshot_1_0_0_t | orte_snapc_base_global_snapshot_1_0_0_t |
typedef struct orte_snapc_base_global_snapshot_1_0_0_t | orte_snapc_base_global_snapshot_t |
typedef struct orte_snapc_base_quiesce_1_0_0_t | orte_snapc_base_quiesce_1_0_0_t |
typedef struct orte_snapc_base_quiesce_1_0_0_t | orte_snapc_base_quiesce_t |
typedef struct orte_snapc_base_request_op_1_0_0_t | orte_snapc_base_request_op_1_0_0_t |
typedef struct orte_snapc_base_request_op_1_0_0_t | orte_snapc_base_request_op_t |
typedef int(* | orte_snapc_base_module_init_fn_t )(bool seed, bool app) |
Module initialization function. More... | |
typedef int(* | orte_snapc_base_module_finalize_fn_t )(void) |
Module finalization function. More... | |
typedef int(* | orte_snapc_base_setup_job_fn_t )(orte_jobid_t jobid) |
Setup the necessary structures for this job Returns ORTE_SUCCESS. | |
typedef int(* | orte_snapc_base_release_job_fn_t )(orte_jobid_t jobid) |
Setup the necessary structures for this job Returns ORTE_SUCCESS. | |
typedef int(* | orte_snapc_base_ft_event_fn_t )(int state) |
Handle fault tolerance updates. More... | |
typedef int(* | orte_snapc_base_start_checkpoint_fn_t )(orte_snapc_base_quiesce_t *datum) |
Start a checkpoint originating from an internal source. More... | |
typedef int(* | orte_snapc_base_end_checkpoint_fn_t )(orte_snapc_base_quiesce_t *datum) |
Signal end of checkpoint epoch originating from an internal source. More... | |
typedef int(* | orte_snapc_base_request_op_fn_t )(orte_snapc_base_request_op_t *datum) |
Request a checkpoint related operation to take place. | |
typedef struct orte_snapc_base_component_2_0_0_t | orte_snapc_base_component_2_0_0_t |
typedef struct orte_snapc_base_component_2_0_0_t | orte_snapc_base_component_t |
typedef struct orte_snapc_base_module_1_0_0_t | orte_snapc_base_module_1_0_0_t |
typedef struct orte_snapc_base_module_1_0_0_t | orte_snapc_base_module_t |
Enumerations | |
enum | orte_snapc_base_request_op_event_t { ORTE_SNAPC_OP_NONE = 0, ORTE_SNAPC_OP_INIT, ORTE_SNAPC_OP_FIN, ORTE_SNAPC_OP_FIN_ACK, ORTE_SNAPC_OP_CHECKPOINT, ORTE_SNAPC_OP_RESTART, ORTE_SNAPC_OP_MIGRATE, ORTE_SNAPC_OP_QUIESCE_START, ORTE_SNAPC_OP_QUIESCE_CHECKPOINT, ORTE_SNAPC_OP_QUIESCE_END } |
Application request for a global checkpoint related operation. | |
Functions | |
ORTE_DECLSPEC | OBJ_CLASS_DECLARATION (orte_snapc_base_local_snapshot_t) |
ORTE_DECLSPEC | OBJ_CLASS_DECLARATION (orte_snapc_base_global_snapshot_t) |
ORTE_DECLSPEC | OBJ_CLASS_DECLARATION (orte_snapc_base_quiesce_t) |
ORTE_DECLSPEC | OBJ_CLASS_DECLARATION (orte_snapc_base_request_op_t) |
Variables | |
ORTE_DECLSPEC orte_snapc_base_module_t | orte_snapc |
ORTE_DECLSPEC orte_snapc_base_component_t | orte_snapc_base_selected_component |
Snapshot Coordination (SNAPC) Interface.
Global Snapshot Coordinator:
This framework is tasked with:
Each component will have 3 teirs of behavior that must behave in concert:
#define ORTE_SNAPC_BASE_VERSION_2_0_0 |
Macro for use in components that are of type SNAPC.
typedef int(* orte_snapc_base_end_checkpoint_fn_t)(orte_snapc_base_quiesce_t *datum) |
Signal end of checkpoint epoch originating from an internal source.
[in] | epoch | Epoch number to associate with this checkpoint operation Returns ORTE_SUCCESS |
typedef int(* orte_snapc_base_ft_event_fn_t)(int state) |
Handle fault tolerance updates.
[in] | state | Fault tolerance state update |
ORTE_SUCCESS | The operation completed successfully |
ORTE_ERROR | An unspecifed error occurred |
typedef int(* orte_snapc_base_module_finalize_fn_t)(void) |
Module finalization function.
Returns ORTE_SUCCESS
typedef int(* orte_snapc_base_module_init_fn_t)(bool seed, bool app) |
Module initialization function.
Returns ORTE_SUCCESS
typedef int(* orte_snapc_base_start_checkpoint_fn_t)(orte_snapc_base_quiesce_t *datum) |
Start a checkpoint originating from an internal source.
This really only makes sense to call from an application, but in the future we may allow the checkpoint operation to use this function from the local coordinator.
[out] | epoch | Epoch number to associate with this checkpoint operation Returns ORTE_SUCCESS |