OpenMPI
0.1.1
|
Distributed Stable Storage (SStore) Interface. More...
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "orte/runtime/orte_globals.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/class/opal_object.h"
Go to the source code of this file.
Data Structures | |
struct | orte_sstore_base_local_snapshot_info_1_0_0_t |
Local and Global snapshot information structure Primarily used by orte-restart as an abstract way to handle metadata. More... | |
struct | orte_sstore_base_global_snapshot_info_1_0_0_t |
struct | orte_sstore_base_component_2_0_0_t |
Structure for SSTORE components. More... | |
struct | orte_sstore_base_module_1_0_0_t |
Structure for SSTORE modules. More... | |
Macros | |
#define | SSTORE_METADATA_LOCAL_CRS_COMP 0 |
CRS Component. | |
#define | SSTORE_METADATA_LOCAL_COMPRESS_COMP 1 |
Compress Component. | |
#define | SSTORE_METADATA_LOCAL_COMPRESS_POSTFIX 2 |
Compress Component Postfix. | |
#define | SSTORE_METADATA_LOCAL_PID 3 |
Process PID. | |
#define | SSTORE_METADATA_LOCAL_CONTEXT 4 |
Checkpoint Context File. | |
#define | SSTORE_METADATA_LOCAL_MKDIR 5 |
Directory to make on restart. | |
#define | SSTORE_METADATA_LOCAL_TOUCH 6 |
File to touch on restart. | |
#define | SSTORE_METADATA_LOCAL_SNAP_REF 7 |
Local snapshot reference (e.g., opal_snapshot_0.ckpt) | |
#define | SSTORE_METADATA_LOCAL_SNAP_REF_FMT 8 |
Local snapshot reference format string (e.g., opal_snapshot_d.ckpt) passed vpid. | |
#define | SSTORE_METADATA_LOCAL_SNAP_LOC 9 |
Local snapshot directory (Full Path excluding reference) | |
#define | SSTORE_METADATA_LOCAL_SNAP_REF_LOC_FMT 10 |
Local snapshot reference directory (Full Path) | |
#define | SSTORE_METADATA_LOCAL_SNAP_META 11 |
Local snapshot metadata file (Full Path) | |
#define | SSTORE_METADATA_GLOBAL_SNAP_REF 12 |
Global snapshot reference (e.g., ompi_global_snapshot_1234.ckpt) | |
#define | SSTORE_METADATA_GLOBAL_SNAP_LOC 13 |
Global snapshot location (Relative Path from base) | |
#define | SSTORE_METADATA_GLOBAL_SNAP_LOC_ABS 14 |
Global snapshot location (Full path) | |
#define | SSTORE_METADATA_GLOBAL_SNAP_META 15 |
Global snapshot metadata file (Full path) | |
#define | SSTORE_METADATA_GLOBAL_SNAP_SEQ 16 |
Global snapshot sequence number. | |
#define | SSTORE_METADATA_GLOBAL_AMCA_PARAM 17 |
AMCA Parameter to be preserved for ompi-restart. | |
#define | SSTORE_METADATA_GLOBAL_SNAP_NUM_SEQ 18 |
Total number of sequence numbers for this snapshot. | |
#define | SSTORE_METADATA_GLOBAL_SNAP_ALL_SEQ 19 |
Comma separated list of all sequence numbers for this snapshot. | |
#define | SSTORE_METADATA_BASE_LOC 20 |
Access the current default base directory (Full Path) | |
#define | SSTORE_METADATA_LOCAL_SKIP_CKPT 21 |
The local process is skipping the checkpoint Usually this is because there is a migration, and it is not participating. | |
#define | SSTORE_METADATA_GLOBAL_MIGRATING 22 |
A Migration checkpoint does not necessarily contain all of the processes in the job, so it is not a checkpoint that can be restarted from normally. More... | |
#define | SSTORE_METADATA_MAX 23 |
#define | ORTE_SSTORE_HANDLE OPAL_UINT32 |
Storage handle. | |
#define | ORTE_SSTORE_HANDLE_INVALID 0 |
#define | ORTE_SSTORE_BASE_VERSION_2_0_0 |
Macro for use in components that are of type SSTORE. More... | |
Typedefs | |
typedef uint32_t | orte_sstore_base_handle_t |
typedef struct orte_sstore_base_local_snapshot_info_1_0_0_t | orte_sstore_base_local_snapshot_info_1_0_0_t |
typedef struct orte_sstore_base_local_snapshot_info_1_0_0_t | orte_sstore_base_local_snapshot_info_t |
typedef struct orte_sstore_base_global_snapshot_info_1_0_0_t | orte_sstore_base_global_snapshot_info_1_0_0_t |
typedef struct orte_sstore_base_global_snapshot_info_1_0_0_t | orte_sstore_base_global_snapshot_info_t |
typedef int(* | orte_sstore_base_module_init_fn_t )(void) |
Module initialization function. More... | |
typedef int(* | orte_sstore_base_module_finalize_fn_t )(void) |
Module finalization function. More... | |
typedef int(* | orte_sstore_base_request_checkpoint_handle_fn_t )(orte_sstore_base_handle_t *handle, int seq, orte_jobid_t jobid) |
Request a checkpoint storage handle from stable storage. More... | |
typedef int(* | orte_sstore_base_request_restart_handle_fn_t )(orte_sstore_base_handle_t *handle, char *basedir, char *ref, int seq, orte_sstore_base_global_snapshot_info_t *snapshot) |
Request a restart storage handle from stable storage This function will fail if the key cannot be matched. More... | |
typedef int(* | orte_sstore_base_request_global_snapshot_data_fn_t )(orte_sstore_base_handle_t *handle, orte_sstore_base_global_snapshot_info_t *snapshot) |
Request snapshot info from a given handle. More... | |
typedef int(* | orte_sstore_base_register_handle_fn_t )(orte_sstore_base_handle_t handle) |
Register access to a handle. More... | |
typedef int(* | orte_sstore_base_get_attribute_fn_t )(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char **value) |
Get attribute on the storage handle. More... | |
typedef int(* | orte_sstore_base_set_attribute_fn_t )(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char *value) |
Set attribute on the storage handle. More... | |
typedef int(* | orte_sstore_base_sync_fn_t )(orte_sstore_base_handle_t handle) |
Synchronize the handle. More... | |
typedef int(* | orte_sstore_base_remove_fn_t )(orte_sstore_base_handle_t handle) |
Remove data associated with the handle. More... | |
typedef int(* | orte_sstore_base_pack_fn_t )(orte_process_name_t *peer, opal_buffer_t *buffer, orte_sstore_base_handle_t handle) |
Pack a handle into a buffer Only called between the HNP and ORTED (or Global and Local SnapC coordinators) More... | |
typedef int(* | orte_sstore_base_unpack_fn_t )(orte_process_name_t *peer, opal_buffer_t *buffer, orte_sstore_base_handle_t *handle) |
Unack a handle from a buffer Only called between the HNP and ORTED (or Global and Local SnapC coordinators) More... | |
typedef int(* | orte_sstore_base_fetch_app_deps_fn_t )(orte_app_context_t *app) |
Fetch application context dependencies before local launch. More... | |
typedef int(* | orte_sstore_base_wait_all_deps_fn_t )(void) |
Wait for all application context dependencies to be fetched. More... | |
typedef struct orte_sstore_base_component_2_0_0_t | orte_sstore_base_component_2_0_0_t |
typedef struct orte_sstore_base_component_2_0_0_t | orte_sstore_base_component_t |
typedef struct orte_sstore_base_module_1_0_0_t | orte_sstore_base_module_1_0_0_t |
typedef struct orte_sstore_base_module_1_0_0_t | orte_sstore_base_module_t |
Functions | |
ORTE_DECLSPEC | OBJ_CLASS_DECLARATION (orte_sstore_base_local_snapshot_info_t) |
ORTE_DECLSPEC | OBJ_CLASS_DECLARATION (orte_sstore_base_global_snapshot_info_t) |
Variables | |
BEGIN_C_DECLS typedef uint32_t | orte_sstore_base_key_t |
Keys accepted as metadata. | |
ORTE_DECLSPEC orte_sstore_base_handle_t | orte_sstore_handle_current |
ORTE_DECLSPEC orte_sstore_base_handle_t | orte_sstore_handle_last_stable |
ORTE_DECLSPEC orte_sstore_base_module_t | orte_sstore |
Distributed Stable Storage (SStore) Interface.
#define ORTE_SSTORE_BASE_VERSION_2_0_0 |
Macro for use in components that are of type SSTORE.
#define SSTORE_METADATA_GLOBAL_MIGRATING 22 |
A Migration checkpoint does not necessarily contain all of the processes in the job, so it is not a checkpoint that can be restarted from normally.
Therefore, it needs to be marked specially.
typedef int(* orte_sstore_base_fetch_app_deps_fn_t)(orte_app_context_t *app) |
Fetch application context dependencies before local launch.
app | Application context |
typedef int(* orte_sstore_base_get_attribute_fn_t)(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char **value) |
Get attribute on the storage handle.
handle | Storage handle |
key | Key to access |
value | Value of the key. NULL if not avaialble |
typedef int(* orte_sstore_base_module_finalize_fn_t)(void) |
Module finalization function.
Returns ORTE_SUCCESS
typedef int(* orte_sstore_base_module_init_fn_t)(void) |
Module initialization function.
Returns ORTE_SUCCESS
typedef int(* orte_sstore_base_pack_fn_t)(orte_process_name_t *peer, opal_buffer_t *buffer, orte_sstore_base_handle_t handle) |
Pack a handle into a buffer Only called between the HNP and ORTED (or Global and Local SnapC coordinators)
peer | Peer to which this is being sent (or NULL if to all peers) |
buffer | Buffer to pack the data into |
handle | Storage handle |
typedef int(* orte_sstore_base_register_handle_fn_t)(orte_sstore_base_handle_t handle) |
Register access to a handle.
handle | Storage handle |
typedef int(* orte_sstore_base_remove_fn_t)(orte_sstore_base_handle_t handle) |
Remove data associated with the handle.
handle | Storage handle |
typedef int(* orte_sstore_base_request_checkpoint_handle_fn_t)(orte_sstore_base_handle_t *handle, int seq, orte_jobid_t jobid) |
Request a checkpoint storage handle from stable storage.
handle | Checkpoint storage handle |
key | Key to use as an identifier |
value | Value of the key specified |
typedef int(* orte_sstore_base_request_global_snapshot_data_fn_t)(orte_sstore_base_handle_t *handle, orte_sstore_base_global_snapshot_info_t *snapshot) |
Request snapshot info from a given handle.
If they key is NULL, then the latest entry will be used.
handle | Restart storage handle |
typedef int(* orte_sstore_base_request_restart_handle_fn_t)(orte_sstore_base_handle_t *handle, char *basedir, char *ref, int seq, orte_sstore_base_global_snapshot_info_t *snapshot) |
Request a restart storage handle from stable storage This function will fail if the key cannot be matched.
If multiple matches exist, it will return the latest one. If they key is NULL, then the latest entry will be used.
handle | Restart storage handle |
typedef int(* orte_sstore_base_set_attribute_fn_t)(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char *value) |
Set attribute on the storage handle.
handle | Storage handle |
key | Key to set |
value | Value of the key. |
typedef int(* orte_sstore_base_sync_fn_t)(orte_sstore_base_handle_t handle) |
Synchronize the handle.
handle | Storage handle |
typedef int(* orte_sstore_base_unpack_fn_t)(orte_process_name_t *peer, opal_buffer_t *buffer, orte_sstore_base_handle_t *handle) |
Unack a handle from a buffer Only called between the HNP and ORTED (or Global and Local SnapC coordinators)
peer | Peer from which this was received |
buffer | Buffer to unpack the data |
handle | Storage handle |
typedef int(* orte_sstore_base_wait_all_deps_fn_t)(void) |
Wait for all application context dependencies to be fetched.