OpenMPI  0.1.1
sstore.h File Reference

Distributed Stable Storage (SStore) Interface. More...

#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "orte/runtime/orte_globals.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/class/opal_object.h"

Go to the source code of this file.

Data Structures

struct  orte_sstore_base_local_snapshot_info_1_0_0_t
 Local and Global snapshot information structure Primarily used by orte-restart as an abstract way to handle metadata. More...
 
struct  orte_sstore_base_global_snapshot_info_1_0_0_t
 
struct  orte_sstore_base_component_2_0_0_t
 Structure for SSTORE components. More...
 
struct  orte_sstore_base_module_1_0_0_t
 Structure for SSTORE modules. More...
 

Macros

#define SSTORE_METADATA_LOCAL_CRS_COMP   0
 CRS Component.
 
#define SSTORE_METADATA_LOCAL_COMPRESS_COMP   1
 Compress Component.
 
#define SSTORE_METADATA_LOCAL_COMPRESS_POSTFIX   2
 Compress Component Postfix.
 
#define SSTORE_METADATA_LOCAL_PID   3
 Process PID.
 
#define SSTORE_METADATA_LOCAL_CONTEXT   4
 Checkpoint Context File.
 
#define SSTORE_METADATA_LOCAL_MKDIR   5
 Directory to make on restart.
 
#define SSTORE_METADATA_LOCAL_TOUCH   6
 File to touch on restart.
 
#define SSTORE_METADATA_LOCAL_SNAP_REF   7
 Local snapshot reference (e.g., opal_snapshot_0.ckpt)
 
#define SSTORE_METADATA_LOCAL_SNAP_REF_FMT   8
 Local snapshot reference format string (e.g., opal_snapshot_d.ckpt) passed vpid.
 
#define SSTORE_METADATA_LOCAL_SNAP_LOC   9
 Local snapshot directory (Full Path excluding reference)
 
#define SSTORE_METADATA_LOCAL_SNAP_REF_LOC_FMT   10
 Local snapshot reference directory (Full Path)
 
#define SSTORE_METADATA_LOCAL_SNAP_META   11
 Local snapshot metadata file (Full Path)
 
#define SSTORE_METADATA_GLOBAL_SNAP_REF   12
 Global snapshot reference (e.g., ompi_global_snapshot_1234.ckpt)
 
#define SSTORE_METADATA_GLOBAL_SNAP_LOC   13
 Global snapshot location (Relative Path from base)
 
#define SSTORE_METADATA_GLOBAL_SNAP_LOC_ABS   14
 Global snapshot location (Full path)
 
#define SSTORE_METADATA_GLOBAL_SNAP_META   15
 Global snapshot metadata file (Full path)
 
#define SSTORE_METADATA_GLOBAL_SNAP_SEQ   16
 Global snapshot sequence number.
 
#define SSTORE_METADATA_GLOBAL_AMCA_PARAM   17
 AMCA Parameter to be preserved for ompi-restart.
 
#define SSTORE_METADATA_GLOBAL_SNAP_NUM_SEQ   18
 Total number of sequence numbers for this snapshot.
 
#define SSTORE_METADATA_GLOBAL_SNAP_ALL_SEQ   19
 Comma separated list of all sequence numbers for this snapshot.
 
#define SSTORE_METADATA_BASE_LOC   20
 Access the current default base directory (Full Path)
 
#define SSTORE_METADATA_LOCAL_SKIP_CKPT   21
 The local process is skipping the checkpoint Usually this is because there is a migration, and it is not participating.
 
#define SSTORE_METADATA_GLOBAL_MIGRATING   22
 A Migration checkpoint does not necessarily contain all of the processes in the job, so it is not a checkpoint that can be restarted from normally. More...
 
#define SSTORE_METADATA_MAX   23
 
#define ORTE_SSTORE_HANDLE   OPAL_UINT32
 Storage handle.
 
#define ORTE_SSTORE_HANDLE_INVALID   0
 
#define ORTE_SSTORE_BASE_VERSION_2_0_0
 Macro for use in components that are of type SSTORE. More...
 

Typedefs

typedef uint32_t orte_sstore_base_handle_t
 
typedef struct
orte_sstore_base_local_snapshot_info_1_0_0_t 
orte_sstore_base_local_snapshot_info_1_0_0_t
 
typedef struct
orte_sstore_base_local_snapshot_info_1_0_0_t 
orte_sstore_base_local_snapshot_info_t
 
typedef struct
orte_sstore_base_global_snapshot_info_1_0_0_t 
orte_sstore_base_global_snapshot_info_1_0_0_t
 
typedef struct
orte_sstore_base_global_snapshot_info_1_0_0_t 
orte_sstore_base_global_snapshot_info_t
 
typedef int(* orte_sstore_base_module_init_fn_t )(void)
 Module initialization function. More...
 
typedef int(* orte_sstore_base_module_finalize_fn_t )(void)
 Module finalization function. More...
 
typedef int(* orte_sstore_base_request_checkpoint_handle_fn_t )(orte_sstore_base_handle_t *handle, int seq, orte_jobid_t jobid)
 Request a checkpoint storage handle from stable storage. More...
 
typedef int(* orte_sstore_base_request_restart_handle_fn_t )(orte_sstore_base_handle_t *handle, char *basedir, char *ref, int seq, orte_sstore_base_global_snapshot_info_t *snapshot)
 Request a restart storage handle from stable storage This function will fail if the key cannot be matched. More...
 
typedef int(* orte_sstore_base_request_global_snapshot_data_fn_t )(orte_sstore_base_handle_t *handle, orte_sstore_base_global_snapshot_info_t *snapshot)
 Request snapshot info from a given handle. More...
 
typedef int(* orte_sstore_base_register_handle_fn_t )(orte_sstore_base_handle_t handle)
 Register access to a handle. More...
 
typedef int(* orte_sstore_base_get_attribute_fn_t )(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char **value)
 Get attribute on the storage handle. More...
 
typedef int(* orte_sstore_base_set_attribute_fn_t )(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char *value)
 Set attribute on the storage handle. More...
 
typedef int(* orte_sstore_base_sync_fn_t )(orte_sstore_base_handle_t handle)
 Synchronize the handle. More...
 
typedef int(* orte_sstore_base_remove_fn_t )(orte_sstore_base_handle_t handle)
 Remove data associated with the handle. More...
 
typedef int(* orte_sstore_base_pack_fn_t )(orte_process_name_t *peer, opal_buffer_t *buffer, orte_sstore_base_handle_t handle)
 Pack a handle into a buffer Only called between the HNP and ORTED (or Global and Local SnapC coordinators) More...
 
typedef int(* orte_sstore_base_unpack_fn_t )(orte_process_name_t *peer, opal_buffer_t *buffer, orte_sstore_base_handle_t *handle)
 Unack a handle from a buffer Only called between the HNP and ORTED (or Global and Local SnapC coordinators) More...
 
typedef int(* orte_sstore_base_fetch_app_deps_fn_t )(orte_app_context_t *app)
 Fetch application context dependencies before local launch. More...
 
typedef int(* orte_sstore_base_wait_all_deps_fn_t )(void)
 Wait for all application context dependencies to be fetched. More...
 
typedef struct
orte_sstore_base_component_2_0_0_t 
orte_sstore_base_component_2_0_0_t
 
typedef struct
orte_sstore_base_component_2_0_0_t 
orte_sstore_base_component_t
 
typedef struct
orte_sstore_base_module_1_0_0_t 
orte_sstore_base_module_1_0_0_t
 
typedef struct
orte_sstore_base_module_1_0_0_t 
orte_sstore_base_module_t
 

Functions

ORTE_DECLSPEC OBJ_CLASS_DECLARATION (orte_sstore_base_local_snapshot_info_t)
 
ORTE_DECLSPEC OBJ_CLASS_DECLARATION (orte_sstore_base_global_snapshot_info_t)
 

Variables

BEGIN_C_DECLS typedef uint32_t orte_sstore_base_key_t
 Keys accepted as metadata.
 
ORTE_DECLSPEC
orte_sstore_base_handle_t 
orte_sstore_handle_current
 
ORTE_DECLSPEC
orte_sstore_base_handle_t 
orte_sstore_handle_last_stable
 
ORTE_DECLSPEC
orte_sstore_base_module_t 
orte_sstore
 

Detailed Description

Distributed Stable Storage (SStore) Interface.

Macro Definition Documentation

#define ORTE_SSTORE_BASE_VERSION_2_0_0
Value:
MCA_BASE_VERSION_2_0_0, \
"sstore", 2, 0, 0

Macro for use in components that are of type SSTORE.

#define SSTORE_METADATA_GLOBAL_MIGRATING   22

A Migration checkpoint does not necessarily contain all of the processes in the job, so it is not a checkpoint that can be restarted from normally.

Therefore, it needs to be marked specially.

Typedef Documentation

typedef int(* orte_sstore_base_fetch_app_deps_fn_t)(orte_app_context_t *app)

Fetch application context dependencies before local launch.

Parameters
appApplication context
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_get_attribute_fn_t)(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char **value)

Get attribute on the storage handle.

Parameters
handleStorage handle
keyKey to access
valueValue of the key. NULL if not avaialble
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_module_finalize_fn_t)(void)

Module finalization function.

Returns ORTE_SUCCESS

typedef int(* orte_sstore_base_module_init_fn_t)(void)

Module initialization function.

Returns ORTE_SUCCESS

typedef int(* orte_sstore_base_pack_fn_t)(orte_process_name_t *peer, opal_buffer_t *buffer, orte_sstore_base_handle_t handle)

Pack a handle into a buffer Only called between the HNP and ORTED (or Global and Local SnapC coordinators)

Parameters
peerPeer to which this is being sent (or NULL if to all peers)
bufferBuffer to pack the data into
handleStorage handle
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_register_handle_fn_t)(orte_sstore_base_handle_t handle)

Register access to a handle.

Parameters
handleStorage handle
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_remove_fn_t)(orte_sstore_base_handle_t handle)

Remove data associated with the handle.

Parameters
handleStorage handle
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_request_checkpoint_handle_fn_t)(orte_sstore_base_handle_t *handle, int seq, orte_jobid_t jobid)

Request a checkpoint storage handle from stable storage.

Parameters
handleCheckpoint storage handle
keyKey to use as an identifier
valueValue of the key specified
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_request_global_snapshot_data_fn_t)(orte_sstore_base_handle_t *handle, orte_sstore_base_global_snapshot_info_t *snapshot)

Request snapshot info from a given handle.

If they key is NULL, then the latest entry will be used.

Parameters
handleRestart storage handle
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_request_restart_handle_fn_t)(orte_sstore_base_handle_t *handle, char *basedir, char *ref, int seq, orte_sstore_base_global_snapshot_info_t *snapshot)

Request a restart storage handle from stable storage This function will fail if the key cannot be matched.

If multiple matches exist, it will return the latest one. If they key is NULL, then the latest entry will be used.

Parameters
handleRestart storage handle
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_set_attribute_fn_t)(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char *value)

Set attribute on the storage handle.

Parameters
handleStorage handle
keyKey to set
valueValue of the key.
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_sync_fn_t)(orte_sstore_base_handle_t handle)

Synchronize the handle.

Parameters
handleStorage handle
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_unpack_fn_t)(orte_process_name_t *peer, opal_buffer_t *buffer, orte_sstore_base_handle_t *handle)

Unack a handle from a buffer Only called between the HNP and ORTED (or Global and Local SnapC coordinators)

Parameters
peerPeer from which this was received
bufferBuffer to unpack the data
handleStorage handle
Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure
typedef int(* orte_sstore_base_wait_all_deps_fn_t)(void)

Wait for all application context dependencies to be fetched.

Returns
ORTE_SUCCESS on success
ORTE_ERROR on failure