OpenMPI  0.1.1
opal_cr.h File Reference

Checkpoint functionality for Open MPI. More...

#include "opal_config.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/event/event.h"
#include "opal/util/output.h"
#include "opal/prefetch.h"

Go to the source code of this file.

Macros

#define OPAL_CR_DONE   ((char) 0)
 
#define OPAL_CR_ACK   ((char) 1)
 
#define OPAL_CR_CHECKPOINT   ((char) 2)
 
#define OPAL_CR_NAMED_PROG_R   ("opal_cr_prog_read")
 
#define OPAL_CR_NAMED_PROG_W   ("opal_cr_prog_write")
 
#define OPAL_CR_BASE_ENV_NAME   ("opal_cr_restart-env")
 
#define OPAL_CR_TEST_CHECKPOINT_READY()   ;
 
#define OPAL_CR_TEST_CHECKPOINT_READY_STALL()   ;
 
#define OPAL_CR_INIT_LIBRARY()   ;
 
#define OPAL_CR_FINALIZE_LIBRARY()   ;
 
#define OPAL_CR_ABORT_LIBRARY()   ;
 
#define OPAL_CR_ENTER_LIBRARY()   ;
 
#define OPAL_CR_EXIT_LIBRARY()   ;
 
#define OPAL_CR_NOOP_PROGRESS()   ;
 
#define OPAL_CR_TIMER_ENTRY0   0
 
#define OPAL_CR_TIMER_ENTRY1   1
 
#define OPAL_CR_TIMER_ENTRY2   2
 
#define OPAL_CR_TIMER_CRCPBR0   3
 
#define OPAL_CR_TIMER_CRCP0   4
 
#define OPAL_CR_TIMER_CRCPBR1   5
 
#define OPAL_CR_TIMER_P2P0   6
 
#define OPAL_CR_TIMER_P2P1   7
 
#define OPAL_CR_TIMER_P2PBR0   8
 
#define OPAL_CR_TIMER_CORE0   9
 
#define OPAL_CR_TIMER_CORE1   10
 
#define OPAL_CR_TIMER_COREBR0   11
 
#define OPAL_CR_TIMER_P2P2   12
 
#define OPAL_CR_TIMER_P2PBR1   13
 
#define OPAL_CR_TIMER_P2P3   14
 
#define OPAL_CR_TIMER_P2PBR2   15
 
#define OPAL_CR_TIMER_CRCP1   16
 
#define OPAL_CR_TIMER_COREBR1   17
 
#define OPAL_CR_TIMER_CORE2   18
 
#define OPAL_CR_TIMER_ENTRY3   19
 
#define OPAL_CR_TIMER_ENTRY4   20
 
#define OPAL_CR_TIMER_MAX   21
 
#define OPAL_CR_CLEAR_TIMERS()
 
#define OPAL_CR_SET_TIMER(idx)
 
#define OPAL_CR_DISPLAY_ALL_TIMERS()
 

Typedefs

typedef enum
opal_cr_ckpt_cmd_state_t 
opal_cr_ckpt_cmd_state_t
 
typedef int(* opal_cr_notify_callback_fn_t )(opal_cr_ckpt_cmd_state_t)
 A function to respond to the async checkpoint request this is useful when figuring out who should respond when stalling.
 
typedef int(* opal_cr_user_inc_callback_fn_t )(opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_state_t state)
 User coordination callback routine.
 
typedef int(* opal_cr_coord_callback_fn_t )(int)
 Coordination callback routine signature.
 

Enumerations

enum  opal_cr_ckpt_cmd_state_t {
  OPAL_CHECKPOINT_CMD_START, OPAL_CHECKPOINT_CMD_IN_PROGRESS, OPAL_CHECKPOINT_CMD_NULL, OPAL_CHECKPOINT_CMD_ERROR,
  OPAL_CR_STATUS_NONE, OPAL_CR_STATUS_REQUESTED, OPAL_CR_STATUS_RUNNING, OPAL_CR_STATUS_TERM,
  OPAL_CR_STATUS_CONTINUE, OPAL_CR_STATUS_RESTART_PRE, OPAL_CR_STATUS_RESTART_POST
}
 
enum  opal_cr_user_inc_callback_event_t {
  OMPI_CR_INC_PRE_CRS_PRE_MPI = 0, OMPI_CR_INC_PRE_CRS_POST_MPI = 1, OMPI_CR_INC_CRS_PRE_CKPT = 2, OMPI_CR_INC_CRS_POST_CKPT = 3,
  OMPI_CR_INC_POST_CRS_PRE_MPI = 4, OMPI_CR_INC_POST_CRS_POST_MPI = 5, OMPI_CR_INC_MAX = 6
}
 
enum  opal_cr_user_inc_callback_state_t { OMPI_CR_INC_STATE_PREPARE = 0, OMPI_CR_INC_STATE_CONTINUE = 1, OMPI_CR_INC_STATE_RESTART = 2, OMPI_CR_INC_STATE_ERROR = 3 }
 

Functions

OPAL_DECLSPEC int opal_cr_refresh_environ (int prev_pid)
 
OPAL_DECLSPEC int opal_cr_set_enabled (bool)
 
OPAL_DECLSPEC int opal_cr_init (void)
 Initialize the notification and coordination elements.
 
OPAL_DECLSPEC int opal_cr_finalize (void)
 Finalize the notification and coordination elements.
 
OPAL_DECLSPEC void opal_cr_test_if_checkpoint_ready (void)
 
OPAL_DECLSPEC int opal_cr_reg_notify_callback (opal_cr_notify_callback_fn_t new_func, opal_cr_notify_callback_fn_t *prev_func)
 
OPAL_DECLSPEC int opal_cr_inc_core (pid_t pid, opal_crs_base_snapshot_t *snapshot, opal_crs_base_ckpt_options_t *options, int *state)
 Function to go through the INC. More...
 
OPAL_DECLSPEC int opal_cr_inc_core_prep (void)
 
OPAL_DECLSPEC int opal_cr_inc_core_ckpt (pid_t pid, opal_crs_base_snapshot_t *snapshot, opal_crs_base_ckpt_options_t *options, int *state)
 
OPAL_DECLSPEC int opal_cr_inc_core_recover (int state)
 
OPAL_DECLSPEC int opal_cr_user_inc_register_callback (opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_fn_t function, opal_cr_user_inc_callback_fn_t *prev_function)
 
OPAL_DECLSPEC int trigger_user_inc_callback (opal_cr_user_inc_callback_event_t event, opal_cr_user_inc_callback_state_t state)
 
OPAL_DECLSPEC int opal_cr_reg_coord_callback (opal_cr_coord_callback_fn_t new_func, opal_cr_coord_callback_fn_t *prev_func)
 Register a checkpoint coodination routine for a higher level.
 
OPAL_DECLSPEC int opal_cr_coord (int state)
 OPAL Checkpoint Coordination Routine. More...
 
OPAL_DECLSPEC void opal_cr_set_time (int idx)
 Checkpoint life-cycle timing.
 
OPAL_DECLSPEC void opal_cr_display_all_timers (void)
 
OPAL_DECLSPEC void opal_cr_clear_timers (void)
 

Variables

OPAL_DECLSPEC int opal_cr_output
 
OPAL_DECLSPEC char * opal_cr_pipe_dir
 
OPAL_DECLSPEC int opal_cr_entry_point_signal
 
OPAL_DECLSPEC bool opal_cr_is_enabled
 
OPAL_DECLSPEC bool opal_cr_is_tool
 
OPAL_DECLSPEC int opal_cr_checkpoint_request
 
OPAL_DECLSPEC int opal_cr_checkpointing_state
 
OPAL_DECLSPEC bool opal_cr_stall_check
 
OPAL_DECLSPEC bool opal_cr_currently_stalled
 
OPAL_DECLSPEC bool opal_cr_timing_enabled
 
OPAL_DECLSPEC bool opal_cr_timing_barrier_enabled
 
OPAL_DECLSPEC int opal_cr_timing_my_rank
 
OPAL_DECLSPEC int opal_cr_timing_target_rank
 

Detailed Description

Checkpoint functionality for Open MPI.

Macro Definition Documentation

#define OPAL_CR_CLEAR_TIMERS ( )
Value:
{ \
if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
opal_cr_clear_timers(); \
} \
}
#define OPAL_CR_DISPLAY_ALL_TIMERS ( )
Value:
{ \
if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
opal_cr_display_all_timers(); \
} \
}
#define OPAL_CR_SET_TIMER (   idx)
Value:
{ \
if(OPAL_UNLIKELY(opal_cr_timing_enabled > 0)) { \
} \
}
OPAL_DECLSPEC void opal_cr_set_time(int idx)
Checkpoint life-cycle timing.
Definition: opal_cr.c:1195

Function Documentation

OPAL_DECLSPEC int opal_cr_coord ( int  state)

OPAL Checkpoint Coordination Routine.

OPAL Checkpoint Coordination Routine.

Referenced by opal_cr_init().

OPAL_DECLSPEC int opal_cr_inc_core ( pid_t  pid,
opal_crs_base_snapshot_t snapshot,
opal_crs_base_ckpt_options_t options,
int *  state 
)

Function to go through the INC.

  • Call Registered INC_Coord(CHECKPOINT)
  • Call the CRS.checkpoint()
  • Call Registered INC_Coord(state)