OpenMPI  0.1.1
routed.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2007 Los Alamos National Security, LLC.
3  * All rights reserved.
4  * Copyright (c) 2004-2008 The Trustees of Indiana University.
5  * All rights reserved.
6  * Copyright (c) 2004-2011 The University of Tennessee and The University
7  * of Tennessee Research Foundation. All rights
8  * reserved.
9  * $COPYRIGHT$
10  *
11  * Additional copyrights may follow
12  *
13  * $HEADER$
14  */
15 
16 /**
17  * @file
18  *
19  * Routing table for the RML
20  *
21  * A flexible routing infrastructure for the RML. Provides "next hop"
22  * service. Only deals with orte_process_name_ts.
23  */
24 
25 
26 #ifndef ORTE_MCA_ROUTED_ROUTED_H_
27 #define ORTE_MCA_ROUTED_ROUTED_H_
28 
29 #include "orte_config.h"
30 
31 #ifdef HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34 
35 #include "orte/types.h"
36 #include "opal/mca/mca.h"
37 
38 #include "opal/dss/dss_types.h"
39 
40 #include "opal/mca/crs/crs.h"
41 #include "opal/mca/crs/base/base.h"
42 
44 
45 BEGIN_C_DECLS
46 
47 
48 /* ******************************************************************** */
49 
50 
51 struct opal_buffer_t;
52 struct orte_process_name_t;
53 struct orte_rml_module_t;
54 
55 
56 /* ******************************************************************** */
57 
58 /**
59  * routed component interface
60  *
61  * Component interface for the routed framework. A public instance of
62  * this structure, called mca_routed_[component name]_component, must
63  * exist in any routed component.
64  */
66  /* Base component description */
67  mca_base_component_t base_version;
68  /* Base component data block */
69  mca_base_component_data_t base_data;
70 };
71 /** Convienence typedef */
73 
74 
75 /* ******************************************************************** */
76 /**
77  * Initialize the routed module
78  *
79  * Do whatever needs to be done to initialize the selected module
80  *
81  * @retval ORTE_SUCCESS Success
82  * @retval ORTE_ERROR Error code from whatever was encountered
83  */
84 typedef int (*orte_routed_module_init_fn_t)(void);
85 
86 /**
87  * Finalize the routed module
88  *
89  * Finalize the routed module, ending cleaning up all resources
90  * associated with the module. After the finalize function is called,
91  * all interface functions (and the module structure itself) are not
92  * available for use.
93  *
94  * @note Whether or not the finalize function returns successfully,
95  * the module should not be used once this function is called.
96  *
97  * @retval ORTE_SUCCESS Success
98  * @retval ORTE_ERROR An unspecified error occurred
99  */
100 typedef int (*orte_routed_module_finalize_fn_t)(void);
101 
102 
103 /*
104  * Delete route
105  *
106  * Delete the route to the specified proc from the routing table. Note
107  * that wildcards are supported to remove routes from, for example, all
108  * procs in a given job
109  */
110 typedef int (*orte_routed_module_delete_route_fn_t)(orte_process_name_t *proc);
111 
112 /**
113  * Update route table with new information
114  *
115  * Update routing table with a new entry. If an existing exact match
116  * for the entry exists, it will be replaced with the current
117  * information. If the entry is new, it will be inserted behind all
118  * entries of similar "mask". So a wildcard cellid entry will be
119  * inserted after any fully-specified entries and any other wildcard
120  * cellid entries, but before any wildcard cellid and jobid entries.
121  *
122  * @retval ORTE_SUCCESS Success
123  * @retval ORTE_ERR_NOT_SUPPORTED The updated is not supported. This
124  * is likely due to using partially-specified
125  * names with a component that does not support
126  * such functionality
127  * @retval ORTE_ERROR An unspecified error occurred
128  */
130  orte_process_name_t *route);
131 
132 /**
133  * Get the next hop towards the target
134  *
135  * Obtain the next process on the route to the target. ORTE's routing system
136  * works one hop at-a-time, so this function doesn't return the entire path
137  * to the target - it only returns the next hop. This could be the target itself,
138  * or it could be an intermediate relay. By design, we -never- use application
139  * procs as relays, so any relay will be an orted.
140  */
142 
143 /**
144  * Initialize the routing table
145  *
146  * Initialize the routing table for the specified job. This can be rather complex
147  * and depends entirely upon both the selected module AND whether the function
148  * is being called by the HNP, an orted, a tool, or an application proc. To
149  * understand what is happening, you really need to look at the specific module.
150  *
151  * Regardless, at the end of the function, the routes to any other process in the
152  * specified job -must- be defined (even if it is direct)
153  */
155 
156 /**
157  * Report a route as "lost"
158  *
159  * Report that an existing connection has been lost, therefore potentially
160  * "breaking" a route in the routing table. It is critical that broken
161  * connections be reported so that the selected routing module has the
162  * option of dealing with it. This could consist of nothing more than
163  * removing that route from the routing table, or could - in the case
164  * of a "lifeline" connection - result in abort of the process.
165  */
167 
168 /*
169  * Is this route defined?
170  *
171  * Check to see if a route to the specified target has been defined. The
172  * function returns "true" if it has, and "false" if no route to the
173  * target was previously defined.
174  *
175  * This is needed because routed modules will return their "wildcard"
176  * route if we request a route to a target that they don't know about.
177  * In some cases, though, we truly -do- need to know if a route was
178  * specifically defined.
179  */
180 typedef bool (*orte_routed_module_route_is_defined_fn_t)(const orte_process_name_t *target);
181 
182 /**
183  * Get wireup data for daemons
184  *
185  * Add whatever routing data
186  * this module requires to allow inter-process messaging.
187  */
189 
190 /*
191  * Update the module's routing tree for this process
192  *
193  * Called only by a daemon and the HNP, this function creates a list
194  * of "leaves" for this process and identifies the vpid of the parent
195  * sitting above this process in the tree.
196  *
197  * @param [in] jobid The jobid of the routing tree that needs to be updated.
198  *
199  * @retval ORTE_SUCCESS The operation completed successfully
200  * @retval ORTE_ERROR_xxx The specifed error occurred
201  */
202 typedef int (*orte_routed_module_update_routing_tree_fn_t)(orte_jobid_t jobid);
203 
204 /*
205  * Get the routing tree for this process
206  *
207  * Fills the provided list with the direct children of this process
208  * in the routing tree, and returns the vpid of the parent. Only valid
209  * when called by a daemon or the HNP. Passing a NULL pointer will result
210  * in only the parent vpid being returned. The returned list will be filled
211  * with orte_routed_tree_t items.
212  */
213 typedef orte_vpid_t (*orte_routed_module_get_routing_tree_fn_t)(opal_list_t *children);
214 
215 /*
216  * Set lifeline process
217  *
218  * Defines the lifeline to be the specified process. Should contact to
219  * that process be lost, the errmgr will be called, possibly resulting
220  * in termination of the process and job.
221  */
222 typedef int (*orte_routed_module_set_lifeline_fn_t)(orte_process_name_t *proc);
223 
224 /*
225  * Get the number of routes supported by this process
226  *
227  * Returns the size of the routing tree using an O(1) function
228  */
229 typedef size_t (*orte_routed_module_num_routes_fn_t)(void);
230 
231 /**
232  * Handle fault tolerance updates
233  *
234  * @param[in] state Fault tolerance state update
235  *
236  * @retval ORTE_SUCCESS The operation completed successfully
237  * @retval ORTE_ERROR An unspecifed error occurred
238  */
239 typedef int (*orte_routed_module_ft_event_fn_t)(int state);
240 
241 /* ******************************************************************** */
242 
243 
244 /**
245  * routed module interface
246  *
247  * Module interface to the routed communication system. A global
248  * instance of this module, orte_routed, provices an interface into the
249  * active routed interface.
250  */
252  /** Startup/shutdown the communication system and clean up resources */
255  /* API functions */
256  orte_routed_module_delete_route_fn_t delete_route;
261  orte_routed_module_route_is_defined_fn_t route_is_defined;
262  orte_routed_module_set_lifeline_fn_t set_lifeline;
263  /* fns for daemons */
264  orte_routed_module_update_routing_tree_fn_t update_routing_tree;
265  orte_routed_module_get_routing_tree_fn_t get_routing_tree;
267  orte_routed_module_num_routes_fn_t num_routes;
268  /* FT Notification */
270 };
271 /** Convenience typedef */
273 
274 /** Interface for routed communication */
275 ORTE_DECLSPEC extern orte_routed_module_t orte_routed;
276 
277 
278 /* ******************************************************************** */
279 
280 
281 /** Macro for use in components that are of type routed */
282 #define ORTE_ROUTED_BASE_VERSION_2_0_0 \
283  MCA_BASE_VERSION_2_0_0, \
284  "routed", 2, 0, 0
285 
286 
287 /* ******************************************************************** */
288 
289 
290 END_C_DECLS
291 
292 #endif
RML module interface.
Definition: rml.h:577
routed module interface
Definition: routed.h:251
int(* orte_routed_module_init_routes_fn_t)(orte_jobid_t job, opal_buffer_t *ndat)
Initialize the routing table.
Definition: routed.h:154
int(* orte_routed_module_get_wireup_info_fn_t)(opal_buffer_t *buf)
Get wireup data for daemons.
Definition: routed.h:188
Common type for all MCA components.
Definition: mca.h:250
int(* orte_routed_module_finalize_fn_t)(void)
Finalize the routed module.
Definition: routed.h:100
uint32_t orte_jobid_t
Set the allowed range for ids in each space.
Definition: types.h:76
Definition: types.h:146
ORTE_DECLSPEC orte_routed_module_t orte_routed
Interface for routed communication.
Definition: routed_base_components.c:81
Type definitions to support routed framework.
int(* orte_routed_module_route_lost_fn_t)(const orte_process_name_t *route)
Report a route as "lost".
Definition: routed.h:166
Top-level interface for all MCA components.
int(* orte_routed_module_init_fn_t)(void)
Initialize the routed module.
Definition: routed.h:84
orte_process_name_t(* orte_routed_module_get_route_fn_t)(orte_process_name_t *target)
Get the next hop towards the target.
Definition: routed.h:141
orte_routed_module_init_fn_t initialize
Startup/shutdown the communication system and clean up resources.
Definition: routed.h:253
int(* orte_routed_module_ft_event_fn_t)(int state)
Handle fault tolerance updates.
Definition: routed.h:239
Buffer management types.
Meta data for MCA v2.0.0 components.
Definition: mca.h:309
int(* orte_routed_module_update_route_fn_t)(orte_process_name_t *target, orte_process_name_t *route)
Update route table with new information.
Definition: routed.h:129
Definition: opal_list.h:147
routed component interface
Definition: routed.h:65
Structure for holding a buffer to be used with the RML or OOB subsystems.
Definition: dss_types.h:159
Checkpoint and Restart Service (CRS) Interface.