OpenMPI  0.1.1
opal_sos.h
1 /*
2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3  * University Research and Technology
4  * Corporation. All rights reserved.
5  * Copyright (c) 2004-2006 The University of Tennessee and The University
6  * of Tennessee Research Foundation. All rights
7  * reserved.
8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9  * University of Stuttgart. All rights reserved.
10  * Copyright (c) 2004-2005 The Regents of the University of California.
11  * All rights reserved.
12  * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
13  * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
14  * $COPYRIGHT$
15  *
16  * Additional copyrights may follow
17  *
18  * $HEADER$
19  */
20 
21 #ifndef OPAL_SOS_H
22 #define OPAL_SOS_H
23 
24 #ifdef HAVE_LIMITS_H
25 #include <limits.h>
26 #endif
27 #ifdef HAVE_SYSLOG_H
28 #include <syslog.h>
29 #endif
30 
31 #include "opal/class/opal_object.h"
33 #include "opal/threads/mutex.h"
34 #include "opal/util/output.h"
35 
36 #ifdef __STDC_VERSION__
37 # if __STDC_VERSION__ < 199901L
38 # if defined(__GNUC__) && __GNUC__ >= 2
39 # define OPAL_SOS_FUNCTION __FUNCTION__
40 # else
41 # define OPAL_SOS_FUNCTION "<unknown>"
42 # endif
43 # else
44 # define OPAL_SOS_FUNCTION __func__
45 # endif
46 #else
47 # define OPAL_SOS_FUNCTION __func__
48 #endif
49 
50 /* Internal use only */
51 #define OPAL_SOS_ERR_BASE OPAL_SUCCESS
52 
53 /**
54  * Size of the OPAL SOS error table.
55  *
56  * Since the index into the error table that is encoded in the error
57  * code is 9-bit long, setting a higher value than (1 << 9) would make
58  * no difference at all.
59  */
60 #define OPAL_SOS_ERR_TABLE_SIZE 512
61 
62 /**
63  * Maximum length for the error string stored per error code in the
64  * OPAL SOS error table.
65  */
66 #define OPAL_SOS_MAX_ERR_LEN 1024
67 
68 /**
69  * Reports an error to OPAL SOS reporter.
70  *
71  * Encodes an informational message with severity \c severity and
72  * other passed arguments like errnum, errmsg etc. It also remembers
73  * the line number, file name and the function name where the error
74  * has occurred.
75  * If the MCA parameter \c opal_sos_print_low is set, the error message
76  * is displayed on stderr using the "show help" subsystem. By default,
77  * informational messages are not printed out on stderr.
78  * If \c show_stack is set, the stacktrace is saved and/or printed
79  * along with the corresponding \c errmsg.
80  */
81 #define OPAL_SOS_REPORT(severity, arg) opal_sos_reporter(__FILE__, __LINE__, \
82  OPAL_SOS_FUNCTION, \
83  severity, \
84  opal_sos_build_error arg)
85 
86 /**
87  * Print or store an event with the maximum severity (EMERG).
88  */
89 #define OPAL_SOS_EMERG(arg) OPAL_SOS_REPORT(OPAL_SOS_SEVERITY_EMERG, arg)
90 
91 /**
92  * Report an event of severity "ALERT".
93  */
94 #define OPAL_SOS_ALERT(arg) OPAL_SOS_REPORT(OPAL_SOS_SEVERITY_ALERT, arg)
95 
96 /**
97  * Report events with severity marked as "CRITICAL".
98  */
99 #define OPAL_SOS_CRIT(arg) OPAL_SOS_REPORT(OPAL_SOS_SEVERITY_CRIT, arg)
100 
101 /**
102  * Prints and/or logs an error.
103  * This function can be used to log or print error events.
104  */
105 #define OPAL_SOS_ERROR(arg) OPAL_SOS_REPORT(OPAL_SOS_SEVERITY_ERROR, arg)
106 
107 /**
108  * Prints and/or logs a warning.
109  *
110  * This function is similar to OPAL_SOS_INFO but with a higher
111  * severity. These events are printed out on the output stream
112  * by default.
113  */
114 #define OPAL_SOS_WARN(arg) OPAL_SOS_REPORT(OPAL_SOS_SEVERITY_WARN, arg)
115 
116 /**
117  * Report an error event with severity "NOTICE".
118  */
119 #define OPAL_SOS_NOTICE(arg) OPAL_SOS_REPORT(OPAL_SOS_SEVERITY_NOTICE,arg)
120 
121 /**
122  * Prints or logs an informational message in the OPAL SOS framework.
123  * Events with this severity are not printed, by default. However,
124  * they are still stored in the SOS table.
125  */
126 #define OPAL_SOS_INFO(arg) OPAL_SOS_REPORT(OPAL_SOS_SEVERITY_INFO, arg)
127 
128 /**
129  * Log debug events in the SOS framework.
130  */
131 #define OPAL_SOS_DEBUG(arg) OPAL_SOS_REPORT(OPAL_SOS_SEVERITY_DEBUG, arg)
132 
133 /**
134  * Frees all the (entire stack of) OPAL SOS error objects associated
135  * with the encoded error code obtained after dereferencing the
136  * pointer \c errnum.
137  */
138 #define OPAL_SOS_FREE(perrnum) opal_sos_free(perrnum)
139 
140 /**
141  * Print the warnings/errors/informational messages previously logged
142  * in to the SOS framework.
143  *
144  * This function prints the error details encoded by \c errnum.
145  * If \c show_history is true, the entire history for the error
146  * represented by \c errnum is printed on the output stream.
147  */
148 #define OPAL_SOS_PRINT(errnum, show_history) \
149  opal_sos_print(errnum, show_history)
150 
151 /**
152  * Attach the history from one error code to another error code
153  * Returns the target encoded error \c errtgt with history of \c
154  * errnum associated to it.
155  */
156 #define OPAL_SOS_ATTACH(errtgt, errnum) \
157  (errtgt = -((-errtgt & ~0xFF80000L) | \
158  ((OPAL_SOS_GET_INDEX(errnum) & 0x1FFL) * 0x80000L)))
159 
160 /**
161  * Returns the index of the error attached to errnum using OPAL_SOS_ATTACH().
162  */
163 #define OPAL_SOS_GET_ATTACHED_INDEX(errnum) ((int) ((-errnum & 0xFF80000L) >> 19))
164 
165 /**
166  * Returns the native error code for the given encoded error code \c
167  * errnum. \c errnum can be a native error code itself.
168  */
169 #define OPAL_SOS_GET_ERROR_CODE(errnum) \
170  ((errnum >= 0) ? errnum : (int) -(-errnum & 0x3FFL))
171 
172 /**
173  * Sets the native error code for the potentially encoded error code.
174  *
175  * The lower 10 bits are reserved for the native error code. This
176  * macro sets the lower 10 bits of errnum to nativeerr.
177  */
178 #define OPAL_SOS_SET_ERROR_CODE(errnum, nativeerr) \
179  (errnum = -((-errnum & ~0x3FFL) | (-nativeerr & 0x3FFL)))
180 
181 /**
182  * Macro to check if the error encoded by \c errnum is a native error
183  * or an OPAL SOS encoded error.
184  */
185 #define OPAL_SOS_IS_NATIVE(errnum) ((-errnum & ~0x3FFL) == 0)
186 
187 /**
188  * Returns the severity level for the potentially encoded error code.
189  *
190  * The severity is encoded in the last three bits of the first nibble.
191  */
192 #define OPAL_SOS_GET_SEVERITY(errnum) ((int)((-errnum >> 28) & 0x7L))
193 
194 /**
195  * Sets the severity level for the given error code \c errnum.
196  *
197  * This macros do not do strict error checking of the specified
198  * severity levels.
199  */
200 #define OPAL_SOS_SET_SEVERITY(errnum, severity) \
201  (errnum = -((-errnum & ~0x70000000L) | ((severity & 0x7L) * 0x10000000L)))
202 
203 /**
204  * Macro to get the encoded error severity level as a string.
205  *
206  * This macro accepts the argument \c severity and calls the corresponding
207  * function opal_sos_severity2str to convert it to a string. The result
208  * is returned in a static buffer that should not be freed with free().
209  */
210 #define OPAL_SOS_SEVERITY2STR(severity) opal_sos_severity2str(severity)
211 
212 /**
213  * Log an encoded error \c errnum.
214  *
215  * This macro prints out and consequently frees the entire stack of
216  * errors associated with the \c errnum.
217  */
218 #define OPAL_SOS_LOG(errnum) opal_sos_log(errnum)
219 
220 /**
221  * \internal
222  * Returns the index into the error table of the error encoded by \c errnum.
223  *
224  * The index is 9-bit long stored from bit 11 to bit 20 in the encoded
225  * error code.
226  */
227 #define OPAL_SOS_GET_INDEX(errnum) ((int)((-errnum & 0x7FC00L) >> 10))
228 
229 /**
230  * \internal
231  * Sets the index into the error table for the error encoded by \c errnum.
232  */
233 #define OPAL_SOS_SET_INDEX(errnum, index) \
234  (errnum = -((-errnum & ~0x7FC00L) | ((index & 0x1FFL) * 0x400L)))
235 
236 BEGIN_C_DECLS
237 
238 /** This MCA parameter sos_print_low can be set to non-zero to enable
239  * the print-at-bottom preference for OPAL SOS. */
240 OPAL_DECLSPEC extern bool opal_sos_print_low;
241 
242 /* Severity levels for OPAL SOS */
243 typedef enum {
244  OPAL_SOS_SEVERITY_EMERG = LOG_EMERG,
245  OPAL_SOS_SEVERITY_ALERT = LOG_ALERT,
246  OPAL_SOS_SEVERITY_CRIT = LOG_CRIT,
247  OPAL_SOS_SEVERITY_ERROR = LOG_ERR,
248  OPAL_SOS_SEVERITY_WARN = LOG_WARNING,
249  OPAL_SOS_SEVERITY_NOTICE = LOG_NOTICE,
250  OPAL_SOS_SEVERITY_INFO = LOG_INFO,
251  OPAL_SOS_SEVERITY_DEBUG = LOG_DEBUG
252 } opal_sos_severity_t;
253 
254 typedef struct opal_sos_error_t {
255  /** Class parent */
257 
258  /**
259  * The encoded error code for a given type of error.
260  *
261  * errnum encodes a native error code (lower 10 bits) with the
262  * current severity (higher 2 bits) and an index into the error
263  * table along with the associated error, if there is one.
264  */
265  int errnum;
266 
267  /** File in which the error occured */
268  char *file;
269 
270  /** Line number on which the error was encountered */
271  int line;
272 
273  /** This is an optional parameter that indicates the function in
274  which the error occured */
275  char *func;
276 
277  /** The actual error message or string for the error indicated by
278  \c errnum */
279  char *msg;
280 
281  /** Encoded error numbers of the previous and the next error.
282  These are used are used to maintain the history of an error.
283  The complete history of an error can be printed later using
284  OPAL_SOS_PRINT() */
285  int prev;
286  int next;
288 
290 
291 /**
292  * Signature for OPAL SOS print function callback type.
293  */
294 typedef void (*opal_sos_print_callback_fn_t) (int errcode);
295 
296 /**
297  * Signature for OPAL SOS reporter function callback type.
298  */
299 typedef void (*opal_sos_reporter_callback_fn_t) (opal_sos_severity_t severity, int errcode,
300  const char *msg, ...)
301  __opal_attribute_format_funcptr__(__printf__, 3, 4);
302 
303 /**
304  * A global handle that points to the local OPAL SOS table.
305  * This is used by the notifier components to reference the local OPAL
306  * SOS table, especially for packing/unpacking and sending it over to
307  * the HNP.
308  */
309 OPAL_DECLSPEC extern opal_hash_table_t opal_sos_table;
310 
311 /**
312  * A global handle that points to the OPAL SOS table lock.
313  *
314  */
315 OPAL_DECLSPEC extern opal_mutex_t opal_sos_table_lock;
316 
317 /**
318  * \internal
319  *
320  * Initialize OPAL SOS.
321  *
322  * This function initializes and sets up the structures required to
323  * track the data handled by OPAL SOS. It is invoked by
324  * opal_util().
325  */
326 void opal_sos_init(void);
327 
328 /**
329  * \internal
330  *
331  * Shut down OPAL SOS.
332  *
333  * Invoked by opal_finalize() to deallocate the structures needed by
334  * OPAL SOS.
335  */
336 void opal_sos_finalize(void);
337 
338 /**
339  * Prints or relays the error locally or using the selected notifier
340  * components.
341  */
342 void
343 opal_sos_report_error(opal_sos_error_t *error);
344 
345 /**
346  * Builds an OPAL SOS error object given the parameters errnum,
347  * show_stack and errmsg.
348  * NOTE: This function only partially populates the SOS error object
349  * structure, setting the error message details but nothing about where
350  * the error occurred. Filling up the rest of the error object is left
351  * to OPAL SOS reporter which then handles the error appropriately.
352  *
353  * @param errnum
354  * @param show_stack
355  * @param errmsg
356  *
357  * @return
358  */
359 OPAL_DECLSPEC opal_sos_error_t *
360 opal_sos_build_error(int errnum, bool show_stack,
361  const char *errmsg, ...)
362  __opal_attribute_format_funcptr__(__printf__, 3, 4);
363 
364 /**
365  * OPAL SOS reporter logs the error in the OPAL SOS error table or
366  * prints it out depending on the associated reporter callback. It can
367  * also relay the error messages to the selected notifier components
368  * using the OPAL SOS reporter callback interface.
369  *
370  * @param file
371  * @param line
372  * @param func
373  * @param opal_error
374  *
375  * @return encoded error code
376  */
377 OPAL_DECLSPEC int opal_sos_reporter(const char *file, int line, const char *func,
378  opal_sos_severity_t severity,
379  opal_sos_error_t *opal_error);
380 
381 /**
382  * Prints the error encoded by the error number \c errnum
383  *
384  * @param errnum
385  * @param show_history
386  *
387  */
388 OPAL_DECLSPEC void opal_sos_print(int errnum, bool show_history);
389 
390 OPAL_DECLSPEC int opal_sos_prettify_error(const char *error, char **pretty_error);
391 
392 /**
393  * Prints a single error represented by the OPAL SOS error object
394  * opal_sos_error_t.
395  */
396 OPAL_DECLSPEC void opal_sos_print_error(opal_sos_severity_t severity,
397  int errnum, const char *errmsg, ...)
398  __opal_attribute_format_funcptr__(__printf__, 3, 4);
399 
400 /**
401  * Frees the error object represented by the error code \c errnum.
402  */
403 OPAL_DECLSPEC void opal_sos_free(int *errnum);
404 
405 /**
406  * Logs (prints and frees) the error object represented by \c errnum.
407  */
408 OPAL_DECLSPEC void opal_sos_log(int errnum);
409 
410 /**
411  * Returns the OPAL SOS severity level as a string.
412  *
413  */
414 const char *opal_sos_severity2str(opal_sos_severity_t severity);
415 
416 /**
417  * \internal
418  * Return a unique key into the hash table (opal_sos_error_table)
419  * depending on the type and location of the error.
420  *
421  */
422 int opal_sos_hash_error(opal_sos_error_t *error);
423 
424 /**
425  * Registers a print callback function for OPAL_SOS_PRINT()
426  */
427 OPAL_DECLSPEC int
428 opal_sos_reg_print_callback(opal_sos_print_callback_fn_t new_func,
429  opal_sos_print_callback_fn_t *prev_func);
430 
431 /**
432  * Registers a reporter callback function for OPAL_SOS_INFO(),
433  * OPAL_SOS_WARN() and OPAL_SOS_ERROR()
434  */
435 OPAL_DECLSPEC int
436 opal_sos_reg_reporter_callback(opal_sos_reporter_callback_fn_t new_func,
437  opal_sos_reporter_callback_fn_t *prev_func);
438 
439 END_C_DECLS
440 
441 #endif /* OPAL_SOS_H */
Definition: opal_hash_table.h:42
char * msg
The actual error message or string for the error indicated by errnum.
Definition: opal_sos.h:279
OPAL output stream facility.
char * file
File in which the error occured.
Definition: opal_sos.h:268
char * func
This is an optional parameter that indicates the function in which the error occured.
Definition: opal_sos.h:275
Definition: opal_sos.h:254
Definition: mutex_unix.h:53
int errnum
The encoded error code for a given type of error.
Definition: opal_sos.h:265
A hash table that may be indexed with either fixed length (e.g.
Base object.
Definition: opal_object.h:182
int prev
Encoded error numbers of the previous and the next error.
Definition: opal_sos.h:285
opal_object_t super
Class parent.
Definition: opal_sos.h:256
int line
Line number on which the error was encountered.
Definition: opal_sos.h:271
Mutual exclusion functions.
A simple C-language object-oriented system with single inheritance and ownership-based memory managem...
#define OBJ_CLASS_DECLARATION(NAME)
Declaration for class descriptor.
Definition: opal_object.h:236