OpenMPI  0.1.1
vt_gpu.h
1 /**
2  * VampirTrace
3  * http://www.tu-dresden.de/zih/vampirtrace
4  *
5  * Copyright (c) 2005-2012, ZIH, TU Dresden, Federal Republic of Germany
6  *
7  * Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing
8  * Centre, Federal Republic of Germany
9  *
10  * See the file COPYING in the package base directory for details
11  **/
12 
13 #ifndef _VT_GPU_H_
14 #define _VT_GPU_H_
15 
16 #ifdef __cplusplus
17 # define EXTERN extern "C"
18 #else
19 # define EXTERN extern
20 #endif
21 
22 #include "vt_defs.h"
23 #include "vt_inttypes.h" /* VampirTrace integer types */
24 #include "vt_thrd.h" /* thread creation for GPU kernels */
25 #include "vt_trc.h" /* VampirTrace events */
26 #include "vt_error.h" /* VampirTrace warning and error messages */
27 
28 /* defines the maximum string length of a function/kernel executed on GPU */
29 #define VTGPU_KERNEL_STRING_SIZE 256
30 
31 /* default and maximum buffer size for asynchronous on-device tasks (in bytes) */
32 #define VTGPU_DEFAULT_BSIZE 8192
33 #define VTGPU_MAX_BSIZE 2097152 /* 8192^8 bytes */
34 
35 /* defines for GPU GROUP and GPU COMM (8 bit only!!!) */
36 #define VTGPU_NO_GPU 0x00 /* thread is no gpu and does no gpu communication */
37 #define VTGPU_GPU 0x01 /* thread is a GPU thread */
38 #define VTGPU_GPU_COMM 0x02 /* thread does gpu communication (CPU or GPU) */
39 
40 /* performance counter available? */
41 #define VTGPU_NO_PC 0x04 /* no performance counter for this thread available */
42 
43 /*
44  * Get the rank ID for a given VampirTrace thread ID.
45  * The MPI RMA functions take the rank ID instead of the VampirTrace process ID!
46  */
47 #define VT_GPU_RANK_ID(thread_id) \
48  (VT_PROCESS_ID(vt_my_trace, thread_id)-1)
49 
50 #if (defined(VT_CUDARTWRAP) || defined(VT_CUPTI))
51 /*
52  * Parse the device function name:
53  * "_Z<kernel_length><kernel_name><templates>..." (no name space)
54  * "_ZN<ns_length><ns_name>...<ns_length><ns_name><kernel_length>..." (with name space)
55  *
56  * @param kname the extracted kernel name
57  * @param devFunc the CUDA internal kernel function name
58  */
59 EXTERN void vt_cuda_symbolToKernel(char *kname, const char* devFunc);
60 #endif /* defined(VT_CUDARTWRAP) || defined(VT_CUPTI) */
61 
62 
63 #if (defined(VT_CUDA) && defined(VT_CUPTI))
64 
65 #include "vt_cuda_driver_api.h"
66 
67 # define CHECK_CU_ERROR(_err, _msg) \
68  if(_err != CUDA_SUCCESS){ \
69  vt_gpu_handleCuError(_err, _msg, __FILE__,__LINE__); \
70  }
71 
72 /*
73  * Handles errors returned from CUDA driver API calls.
74  *
75  * @param ecode the CUDA driver API error code
76  * @param msg a message to get more detailed information about the error
77  * @param the corresponding file
78  * @param the line the error occurred
79  */
80 EXTERN void vt_gpu_handleCuError(CUresult ecode, const char* msg,
81  const char *file, const int line);
82 
83 #else /* defined(VT_CUDA) && defined(VT_CUPTI) */
84 
85 # define CHECK_CU_ERROR(_err, _msg)
86 
87 #endif /* defined(VT_CUDA) && defined(VT_CUPTI) */
88 
89 
90 /* device/host communication directions */
91 typedef enum {
92  VT_GPU_DEV2HOST = 0x00, /* device to host copy */
93  VT_GPU_HOST2DEV = 0x01, /* host to device copy */
94  VT_GPU_DEV2DEV = 0x02, /* device to device copy */
95  VT_GPU_HOST2HOST = 0x04, /* host to host copy */
96  VT_GPU_COPYDIRECTION_UNKNOWN = 0x08 /* unknown */
97 } vt_gpu_copy_kind_t;
98 
99 /*
100  * global communicator id for all GPU threads
101  */
102 EXTERN uint32_t vt_gpu_groupCID;
103 
104 /*
105  * communicator for all node local threads communicating with GPU
106  */
107 EXTERN uint32_t vt_gpu_commCID;
108 
109 /*
110  * Process/Thread IDs, which participate in GPU communication.
111  * Index of the list is the thread ID (VTThrd...)
112  */
113 EXTERN uint8_t *vt_gpu_prop;
114 
115 /*
116  * flag: write GPU idle time as region into first GPU stream/queue?
117  */
118 EXTERN uint8_t vt_gpu_trace_idle;
119 
120 /*
121  * flag: Is debugging on? (yes: do not call CUDA functions in finalize)
122  */
123 EXTERN uint8_t vt_gpu_debug;
124 
125 /*
126  * flag: abort program on GPU error, if enabled
127  */
128 EXTERN uint8_t vt_gpu_error;
129 
130 /*
131  * VampirTrace region ID for GPU idle time
132  */
133 EXTERN uint32_t vt_gpu_rid_idle;
134 
135 /*
136  * Initialization for all GPU API wrappers.
137  * VampirTrace IDS have to be locked, before calling this function.
138  */
139 EXTERN void vt_gpu_init(void);
140 
141 /*
142  * Finalization for all GPU API wrappers.
143  * VampirTrace IDS have to be locked, before calling this function.
144  */
145 EXTERN void vt_gpu_finalize(void);
146 
147 /*
148  * Uses VampirTrace Thread API to create a GPU thread
149  *
150  * @param tname the name of the thread to be registered
151  * @param ptid the parent thread id
152  * @param vt_tid pointer to the thread id of the thread to be registered
153  */
154 EXTERN void vt_gpu_registerThread(const char* tname, uint32_t ptid,
155  uint32_t *vt_tid);
156 
157 /***************************** hashing of strings *****************************/
158 
159 /* The key of the hash node is a string and the value an unsigned 32bit integer.
160  It is used to store region names with its corresponding region IDs. */
161 typedef struct vt_gpu_hnString_st {
162  char *sname; /**< name of the symbol */
163  uint32_t rid; /**< associated region group identifier */
164  struct vt_gpu_hnString_st *next; /**< bucket for collision */
166 
167 /*
168  * Stores a hash value in the hash table.
169  *
170  * @param n pointer to a char (string) - the hash nodes key
171  * @param rid integer - the hash nodes value
172  *
173  * @return pointer to the hash node
174  */
175 EXTERN void* vt_gpu_stringHashPut(const char* n, uint32_t rid);
176 
177 /*
178  * Retrieves the hash node for a given key.
179  *
180  * @param n pointer to a char (string) - the hash nodes key
181  *
182  * @return pointer to the hash node
183  */
184 EXTERN void* vt_gpu_stringHashGet(const char* n);
185 
186 /*
187  * Clears the hash table. Frees all allocated hash nodes.
188  */
189 EXTERN void vt_gpu_stringhashClear(void);
190 
191 #endif /* _VT_GPU_H_ */
Definition: vt_gpu.h:161
uint32_t rid
associated region group identifier
Definition: vt_gpu.h:163
struct vt_gpu_hnString_st * next
bucket for collision
Definition: vt_gpu.h:164
char * sname
name of the symbol
Definition: vt_gpu.h:162