OpenMPI  0.1.1
datastructs.h
1 /*
2  This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2012.
3  Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
4 */
5 
6 #ifndef DATASTRUCTS_H
7 #define DATASTRUCTS_H
8 
9 
10 #include <stdlib.h>
11 #include <cassert>
12 #include <sys/time.h>
13 
14 #include <string>
15 #include <map>
16 #include <list>
17 #include <set>
18 
19 #ifdef OTFPROFILE_MPI
20 # include "mpi.h"
21 #endif /* OTFPROFILE_MPI */
22 
23 #include "OTF_inttypes.h"
24 
25 
26 using namespace std;
27 
28 
29 /* *** process clustering algorithms *** */
30 
31 typedef enum {
32 
33  CLUSTER_ALG_CLINKAGE,
34  CLUSTER_ALG_KMEANS
35 
36 } ClusterAlgorithm;
37 
38 
39 /* *** program parameters *** */
40 
41 struct Params {
42 
43  /* general parameters */
44 
45  static const uint32_t DEFAULT_MAX_FILE_HANDLES= 50;
46  static const uint32_t DEFAULT_BUFFER_SIZE= 1024 * 1024;
47  static const uint32_t DEFAULT_MAX_GROUPS= 16;
48  static const uint8_t DEFAULT_VERBOSE_LEVEL= 0;
49  static const bool DEFAULT_CREATE_CSV= false;
50  static const bool DEFAULT_CREATE_TEX= true;
51  static const bool DEFAULT_CREATE_PDF= true;
52  static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; }
53 
54  uint32_t max_file_handles;
55  uint32_t buffer_size;
56  uint32_t max_groups;
57  uint8_t verbose_level;
58  bool progress;
59  bool read_from_stats;
60 
61  bool create_csv;
62  bool create_tex;
63  bool create_pdf;
64  string input_file_prefix;
65  string output_file_prefix;
66 
67  /* process clustering related parameters */
68 
69  struct Clustering {
70 
71  static const ClusterAlgorithm DEFAULT_ALGORITHM= CLUSTER_ALG_KMEANS;
72  static double DEFAULT_QUALITY_THRESHOLD() { return 0.1; }
73  static const string DEFAULT_MAP_FILE_NAME() { return "result.map"; }
74 
75  ClusterAlgorithm alg;
76  bool enabled;
77  bool shrink;
78  bool hard_grouping;
79  double quality_threshold;
80 
81  string map_file_name;
82  string shrink_output_prefix;
83 
84  bool synth_data;
85  uint32_t synth_ranks_num;
86  uint32_t synth_funcs_num;
87 
88  Clustering()
89  : alg(DEFAULT_ALGORITHM), enabled(false), shrink(false),
90  hard_grouping(false),
91  quality_threshold(DEFAULT_QUALITY_THRESHOLD()),
92  map_file_name(DEFAULT_MAP_FILE_NAME()), synth_data(false),
93  synth_ranks_num(0), synth_funcs_num(0) {}
94 
95  } clustering;
96 
97  Params()
98  : max_file_handles(DEFAULT_MAX_FILE_HANDLES),
99  buffer_size(DEFAULT_BUFFER_SIZE), max_groups(DEFAULT_MAX_GROUPS),
100  verbose_level(DEFAULT_VERBOSE_LEVEL), progress(false),
101  read_from_stats(false), create_csv(DEFAULT_CREATE_CSV),
102  create_tex(DEFAULT_CREATE_TEX), create_pdf(DEFAULT_CREATE_PDF),
103  output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()) {}
104 
105 };
106 
107 
108 /* *** progress information *** */
109 
110 struct Progress {
111 
112  /* maximum number of records to read between progress updates */
113  static const uint64_t EVENTS_RECORD_LIMIT= 1000000;
114  static const uint64_t STATS_RECORD_LIMIT= 100;
115 
116  uint64_t cur_bytes; /* current bytes read */
117  uint64_t max_bytes; /* max. bytes readable */
118 
119 #ifdef OTFPROFILE_MPI
120  /* message tag to use for communication */
121  static const int MSG_TAG= 500;
122 
123  MPI_Request send_request; /* sender request handle */
124 
125  uint64_t* recv_buffers; /* receive buffers */
126  MPI_Request* recv_requests; /* persistent receive request handles */
127  MPI_Status* recv_statuses; /* receive statuses */
128  int* recv_indices; /* indices of completed recv. operations */
129 
130  uint64_t* rank_cur_bytes; /* current bytes read per rank (except rank 0) */
131  uint32_t ranks_left; /* root keeps track of ranks left to query */
132 #endif /* OTFPROFILE_MPI */
133 };
134 
135 
136 /* *** runtime measurement *** */
137 
138 struct Measurement {
139 
140  struct Scope {
141 
142  double start_time; /* start timestamp of measurement scope */
143  double stop_time; /* stop timestamp of measurement scope */
144  const uint8_t verbose_level; /* verbose level required to
145  perform measurement */
146 
147  Scope( const uint8_t _verbose_level )
148  : start_time(-1.0), stop_time(-1.0),
149  verbose_level(_verbose_level) {}
150  };
151 
152  /* are there any completed runtime measurement result? */
153  bool have_data;
154 
155  /* store per-measurement scope runtimes */
156  map< string, Scope > scope_map;
157 
158  Measurement() : have_data(false) {}
159 
160  /* get global timestamp in seconds */
161  static double gettime() {
162 
163 #ifdef OTFPROFILE_MPI
164  return MPI_Wtime();
165 #else /* OTFPROFILE_MPI */
166  struct timeval tv;
167  gettimeofday( &tv, NULL );
168  return (double)( tv.tv_sec * 1e6 + tv.tv_usec ) / 1.0e6;
169 #endif /* OTFPROFILE_MPI */
170  }
171 };
172 
173 
174 /* *** pair of values as map key *** */
175 
176 struct Pair {
177 
178  uint64_t a;
179  uint64_t b;
180 
181  Pair() : a(0), b(0) {}
182  Pair( uint64_t aa, uint64_t bb ) : a(aa), b(bb) {}
183  ~Pair() {}
184 };
185 
186 
187 struct ltPair {
188 
189  bool operator()( const Pair& p1, const Pair& p2 ) const {
190 
191  /* a is the major number for comparison, this gives a better
192  order when reducing the entries over the first argument */
193 
194  if ( p1.a == p2.a ) {
195 
196  return p1.b < p2.b;
197 
198  } else {
199 
200  return p1.a < p2.a;
201  }
202  }
203 };
204 
205 
206 /* *** triplett of values as map key *** */
207 
208 struct Triple {
209 
210  uint64_t a;
211  uint64_t b;
212  uint64_t c;
213 
214  Triple() : a(0), b(0), c(0) {}
215  Triple( uint64_t aa, uint64_t bb, uint64_t cc ) : a(aa), b(bb), c(cc) {}
216  ~Triple() {}
217 };
218 
219 
220 struct ltTriple {
221 
222  bool operator()( const Triple& p1, const Triple& p2 ) const {
223 
224  /* a is the major number for comparison, this gives a better
225  order when reducing the entries over the first argument */
226 
227  if ( p1.a == p2.a ) {
228 
229  if ( p1.b == p2.b ) {
230 
231  return p1.c < p2.c;
232 
233  } else {
234 
235  return p1.b < p2.b;
236  }
237 
238  } else {
239 
240  return p1.a < p2.a;
241  }
242  }
243 };
244 
245 
246 struct Process {
247 
248  uint64_t process;
249  uint64_t parent;
250 
251  Process() : process(0), parent(0) {}
252  Process( uint64_t _process, uint64_t _parent )
253  : process(_process), parent(_parent) {}
254  ~Process() {}
255 };
256 
257 
258 struct ltProcess {
259 
260  bool operator()( const Process& p1, const Process& p2 ) const {
261 
262  /* involve parent for sorting? */
263  return p1.process < p2.process;
264  }
265 };
266 
267 
268 /* class that collects the minimum, the maximum, and the sum for some values.
269 the minimum will ignore the value '0' though because it should only record
270 actual occurences */
271 template <class type>
272 class min_max_avg {
273 
274 public:
275 
276  type min;
277  type max;
278  type sum;
279  uint64_t cnt;
280 
281  min_max_avg( type a= (type) OTF_UINT64_MAX, type b= (type) 0, type s= (type) 0, uint64_t c= 0 ) :
282  min( a ), max( b ), sum( s ), cnt( c ) {}
283  ~min_max_avg() {}
284 
285  /* append a single value */
286  void append( const type& value ) {
287 
288  if ( ((type) 0) != value ) {
289 
290  min= ( value < min ) ? value : min;
291  max= ( value > max ) ? value : max;
292  sum += value;
293  cnt += 1;
294  }
295  }
296 
297  /* add another min_max_avg object as if all their values were appended to on object */
298  void add( const min_max_avg<type>& other ) {
299 
300  min= ( other.min < min ) ? other.min : min;
301  max= ( other.max > max ) ? other.max : max;
302  sum += other.sum;
303  cnt += other.cnt;
304  }
305 };
306 
307 
308 /* manage grouping of processes (or ranks/threads/whatever) -- grouping
309 reduces the potentially unlimited numbers of processes to a fixed number of
310 groups (or bins, buckets, ... ). Instead of a per-process basis global
311 statisitics are collected per group. The processes can be spread over the groups
312 either consecutively, or round-robin, or randomly, or in any special scheme,
313 e.g. separate groups for GPU theads and host processes. Therefore, the Grouping
314 structure manages explicit mappings from group IDs to process IDs.
315 Every process belongs to one group exclusively. */
316 
317 struct Grouping {
318 
319  /* maximum number of groups
320  (limited by LaTeX output; defined in create_latex.cpp) */
321  static const uint32_t MAX_GROUPS;
322 
323  /* store process/group mappings */
324  map< uint64_t, uint64_t > processesToGroups;
325  map< uint64_t, set<uint64_t> > groupsToProcesses;
326 
327  /* indicates whether grouping is enabled
328  (more processes than maximum number of groups) */
329  bool enabled;
330 
331  Grouping() : enabled( false ) {}
332  ~Grouping() {}
333 
334  /* insert process into a group, return true if succeeded */
335  bool insert( uint64_t group, uint64_t process ) {
336 
337  /* insert the new entry if and only if there was no process with this ID before,
338  because every process can only be in one group */
339 
340  pair< map< uint64_t, uint64_t >::const_iterator, bool> ret=
341  processesToGroups.insert( pair< uint64_t, uint64_t >( process, group ) );
342 
343  if ( ret.second ) {
344 
345  groupsToProcesses[ group ].insert( process );
346 
347  /* set indicator that grouping is enabled, if there are more than
348  one process within a group */
349  if ( !enabled && 1 < groupsToProcesses[ group ].size() ) {
350 
351  enabled= true;
352  }
353 
354  return true;
355  }
356 
357  return false;
358  }
359 
360  /* return the group ID for the given process ID, return 0 if not found */
361  uint64_t process2group( uint64_t process ) const {
362 
363  map< uint64_t, uint64_t >::const_iterator it= processesToGroups.find( process );
364 
365  return ( processesToGroups.end() != it ) ? it->second : ( uint64_t) 0 ;
366  }
367 
368  /* return a const pointer to the set or NULL if there is no such group,
369  this is better than the [] operator which would create an empty set if
370  a search goes negative */
371  const set<uint64_t>* group2processes( uint64_t group ) const {
372 
373  map< uint64_t, set<uint64_t> >::const_iterator it= groupsToProcesses.find( group );
374 
375  return ( groupsToProcesses.end() != it ) ? ( & it->second ) : NULL ;
376  }
377 
378  /* return number of groups */
379  uint32_t numGroups( ) const {
380 
381  return groupsToProcesses.size();
382  }
383 };
384 
385 
386 struct StackType {
387 
388  /* the function ID */
389  uint64_t fid;
390 
391  /* the enter timestamp when this was pushed to the top-of-stack */
392  uint64_t timestamp;
393 
394  /* the duration consumed by child calls, it is the sum of all child call's
395  inclusive durations, with this one can compute the exclusive durations of
396  the currrent call based on the inclusive time which comes from the end
397  timestamps minus the start timestamp. */
398  uint64_t childDuration;
399 
400  struct CounterData {
401 
402  /* the first counter value relevant to the enter timestamp */
403  uint64_t firstValue;
404 
405  /* the last counter value */
406  uint64_t lastValue;
407 
408  /* the timestamp on which the last counter value occurred */
409  uint64_t lastTime;
410 
411  /* similar to childDuration but for the counter values */
412  uint64_t childDelta;
413 
414  CounterData()
415  : firstValue( (uint64_t)-1 ), lastValue( (uint64_t)-1 ),
416  lastTime( (uint64_t)-1 ), childDelta( 0 ) {}
417  };
418 
419  /* the counter data on this stack level */
420  map< uint64_t, CounterData > counterIdDataMap;
421 
422  StackType( uint64_t f, uint64_t t )
423  : fid( f ), timestamp( t ), childDuration( 0 ) {}
424  ~StackType() {}
425 };
426 
427 
428 struct FunctionData {
429 
430  min_max_avg<uint64_t> count;
431  min_max_avg<double> excl_time;
432  min_max_avg<double> incl_time;
433 
434  FunctionData( ) {}
435  ~FunctionData( ) {}
436 
437  void add( uint64_t n= 0, double ex= 0.0, double in= 0.0 ) {
438 
439  count.append( n );
440  excl_time.append( ex );
441  incl_time.append( in );
442  }
443 
444  void add( const FunctionData& other ) {
445 
446  count.add( other.count );
447  excl_time.add( other.excl_time );
448  incl_time.add( other.incl_time );
449  }
450 };
451 
452 
453 /* counter data are similar to function data */
454 typedef FunctionData CounterData;
455 
456 
457 struct MessageData {
458 
459  min_max_avg<uint64_t> count_send;
460  min_max_avg<uint64_t> count_recv;
461  min_max_avg<uint64_t> bytes_send;
462  min_max_avg<uint64_t> bytes_recv;
463  min_max_avg<double> duration_send;
464  min_max_avg<double> duration_recv;
465 
466 
467  MessageData( ) {}
468  ~MessageData( ) {}
469 
470  void add_send( uint64_t n= 0, uint64_t b= 0, double d= 0.0 ) {
471 
472  count_send.append( n );
473  bytes_send.append( b );
474  duration_send.append( d );
475  }
476 
477  void add_recv( uint64_t n= 0, uint64_t b= 0, double d= 0.0 ) {
478 
479  count_recv.append( n );
480  bytes_recv.append( b );
481  duration_recv.append( d );
482  }
483 
484  void add( const MessageData& other ) {
485 
486  count_send.add( other.count_send );
487  count_recv.add( other.count_recv );
488  bytes_send.add( other.bytes_send );
489  bytes_recv.add( other.bytes_recv );
490  duration_send.add( other.duration_send );
491  duration_recv.add( other.duration_recv );
492  }
493 };
494 
495 
497 
498  static const uint32_t BIN_LOG_BASE= 2;
499 
500  min_max_avg<uint64_t> count;
501 
502 
503  MessageSpeedData( ) {}
504  ~MessageSpeedData( ) {}
505 
506  void add( uint64_t n= 0 ) {
507 
508  count.append( n );
509  }
510 
511  void add( const MessageSpeedData& other ) {
512 
513  count.add( other.count );
514  }
515 };
516 
517 
518 /* collective data are similar to message data */
520 
521 
523 
524  uint32_t collop;
525  uint64_t bytes_send;
526  uint64_t bytes_recv;
527  uint64_t begin_time;
528 
530  : collop(0), bytes_send(0), bytes_recv(0), begin_time(0) {}
531  PendingCollective( uint32_t _collop, uint64_t _bytes_send,
532  uint64_t _bytes_recv, uint64_t _begin_time )
533  : collop(_collop), bytes_send(_bytes_send), bytes_recv(_bytes_recv),
534  begin_time(_begin_time) {}
535  ~PendingCollective() {}
536 };
537 
538 
539 /* *** management and statistics data structures, needed on all ranks *** */
540 
541 struct AllData {
542 
543  /* MPI-rank and number of analysis processes */
544  const uint32_t myRank;
545  const uint32_t numRanks;
546 
547 #ifdef OTFPROFILE_MPI
548  /* one instance of send/receive buffer to be re-used all the time */
549  uint32_t packBufferSize;
550  char* packBuffer;
551 #endif /* OTFPROFILE_MPI */
552 
553  /* number and list of processes to be handled by every worker */
554  uint32_t myProcessesNum;
555  uint32_t* myProcessesList;
556 
557  /* program parameters */
558  Params params;
559 
560  /* progress information */
561  Progress progress;
562 
563  /* runtime measurement */
564  Measurement measurement;
565 
566  /* grouping information for ranks */
567  Grouping grouping;
568 
569 
570 
571 
572  /* trace context information */
573 
574  /* trace creator */
575  string creator;
576 
577  /* OTF version */
578  string version;
579 
580  /* definition comments */
581  string comments;
582 
583  /* all defined process IDs with its parents */
584  set< Process, ltProcess > allProcesses;
585 
586  /* map function IDs to their corresponding names */
587  map< uint64_t, string > functionIdNameMap;
588 
589  /* map counter IDs to their corresponding names */
590  map< uint64_t, string > counterIdNameMap;
591 
592  /* map process IDs to their corresponding names */
593  map< uint64_t, string > processIdNameMap;
594 
595 
596 
597 
598  /* data collection helper datastructures, they hold management and temp
599  information while reading the OTF streams */
600 
601  /* this is the function stack per trace process which is tracked while
602  reading the trace processes of the current worker */
603  map< uint64_t, list<StackType> > stackPerProcess;
604 
605  /* temporary store per-rank collective begin operations
606  Pair is <matching-id,rank> */
607  map< Pair, PendingCollective, ltPair > pendingCollectives;
608 
609  /* map all defined collective operations to the four
610  classes { barrier, one-to-all, all-to-one, all-to-all }
611  use the OTF constants for the four classes */
612  map< uint64_t, uint64_t > collectiveOperationsToClasses;
613 
614  /* OTF counter IDs to consider in statistics, ignore all other counters */
615  set< uint64_t > countersOfInterest;
616 
617  /* timer resolution (ticks per second) */
618  uint64_t timerResolution;
619 
620  /* key for OTF key-value-pairs with message matching information */
621  uint64_t recvTimeKey;
622 
623 
624 
625 
626  /* data collection containers:
627  the following maps are for collecting individual data per trace rank,
628  they will be summarized to the next set of maps */
629 
630  /* store per-function statistics over the ranks, Pair is <rank,funcId>
631 
632  in case of additional clustering, collect it to the master node such that
633  process clustering according to similar function call patterns can
634  be done */
635  map< Pair, FunctionData, ltPair > functionMapPerRank;
636 
637  /* store per-counter statistics over the functions and ranks,
638  Triple is <rank,funcId,counterId> */
639  map< Triple, CounterData, ltTriple > counterMapPerFunctionRank;
640 
641  /* store send-recv statistics for P2P messages per communication pairs,
642  Pair is <rank,peer> */
643  map< Pair, MessageData, ltPair > messageMapPerRankPair;
644 
645  /* store send-recv statistics per rank without differenciating the
646  communication partners */
647  map< uint64_t, MessageData > messageMapPerRank;
648 
649  /* store per-collop.-class statistics over the ranks,
650  Pair is <rank,collective-class> */
651  map< Pair, CollectiveData, ltPair > collectiveMapPerRank;
652 
653 
654 
655 
656  /* data summarization and reduction containers:
657  the following maps are filled when summarizing the previous set of maps,
658  they will be considered in the later reduce operation over the analysis ranks. */
659 
660  /* compact function statistics summed over all ranks */
661  map< uint64_t, FunctionData > functionMapGlobal;
662 
663  /* store per-counter statistics over the functions and ranks,
664  Pair is <counterId,funcId> */
665  map< Pair, CounterData, ltPair > counterMapGlobal;
666 
667  /* compact send-recv statistics for P2P messages per communicating groups,
668  groups are groups of neigbor ranks,
669  Pair is <rank,peer> */
670  map< Pair, MessageData, ltPair > messageMapPerGroupPair;
671 
672  /* compact send-receive statistics per group without differenciating the
673  communication partners; group is a group of ranks */
674  map< uint64_t, MessageData > messageMapPerGroup;
675 
676  /* store per-speed-bin statistics over the length-bins of P2P messages,
677  Pair is <speed-bin,length-bin> where bin is log2(<speed|length>) */
678  map< Pair, MessageSpeedData, ltPair > messageSpeedMapPerLength;
679 
680  /* compact collective operation statistics per group;
681  group is a group of ranks, Pair is <collective-class,group> */
682  map< Pair, CollectiveData, ltPair > collectiveMapPerGroup;
683 
684 
685 
686 
687  AllData( uint32_t my_rank= 0, uint32_t num_ranks= 1 ) :
688  myRank(my_rank), numRanks(num_ranks), myProcessesNum(0),
689  myProcessesList(NULL), timerResolution(0), recvTimeKey(0) {
690 
691 #ifdef OTFPROFILE_MPI
692  packBufferSize= 0;
693  packBuffer= NULL;
694 #endif /* OTFPROFILE_MPI */
695 
696  }
697 
698 
699  ~AllData() {
700 
701  myProcessesNum= 0;
702  free( myProcessesList );
703  myProcessesList= NULL;
704 
705 #ifdef OTFPROFILE_MPI
706  packBufferSize= 0;
707  if ( packBuffer ) {
708 
709  free( packBuffer );
710  packBuffer= NULL;
711  }
712 #endif /* OTFPROFILE_MPI */
713  }
714 
715 #ifdef OTFPROFILE_MPI
716  char* guaranteePackBuffer( uint32_t size ) {
717 
718  if ( packBufferSize < size ) {
719 
720  packBufferSize= size;
721  packBuffer= (char*) realloc( packBuffer, packBufferSize * sizeof(char) );
722  assert( NULL != packBuffer );
723  }
724 
725  return packBuffer;
726  }
727 
728 
729  char* freePackBuffer( ) {
730 
731  free( packBuffer );
732  packBuffer= NULL;
733  packBufferSize= 0;
734 
735  return NULL;
736  }
737 
738 
739  char* getPackBuffer( ) {
740 
741  return packBuffer;
742  }
743 #endif /* OTFPROFILE_MPI */
744 };
745 
746 
747 #endif /* DATASTRUCTS_H */
Definition: datastructs.h:140
Definition: datastructs.h:317
Definition: datastructs.h:208
Definition: datastructs.h:138
Definition: datastructs.h:110
Definition: datastructs.h:400
Definition: datastructs.h:496
Definition: datastructs.h:428
Definition: datastructs.h:41
Definition: datastructs.h:187
Definition: datastructs.h:386
Definition: datastructs.h:69
Definition: datastructs.h:220
Definition: datastructs.h:522
Definition: datastructs.h:272
Definition: datastructs.h:457
Definition: mpi.h:337
Definition: datastructs.h:258
Definition: datastructs.h:541
VampirTrace http://www.tu-dresden.de/zih/vampirtrace.
Definition: datastructs.h:246
Main top-level request struct definition.
Definition: request.h:100
Deals with all data type related issues.
Definition: datastructs.h:176