//sort vertices by degrees //input line format: vid \t num_in in1 in2 ... num_out out1 out2 ... //output line format: order vid \t degree //we do not hold vertex info, like adjacency list, since it will cause load balance issue due to range-based partitioning //then use a program to extract top-k, and report the degree threshold (and adjusted k if ties are observed) #include "utils/TeraSort.h" #include "utils/type.h" double sampRate=0.001; enum key_type{INDEGREE_ONLY, OUTDEGREE_ONLY, INOUT_DEGREE}; class DegreeSortDG:public TeraWorker { char tmp[100]; key_type type; public: DegreeSortDG(key_type key):TeraWorker(sampRate, true) { type=key; } virtual TeraItem* toVertex(char* line) { TeraItem* v=new TeraItem; char * pch; pch=strtok(line, "\t"); int id=atoi(pch); v->key.v2=id; pch=strtok(NULL, " "); int in_deg=atoi(pch); for(int i = 0; i < in_deg; i++){ pch = strtok(NULL," "); } pch = strtok(NULL," "); int out_deg=atoi(pch); if(type == INDEGREE_ONLY){ v->key.v1 = -(in_deg); } else if(type == OUTDEGREE_ONLY){ v->key.v1 = -(out_deg); } else{ v->key.v1 = -(in_deg + out_deg); } //use negative degree to put large degree vertices to the head return v; } virtual void toline(TeraItem* v) { sprintf(tmp, "%d\t%d\n", v->key.v2, -v->key.v1); write(tmp); } }; int main(int argc, char* argv[]){ WorkerParams param; param.input_path="/toy_inout"; param.output_path="/sort"; param.force_write=true; param.native_dispatcher=false; DegreeSortDG worker(INDEGREE_ONLY); //DegreeSortDG worker(OUTDEGREE_ONLY); //DegreeSortDG worker(INOUT_DEGREE); worker.run(param); return 0; }