//to run in eclipse, add jars below: //- all jars in $HADOOP_HOME //- all jars in $HADOOP_HOME/lib //USAGE: //- input is the output of SubFolders2NewFiles //output line format: //# srcHVid \t dstHVid1 hop1 dstHVid2 hop2 ... (# is an indicator of file src) import java.io.*; import java.util.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.FileSystem; class FormatHighDeg { public static void main(String[] args) { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://master:9000"); String srcPath ="hdfs://master:9000/ol_matrix/"; String dstPath="hdfs://master:9000/ol_format/matrix"; try { FileSystem fs = FileSystem.get(conf); BufferedWriter bout=new BufferedWriter(new OutputStreamWriter(fs.create(new Path(dstPath), true))); FileStatus[] status = fs.listStatus(new Path(srcPath)); FSDataInputStream in = null; String l = null; for(int i = 0 ;i < status.length ; i ++) { long start_time=System.currentTimeMillis(); Path srcfilePath = status[i].getPath(); in = fs.open(srcfilePath); boolean first=true; while((l = in.readLine()) != null) { StringTokenizer tk=new StringTokenizer(l); String src_str=tk.nextToken(); if(first) { bout.write("# "+src_str+"\t");//srcHVid first=false; } bout.write(tk.nextToken()+" ");//dstHVid bout.write(tk.nextToken()+" ");//hop } bout.write("\n"); long end_time=System.currentTimeMillis(); System.out.println(srcfilePath+ " processed in "+((end_time-start_time)/1000.0)+" seconds"); in.close(); } bout.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } }