import java.io.*;
import java.util.*;

import javax.xml.parsers.*;

import org.xml.sax.*;
import org.xml.sax.helpers.*;

public class Tree2GraphParser extends DefaultHandler {

    public static String inputfile = "uwm.xml";
    public static String outputfile = "output.txt";
    public static String indexfile = "index.txt";

    public class Vertex {
        int id;
        String text;
        long start_pos; // notice the start_pos is the start row of the subtree
        long end_pos; // the end_pos is the end row of the subtree
        int parent;
        Vector<Integer> children;

        Vertex(int id, String text) {
            this.id = id;
            this.text = text;
            children = new Vector<Integer>();
        }

        public String toString()
        {
            StringBuffer strBuf=new StringBuffer();
            text = text.replaceAll("[\n\r]", " ");
            text = text.replaceAll("<", "&lt;");
            text = text.replaceAll(">", "&gt;");
            strBuf.append(id+"\t"+"<"+text+"> "+start_pos+" "+end_pos+" "+parent+" "+children.size());
            for(Integer i:children) strBuf.append(" "+i);
            return strBuf.toString();
        }
    }

    private int nxtID = 0;
    private int pos = 0;

    Stack<Vertex> path = new Stack<Vertex>();
    StringBuffer buf = new StringBuffer();
    FileWriter writer = null;
    FileWriter index_writer = null;

    public void startDocument() {
        try {
            writer = new FileWriter(outputfile);
            index_writer = new FileWriter(indexfile);
        } catch (IOException ioe) {
            ioe.printStackTrace();
            System.exit(-1);
        }
    }

    private void vertexGene(String qName) {
        // push the vertex into the path.
        Vertex v = new Vertex(nxtID, qName);
        nxtID++;
        if (path.isEmpty()) {
            // This is the root node.
            v.parent = -1;// root's parent is denoted as -1
        } else {
            Vertex parent = path.lastElement();
            parent.children.add(v.id);
            v.parent = parent.id;
        }
        path.push(v);
    }

    private void outputVertex() {
        // pop the path and write the file.
        Vertex v = path.pop();
        try {
            index_writer.write(v + "\n");
            index_writer.flush();
        } catch (IOException ioe) {
            ioe.printStackTrace();
            System.exit(-1);
        }
    }

    public void startElement(String uri, String localName, String qName,
            Attributes attributes) {
        try {
            vertexGene(qName);
            path.lastElement().start_pos = pos;
            writer.write("<" + qName + ">");
            pos += qName.length() + 2;
            for (int i = 0; i < attributes.getLength(); i++) {
                String attrKey = attributes.getQName(i);
                String attrVal = attributes.getValue(i);

                vertexGene(attrKey);
                path.lastElement().start_pos = pos;
                writer.write("<" + attrKey + ">");
                pos = pos + attrKey.length() + 2;

                vertexGene(attrVal);
                path.lastElement().start_pos = pos;

                writer.write(attrVal);
                pos = pos + attrVal.length();
                path.lastElement().end_pos = pos;
                outputVertex();

                writer.write("</" + attrKey + ">");
                pos = pos + attrKey.length() + 3;
                path.lastElement().end_pos = pos;
                outputVertex();
            }
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public void characters(char[] ch, int start, int length)
        throws SAXException {
        buf.append(new String(ch, start, length).trim());
    }

    public void endElement(String uri, String localName, String qName) {
        try {
            if (!buf.toString().trim().equals("")) {
                vertexGene(buf.toString());
                path.lastElement().start_pos = pos;
                writer.write(buf.toString());
                pos = pos + buf.toString().length();
                path.lastElement().end_pos = pos;
                outputVertex();
            }
            buf = new StringBuffer();
            writer.write("</" + qName + ">");
            pos = pos + qName.length() + 3;
            path.lastElement().end_pos = pos; 
            outputVertex();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void endDocument() {
        // close the file;
        try {
            index_writer.close();
            writer.close();
        } catch (IOException ioe) {
            ioe.printStackTrace();
            System.exit(-1);
        }
    }

    public static void getXML(String input_file_name, int start_pos, int end_pos, String out_file_name){
        try{
            RandomAccessFile index = new RandomAccessFile(input_file_name, "r");
            index.seek(start_pos);
            byte [] bytes = new byte[end_pos - start_pos];
            index.read(bytes);
            FileWriter output = new FileWriter(out_file_name);
            String out = new String(bytes);
            output.write(out);
            output.close();
        }catch(Exception e){
            e.printStackTrace();
            System.exit(-1);
        }
    }

    public static void main(String args[]) throws ParserConfigurationException, SAXException, IOException {
        //inputfile = args[0];
        //outputfile = args[1];
        //indexfile = args[2];

        SAXParserFactory factory = SAXParserFactory.newInstance();
        SAXParser parser = factory.newSAXParser();
        Tree2GraphParser graphParser = new Tree2GraphParser();
        long start = System.currentTimeMillis();
        parser.parse(new File(inputfile), graphParser);
        long end = System.currentTimeMillis();
        System.out.println("Processing time: " + (end - start) + " ms");
        /* getXML("output.txt", 67, 270, "out.xml"); */
    }
}