package szte.io;

import java.io.*;
import java.util.*;

import org.apache.xerces.parsers.DOMParser;
import org.w3c.dom.*;
import org.xml.sax.*;

/**
 * 
 * The reader and container class for the CMC XML format.
 * To obtain a corpus please visit: http://www.computationalmedicine.org/catalog
 *
 */
public class CMCDataHolder extends Vector<Document> implements DocumentSet{

    static final long serialVersionUID = 2L;    
    
    public CMCDataHolder(){
        super();
      }
  
		  protected void depthFirstWalkTree (Node n) throws IOException
		  {

			  if (n != null) { // && !n.getNodeName().equals("recipe"))
				  
			      if (n.getNodeName().equalsIgnoreCase("doc")) {//counter%10 == 2) {
			    	  CMCDocument md = new CMCDocument();
//			    	  System.out.println("***************************");
			    	  
			    	  NodeList children = n.getChildNodes();
				      int len = children.getLength();
			    	  NamedNodeMap dnnm = n.getAttributes();
//			    	  System.out.println(dnnm.getNamedItem("id").getNodeValue());
			    	  md.setDocID(dnnm.getNamedItem("id").getNodeValue());

				      for (int i=0; i<len; i++) {
				    	  if (children.item(i).getNodeType()==children.item(i).ELEMENT_NODE) {
					    	  if (children.item(i).getNodeName().equalsIgnoreCase("codes")) {
					    		  NodeList codes = children.item(i).getChildNodes();
					    		  for (int j=0; j<codes.getLength(); j++) {
					    			  if (codes.item(j).getNodeType() == codes.item(j).ELEMENT_NODE) {
					    				  NamedNodeMap nnm = codes.item(j).getAttributes();
							    		  String origin = nnm.getNamedItem("origin").getNodeValue();
							    		  if (origin.equalsIgnoreCase("cmc_majority")) {
							    			  CMCDocument.major_Annotations.put(codes.item(j).getFirstChild().getNodeValue(),
							    					  (CMCDocument.major_Annotations.containsKey(codes.item(j).getFirstChild().getNodeValue())) ?
							    					  CMCDocument.major_Annotations.get(codes.item(j).getFirstChild().getNodeValue())+1 : 1);
							    			  md.addMajorAnnotation(codes.item(j).getFirstChild().getNodeValue());
							    		  }
					    			  }
					    		  }
					    		  
					    	  }
					    	  if (children.item(i).getNodeName().equalsIgnoreCase("texts")) {
					    		  NodeList texts = children.item(i).getChildNodes();
					    		  for (int j=0; j<texts.getLength(); j++) {
					    			  if (texts.item(j).getNodeType() == texts.item(j).ELEMENT_NODE) {
					    				  NamedNodeMap nnm = texts.item(j).getAttributes();
					    				  String type = nnm.getNamedItem("type").getNodeValue();
							    		  if (type.equalsIgnoreCase("clinical_history"))
						    				  md.setHistory(texts.item(j).getFirstChild().getNodeValue());
							    		  if (type.equalsIgnoreCase("impression"))
						    				  md.setImpression(texts.item(j).getFirstChild().getNodeValue());

					    			  }
					    		  }
					    	  }
					      }
				      }
			    	  add(md);
			      }
			      else {
				      NodeList children = n.getChildNodes();
				      int len = children.getLength();
		
				      for (int i=0; i<len; i++)
				        depthFirstWalkTree(children.item(i));
		
				      children=null;
			      }
			    }
			  }

	    public void readFileToSet(Set<String> set, String fn) throws IOException
	    {
	      BufferedReader r = new BufferedReader(new FileReader(fn));
	      String line;
	      while ((line = r.readLine())!=null)
	        set.add(line);
	      r.close();  
	    }
      
      public void readDocumentSet(String file){
			 
			DOMParser p = new DOMParser();
			
			try {
				p.parse(file);
        org.w3c.dom.Document doc = p.getDocument();
				if (doc == null)
					System.err.println("NULLDOC");
				
				Node n = doc.getDocumentElement();
				depthFirstWalkTree(n);
			}
		    catch (SAXParseException e) {
		        System.err.println(e.getLineNumber() + " : " + e.getMessage());
		        e.printStackTrace();
		    }
		    catch (SAXException e) {
		      System.err.println("xml parse error");
		      e.printStackTrace();
		    }
		    catch (IOException e) {
		      System.err.println("I/O error");
		      e.printStackTrace();
		    }
		}
}
