require "scripts/umnlp.rb"
require 'optparse'

$options = {}
$options[:verbose] = false
OptionParser.new do |opts|
  opts.banner = "Usage: cat genmodel/<modelfile>.model | ruby rules2lrules.rb [-v]"

  opts.on("-v", "--verbose", "turns on extra stderr output") do |v|
    $options[:verbose] = v
  end
end.parse!


$hw = {} # for unks


$hdwdrules = { # "0" indicates search R-to-L.  "1" indicates search L-to-R
  "ADJP" => "0 NNS QP NN $ ADVP JJ VBN VBG ADJP JJR NP JJS DT FW RBR RBS SBAR RB",
  "ADVP" => "1 RB RBR RBS FW ADVP TO CD JJR JJ IN NP JJS NN",
  "CONJP" => "1 CC RB IN",
  "FRAG" => "1 ",
  "INTJ" => "0 ",
  "LST" => "1 LS :",
  "NAC" => "0 NN NNS NNP NNPS NP NAC EX $ CD QP PRP VBG JJ JJS JJR ADJP FW",
  # NP is dealt with separately
  "PP" => "1 IN TO VBG VBN RP RB FW PP", #added RB after RP, and PP at the end
  "PRN" => "0 ",
  "PRT" => "1 RP",
  "QP" => "0 $ IN NNS NN JJ RB DT CD NCD QP JJR JJS",
  "RRC" => "1 VP NP ADVP ADJP PP",
  "S" => "0 TO IN VP S SBAR VB VBD VBN VBG VBP VBZ ADJP UCP NP", #added all the VB* ones after SBAR
  "SBAR" => "0 WHNP WHPP WHADVP WHADJP IN DT S SQ SINV SBAR WHSBAR FRAG", #added WHSBAR
  "SBARQ" => "0 SQ S SINV SBARQ FRAG",
  "SINV" => "0 VBZ VBD VBP VB MD VP S SINV ADJP NP",
  "SQ" => "0 VBZ VBD VBP VB MD VP SQ",
  "UCP" => "1 ",
  "VP" => "0 TO VBD VP VBN MD VBZ VB VBG VBP ADJP NN NNS NP", #moved up VP, from after VBP to after VBD
  "WHADJP" => "0 CC WRB JJ ADJP",
  "WHADVP" => "1 CC WRB",
  "WHNP" => "0 WDT WP WP$ WHADJP WHPP WHNP",
  "WHPP" => "1 IN TO FW",
  
  #add'l rules to count for previous binarization & processing
  "NN"   => "0 NN NNS NNP NNPS",
  "NNS"  => "0 NN NNS NNP NNPS",
  "NNP"  => "0 NN NNS NNP NNPS",
  "NNPS" => "0 NN NNS NNP NNPS",
  "VBZ"  => "1 VBZ VBD VBP VB VBN VBG",
  "VBD"  => "1 VBZ VBD VBP VB VBN VBG",
  "VBP"  => "1 VBZ VBD VBP VB VBN VBG",
  "VB"   => "1 VBZ VBD VBP VB VBN VBG",
  "VBN"  => "1 VBZ VBD VBP VB VBN VBG",
  "VBG"  => "1 VBZ VBD VBP VB VBN VBG",
  "JJ"   => "0 JJ JJR JJS",
  "JJR"  => "0 JJ JJR JJS",
  "JJS"  => "0 JJ JJR JJS",
  "RB"   => "0 RB RBR RBS",
  "RBR"  => "0 RB RBR RBS",
  "RBS"  => "0 RB RBR RBS",
  "CD"   => "0 CD",
  "IN"   => "1 IN",
  "LISTNP" => "1 NP",
  "LISTSBAR" => "1 SBAR",
  "LISTSINV" => "1 SINV",
  "LISTS"  => "1 S",
  "LISTVP" => "1 VP",
  "WHSBAR" => "0 WHNP WHPP WHADVP WHADJP IN DT S SQ SINV SBAR FRAG VP", #taken from SBAR, added VP to the end
  "WHSBARQ" => "0 SQ S SINV SBARQ FRAG",
}

$lastrules = Array["S","s","V","v","N","n","W","w","P","p","I","i","U","u","A","a","R","r","J","j","Q","q","C","c","L","l"];




def hdwdbattle (head,head0,head1, hdwd,hdwd0,hdwd1, uschead)
  # use $hdwdrules and special NP case to determine which binary branch gets the head
  bald = uschead.gsub( /([A-Z]+).*/,'\1' )  # bald= a bare head

  if $hdwdrules.key?(bald) && uschead!="PP-tmp"
    okhdlist =  $hdwdrules.fetch(bald).split
    seekdirection = okhdlist.shift

    if okhdlist.empty?
      if seekdirection=="1" 
        #get the right corner unless it's a punctuation mark
        return return_r_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead)
      else #seekdirection=="0"
        #get the left corner
        return return_l_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead)
      end

    else

      if seekdirection=="1"
        #check left to right
        okhdlist.each { |okhd| 
          head0.split('_').each { |okhdusc|
            if okhdusc.gsub( /([A-Z]+).*/,'\1' ) == okhd
              #		$stderr.print "  ReturnLtoR :  Head = "+head+", Hdwd will become = "+hdwd0+"\n"
              return return_l_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd0
            end
          }
          head1.split('_').each { |okhdusc|
            if okhdusc.gsub( /([A-Z]+).*/,'\1' ) == okhd
              #		$stderr.print "  ReturnLtoR :  Head = "+head+", Hdwd will become = "+hdwd0+"\n"
              return return_r_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd1
            end
          }
        }
      else #seekdirection=="0"
        #check right to left
        okhdlist.each { |okhd| 
          head1.split('_').reverse_each { |okhdusc|
            if okhdusc.gsub( /([A-Z]+).*/,'\1' ) == okhd
              #		$stderr.print "  ReturnRtoL :  Head = "+head+", Hdwd will become = "+hdwd1+"\n"
              return return_r_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd1
            end
          }
          head0.split('_').reverse_each { |okhdusc|
            if okhdusc.gsub( /([A-Z]+).*/,'\1' ) == okhd
              #		$stderr.print "  ReturnRtoL :  Head = "+head+", Hdwd will become = "+hdwd0+"\n"
              return return_l_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd0
            end
          }
        }
        return last_resort(head,head0,head1, hdwd,hdwd0,hdwd1, uschead)
      end

    end

  elsif bald=="NP" || bald=="NX" || uschead == "PP-tmp" #NP is treated as a special case; see Collins' notes (magerman-black.txt)

    #1. identify POS words -- SKIP this Collins step
    #2. R to L for first child which is NN, NNP, NNPS, NNS, NX, POS, JJR
    npcond1 = ["NN","NNP","NNPS","NNS","NX","POS","JJR"]
    npcond1.each { |okhd| 
      if head1.gsub( /([A-Z]+).*/,'\1' ) == okhd
        #	  $stderr.print "  Returning :  Head = "+head+", Hdwd will become = "+hdwd1+"\n"
        return return_r_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd1
      end
    }
    #3. L to R for first child which is NP
    npcond2 = ["NP"]
    npcond2.each { |okhd| 
      if head0.gsub( /([A-Z]+).*/,'\1' ) == okhd
        #	  $stderr.print "  Returning :  Head = "+head+", Hdwd will become = "+hdwd0+"\n"
        return return_l_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd0
      end
    }
    #4. R to L
    npcond3 = ["$","ADJP","PRN"]
    npcond3.each { |okhd| 
      if head1.gsub( /([A-Z]+).*/,'\1' ) == okhd
        #	  $stderr.print "  Returning :  Head = "+head+", Hdwd will become = "+hdwd1+"\n"
        return return_r_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd1
      end
    }
    #5. R to L
    npcond4 = ["CD"]
    npcond4.each { |okhd| 
      if head1.gsub( /([A-Z]+).*/,'\1' ) == okhd
        #	  $stderr.print "  Returning :  Head = "+head+", Hdwd will become = "+hdwd1+"\n"
        return return_r_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd1
      end
    }
    #6. R to L
    npcond5 = ["JJ","JJS","RB","QP"]
    npcond5.each { |okhd| 
      if head1.gsub( /([A-Z]+).*/,'\1' ) == okhd
        #	  $stderr.print "  Returning :  Head = "+head+", Hdwd will become = "+hdwd1+"\n"
        return return_r_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd1
      end
    }
    #      $stderr.print "  Returning :  Head = "+head+", Hdwd will become = "+hdwd1+"\n"
    return return_r_child(head,head0,head1, hdwd,hdwd0,hdwd1, uschead) # hdwd1
  end
  

  return last_resort(head,head0,head1, hdwd,hdwd0,hdwd1, uschead)# hdwd0
end

def last_resort (head,head0,head1, hdwd,hdwd0,hdwd1, uschead)
  lastresort = ""
  pick = ""
  $lastrules.each{ |letter| 
    if head0.match(/^#{letter}/)
      #lastresort = hdwd0
      pick = 0#head0 
      #head0 = "h:"+head0
      #head1 = "m:"+head1
      break
    elsif head1.match(/^#{letter}/)
      #lastresort = hdwd1
      pick = 1#head1
      #head1 = "h:"+head1
      #head0 = "m:"+head0
      break
    end
  } 
  if pick=="" && lastresort=="" 
    #lastresort = hdwd0
    pick = 0#head0
    #head0 = "h:"+head0
    #head1 = "m:"+head1
  end
  if $options[:verbose]
    $stderr.print "Unable to resolve hdwd for "+head+" -> "+head0+" "+head1+", picked "+pick+"\n"
  end
  return pick#head0+" "+head1#lastresort
end

def return_r_child  (head,head0,head1, hdwd,hdwd0,hdwd1, uschead)
  if head1.match(/[A-Za-z]/) && head1[1]!='!'# hdwd1.match(/[A-Za-z]/) && 

    if $options[:old]
      head1 += "*"
    else
      #head1 = "h:"+head1
      #head0 = "m:"+head0
      pick = 1
    end

    return pick#head0+" "+head1#hdwd1

  elsif head0.match(/[A-Za-z]/) && head1[1]!='!'# hdwd0.match(/[A-Za-z]/) && 

    if $options[:old]
      head0 += "*"
    else
      #head0 = "h:"+head0
      #head1 = "m:"+head1
      pick = 0
    end

    return pick#head0+" "+head1#hdwd0

  else
    return "-:- -:-"#"-"
  end
end

def return_l_child  (head,head0,head1, hdwd,hdwd0,hdwd1, uschead)
  if head0.match(/[A-Za-z]/) && head1[1]!='!'# hdwd0.match(/[A-Za-z]/) && 

    if $options[:old]
      head0 += "*"
    else
      #head0 = "h:"+head0
      #head1 = "m:"+head1
      pick = 0
    end

    return pick#head0+" "+head1#hdwd0

  elsif head1.match(/[A-Za-z]/) && head1[1]!='!'# hdwd1.match(/[A-Za-z]/) && 

    if $options[:old]
      head1 += "*"
    else
      #head1 = "h:"+head1
      #head0 = "m:"+head0
      pick = 1
    end

    return pick# = 1head0+" "+head1#hdwd1
  else
    return -1#"-"
  end
end


#############
hDtoHcat = {}
hDtoMcat = {}
hToPrint = {}

lines = Array.new
ctr=0
while (line = STDIN.gets)

  if line =~ /^M ?([^ ]*) ([^ ]*) ([^ ]*) : ([^ ]*) ([^ ]*) = (.*)$/
    hd = hdwdbattle($3,$4,$5,"unk","unk","unk",$3);
    ldepth = ($1=='r') ? ($2.to_i+1) : $2.to_i
    if hd==0
      children = "h:#{$4} m#{$5}:#{$5}"
      if !hDtoHcat.key?(ldepth)
        hDtoHcat[ldepth]={}
        hDtoMcat[ldepth]={}
      end
      hDtoHcat[ldepth][$4] = 1
      hDtoMcat[ldepth][$5] = 1
      
    elsif hd==1
      children = "m#{$4}:#{$4} h:#{$5}"
      if !hDtoMcat.key?(ldepth)
        hDtoHcat[ldepth]={}
        hDtoMcat[ldepth]={}
      end
      hDtoMcat[ldepth][$4] = 1
      hDtoHcat[ldepth][$5] = 1

    else
      children = "-:- -:-"
    end
    if ($4=='-' || $5=='-')
      children = "-:- -:-"
    end
    if !hToPrint.key?($1)
      hToPrint[$1]={}
    end
    hToPrint[$1]["M #{$1} #{ldepth} #{$3} : #{children} = #{$6}"] = 1
   
    #print "read M #{$1} #{ldepth} #{$3} : #{$4} #{$5} = #{$6}\n"
    #print "pout M #{$1} #{ldepth} #{$3} : #{children} = #{$6}\n"

  else
    print line

  end


  ctr = ctr+1
  if ctr % 10000 == 0
    $stderr.print " ... found headwords for #{ctr} rules ...\n"
  end
end

#print "the hash:\n#{hToPrint.to_s}\n"

hToPrint.each { |model,rules|
  rules.each { |rule,prob|
    #print " at the end "+model+" "+rule+"\n"
    if rule =~ /^M ?([^ ]*) ([^ ]*) ([^ ]*) : ([^ ]*) ([^ ]*) = (.*)$/
      if hDtoHcat[$2.to_i].key?($3)
        print "M #{$1} #{$2} h:#{$3} : #{$4} #{$5} = #{$6}\n"
      end
      if hDtoMcat[$2.to_i].key?($3)
        print "M #{$1} #{$2} m#{$3}:#{$3} : #{$4} #{$5} = #{$6}\n"
      end
    end
  }
}
