###############################################################################
##                                                                           ##
## This file is part of ModelBlocks. Copyright 2009, ModelBlocks developers. ##
##                                                                           ##
##    ModelBlocks is free software: you can redistribute it and/or modify    ##
##    it under the terms of the GNU General Public License as published by   ##
##    the Free Software Foundation, either version 3 of the License, or      ##
##    (at your option) any later version.                                    ##
##                                                                           ##
##    ModelBlocks is distributed in the hope that it will be useful,         ##
##    but WITHOUT ANY WARRANTY; without even the implied warranty of         ##
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          ##
##    GNU General Public License for more details.                           ##
##                                                                           ##
##    You should have received a copy of the GNU General Public License      ##
##    along with ModelBlocks.  If not, see <http://www.gnu.org/licenses/>.   ##
##                                                                           ##
###############################################################################

# BEGIN --- requirements

require 'optparse'
require 'ostruct'
require 'open3'
require 'fileutils'
require 'tempfile'
require 'date'
require 'set'

class Token

	# A token where raw==true means that the token was not created from a rule application
	#   This property is used to know whether to increment the start pointer when parsing the tree

	attr_reader :string, :start_index, :raw
	attr_writer :string, :start_index, :end_index, :raw

	def initialize(string, start_index=nil, end_index=nil, raw=true)
		@string=string
		@start_index=start_index
		@end_index=end_index
		@raw=raw
	end

	def end_index
		if @end_index==nil
			return @start_index + 1
		else
			return @end_index
		end
	end

end


def convert(tree_id, flat_tree, min_rule_size, max_rule_size)

	stack = Array.new
	rules = Array.new

	tokens = flat_tree.gsub("("," ( ").gsub(")"," ) ").split

	start_index = 0
	end_index = 0

	full_sentence = "(0) "
	full_sentence_index = 1

	tokens.each { |token|
	#	puts "\"" + token + "\""
		unless token==")"
			#puts "Pushing #{token}"
			stack.push(Token.new(token, start_index))
		else
			rule = Array.new
			token = stack.pop
			#p token
			raw = true && token.raw
			word = token.string if token.raw==true
			derived_rule_end_index = token.end_index
			until token.string=="("
				#p token
				raw = raw && token.raw
				derived_rule_start_index = token.start_index
				rule.unshift(token)
				token = stack.pop
			end

			if raw==true
				start_index += 1
				end_index += 1
				derived_rule_end_index = end_index if derived_rule_end_index==nil
				full_sentence += "#{word} (#{full_sentence_index}) "
				full_sentence_index += 1
			end

			if rule.size > 1
				lhs = rule.shift
				lhs.raw = false
				lhs.start_index = derived_rule_start_index
				lhs.end_index = derived_rule_end_index
				stack.push(lhs)
				rhs_size = rule.size
				if (rhs_size >= min_rule_size) and (rhs_size <= max_rule_size)
					rule_string = "Sentence #{tree_id}: (#{lhs.start_index}) #{lhs.string} (#{lhs.end_index}) =>"
					first_rhs_token = true
					rule.each { |rhs_token| 
						if first_rhs_token
							rule_string += " (#{rhs_token.start_index})"
							first_rhs_token = false
						end
						rule_string += " #{rhs_token.string} (#{rhs_token.end_index})" 
					}
					rules.push(rule_string) 
				end
			end
		end
	}

	return full_sentence, rules

end



# BEGIN --- Process command line options
      
options = OpenStruct.new
options.min_rule_size = 0
options.max_rule_size = 1.0/0  # INFINITY
         
opt_parser = OptionParser.new { |opt_parser|        

	opt_parser.on("--min_rule_size SIZE", "do not print rules where the minimum number of tokens on the right hand side is less than SIZE") { |value|
		options.min_rule_size = value.to_i
	}

	opt_parser.on("--max_rule_size SIZE", "do not print rules where the maximum number of tokens on the right hand side is greater than SIZE") { |value|
		options.max_rule_size = value.to_i
	}

        opt_parser.on_tail("-h", "--help", "Show this message") { puts opt; exit; }

        begin  
		# Parse the options.
          	opt_parser.parse!
          
        rescue
          	# If something went wrong while parsing the options, bail out and print the help message.
          	abort(opt_parser.to_s)

        end

}      
#  END  --- Process command line options
 

sentence = ""

sentence_id = 0
#num_parens = 0

while (line = STDIN.gets)
#    line = line.chomp
#    left_parens = line.length - line.gsub(/\(/,'').length
#    right_parens = line.length - line.gsub(/\)/,'').length
#	if line=~/^\(\s\((S|INTJ)/
#		if sentence!=""
	full_sentence, rules = convert(sentence_id, line.strip, options.min_rule_size, options.max_rule_size)
	rules.each { |rule| puts rule }
	puts "SENTENCE #{sentence_id}: #{full_sentence}" if rules.size > 0
	sentence_id += 1
#        end

#		sentence = line.strip
#	else
#		sentence += line.strip	
#	end

end

#full_sentence, rules = convert(sentence_id, line.strip, options.min_rule_size, options.max_rule_size)
#rules.each { |rule| puts rule }
#puts "SENTENCE #{sentence_id}: #{full_sentence}" if rules.size > 0
#sentence_id += 1



