#!/usr/bin/env perl

use strict;
use utf8;
binmode STDIN, ":encoding(utf8)";
binmode STDERR, ":encoding(utf8)";
binmode STDOUT, ":encoding(utf8)";

if ( @ARGV != 1 ) {
    print STDERR "$0 <JUMAN.katuyou>\n";
    exit;
}
my %endings_;
&read_katuyou( $ARGV[0] );
&main();


sub main {

    while ( <STDIN> ) {
	chomp;
	my ( $surface_form_ref, $surface_pos_ref, $normal_form, $normal_reading, $normal_pos, $citation_form ) = &read_entry( $_ );
	
	my %cache;
	for ( my$i = 0; $i < @{$surface_form_ref}; ++$i ) {
	    my ( $word_form, $pos ) = ( $surface_form_ref->[$i], $surface_pos_ref->[$i] );
	    
	    # original
	    $cache{"$word_form\t$pos"} = 1;

	    # hiragana -> katakana
	    my$kana = &hira2kana( $word_form );
	    $cache{"$kana\t$pos"} = 1;

	    my ( $pos1, $pos2, $ctype, $cform ) = split( /,/, $pos );
	    if ( $ctype ne "*" ) {
		my ( $stem, $ending ) = &mrph_analysis( $word_form, $pos );
		
		# hiragana -> katakan stem + hiragana ending
		if ( $stem =~ /^\p{Hiragana}+$/ && $ending =~ /^\p{Hiragana}+$/ ) {
		    my $kana_stem = &hira2kana( $stem );
		    $cache{"$kana_stem$ending\t$pos"} = 1;
		}

		if ( 0 < length($stem) && $ending =~ /^\p{Hiragana}+$/ ) {
		    my $kana_ending = &hira2kana( $ending );
		    $cache{"$stem$kana_ending\t$pos"} = 1;
		}
	    }
	}

	for ( keys %cache ) {
	    print "$_\t";
	}
	print "$normal_form\t$normal_reading\t$normal_pos\t$citation_form\n";
    }
}


sub read_katuyou {

    my ( $filename ) = @_;
    
    open( TEXT, $filename ) or die print STDERR "$filename\n";
    binmode( TEXT, ":encoding(utf8)" );
    while ( <TEXT> ) {
	chomp;
	if ( /^\((.+?)\(\((.+)\)\)\)$/ ) {
	    my $ctype = $1;
	    my @data = split( /\)\(/, $2 );
	    for my $data ( @data ) {
		my ( $cform, @endings ) = split( / /, $data );
		for my$ending ( @endings ) {
		    if ( $ending eq "*" ) {
			push( @{$endings_{$ctype}{$cform}}, "" );
		    }else {
			push( @{$endings_{$ctype}{$cform}}, $ending );
		    }
		}
	    }
	}else {
	    print STDERR "Error: $_\n";
	    exit;
	}
    }
    close( TEXT );
}


sub read_entry {

    my ( $line ) = @_;
    my ( @surface_forms, @surface_pos, $normal_form, $normal_reading, $normal_pos, $citation_form );
    my @data = split( /\t/, $line );

    $citation_form  = pop @data;
    $normal_pos     = pop @data;
    $normal_reading = pop @data;
    $normal_form    = pop @data;
    for ( my$i = 0; $i < @data; $i += 2 ) {
	push( @surface_forms, $data[$i] );
	push( @surface_pos, $data[$i+1] );
    }
    return ( [ @surface_forms ], [ @surface_pos ], $normal_form, $normal_reading, $normal_pos, $citation_form );
}


sub hira2kana {

    my ( $hira ) = @_;
    $hira =~ tr/ぁ-ん/ァ-ン/;
    return $hira;
}


sub kana2hira {

    my ( $kana ) = @_;
    $kana =~ tr/ァ-ン/ぁ-ん/;
    return $kana;
}


sub mrph_analysis {
    
    my ( $surface_form, $pos ) = @_;
    my ( $pos1, $pos2, $ctype, $cform ) = split( /,/, $pos );
    
    if ( $ctype eq "*" ) {
	return ( $surface_form, "" );
    }else {
	for my$ending ( @{$endings_{$ctype}{$cform}} ) {
	    if ( $surface_form =~ /^(.*)$ending$/ ) {
		return ( $1, $ending );
	    }
	}
    }
    return ( undef, undef );
}
