@inproceedings{santos-etal-2012-structural,
title = "Structural alignment of plain text books",
author = "Santos, Andr{\'e} and
Almeida, Jos{\'e} Jo{\~a}o and
Carvalho, Nuno",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/967_Paper.pdf",
pages = "2069--2074",
abstract = "Text alignment is one of the main processes for obtaining parallel corpora. When aligning two versions of a book, results are often affected by unpaired sections ― sections which only exist in one of the versions of the book. We developed Text::Perfide::BookSync, a Perl module which performs books synchronization (structural alignment based on section delimitation), provided they have been previously annotated by Text::Perfide::BookCleaner. We discuss the need for such a tool and several implementation decisions. The main functions are described, and examples of input and output are presented. Text::Perfide::PartialAlign is an extension of the partialAlign.py tool bundled with hunalign which proposes an alternative methods for splitting bitexts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="santos-etal-2012-structural">
<titleInfo>
<title>Structural alignment of plain text books</title>
</titleInfo>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">João</namePart>
<namePart type="family">Almeida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuno</namePart>
<namePart type="family">Carvalho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-may</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text alignment is one of the main processes for obtaining parallel corpora. When aligning two versions of a book, results are often affected by unpaired sections ― sections which only exist in one of the versions of the book. We developed Text::Perfide::BookSync, a Perl module which performs books synchronization (structural alignment based on section delimitation), provided they have been previously annotated by Text::Perfide::BookCleaner. We discuss the need for such a tool and several implementation decisions. The main functions are described, and examples of input and output are presented. Text::Perfide::PartialAlign is an extension of the partialAlign.py tool bundled with hunalign which proposes an alternative methods for splitting bitexts.</abstract>
<identifier type="citekey">santos-etal-2012-structural</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/967_Paper.pdf</url>
</location>
<part>
<date>2012-may</date>
<extent unit="page">
<start>2069</start>
<end>2074</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Structural alignment of plain text books
%A Santos, André
%A Almeida, José João
%A Carvalho, Nuno
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 may
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F santos-etal-2012-structural
%X Text alignment is one of the main processes for obtaining parallel corpora. When aligning two versions of a book, results are often affected by unpaired sections ― sections which only exist in one of the versions of the book. We developed Text::Perfide::BookSync, a Perl module which performs books synchronization (structural alignment based on section delimitation), provided they have been previously annotated by Text::Perfide::BookCleaner. We discuss the need for such a tool and several implementation decisions. The main functions are described, and examples of input and output are presented. Text::Perfide::PartialAlign is an extension of the partialAlign.py tool bundled with hunalign which proposes an alternative methods for splitting bitexts.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/967_Paper.pdf
%P 2069-2074
Markdown (Informal)
[Structural alignment of plain text books](http://www.lrec-conf.org/proceedings/lrec2012/pdf/967_Paper.pdf) (Santos et al., LREC 2012)
ACL
- André Santos, José João Almeida, and Nuno Carvalho. 2012. Structural alignment of plain text books. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 2069–2074, Istanbul, Turkey. European Language Resources Association (ELRA).