@inproceedings{santini-2006-identifying,
title = "Identifying Genres of Web Pages",
author = "Santini, Marina",
editor = "Mertens, Piet and
Fairon, C{\'e}drick and
Dister, Anne and
Watrin, Patrick",
booktitle = "Actes de la 13{\`e}me conf{\'e}rence sur le Traitement Automatique des Langues Naturelles. Articles longs",
month = apr,
year = "2006",
address = "Leuven, Belgique",
publisher = "ATALA",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2006.jeptalnrecital-long.28/",
pages = "308--317",
abstract = "In this paper, we present an inferential model for text type and genre identification of Web pages, where text types are inferred using a modified form of Bayes' theorem, and genres are derived using a few simple if-then rules. As the genre system on the Web is a complex phenomenon, and Web pages are usually more unpredictable and individualized than paper documents, we propose this approach as an alternative to unsupervised and supervised techniques. The inferential model allows a classification that can accommodate genres that are not entirely standardized, and is more capable of reading a Web page, which is mixed, rarely corresponding to an ideal type and often showing a mixture of genres or no genre at all. A proper evaluation of such a model remains an open issue."
}
Markdown (Informal)
[Identifying Genres of Web Pages](https://preview.aclanthology.org/jlcl-multiple-ingestion/2006.jeptalnrecital-long.28/) (Santini, JEP/TALN/RECITAL 2006)
ACL
- Marina Santini. 2006. Identifying Genres of Web Pages. In Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs, pages 308–317, Leuven, Belgique. ATALA.