@inproceedings{okabe-etal-2026-optical,
title = "Optical Character Recognition for the International Phonetic Alphabet",
author = "Okabe, Shu and
Zelo, Dejvi and
Fraser, Alexander",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 2: Short Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-short.19/",
pages = "265--273",
ISBN = "979-8-89176-381-4",
abstract = "As grammar books are increasingly used as additional reference resources specifically for very low-resource languages, a significant portion comes from scans and relies on the quality of the Optical Character Recognition (OCR) tool. We focus here on a particular script used in linguistics to transcribe sounds: the International Phonetic Alphabet (IPA). We consider two data sources: actual grammar book PDFs for two languages under documentation, Japhug and Kagayanen, and a synthetically generated dataset based on Wiktionary. We compare two neural OCR frameworks, Tesseract and Calamari, and a recent large vision-language model, Qwen2.5-VL-7B, all three in an off-the-shelf setting and with fine-tuning. While their zero-shot performance is relatively poor for IPA characters in general due to character set mismatch, fine-tuning with the synthetic dataset leads to notable improvements."
}Markdown (Informal)
[Optical Character Recognition for the International Phonetic Alphabet](https://preview.aclanthology.org/ingest-eacl/2026.eacl-short.19/) (Okabe et al., EACL 2026)
ACL