@inproceedings{kruse-ahmed-2024-tafsirextractor, title = "{T}afsir{E}xtractor: Text Preprocessing Pipeline preparing Classical {A}rabic Literature for Machine Learning Applications", author = "Kruse, Carl and Ahmed, Sajawel", editor = "Al-Khalifa, Hend and Darwish, Kareem and Mubarak, Hamdy and Ali, Mona and Elsayed, Tamer", booktitle = "Proceedings of the 6th Workshop on Open-Source Arabic Corpora and Processing Tools (OSACT) with Shared Tasks on Arabic LLMs Hallucination and Dialect to MSA Machine Translation @ LREC-COLING 2024", month = may, year = "2024", address = "Torino, Italia", publisher = "ELRA and ICCL", url = "https://preview.aclanthology.org/fix-sig-urls/2024.osact-1.8/", pages = "67--73" }