@inproceedings{chen-2026-ontology,
title = "Ontology-oriented lexico-semantic modeling and neural classification of {C}hinese ch{\'e}ngyǔ: A culture-aware {NLP} approach",
author = "Chen, Lian",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cao, Yong and
Zhou, Li and
Ma, BOlei and
Adebara, Ife",
booktitle = "Proceedings of the 4th Workshop on Cross-Cultural Considerations in {NLP} ({C}3{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.c3nlp-1.12/",
pages = "150--160",
ISBN = "979-8-89176-420-0",
abstract = "This paper proposes a semi-automatic lexico-semantic modeling framework for Chinese ch{\'e}ngyǔ containing body-part and animal lexemes. The framework combines manual semantic annotation, lightweight RDF/OWL formalization and semantic classification in order to investigate whether lexical mediators such as 心 x{\={i}}n ``heart/mind'', 口 kǒu ``mouth'' or 马 mǎ ``horse'' are sufficient to predict idiomatic semantic interpretation. Based on 440 annotated ch{\'e}ngyǔ normalized into 18 semantic categories, we compare three classification approaches: a rule-based keyword baseline, character n-gram TF-IDF with logistic regression, and BERT-base-chinese. The results show that lexical mediators cannot be directly equated with semantic categories and that TF-IDF achieves the best overall performance, suggesting that lightweight character-level representations remain robust for very short idioms in low-resource settings. The study contributes an interpretable RDF/OWL-compatible resource for culture-aware modeling of Chinese idioms."
}Markdown (Informal)
[Ontology-oriented lexico-semantic modeling and neural classification of Chinese chéngyǔ: A culture-aware NLP approach](https://preview.aclanthology.org/ingest-acl-workshops/2026.c3nlp-1.12/) (Chen, C3NLP 2026)
ACL