@inproceedings{polat-etal-2025-stress,
title = "Stress-Testing Multimodal Foundation Models for Crystallographic Reasoning",
author = "Polat, Can and
Kurban, Hasan and
Serpedin, Erchin and
Kurban, Mustafa",
editor = "Zhang, Yuji and
Chen, Canyu and
Li, Sha and
Geva, Mor and
Han, Chi and
Wang, Xiaozhi and
Feng, Shangbin and
Gao, Silin and
Augenstein, Isabelle and
Bansal, Mohit and
Li, Manling and
Ji, Heng",
booktitle = "Proceedings of the 3rd Workshop on Towards Knowledgeable Foundation Models (KnowFM)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2025.knowllm-1.5/",
pages = "49--58",
ISBN = "979-8-89176-283-1",
abstract = "Evaluating foundation models for crystallographic reasoning requires benchmarks that isolate generalization behavior while enforcing physical constraints. This work introduces, xCrysAlloys, a multiscale multicrystal dataset with two physically grounded evaluation protocols to stress-test multimodal generative models. The Spatial-Exclusion benchmark withholds all supercells of a given radius from a diverse dataset, enabling controlled assessments of spatial interpolation and extrapolation. The Compositional-Exclusion benchmark omits all samples of a specific chemical composition, probing generalization across stoichiometries. Nine vision{--}language foundation models are prompted with crystallographic images and textual context to generate structural annotations. Responses are evaluated via (i) relative errors in lattice parameters and density, (ii) a physics-consistency index penalizing volumetric violations, and (iii) a hallucination score capturing geometric outliers and invalid space-group predictions. These benchmarks establish a reproducible, physically informed framework for assessing generalization, consistency, and reliability in large-scale multimodal models. Dataset and implementation are available at https://github.com/KurbanIntelligenceLab/StressTestingMMFMinCR."
}
Markdown (Informal)
[Stress-Testing Multimodal Foundation Models for Crystallographic Reasoning](https://preview.aclanthology.org/landing_page/2025.knowllm-1.5/) (Polat et al., KnowLLM 2025)
ACL