@inproceedings{kambhatla-etal-2023-decipherment,
title = "Decipherment as Regression: Solving Historical Substitution Ciphers by Learning Symbol Recurrence Relations",
author = "Kambhatla, Nishant and
Born, Logan and
Sarkar, Anoop",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-eacl.160/",
doi = "10.18653/v1/2023.findings-eacl.160",
pages = "2136--2152",
abstract = "Solving substitution ciphers involves mapping sequences of cipher symbols to fluent text in a target language. This has conventionally been formulated as a search problem, to find the decipherment key using a character-level language model to constrain the search space. This work instead frames decipherment as a sequence prediction task, using a Transformer-based causal language model to learn recurrences between characters in a ciphertext. We introduce a novel technique for transcribing arbitrary substitution ciphers into a common recurrence encoding. By leveraging this technique, we (i) create a large synthetic dataset of homophonic ciphers using random keys, and (ii) train a decipherment model that predicts the plaintext sequence given a recurrence-encoded ciphertext. Our method achieves strong results on synthetic 1:1 and homophonic ciphers, and cracks several real historic homophonic ciphers. Our analysis shows that the model learns recurrence relations between cipher symbols and recovers decipherment keys in its self-attention."
}
Markdown (Informal)
[Decipherment as Regression: Solving Historical Substitution Ciphers by Learning Symbol Recurrence Relations](https://preview.aclanthology.org/jlcl-multiple-ingestion/2023.findings-eacl.160/) (Kambhatla et al., Findings 2023)
ACL