@inproceedings{ding-etal-2020-three,
title = "A Three-Parameter Rank-Frequency Relation in Natural Languages",
author = "Ding, Chenchen and
Utiyama, Masao and
Sumita, Eiichiro",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2020.acl-main.44/",
doi = "10.18653/v1/2020.acl-main.44",
pages = "460--464",
abstract = "We present that, the rank-frequency relation in textual data follows $f \propto r^{-\alpha}(r+\gamma)^{-\beta}$, where $f$ is the token frequency and $r$ is the rank by frequency, with ($\alpha$, $\beta$, $\gamma$) as parameters. The formulation is derived based on the empirical observation that $d^2 (x+y)/dx^2$ is a typical impulse function, where $(x,y)=(\log r, \log f)$. The formulation is the power law when $\beta=0$ and the Zipf{--}Mandelbrot law when $\alpha=0$. We illustrate that $\alpha$ is related to the analytic features of syntax and $\beta+\gamma$ to those of morphology in natural languages from an investigation of multilingual corpora."
}
Markdown (Informal)
[A Three-Parameter Rank-Frequency Relation in Natural Languages](https://preview.aclanthology.org/fix-sig-urls/2020.acl-main.44/) (Ding et al., ACL 2020)
ACL