@inproceedings{senuma-aizawa-2016-learning,
title = "Learning Succinct Models: Pipelined Compression with {L}1-Regularization, Hashing, {E}lias-{F}ano Indices, and Quantization",
author = "Senuma, Hajime and
Aizawa, Akiko",
editor = "Matsumoto, Yuji and
Prasad, Rashmi",
booktitle = "Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://preview.aclanthology.org/fix-sig-urls/C16-1261/",
pages = "2774--2784",
abstract = "The recent proliferation of smart devices necessitates methods to learn small-sized models. This paper demonstrates that if there are $m$ features in total but only $n = o(\sqrt{m})$ features are required to distinguish examples, with $\Omega(\log m)$ training examples and reasonable settings, it is possible to obtain a good model in a \textit{succinct} representation using $n \log_2 \frac{m}{n} + o(m)$ bits, by using a pipeline of existing compression methods: L1-regularized logistic regression, feature hashing, Elias{--}Fano indices, and randomized quantization. An experiment shows that a noun phrase chunking task for which an existing library requires 27 megabytes can be compressed to less than 13 \textit{kilo}bytes without notable loss of accuracy."
}
Markdown (Informal)
[Learning Succinct Models: Pipelined Compression with L1-Regularization, Hashing, Elias-Fano Indices, and Quantization](https://preview.aclanthology.org/fix-sig-urls/C16-1261/) (Senuma & Aizawa, COLING 2016)
ACL