@inproceedings{bai-etal-2023-atformer,
title = "{ATF}ormer: A Learned Performance Model with Transfer Learning Across Devices for Deep Learning Tensor Programs",
author = "Bai, Yang and
Zhao, Wenqian and
Yin, Shuo and
Wang, Zixiao and
Yu, Bei",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2023.emnlp-main.250/",
doi = "10.18653/v1/2023.emnlp-main.250",
pages = "4102--4116",
abstract = "The training and inference efficiency of ever-larger deep neural networks highly rely on the performance of tensor operators on specific hardware platforms. Therefore, a compilation-based optimization flow with automatic tensor generation and parameter tuning is necessary for efficient model deployment. While compilation-based methods with performance models can provide dynamic and suitable code optimization, they suffer from a large design space exploration with rough measurement accuracy and poor transferability among different hardware platforms. This paper presents ATFormer, a simple yet efficient design with attention-inspired modules to accurately predict the performance of optimized operators by capturing global and long-range dependencies within a complete scheduling space. Compared with state-of-the-arts, ATFormer can predict the optimal implementation of tensor operators to reduce inference time with minimal effort on modern DNN benchmarks. Furthermore, ATFormer with pre-trained parameters can quickly adapt to different workloads and hardware via transfer learning."
}
Markdown (Informal)
[ATFormer: A Learned Performance Model with Transfer Learning Across Devices for Deep Learning Tensor Programs](https://preview.aclanthology.org/fix-sig-urls/2023.emnlp-main.250/) (Bai et al., EMNLP 2023)
ACL