@article{
gui2023training,
title={Training Vision-Language Transformers from Captions},
author={Liangke Gui and Yingshan Chang and Qiuyuan Huang and Subhojit Som and Alexander G Hauptmann and Jianfeng Gao and Yonatan Bisk},
journal={Transactions on Machine Learning Research},
issn={2835-8856},
year={2023},
url={https://openreview.net/forum?id=xLnbSpozWS},
note={}
}