@article{journals/corr/abs-2211-11275,
added-at = {2024-04-23T00:00:00.000+0200},
author = {Zhu, Qiu-Shi and Zhou, Long and Zhang, Ziqiang and Liu, Shujie and Jiao, Binxing and Zhang, Jie and Dai, Lirong and Jiang, Daxin and Li, Jinyu and Wei, Furu},
biburl = {https://www.bibsonomy.org/bibtex/275c3c976c436e768946974383c0b2be4/dblp},
ee = {https://doi.org/10.48550/arXiv.2211.11275},
interhash = {65db20be7e2db9bd59fa0f1cdddc25b1},
intrahash = {75c3c976c436e768946974383c0b2be4},
journal = {CoRR},
keywords = {dblp},
timestamp = {2024-04-29T07:04:53.000+0200},
title = {VATLM: Visual-Audio-Text Pre-Training with Unified Masked Prediction for Speech Representation Learning.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr2211.html#abs-2211-11275},
volume = {abs/2211.11275},
year = 2022
}