@article{journals/corr/abs-2310-05863,
added-at = {2024-07-22T00:00:00.000+0200},
author = {Sun, Guangzhi and Yu, Wenyi and Tang, Changli and Chen, Xianzhao and Tan, Tian and Li, Wei and Lu, Lu and Ma, Zejun and Zhang, Chao},
biburl = {https://www.bibsonomy.org/bibtex/2314f2c8e6eb23599b7b95bb2311ae867/dblp},
ee = {https://doi.org/10.48550/arXiv.2310.05863},
interhash = {7d3c4bcf12d006c5807ba1fcea186d86},
intrahash = {314f2c8e6eb23599b7b95bb2311ae867},
journal = {CoRR},
keywords = {dblp},
timestamp = {2024-07-29T07:05:47.000+0200},
title = {Fine-grained Audio-Visual Joint Representations for Multimodal Large Language Models.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr2310.html#abs-2310-05863},
volume = {abs/2310.05863},
year = 2023
}