A significant roadblock in multilingual neural language modeling is the lack of labeled non-English data. One potential method for overcoming this issue is learning cross-lingual text representations that can be used to transfer the performance from training on English tasks to non-English tasks, despite little to no task-specific non-English data. In this paper, we explore a natural setup for learning cross-lingual sentence representations: the dual-encoder. We provide a comprehensive evaluation of our cross-lingual representations on a number of monolingual, cross-lingual, and zero-shot/few-shot learning tasks, and also give an analysis of different learned cross-lingual embedding spaces.
%0 Journal Article
%1 chidambaram2018learning
%A Chidambaram, Muthuraman
%A Yang, Yinfei
%A Cer, Daniel
%A Yuan, Steve
%A Sung, Yun-Hsuan
%A Strope, Brian
%A Kurzweil, Ray
%D 2018
%J arXiv preprint arXiv:1810.12836
%K cross-lingual multi-lingual sentence-embeddings transformer xling
%T Learning Cross-Lingual Sentence Representations via a Multi-task Dual-Encoder Model.
%U https://arxiv.org/abs/1810.12836
%V abs/1810.12836
%X A significant roadblock in multilingual neural language modeling is the lack of labeled non-English data. One potential method for overcoming this issue is learning cross-lingual text representations that can be used to transfer the performance from training on English tasks to non-English tasks, despite little to no task-specific non-English data. In this paper, we explore a natural setup for learning cross-lingual sentence representations: the dual-encoder. We provide a comprehensive evaluation of our cross-lingual representations on a number of monolingual, cross-lingual, and zero-shot/few-shot learning tasks, and also give an analysis of different learned cross-lingual embedding spaces.
@article{chidambaram2018learning,
abstract = { A significant roadblock in multilingual neural language modeling is the lack of labeled non-English data. One potential method for overcoming this issue is learning cross-lingual text representations that can be used to transfer the performance from training on English tasks to non-English tasks, despite little to no task-specific non-English data. In this paper, we explore a natural setup for learning cross-lingual sentence representations: the dual-encoder. We provide a comprehensive evaluation of our cross-lingual representations on a number of monolingual, cross-lingual, and zero-shot/few-shot learning tasks, and also give an analysis of different learned cross-lingual embedding spaces. },
added-at = {2019-05-18T16:59:18.000+0200},
author = {Chidambaram, Muthuraman and Yang, Yinfei and Cer, Daniel and Yuan, Steve and Sung, Yun-Hsuan and Strope, Brian and Kurzweil, Ray},
biburl = {https://www.bibsonomy.org/bibtex/23d3df9a568cbb88427b0c2fa0b64040a/ghagerer},
interhash = {7c9f323e248b7ff9606475858f14a4e7},
intrahash = {3d3df9a568cbb88427b0c2fa0b64040a},
journal = {arXiv preprint arXiv:1810.12836},
keywords = {cross-lingual multi-lingual sentence-embeddings transformer xling},
timestamp = {2020-05-26T11:49:24.000+0200},
title = {Learning Cross-Lingual Sentence Representations via a Multi-task Dual-Encoder Model.},
url = {https://arxiv.org/abs/1810.12836},
volume = {abs/1810.12836},
year = 2018
}