Neural machine translation is a recently proposed approach to machine translation. Unlike the traditional statistical machine translation, the neural machine translation aims at building a single neural network that can be jointly tuned to maximize the translation performance. The models proposed recently for neural machine translation often belong to a family of encoder-decoders and consists of an encoder that encodes a source sentence into a fixed-length vector from which a decoder generates a translation. In this paper, we conjecture that the use of a fixed-length vector is a bottleneck in improving the performance of this basic encoder-decoder architecture, and propose to extend this by allowing a model to automatically (soft-)search for parts of a source sentence that are relevant to predicting a target word, without having to form these parts as a hard segment explicitly. With this new approach, we achieve a translation performance comparable to the existing state-of-the-art phrase-based system on the task of English-to-French translation. Furthermore, qualitative analysis reveals that the (soft-)alignments found by the model agree well with our intuition.
%0 Journal Article
%1 Bahdanau2016
%A Bahdanau, Dzmitry
%A Cho, Kyunghyun
%A Bengio, Yoshua
%D 2016
%K thema:transformer
%T Neural Machine Translation by Jointly Learning to Align and Translate
%U http://arxiv.org/abs/1409.0473
%X Neural machine translation is a recently proposed approach to machine translation. Unlike the traditional statistical machine translation, the neural machine translation aims at building a single neural network that can be jointly tuned to maximize the translation performance. The models proposed recently for neural machine translation often belong to a family of encoder-decoders and consists of an encoder that encodes a source sentence into a fixed-length vector from which a decoder generates a translation. In this paper, we conjecture that the use of a fixed-length vector is a bottleneck in improving the performance of this basic encoder-decoder architecture, and propose to extend this by allowing a model to automatically (soft-)search for parts of a source sentence that are relevant to predicting a target word, without having to form these parts as a hard segment explicitly. With this new approach, we achieve a translation performance comparable to the existing state-of-the-art phrase-based system on the task of English-to-French translation. Furthermore, qualitative analysis reveals that the (soft-)alignments found by the model agree well with our intuition.
@article{Bahdanau2016,
abstract = {Neural machine translation is a recently proposed approach to machine translation. Unlike the traditional statistical machine translation, the neural machine translation aims at building a single neural network that can be jointly tuned to maximize the translation performance. The models proposed recently for neural machine translation often belong to a family of encoder-decoders and consists of an encoder that encodes a source sentence into a fixed-length vector from which a decoder generates a translation. In this paper, we conjecture that the use of a fixed-length vector is a bottleneck in improving the performance of this basic encoder-decoder architecture, and propose to extend this by allowing a model to automatically (soft-)search for parts of a source sentence that are relevant to predicting a target word, without having to form these parts as a hard segment explicitly. With this new approach, we achieve a translation performance comparable to the existing state-of-the-art phrase-based system on the task of English-to-French translation. Furthermore, qualitative analysis reveals that the (soft-)alignments found by the model agree well with our intuition.},
added-at = {2020-07-14T22:36:07.000+0200},
archiveprefix = {arXiv},
arxivid = {1409.0473v7},
author = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
biburl = {https://www.bibsonomy.org/bibtex/23627201ea916210e80d3cc49b58e40ad/jonaskaiser},
eprint = {1409.0473v7},
file = {:C$\backslash$:/Users/Usuario/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Bahdanau, Cho, Bengio - 2014 - Neural Machine Translation by Jointly Learning to Align and Translate.pdf:pdf},
interhash = {d15f67689deab1bcce096aa5d17bd314},
intrahash = {3627201ea916210e80d3cc49b58e40ad},
keywords = {thema:transformer},
month = may,
timestamp = {2020-07-14T22:36:07.000+0200},
title = {Neural Machine Translation by Jointly Learning to Align and Translate},
url = {http://arxiv.org/abs/1409.0473},
year = 2016
}