The success of long short-term memory
(LSTM) neural networks in language process-
ing is typically attributed to their ability to
capture long-distance statistical regularities.
Linguistic regularities are often sensitive to
syntactic structure; can such dependencies be
captured by LSTMs, which do not have ex-
plicit structural representations? We begin ad-
dressing this question using number agreement
in English subject-verb dependencies. We
probe the architecture’s grammatical compe-
tence both using training objectives with an
explicit grammatical target (number prediction,
grammaticality judgments) and using language
models. In the strongly supervised settings,
the LSTM achieved very high overall accu-
racy (less than 1% errors), but errors increased
when sequential and structural information con-
flicted. The frequency of such errors rose
sharply in the language-modeling setting. We
conclude that LSTMs can capture a non-trivial
amount of grammatical structure given targeted
supervision, but stronger architectures may be
required to further reduce errors; furthermore,
the language modeling signal is insufficient
for capturing syntax-sensitive dependencies,
and should be supplemented with more direct
supervision if such dependencies need to be
captured.
%0 Journal Article
%1 journals/tacl/LinzenDG16
%A Linzen, Tal
%A Dupoux, Emmanuel
%A Goldberg, Yoav
%D 2016
%J TACL
%K deep_learning nlp rnn
%P 521-535
%T Assessing the Ability of LSTMs to Learn Syntax-Sensitive Dependencies.
%U http://dblp.uni-trier.de/db/journals/tacl/tacl4.html#LinzenDG16
%V 4
%X The success of long short-term memory
(LSTM) neural networks in language process-
ing is typically attributed to their ability to
capture long-distance statistical regularities.
Linguistic regularities are often sensitive to
syntactic structure; can such dependencies be
captured by LSTMs, which do not have ex-
plicit structural representations? We begin ad-
dressing this question using number agreement
in English subject-verb dependencies. We
probe the architecture’s grammatical compe-
tence both using training objectives with an
explicit grammatical target (number prediction,
grammaticality judgments) and using language
models. In the strongly supervised settings,
the LSTM achieved very high overall accu-
racy (less than 1% errors), but errors increased
when sequential and structural information con-
flicted. The frequency of such errors rose
sharply in the language-modeling setting. We
conclude that LSTMs can capture a non-trivial
amount of grammatical structure given targeted
supervision, but stronger architectures may be
required to further reduce errors; furthermore,
the language modeling signal is insufficient
for capturing syntax-sensitive dependencies,
and should be supplemented with more direct
supervision if such dependencies need to be
captured.
@article{journals/tacl/LinzenDG16,
abstract = {The success of long short-term memory
(LSTM) neural networks in language process-
ing is typically attributed to their ability to
capture long-distance statistical regularities.
Linguistic regularities are often sensitive to
syntactic structure; can such dependencies be
captured by LSTMs, which do not have ex-
plicit structural representations? We begin ad-
dressing this question using number agreement
in English subject-verb dependencies. We
probe the architecture’s grammatical compe-
tence both using training objectives with an
explicit grammatical target (number prediction,
grammaticality judgments) and using language
models. In the strongly supervised settings,
the LSTM achieved very high overall accu-
racy (less than 1% errors), but errors increased
when sequential and structural information con-
flicted. The frequency of such errors rose
sharply in the language-modeling setting. We
conclude that LSTMs can capture a non-trivial
amount of grammatical structure given targeted
supervision, but stronger architectures may be
required to further reduce errors; furthermore,
the language modeling signal is insufficient
for capturing syntax-sensitive dependencies,
and should be supplemented with more direct
supervision if such dependencies need to be
captured.},
added-at = {2018-11-07T10:39:43.000+0100},
author = {Linzen, Tal and Dupoux, Emmanuel and Goldberg, Yoav},
biburl = {https://www.bibsonomy.org/bibtex/2c4b5a29f16aa358cb9a67eadbf0ad600/dallmann},
ee = {https://transacl.org/ojs/index.php/tacl/article/view/972},
interhash = {bb9d148c4f75978771f27c2dca454bb1},
intrahash = {c4b5a29f16aa358cb9a67eadbf0ad600},
journal = {TACL},
keywords = {deep_learning nlp rnn},
pages = {521-535},
timestamp = {2018-11-07T10:39:43.000+0100},
title = {Assessing the Ability of LSTMs to Learn Syntax-Sensitive Dependencies.},
url = {http://dblp.uni-trier.de/db/journals/tacl/tacl4.html#LinzenDG16},
volume = 4,
year = 2016
}