Pre-trained text encoders have rapidly advanced the state of the art on many
NLP tasks. We focus on one such model, BERT, and aim to quantify where
linguistic information is captured within the network. We find that the model
represents the steps of the traditional NLP pipeline in an interpretable and
localizable way, and that the regions responsible for each step appear in the
expected sequence: POS tagging, parsing, NER, semantic roles, then coreference.
Qualitative analysis reveals that the model can and often does adjust this
pipeline dynamically, revising lower-level decisions on the basis of
disambiguating information from higher-level representations.
%0 Generic
%1 tenney2019rediscovers
%A Tenney, Ian
%A Das, Dipanjan
%A Pavlick, Ellie
%D 2019
%K dl nlp
%T BERT Rediscovers the Classical NLP Pipeline
%U http://arxiv.org/abs/1905.05950
%X Pre-trained text encoders have rapidly advanced the state of the art on many
NLP tasks. We focus on one such model, BERT, and aim to quantify where
linguistic information is captured within the network. We find that the model
represents the steps of the traditional NLP pipeline in an interpretable and
localizable way, and that the regions responsible for each step appear in the
expected sequence: POS tagging, parsing, NER, semantic roles, then coreference.
Qualitative analysis reveals that the model can and often does adjust this
pipeline dynamically, revising lower-level decisions on the basis of
disambiguating information from higher-level representations.
@misc{tenney2019rediscovers,
abstract = {Pre-trained text encoders have rapidly advanced the state of the art on many
NLP tasks. We focus on one such model, BERT, and aim to quantify where
linguistic information is captured within the network. We find that the model
represents the steps of the traditional NLP pipeline in an interpretable and
localizable way, and that the regions responsible for each step appear in the
expected sequence: POS tagging, parsing, NER, semantic roles, then coreference.
Qualitative analysis reveals that the model can and often does adjust this
pipeline dynamically, revising lower-level decisions on the basis of
disambiguating information from higher-level representations.},
added-at = {2019-05-24T19:45:27.000+0200},
author = {Tenney, Ian and Das, Dipanjan and Pavlick, Ellie},
biburl = {https://www.bibsonomy.org/bibtex/2d5e2a95fbf68481bd1cc439ac9f68775/bechr7},
description = {BERT Rediscovers the Classical NLP Pipeline},
interhash = {5e2beac70ab8fac60ac65bbc9c8d8c7c},
intrahash = {d5e2a95fbf68481bd1cc439ac9f68775},
keywords = {dl nlp},
note = {cite arxiv:1905.05950Comment: Accepted to ACL 2019},
timestamp = {2019-05-24T19:45:27.000+0200},
title = {BERT Rediscovers the Classical NLP Pipeline},
url = {http://arxiv.org/abs/1905.05950},
year = 2019
}