We introduce a new language representation model called BERT, which stands
for Bidirectional Encoder Representations from Transformers. Unlike recent
language representation models, BERT is designed to pre-train deep
bidirectional representations by jointly conditioning on both left and right
context in all layers. As a result, the pre-trained BERT representations can be
fine-tuned with just one additional output layer to create state-of-the-art
models for a wide range of tasks, such as question answering and language
inference, without substantial task-specific architecture modifications.
BERT is conceptually simple and empirically powerful. It obtains new
state-of-the-art results on eleven natural language processing tasks, including
pushing the GLUE benchmark to 80.4% (7.6% absolute improvement), MultiNLI
accuracy to 86.7 (5.6% absolute improvement) and the SQuAD v1.1 question
answering Test F1 to 93.2 (1.5% absolute improvement), outperforming human
performance by 2.0%.
Beschreibung
[1810.04805] BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
%0 Generic
%1 devlin2018pretraining
%A Devlin, Jacob
%A Chang, Ming-Wei
%A Lee, Kenton
%A Toutanova, Kristina
%D 2018
%K bert elmo embeddings kallimachos nlp proposal-knowledge wordembeddings
%T BERT: Pre-training of Deep Bidirectional Transformers for Language
Understanding
%U http://arxiv.org/abs/1810.04805
%X We introduce a new language representation model called BERT, which stands
for Bidirectional Encoder Representations from Transformers. Unlike recent
language representation models, BERT is designed to pre-train deep
bidirectional representations by jointly conditioning on both left and right
context in all layers. As a result, the pre-trained BERT representations can be
fine-tuned with just one additional output layer to create state-of-the-art
models for a wide range of tasks, such as question answering and language
inference, without substantial task-specific architecture modifications.
BERT is conceptually simple and empirically powerful. It obtains new
state-of-the-art results on eleven natural language processing tasks, including
pushing the GLUE benchmark to 80.4% (7.6% absolute improvement), MultiNLI
accuracy to 86.7 (5.6% absolute improvement) and the SQuAD v1.1 question
answering Test F1 to 93.2 (1.5% absolute improvement), outperforming human
performance by 2.0%.
@misc{devlin2018pretraining,
abstract = {We introduce a new language representation model called BERT, which stands
for Bidirectional Encoder Representations from Transformers. Unlike recent
language representation models, BERT is designed to pre-train deep
bidirectional representations by jointly conditioning on both left and right
context in all layers. As a result, the pre-trained BERT representations can be
fine-tuned with just one additional output layer to create state-of-the-art
models for a wide range of tasks, such as question answering and language
inference, without substantial task-specific architecture modifications.
BERT is conceptually simple and empirically powerful. It obtains new
state-of-the-art results on eleven natural language processing tasks, including
pushing the GLUE benchmark to 80.4% (7.6% absolute improvement), MultiNLI
accuracy to 86.7 (5.6% absolute improvement) and the SQuAD v1.1 question
answering Test F1 to 93.2 (1.5% absolute improvement), outperforming human
performance by 2.0%.},
added-at = {2019-02-05T23:35:51.000+0100},
author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
biburl = {https://www.bibsonomy.org/bibtex/210c860e3f390c6fbfd78a3b91ab9b0af/albinzehe},
description = {[1810.04805] BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding},
interhash = {a74f4c3853d3f0340e75546639134e91},
intrahash = {10c860e3f390c6fbfd78a3b91ab9b0af},
keywords = {bert elmo embeddings kallimachos nlp proposal-knowledge wordembeddings},
note = {cite arxiv:1810.04805Comment: 13 pages},
timestamp = {2020-07-28T14:17:24.000+0200},
title = {BERT: Pre-training of Deep Bidirectional Transformers for Language
Understanding},
url = {http://arxiv.org/abs/1810.04805},
year = 2018
}