Language models pretrained on text from a wide variety of sources form the
foundation of today's NLP. In light of the success of these broad-coverage
models, we investigate whether it is still helpful to tailor a pretrained model
to the domain of a target task. We present a study across four domains
(biomedical and computer science publications, news, and reviews) and eight
classification tasks, showing that a second phase of pretraining in-domain
(domain-adaptive pretraining) leads to performance gains, under both high- and
low-resource settings. Moreover, adapting to the task's unlabeled data
(task-adaptive pretraining) improves performance even after domain-adaptive
pretraining. Finally, we show that adapting to a task corpus augmented using
simple data selection strategies is an effective alternative, especially when
resources for domain-adaptive pretraining might be unavailable. Overall, we
consistently find that multi-phase adaptive pretraining offers large gains in
task performance.
%0 Generic
%1 gururangan2020pretraining
%A Gururangan, Suchin
%A Marasović, Ana
%A Swayamdipta, Swabha
%A Lo, Kyle
%A Beltagy, Iz
%A Downey, Doug
%A Smith, Noah A.
%D 2020
%K language models pretraining
%T Don't Stop Pretraining: Adapt Language Models to Domains and Tasks
%X Language models pretrained on text from a wide variety of sources form the
foundation of today's NLP. In light of the success of these broad-coverage
models, we investigate whether it is still helpful to tailor a pretrained model
to the domain of a target task. We present a study across four domains
(biomedical and computer science publications, news, and reviews) and eight
classification tasks, showing that a second phase of pretraining in-domain
(domain-adaptive pretraining) leads to performance gains, under both high- and
low-resource settings. Moreover, adapting to the task's unlabeled data
(task-adaptive pretraining) improves performance even after domain-adaptive
pretraining. Finally, we show that adapting to a task corpus augmented using
simple data selection strategies is an effective alternative, especially when
resources for domain-adaptive pretraining might be unavailable. Overall, we
consistently find that multi-phase adaptive pretraining offers large gains in
task performance.
@misc{gururangan2020pretraining,
abstract = {Language models pretrained on text from a wide variety of sources form the
foundation of today's NLP. In light of the success of these broad-coverage
models, we investigate whether it is still helpful to tailor a pretrained model
to the domain of a target task. We present a study across four domains
(biomedical and computer science publications, news, and reviews) and eight
classification tasks, showing that a second phase of pretraining in-domain
(domain-adaptive pretraining) leads to performance gains, under both high- and
low-resource settings. Moreover, adapting to the task's unlabeled data
(task-adaptive pretraining) improves performance even after domain-adaptive
pretraining. Finally, we show that adapting to a task corpus augmented using
simple data selection strategies is an effective alternative, especially when
resources for domain-adaptive pretraining might be unavailable. Overall, we
consistently find that multi-phase adaptive pretraining offers large gains in
task performance.},
added-at = {2020-12-21T14:24:00.000+0100},
author = {Gururangan, Suchin and Marasović, Ana and Swayamdipta, Swabha and Lo, Kyle and Beltagy, Iz and Downey, Doug and Smith, Noah A.},
biburl = {https://www.bibsonomy.org/bibtex/2f2447bc1b0f0882e52e02d5351051969/nosebrain},
description = {2004.10964.pdf},
interhash = {5c763619429ef5a5a82ccf5af07f01ac},
intrahash = {f2447bc1b0f0882e52e02d5351051969},
keywords = {language models pretraining},
note = {cite arxiv:2004.10964Comment: ACL 2020},
timestamp = {2021-07-23T17:27:05.000+0200},
title = {Don't Stop Pretraining: Adapt Language Models to Domains and Tasks},
year = 2020
}