There is a growing consensus that significant, rapid progress can be made in both text understanding and spoken language understanding by investigating those phenom- ena that occur most centrally in naturally occurring unconstrained materials and by attempting to automatically extract information about language from very large cor- pora. Such corpora are beginning to serve as important research tools for investigators in natural language processing, speech recognition, and integrated spoken language systems, as well as in theoretical linguistics. Annotated corpora promise to be valu- able for enterprises as diverse as the automatic construction of statistical models for the grammar of the written and the colloquial spoken language, the development of explicit formal theories of the differing grammars of writing and speech, the investi- gation of prosodic phenomena in speech, and the evaluation and comparison of the adequacy of parsing models.
%0 Journal Article
%1 marcus_building_1993
%A Marcus, Mitchell
%A Santorini, Beatrice
%A Marcinkiewicz, Mary
%D 1993
%J Computational Linguistics
%K pos_tagging
%N 2
%P 313--330
%T Building a large annotated corpus of English : the Penn Treebank.
%U http://dblp.uni-trier.de/db/journals/coling/coling19.html#MarcusSM94
%V 19
%X There is a growing consensus that significant, rapid progress can be made in both text understanding and spoken language understanding by investigating those phenom- ena that occur most centrally in naturally occurring unconstrained materials and by attempting to automatically extract information about language from very large cor- pora. Such corpora are beginning to serve as important research tools for investigators in natural language processing, speech recognition, and integrated spoken language systems, as well as in theoretical linguistics. Annotated corpora promise to be valu- able for enterprises as diverse as the automatic construction of statistical models for the grammar of the written and the colloquial spoken language, the development of explicit formal theories of the differing grammars of writing and speech, the investi- gation of prosodic phenomena in speech, and the evaluation and comparison of the adequacy of parsing models.
@article{marcus_building_1993,
abstract = {There is a growing consensus that significant, rapid progress can be made in both text understanding and spoken language understanding by investigating those phenom- ena that occur most centrally in naturally occurring unconstrained materials and by attempting to automatically extract information about language from very large cor- pora. Such corpora are beginning to serve as important research tools for investigators in natural language processing, speech recognition, and integrated spoken language systems, as well as in theoretical linguistics. Annotated corpora promise to be valu- able for enterprises as diverse as the automatic construction of statistical models for the grammar of the written and the colloquial spoken language, the development of explicit formal theories of the differing grammars of writing and speech, the investi- gation of prosodic phenomena in speech, and the evaluation and comparison of the adequacy of parsing models.},
added-at = {2018-11-04T17:02:36.000+0100},
author = {Marcus, Mitchell and Santorini, Beatrice and Marcinkiewicz, Mary},
biburl = {https://www.bibsonomy.org/bibtex/2d4f4206566c60141f82342153e38717a/lepsky},
interhash = {e15687d066c2619723bd00c8d536efdb},
intrahash = {d4f4206566c60141f82342153e38717a},
journal = {Computational Linguistics},
keywords = {pos_tagging},
language = {Englisch},
number = 2,
pages = {313--330},
timestamp = {2018-11-07T09:17:50.000+0100},
title = {Building a large annotated corpus of {English} : the {Penn} {Treebank}.},
url = {http://dblp.uni-trier.de/db/journals/coling/coling19.html#MarcusSM94},
volume = 19,
year = 1993
}