M. Baroni, F. Chantree, A. Kilgarriff, and S. Sharoff. Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08), Marrakech, Morocco, European Language Resources Association (ELRA), (May 2008)
Abstract
Cleaneval is a shared task and competitive evaluation on the topic of cleaning arbitrary web pages, with the goal of preparing web data for use as a corpus for linguistic and language technology research and development. The first exercise took place in 2007. We describe how it was set up, results, and lessons learnt
Description
Cleaneval: a Competition for Cleaning Web Pages - ACL Anthology
%0 Conference Paper
%1 baroni-etal-2008-cleaneval
%A Baroni, Marco
%A Chantree, Francis
%A Kilgarriff, Adam
%A Sharoff, Serge
%B Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08)
%C Marrakech, Morocco
%D 2008
%I European Language Resources Association (ELRA)
%K boilerplate clean cleaneval data dataset set
%T Cleaneval: a Competition for Cleaning Web Pages
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/162_paper.pdf
%X Cleaneval is a shared task and competitive evaluation on the topic of cleaning arbitrary web pages, with the goal of preparing web data for use as a corpus for linguistic and language technology research and development. The first exercise took place in 2007. We describe how it was set up, results, and lessons learnt
@inproceedings{baroni-etal-2008-cleaneval,
abstract = {Cleaneval is a shared task and competitive evaluation on the topic of cleaning arbitrary web pages, with the goal of preparing web data for use as a corpus for linguistic and language technology research and development. The first exercise took place in 2007. We describe how it was set up, results, and lessons learnt},
added-at = {2020-12-11T16:17:17.000+0100},
address = {Marrakech, Morocco},
author = {Baroni, Marco and Chantree, Francis and Kilgarriff, Adam and Sharoff, Serge},
biburl = {https://www.bibsonomy.org/bibtex/2b722933f38e54335036eedec7568a48e/parismic},
booktitle = {Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)},
description = {Cleaneval: a Competition for Cleaning Web Pages - ACL Anthology},
interhash = {3635e9cade0cd297f1af6c624241b69c},
intrahash = {b722933f38e54335036eedec7568a48e},
keywords = {boilerplate clean cleaneval data dataset set},
month = may,
publisher = {European Language Resources Association (ELRA)},
timestamp = {2020-12-11T16:18:35.000+0100},
title = {{C}leaneval: a Competition for Cleaning Web Pages},
url = {http://www.lrec-conf.org/proceedings/lrec2008/pdf/162_paper.pdf},
year = 2008
}