Linked Open Data (LOD) comprises of an unprecedented volume of structured
datasets on the Web. However, these datasets are of varying quality
ranging from extensively curated datasets to crowdsourced and even
extracted data of relatively low quality. We present a methodology
for assessing the quality of linked data resources, which comprises
of a manual and a semi-automatic process. The first phase includes
the detection of common quality problems and their representation
in a quality problem taxonomy. In the manual process, the second
phase comprises of the evaluation of a large number of individual
resources, according to the quality problem taxonomy via crowdsourcing.
This process is accompanied by a tool wherein a user assesses an
individual resource and evaluates each fact for correctness. The
semi-automatic process involves the generation and verification of
schema axioms. We report the results obtained by applying this methodology
to DBpedia. We identified 17 data quality problem types and 58 users
assessed a total of 521 resources. Overall, 11.93\% of the evaluated
DBpedia triples were identified to have some quality issues. Applying
the semi-automatic component yielded a total of 222,982 triples that
have a high probability to be incorrect. In particular, we found
that problems such as object values being incorrectly extracted,
irrelevant extraction of information and broken links were the most
recurring quality problems. With this study, we not only aim to assess
the quality of this sample of DBpedia resources but also adopt an
agile methodology to improve the quality in future versions by regularly
providing feedback to the DBpedia maintainers.
%0 Conference Paper
%1 zaveri2013
%A Zaveri, Amrapali
%A Kontokostas, Dimitris
%A Sherif, Mohamed A.
%A Bühmann, Lorenz
%A Morsey, Mohamed
%A Auer, Sören
%A Lehmann, Jens
%B Proceedings of the 9th International Conference on Semantic Systems (I-SEMANTICS)
%C New York, NY, USA
%D 2013
%E Sabou, Marta
%E Blomqvist, Eva
%E Noia, Tommaso Di
%E Sack, Harald
%E Pellegrini, Tassilo
%I ACM
%K 2013 auer buemann dbpediadq event_I-Semantics group_aksw kontokostas lehmann lod2page morsey sherif zaveri
%P 97--104
%T User-driven Quality Evaluation of DBpedia
%U http://svn.aksw.org/papers/2013/ISemantics_DBpediaDQ/public.pdf
%X Linked Open Data (LOD) comprises of an unprecedented volume of structured
datasets on the Web. However, these datasets are of varying quality
ranging from extensively curated datasets to crowdsourced and even
extracted data of relatively low quality. We present a methodology
for assessing the quality of linked data resources, which comprises
of a manual and a semi-automatic process. The first phase includes
the detection of common quality problems and their representation
in a quality problem taxonomy. In the manual process, the second
phase comprises of the evaluation of a large number of individual
resources, according to the quality problem taxonomy via crowdsourcing.
This process is accompanied by a tool wherein a user assesses an
individual resource and evaluates each fact for correctness. The
semi-automatic process involves the generation and verification of
schema axioms. We report the results obtained by applying this methodology
to DBpedia. We identified 17 data quality problem types and 58 users
assessed a total of 521 resources. Overall, 11.93\% of the evaluated
DBpedia triples were identified to have some quality issues. Applying
the semi-automatic component yielded a total of 222,982 triples that
have a high probability to be incorrect. In particular, we found
that problems such as object values being incorrectly extracted,
irrelevant extraction of information and broken links were the most
recurring quality problems. With this study, we not only aim to assess
the quality of this sample of DBpedia resources but also adopt an
agile methodology to improve the quality in future versions by regularly
providing feedback to the DBpedia maintainers.
@inproceedings{zaveri2013,
abstract = {Linked Open Data (LOD) comprises of an unprecedented volume of structured
datasets on the Web. However, these datasets are of varying quality
ranging from extensively curated datasets to crowdsourced and even
extracted data of relatively low quality. We present a methodology
for assessing the quality of linked data resources, which comprises
of a manual and a semi-automatic process. The first phase includes
the detection of common quality problems and their representation
in a quality problem taxonomy. In the manual process, the second
phase comprises of the evaluation of a large number of individual
resources, according to the quality problem taxonomy via crowdsourcing.
This process is accompanied by a tool wherein a user assesses an
individual resource and evaluates each fact for correctness. The
semi-automatic process involves the generation and verification of
schema axioms. We report the results obtained by applying this methodology
to DBpedia. We identified 17 data quality problem types and 58 users
assessed a total of 521 resources. Overall, 11.93\% of the evaluated
DBpedia triples were identified to have some quality issues. Applying
the semi-automatic component yielded a total of 222,982 triples that
have a high probability to be incorrect. In particular, we found
that problems such as object values being incorrectly extracted,
irrelevant extraction of information and broken links were the most
recurring quality problems. With this study, we not only aim to assess
the quality of this sample of DBpedia resources but also adopt an
agile methodology to improve the quality in future versions by regularly
providing feedback to the DBpedia maintainers.},
added-at = {2017-01-27T23:28:47.000+0100},
address = {New York, NY, USA},
author = {Zaveri, Amrapali and Kontokostas, Dimitris and Sherif, Mohamed A. and B\"uhmann, Lorenz and Morsey, Mohamed and Auer, S\"oren and Lehmann, Jens},
bdsk-url-1 = {http://svn.aksw.org/papers/2013/ISemantics_DBpediaDQ/public.pdf},
biburl = {https://www.bibsonomy.org/bibtex/262b1f872f4bd0c093d281f9d8b739c13/soeren},
booktitle = {Proceedings of the 9\textsuperscript{th} International Conference on Semantic Systems ({I-SEMANTICS})},
crossref = {ISEMANTICS2013},
date-modified = {2013-07-11 19:42:39 +0000},
editor = {Sabou, Marta and Blomqvist, Eva and Noia, Tommaso Di and Sack, Harald and Pellegrini, Tassilo},
ee = {http://doi.acm.org/10.1145/2506182.2506195},
eventdate = {2013-09-04/2013-09-06},
interhash = {b581532f52cb71aeecff54948b06c173},
intrahash = {62b1f872f4bd0c093d281f9d8b739c13},
keywords = {2013 auer buemann dbpediadq event_I-Semantics group_aksw kontokostas lehmann lod2page morsey sherif zaveri},
owner = {soeren},
pages = {97--104},
publisher = {ACM},
timestamp = {2017-01-27T23:30:12.000+0100},
title = {User-driven Quality Evaluation of DBpedia},
url = {http://svn.aksw.org/papers/2013/ISemantics_DBpediaDQ/public.pdf},
venue = {Graz, Austria},
year = 2013
}