Retrieving topically relevant passages over a huge document collection is deemed to be of central importance to many information retrieval tasks, particularly to Question Answering (QA). Indeed, Passage Retrieval (PR) is a longstanding problem in QA, that has been widely studied over the last decades and still requires further efforts in order to enable a user to have a better chance to find a relevant answer to his human natural language question. This paper describes a successful attempt to improve PR and ranking for open domain QA by finding out the most relevant passage to a given question. It uses a support vector machine (SVM) model that incorporates a set of different powerful text similarity measures constituting our features. These latter include our new proposed n-gram based metric relying on the dependency degree of n-gram words of the question in the passage, as well as other lexical and semantic features which have already been proven successful in a recent Semantic Textual Similarity task (STS). We implemented a system named PRSYS to validate our approach in different languages. Our experimental evaluations have shown a comparable performance with other similar systems endowing with strong performance.
Natural Language Processing and Information Systems: 21st International Conference on Applications of Natural Language to Information Systems, NLDB 2016, Salford, UK, June 22-24, 2016, Proceedings
%0 Book Section
%1 Othman2016
%A Othman, Nouha
%A Faiz, Rim
%B Natural Language Processing and Information Systems: 21st International Conference on Applications of Natural Language to Information Systems, NLDB 2016, Salford, UK, June 22-24, 2016, Proceedings
%C Cham
%D 2016
%E Métais, Elisabeth
%E Meziane, Farid
%E Saraee, Mohamad
%E Sugumaran, Vijayan
%E Vadera, Sunil
%I Springer International Publishing
%K qa
%P 127--139
%R 10.1007/978-3-319-41754-7_11
%T A Multi-lingual Approach to Improve Passage Retrieval for Automatic Question Answering
%U https://doi.org/10.1007/978-3-319-41754-7_11
%X Retrieving topically relevant passages over a huge document collection is deemed to be of central importance to many information retrieval tasks, particularly to Question Answering (QA). Indeed, Passage Retrieval (PR) is a longstanding problem in QA, that has been widely studied over the last decades and still requires further efforts in order to enable a user to have a better chance to find a relevant answer to his human natural language question. This paper describes a successful attempt to improve PR and ranking for open domain QA by finding out the most relevant passage to a given question. It uses a support vector machine (SVM) model that incorporates a set of different powerful text similarity measures constituting our features. These latter include our new proposed n-gram based metric relying on the dependency degree of n-gram words of the question in the passage, as well as other lexical and semantic features which have already been proven successful in a recent Semantic Textual Similarity task (STS). We implemented a system named PRSYS to validate our approach in different languages. Our experimental evaluations have shown a comparable performance with other similar systems endowing with strong performance.
%@ 978-3-319-41754-7
@inbook{Othman2016,
abstract = {Retrieving topically relevant passages over a huge document collection is deemed to be of central importance to many information retrieval tasks, particularly to Question Answering (QA). Indeed, Passage Retrieval (PR) is a longstanding problem in QA, that has been widely studied over the last decades and still requires further efforts in order to enable a user to have a better chance to find a relevant answer to his human natural language question. This paper describes a successful attempt to improve PR and ranking for open domain QA by finding out the most relevant passage to a given question. It uses a support vector machine (SVM) model that incorporates a set of different powerful text similarity measures constituting our features. These latter include our new proposed n-gram based metric relying on the dependency degree of n-gram words of the question in the passage, as well as other lexical and semantic features which have already been proven successful in a recent Semantic Textual Similarity task (STS). We implemented a system named PRSYS to validate our approach in different languages. Our experimental evaluations have shown a comparable performance with other similar systems endowing with strong performance.},
added-at = {2018-01-09T18:11:20.000+0100},
address = {Cham},
author = {Othman, Nouha and Faiz, Rim},
biburl = {https://www.bibsonomy.org/bibtex/2343bb59706fd91523055cb1625ef043e/defeatnelly},
booktitle = {Natural Language Processing and Information Systems: 21st International Conference on Applications of Natural Language to Information Systems, NLDB 2016, Salford, UK, June 22-24, 2016, Proceedings},
doi = {10.1007/978-3-319-41754-7_11},
editor = {M{\'e}tais, Elisabeth and Meziane, Farid and Saraee, Mohamad and Sugumaran, Vijayan and Vadera, Sunil},
interhash = {3e0a43aacf9af501ff52a35832b15bdc},
intrahash = {343bb59706fd91523055cb1625ef043e},
isbn = {978-3-319-41754-7},
keywords = {qa},
pages = {127--139},
publisher = {Springer International Publishing},
timestamp = {2018-01-09T18:11:20.000+0100},
title = {A Multi-lingual Approach to Improve Passage Retrieval for Automatic Question Answering},
url = {https://doi.org/10.1007/978-3-319-41754-7_11},
year = 2016
}