Answering questions asked from instructional corpora such as E-manuals, recipe books, etc., has been far less studied than open-domain factoid context-based question answering. This can be primarily attributed to the absence of standard benchmark datasets. In this paper, we meticulously create a large amount of data connected with E-manuals and develop a suitable algorithm to exploit it. We collect E-Manual Corpus, a huge corpus of 307,957 E-manuals, and pretrain RoBERTa on this large corpus. We create various benchmark QA datasets which include question answer pairs curated by experts based upon two E-manuals, real user questions from Community Question Answering Forum pertaining to E-manuals etc. We introduce EMQAP (E-Manual Question Answering Pipeline) that answers questions pertaining to electronics devices. Built upon the pretrained RoBERTa, it harbors a supervised multi-task learning framework which efficiently performs the dual tasks of identifying the section in the E-manual where the answer can be found and the exact answer span within that section. For E-Manual annotated question-answer pairs, we show an improvement of about 40% in ROUGE-L F1 scores over most competitive baseline. We perform a detailed ablation study and establish the versatility of EMQAP across different circumstances. The code and datasets are shared at https://github.com/abhi1nandy2/EMNLP-2021-Findings, and the corresponding project website is https://sites.google.com/view/emanualqa/home.
%0 Conference Proceedings
%1 nandy2021question
%A Nandy, Abhilash
%A Sharma, Soumya
%A Maddhashiya, Shubham
%A Sachdeva, Kapil
%A Goyal, Pawan
%A Ganguly, NIloy
%C Association for Computational Linguistics
%D 2021
%K leibnizailab myown
%P 4600-4609
%T Question Answering over Electronic Devices: A New Benchmark Dataset and a Multi-Task Learning based QA Framework
%U https://aclanthology.org/2021.findings-emnlp.392
%X Answering questions asked from instructional corpora such as E-manuals, recipe books, etc., has been far less studied than open-domain factoid context-based question answering. This can be primarily attributed to the absence of standard benchmark datasets. In this paper, we meticulously create a large amount of data connected with E-manuals and develop a suitable algorithm to exploit it. We collect E-Manual Corpus, a huge corpus of 307,957 E-manuals, and pretrain RoBERTa on this large corpus. We create various benchmark QA datasets which include question answer pairs curated by experts based upon two E-manuals, real user questions from Community Question Answering Forum pertaining to E-manuals etc. We introduce EMQAP (E-Manual Question Answering Pipeline) that answers questions pertaining to electronics devices. Built upon the pretrained RoBERTa, it harbors a supervised multi-task learning framework which efficiently performs the dual tasks of identifying the section in the E-manual where the answer can be found and the exact answer span within that section. For E-Manual annotated question-answer pairs, we show an improvement of about 40% in ROUGE-L F1 scores over most competitive baseline. We perform a detailed ablation study and establish the versatility of EMQAP across different circumstances. The code and datasets are shared at https://github.com/abhi1nandy2/EMNLP-2021-Findings, and the corresponding project website is https://sites.google.com/view/emanualqa/home.
@proceedings{nandy2021question,
abstract = {Answering questions asked from instructional corpora such as E-manuals, recipe books, etc., has been far less studied than open-domain factoid context-based question answering. This can be primarily attributed to the absence of standard benchmark datasets. In this paper, we meticulously create a large amount of data connected with E-manuals and develop a suitable algorithm to exploit it. We collect E-Manual Corpus, a huge corpus of 307,957 E-manuals, and pretrain RoBERTa on this large corpus. We create various benchmark QA datasets which include question answer pairs curated by experts based upon two E-manuals, real user questions from Community Question Answering Forum pertaining to E-manuals etc. We introduce EMQAP (E-Manual Question Answering Pipeline) that answers questions pertaining to electronics devices. Built upon the pretrained RoBERTa, it harbors a supervised multi-task learning framework which efficiently performs the dual tasks of identifying the section in the E-manual where the answer can be found and the exact answer span within that section. For E-Manual annotated question-answer pairs, we show an improvement of about 40% in ROUGE-L F1 scores over most competitive baseline. We perform a detailed ablation study and establish the versatility of EMQAP across different circumstances. The code and datasets are shared at https://github.com/abhi1nandy2/EMNLP-2021-Findings, and the corresponding project website is https://sites.google.com/view/emanualqa/home.
},
added-at = {2022-02-24T21:35:29.000+0100},
address = {Association for Computational Linguistics},
author = {Nandy, Abhilash and Sharma, Soumya and Maddhashiya, Shubham and Sachdeva, Kapil and Goyal, Pawan and Ganguly, NIloy},
biburl = {https://www.bibsonomy.org/bibtex/23909bcff215b44297b847506f329afd3/niloy},
howpublished = {Punta Cana, Dominican Republic},
interhash = {4b901eea0be294db78d677a3a87125cd},
intrahash = {3909bcff215b44297b847506f329afd3},
keywords = {leibnizailab myown},
pages = {4600-4609},
timestamp = {2022-02-25T09:02:24.000+0100},
title = {Question Answering over Electronic Devices: A New Benchmark Dataset and a Multi-Task Learning based QA Framework},
url = {https://aclanthology.org/2021.findings-emnlp.392},
year = 2021
}