Accurate and computationally efficient approaches in discovering relationships between biological objects from text documents are important for biologists to develop biological models. This paper presents a novel approach to extract relationships between multiplebiological objects that are present in a text document. The approach involves object identification, reference resolution, ontology and synonym discovery, and extracting object-object relationships. Hidden Markov Models (HMMs), dictionaries, and N-Gram models are used to set the framework to tackle the complex task of extracting object-object relationships. Experiments were carried out using a corpus of one thousand Medline abstracts. Intermediate results were obtained for the object identification process, synonym discovery, and finally the relationship extraction. For a corpus of thousand abstracts, 53 relationships were extracted of which 43 were correct, giving a specificity of 81%. The approach is both adaptable and scalable to new problems as opposed to rule-based methods.
%0 Conference Paper
%1 palakal.bio.rel.2002
%A Palakal, Mathew
%A Stephens, Matthew
%A Mukhopadhyay, Snehasis
%A Raje, Rajeev
%A Rhodes, Simon
%B CSB '02: Proceedings of the IEEE Computer Society Conference on Bioinformatics
%C Washington, DC, USA
%D 2002
%I IEEE Computer Society
%K CAT CAT-REL-ML-HMM-directed-rel CAT-REL-STAT-n-gram
%P 97
%T A Multi-Level Text Mining Method to Extract Biological Relationships
%X Accurate and computationally efficient approaches in discovering relationships between biological objects from text documents are important for biologists to develop biological models. This paper presents a novel approach to extract relationships between multiplebiological objects that are present in a text document. The approach involves object identification, reference resolution, ontology and synonym discovery, and extracting object-object relationships. Hidden Markov Models (HMMs), dictionaries, and N-Gram models are used to set the framework to tackle the complex task of extracting object-object relationships. Experiments were carried out using a corpus of one thousand Medline abstracts. Intermediate results were obtained for the object identification process, synonym discovery, and finally the relationship extraction. For a corpus of thousand abstracts, 53 relationships were extracted of which 43 were correct, giving a specificity of 81%. The approach is both adaptable and scalable to new problems as opposed to rule-based methods.
%@ 0-7695-1653-X
@inproceedings{palakal.bio.rel.2002,
abstract = {Accurate and computationally efficient approaches in discovering relationships between biological objects from text documents are important for biologists to develop biological models. This paper presents a novel approach to extract relationships between multiplebiological objects that are present in a text document. The approach involves object identification, reference resolution, ontology and synonym discovery, and extracting object-object relationships. Hidden Markov Models (HMMs), dictionaries, and N-Gram models are used to set the framework to tackle the complex task of extracting object-object relationships. Experiments were carried out using a corpus of one thousand Medline abstracts. Intermediate results were obtained for the object identification process, synonym discovery, and finally the relationship extraction. For a corpus of thousand abstracts, 53 relationships were extracted of which 43 were correct, giving a specificity of 81%. The approach is both adaptable and scalable to new problems as opposed to rule-based methods.},
added-at = {2010-11-07T23:41:18.000+0100},
address = {Washington, DC, USA},
author = {Palakal, Mathew and Stephens, Matthew and Mukhopadhyay, Snehasis and Raje, Rajeev and Rhodes, Simon},
biburl = {https://www.bibsonomy.org/bibtex/20c968cbd435ad9abbcae938ca4a85c3f/huiyangsfsu},
booktitle = {CSB '02: Proceedings of the IEEE Computer Society Conference on Bioinformatics},
interhash = {7ec8158642b66f1cd9daff1fc59271c3},
intrahash = {0c968cbd435ad9abbcae938ca4a85c3f},
isbn = {0-7695-1653-X},
keywords = {CAT CAT-REL-ML-HMM-directed-rel CAT-REL-STAT-n-gram},
pages = 97,
publisher = {IEEE Computer Society},
timestamp = {2010-11-09T06:05:57.000+0100},
title = {A Multi-Level Text Mining Method to Extract Biological Relationships},
year = 2002
}