Enriching existing medical terminology knowledge bases (KBs) is an important
and never-ending work for clinical research because new terminology alias may
be continually added and standard terminologies may be newly renamed. In this
paper, we propose a novel automatic terminology enriching approach to
supplement a set of terminologies to KBs. Specifically, terminology and entity
characters are first fed into pre-trained language model to obtain semantic
embedding. The pre-trained model is used again to initialize the terminology
and entity representations, then they are further embedded through graph
convolutional network to gain structure embedding. Afterwards, both semantic
and structure embeddings are combined to measure the relevancy between the
terminology and the entity. Finally, the optimal alignment is achieved based on
the order of relevancy between the terminology and all the entities in the KB.
Experimental results on clinical indicator terminology KB, collected from 38
top-class hospitals of Shanghai Hospital Development Center, show that our
proposed approach outperforms baseline methods and can effectively enrich the
KB.
%0 Generic
%1 zhang2019enriching
%A Zhang, Jiaying
%A Zhang, Zhixing
%A Zhang, Huanhuan
%A Ma, Zhiyuan
%A Zhou, Yangming
%A He, Ping
%D 2019
%K embedding graph knowledge medical proposal:bmbf2019
%T Enriching Medcial Terminology Knowledge Bases via Pre-trained Language
Model and Graph Convolutional Network
%U http://arxiv.org/abs/1909.00615
%X Enriching existing medical terminology knowledge bases (KBs) is an important
and never-ending work for clinical research because new terminology alias may
be continually added and standard terminologies may be newly renamed. In this
paper, we propose a novel automatic terminology enriching approach to
supplement a set of terminologies to KBs. Specifically, terminology and entity
characters are first fed into pre-trained language model to obtain semantic
embedding. The pre-trained model is used again to initialize the terminology
and entity representations, then they are further embedded through graph
convolutional network to gain structure embedding. Afterwards, both semantic
and structure embeddings are combined to measure the relevancy between the
terminology and the entity. Finally, the optimal alignment is achieved based on
the order of relevancy between the terminology and all the entities in the KB.
Experimental results on clinical indicator terminology KB, collected from 38
top-class hospitals of Shanghai Hospital Development Center, show that our
proposed approach outperforms baseline methods and can effectively enrich the
KB.
@misc{zhang2019enriching,
abstract = {Enriching existing medical terminology knowledge bases (KBs) is an important
and never-ending work for clinical research because new terminology alias may
be continually added and standard terminologies may be newly renamed. In this
paper, we propose a novel automatic terminology enriching approach to
supplement a set of terminologies to KBs. Specifically, terminology and entity
characters are first fed into pre-trained language model to obtain semantic
embedding. The pre-trained model is used again to initialize the terminology
and entity representations, then they are further embedded through graph
convolutional network to gain structure embedding. Afterwards, both semantic
and structure embeddings are combined to measure the relevancy between the
terminology and the entity. Finally, the optimal alignment is achieved based on
the order of relevancy between the terminology and all the entities in the KB.
Experimental results on clinical indicator terminology KB, collected from 38
top-class hospitals of Shanghai Hospital Development Center, show that our
proposed approach outperforms baseline methods and can effectively enrich the
KB.},
added-at = {2019-10-22T21:35:01.000+0200},
author = {Zhang, Jiaying and Zhang, Zhixing and Zhang, Huanhuan and Ma, Zhiyuan and Zhou, Yangming and He, Ping},
biburl = {https://www.bibsonomy.org/bibtex/2220e6819ed8ead68d4be4d484aea4605/nosebrain},
interhash = {15b2768194a47cb544356816388183ef},
intrahash = {220e6819ed8ead68d4be4d484aea4605},
keywords = {embedding graph knowledge medical proposal:bmbf2019},
note = {cite arxiv:1909.00615Comment: 8 pages, submitted to BIBM 2019},
timestamp = {2019-10-22T21:35:01.000+0200},
title = {Enriching Medcial Terminology Knowledge Bases via Pre-trained Language
Model and Graph Convolutional Network},
url = {http://arxiv.org/abs/1909.00615},
year = 2019
}