Pre-trained language models (LMs) have made significant advances in various
Natural Language Processing (NLP) domains, but it is unclear to what extent
they can infer formal semantics in ontologies, which are often used to
represent conceptual knowledge and serve as the schema of data graphs. To
investigate an LM's knowledge of ontologies, we propose OntoLAMA, a set of
inference-based probing tasks and datasets from ontology subsumption axioms
involving both atomic and complex concepts. We conduct extensive experiments on
ontologies of different domains and scales, and our results demonstrate that
LMs encode relatively less background knowledge of Subsumption Inference (SI)
than traditional Natural Language Inference (NLI) but can improve on SI
significantly when a small number of samples are given. We will open-source our
code and datasets.
Description
[2302.06761] Language Model Analysis for Ontology Subsumption Inference
%0 Generic
%1 he2023language
%A He, Yuan
%A Chen, Jiaoyan
%A Jiménez-Ruiz, Ernesto
%A Dong, Hang
%A Horrocks, Ian
%D 2023
%K biomedical description-logics gene-ontologies go knowledge language-models myown natural-language-inference ontology probing roberta subsumption
%T Language Model Analysis for Ontology Subsumption Inference
%U http://arxiv.org/abs/2302.06761
%X Pre-trained language models (LMs) have made significant advances in various
Natural Language Processing (NLP) domains, but it is unclear to what extent
they can infer formal semantics in ontologies, which are often used to
represent conceptual knowledge and serve as the schema of data graphs. To
investigate an LM's knowledge of ontologies, we propose OntoLAMA, a set of
inference-based probing tasks and datasets from ontology subsumption axioms
involving both atomic and complex concepts. We conduct extensive experiments on
ontologies of different domains and scales, and our results demonstrate that
LMs encode relatively less background knowledge of Subsumption Inference (SI)
than traditional Natural Language Inference (NLI) but can improve on SI
significantly when a small number of samples are given. We will open-source our
code and datasets.
@misc{he2023language,
abstract = {Pre-trained language models (LMs) have made significant advances in various
Natural Language Processing (NLP) domains, but it is unclear to what extent
they can infer formal semantics in ontologies, which are often used to
represent conceptual knowledge and serve as the schema of data graphs. To
investigate an LM's knowledge of ontologies, we propose OntoLAMA, a set of
inference-based probing tasks and datasets from ontology subsumption axioms
involving both atomic and complex concepts. We conduct extensive experiments on
ontologies of different domains and scales, and our results demonstrate that
LMs encode relatively less background knowledge of Subsumption Inference (SI)
than traditional Natural Language Inference (NLI) but can improve on SI
significantly when a small number of samples are given. We will open-source our
code and datasets.},
added-at = {2023-02-15T15:09:36.000+0100},
author = {He, Yuan and Chen, Jiaoyan and Jiménez-Ruiz, Ernesto and Dong, Hang and Horrocks, Ian},
biburl = {https://www.bibsonomy.org/bibtex/24f3cecfee3d9aeae2e164a1d24efeca5/hangdong},
description = {[2302.06761] Language Model Analysis for Ontology Subsumption Inference},
interhash = {4709fbae719aac9f36a160a9186c8d66},
intrahash = {4f3cecfee3d9aeae2e164a1d24efeca5},
keywords = {biomedical description-logics gene-ontologies go knowledge language-models myown natural-language-inference ontology probing roberta subsumption},
note = {cite arxiv:2302.06761},
timestamp = {2023-02-15T15:09:36.000+0100},
title = {Language Model Analysis for Ontology Subsumption Inference},
url = {http://arxiv.org/abs/2302.06761},
year = 2023
}