This study reports the results of using minimum description length (MDL) analysis to model
unsupervised learning of the morphological segmentation of European languages, using corpora
ranging in size from 5,000 words to 500,000 words. We develop a set of heuristics that rapidly
develop a probabilistic morphological grammar, and use MDL as our primary tool to determine
whether the modifications proposed by the heuristics will be adopted or not. The resulting grammar
matches well the analysis that would be developed by a human morphologist.
In the final section, we discuss the relationship of this style of MDL grammatical analysis to
the notion of evaluation metric in early generative grammar.
%0 Journal Article
%1 Goldsmith2001
%A Goldsmith, John
%C Cambridge, MA, USA
%D 2001
%I MIT Press
%J Computational Linguistics
%K unsupervised morphology machinelearning
%N 2
%P 153--198
%R http://dx.doi.org/10.1162/089120101750300490
%T Unsupervised learning of the morphology of a natural language
%V 27
%X This study reports the results of using minimum description length (MDL) analysis to model
unsupervised learning of the morphological segmentation of European languages, using corpora
ranging in size from 5,000 words to 500,000 words. We develop a set of heuristics that rapidly
develop a probabilistic morphological grammar, and use MDL as our primary tool to determine
whether the modifications proposed by the heuristics will be adopted or not. The resulting grammar
matches well the analysis that would be developed by a human morphologist.
In the final section, we discuss the relationship of this style of MDL grammatical analysis to
the notion of evaluation metric in early generative grammar.
@article{Goldsmith2001,
abstract = {This study reports the results of using minimum description length (MDL) analysis to model
unsupervised learning of the morphological segmentation of European languages, using corpora
ranging in size from 5,000 words to 500,000 words. We develop a set of heuristics that rapidly
develop a probabilistic morphological grammar, and use MDL as our primary tool to determine
whether the modifications proposed by the heuristics will be adopted or not. The resulting grammar
matches well the analysis that would be developed by a human morphologist.
In the final section, we discuss the relationship of this style of MDL grammatical analysis to
the notion of evaluation metric in early generative grammar.
},
added-at = {2006-11-19T12:33:36.000+0100},
address = {Cambridge, MA, USA},
author = {Goldsmith, John},
biburl = {https://www.bibsonomy.org/bibtex/27c208c676f63a4226986e485d0bbe03a/tmalsburg},
doi = {http://dx.doi.org/10.1162/089120101750300490},
interhash = {434e3a887c7b37b29955f0a4542e834c},
intrahash = {7c208c676f63a4226986e485d0bbe03a},
issn = {0891-2017},
journal = {Computational Linguistics},
keywords = {unsupervised morphology machinelearning},
number = 2,
pages = {153--198},
publisher = {MIT Press},
timestamp = {2006-11-19T12:33:36.000+0100},
title = {Unsupervised learning of the morphology of a natural language},
volume = 27,
year = 2001
}