R. Brown. Proceedings of the Ninth International Conference on Theoretical and Methodological Issues in Machine Translation, (2002)
Abstract
This paper presents a method for splitting compound words into their constituents based on cognate words in the other language of a parallel corpus. A minor extension to the method allows the decompounding of words which do not have cognates in the other language. By decompounding the training corpus for an Example-Based MT system, the incidence of word alignment failure can be substantially reduced, yielding a modest improvement in performance.
%0 Conference Paper
%1 Brown:02
%A Brown, Ralf D.
%B Proceedings of the Ninth International Conference on Theoretical and Methodological Issues in Machine Translation
%D 2002
%K 2002 compounds mt
%T Corpus-Driven Splitting of Compound Words
%U http://www-2.cs.cmu.edu/~ralf/papers/tmi02.pdf
%X This paper presents a method for splitting compound words into their constituents based on cognate words in the other language of a parallel corpus. A minor extension to the method allows the decompounding of words which do not have cognates in the other language. By decompounding the training corpus for an Example-Based MT system, the incidence of word alignment failure can be substantially reduced, yielding a modest improvement in performance.
@inproceedings{Brown:02,
abstract = {This paper presents a method for splitting compound words into their constituents based on cognate words in the other language of a parallel corpus. A minor extension to the method allows the decompounding of words which do not have cognates in the other language. By decompounding the training corpus for an Example-Based MT system, the incidence of word alignment failure can be substantially reduced, yielding a modest improvement in performance.},
added-at = {2007-09-23T15:04:53.000+0200},
author = {Brown, Ralf D.},
biburl = {https://www.bibsonomy.org/bibtex/29c7d4f177786c4c601ea5a21008711b2/seandalai},
booktitle = {Proceedings of the Ninth International Conference on Theoretical and Methodological Issues in Machine Translation},
interhash = {4cf34bf2c84ea4dc6d150b794c1510d7},
intrahash = {9c7d4f177786c4c601ea5a21008711b2},
keywords = {2002 compounds mt},
timestamp = {2007-09-23T15:04:53.000+0200},
title = {Corpus-Driven Splitting of Compound Words},
url = {http://www-2.cs.cmu.edu/~ralf/papers/tmi02.pdf},
year = 2002
}