When data is scarce or the alphabet is large, smoothing the probability estimates becomes inescapable when estimating n-gram models. In this paper we propose a method that implements a form of smoothing by exploiting similarity information of the alphabet elements. The idea is to view the log-conditional probability function as a smooth function defined over the similarity graph. The algorithm that we propose uses the eigenvectors of the similarity graph as the basis of the expansion of the log conditional probability function whose coefficients are found by solving a regularized logistic regression problem. The experimental results demonstrate the superiority of the method when the similarity graph contains relevant information, whilst the method still remains competitive with state-of-the-art smoothing methods even in the lack of such information.
%0 Conference Paper
%1 BiSzaSze07
%A B\'ıró, I.
%A Szamonek, Z.
%A and Szepesvári, Cs.
%B IJCAI
%D 2007
%K basis functions graph language learning natural prediction, processing, sequence smoothing, spectral theory,
%P 1576--1581
%T Sequence prediction exploiting similarity information
%X When data is scarce or the alphabet is large, smoothing the probability estimates becomes inescapable when estimating n-gram models. In this paper we propose a method that implements a form of smoothing by exploiting similarity information of the alphabet elements. The idea is to view the log-conditional probability function as a smooth function defined over the similarity graph. The algorithm that we propose uses the eigenvectors of the similarity graph as the basis of the expansion of the log conditional probability function whose coefficients are found by solving a regularized logistic regression problem. The experimental results demonstrate the superiority of the method when the similarity graph contains relevant information, whilst the method still remains competitive with state-of-the-art smoothing methods even in the lack of such information.
@inproceedings{BiSzaSze07,
abstract = {When data is scarce or the alphabet is large, smoothing the probability estimates becomes inescapable when estimating n-gram models. In this paper we propose a method that implements a form of smoothing by exploiting similarity information of the alphabet elements. The idea is to view the log-conditional probability function as a smooth function defined over the similarity graph. The algorithm that we propose uses the eigenvectors of the similarity graph as the basis of the expansion of the log conditional probability function whose coefficients are found by solving a regularized logistic regression problem. The experimental results demonstrate the superiority of the method when the similarity graph contains relevant information, whilst the method still remains competitive with state-of-the-art smoothing methods even in the lack of such information.},
added-at = {2020-03-17T03:03:01.000+0100},
author = {B{\'\i}r{\'o}, I. and Szamonek, Z. and and Szepesv{\'a}ri, {Cs}.},
biburl = {https://www.bibsonomy.org/bibtex/27c001559e70aacb8722b9ed5f0212ecf/csaba},
booktitle = {IJCAI},
date-added = {2013-03-29 18:12:09 -0600},
date-modified = {2013-03-29 18:19:39 -0600},
interhash = {b4083d5e5134d7431dda116f49732daa},
intrahash = {7c001559e70aacb8722b9ed5f0212ecf},
keywords = {basis functions graph language learning natural prediction, processing, sequence smoothing, spectral theory,},
pages = {1576--1581},
pdf = {papers/sequence-ijcai07.pdf},
timestamp = {2020-03-17T03:03:01.000+0100},
title = {Sequence prediction exploiting similarity information},
year = 2007
}