We study the question of closeness testing for two discrete distributions.
More precisely, given samples from two distributions $p$ and $q$ over an
$n$-element set, we wish to distinguish whether $p=q$ versus $p$ is at least
$\eps$-far from $q$, in either $\ell_1$ or $\ell_2$ distance. Batu et al. gave
the first sub-linear time algorithms for these problems, which matched the
lower bounds of Valiant up to a logarithmic factor in $n$, and a polynomial
factor of $\eps.$
In this work, we present simple (and new) testers for both the $\ell_1$ and
$\ell_2$ settings, with sample complexity that is information-theoretically
optimal, to constant fa…(more)
Please log in to take part in the discussion (add own reviews or comments).
Cite this publication
More citation styles
- please select -
%0 Journal Article
%1 chan2013optimal
%A Chan, Siu-On
%A Diakonikolas, Ilias
%A Valiant, Gregory
%A Valiant, Paul
%D 2013
%K distributed probability readings stats
%T Optimal Algorithms for Testing Closeness of Discrete Distributions
%U http://arxiv.org/abs/1308.3946
%X We study the question of closeness testing for two discrete distributions.
More precisely, given samples from two distributions $p$ and $q$ over an
$n$-element set, we wish to distinguish whether $p=q$ versus $p$ is at least
$\eps$-far from $q$, in either $\ell_1$ or $\ell_2$ distance. Batu et al. gave
the first sub-linear time algorithms for these problems, which matched the
lower bounds of Valiant up to a logarithmic factor in $n$, and a polynomial
factor of $\eps.$
In this work, we present simple (and new) testers for both the $\ell_1$ and
$\ell_2$ settings, with sample complexity that is information-theoretically
optimal, to constant factors, both in the dependence on $n$, and the dependence
on $\eps$; for the $\ell_1$ testing problem we establish that the sample
complexity is $\Theta(\max\n^2/3/\eps^4/3, n^1/2/\eps^2 \).$
@article{chan2013optimal,
abstract = {We study the question of closeness testing for two discrete distributions.
More precisely, given samples from two distributions $p$ and $q$ over an
$n$-element set, we wish to distinguish whether $p=q$ versus $p$ is at least
$\eps$-far from $q$, in either $\ell_1$ or $\ell_2$ distance. Batu et al. gave
the first sub-linear time algorithms for these problems, which matched the
lower bounds of Valiant up to a logarithmic factor in $n$, and a polynomial
factor of $\eps.$
In this work, we present simple (and new) testers for both the $\ell_1$ and
$\ell_2$ settings, with sample complexity that is information-theoretically
optimal, to constant factors, both in the dependence on $n$, and the dependence
on $\eps$; for the $\ell_1$ testing problem we establish that the sample
complexity is $\Theta(\max\{n^{2/3}/\eps^{4/3}, n^{1/2}/\eps^2 \}).$},
added-at = {2020-02-26T13:47:22.000+0100},
author = {Chan, Siu-On and Diakonikolas, Ilias and Valiant, Gregory and Valiant, Paul},
biburl = {https://www.bibsonomy.org/bibtex/20216361dc184f9d0f7dde69f290e68e4/kirk86},
description = {[1308.3946] Optimal Algorithms for Testing Closeness of Discrete Distributions},
interhash = {fb548f4773e288ba285af1bc5c49f2e3},
intrahash = {0216361dc184f9d0f7dde69f290e68e4},
keywords = {distributed probability readings stats},
note = {cite arxiv:1308.3946},
timestamp = {2020-02-26T13:47:22.000+0100},
title = {Optimal Algorithms for Testing Closeness of Discrete Distributions},
url = {http://arxiv.org/abs/1308.3946},
year = 2013
}