\documentclass[reqno]{amsart}
\usepackage{hyperref}

\AtBeginDocument{{\noindent\small
\emph{Electronic Journal of Differential Equations},
Vol. 2016 (2016), No. 237, pp. 1--6.\newline
ISSN: 1072-6691. URL: http://ejde.math.txstate.edu or http://ejde.math.unt.edu}
\thanks{\copyright 2016 Texas State University.}
\vspace{8mm}}

\begin{document}
\title[\hfilneg EJDE-2016/237\hfil Bounds for Kullback-Leibler divergence]
{Bounds for Kullback-Leibler divergence}

\author[P. G. Popescu, S. S. Dragomir, E. I. Slu\c{s}anschi,
O. N. St\u{a}n\u{a}\c{s}il\u{a} \hfil EJDE-2016/237\hfilneg]
{Pantelimon G. Popescu, Sever S. Dragomir,  \\
Emil I. Slu\c{s}anschi,
Octavian N. St\u{a}n\u{a}\c{s}il\u{a}}

\address{Pantelimon George Popescu \newline
Computer Science and Engineering Departament,
Faculty of Automatic Control and Computers,
University Politehnica of Bucharest,
Splaiul Independen\c{t}ei 313, 060042,
Bucharest (6), Romania}
\email{pgpopescu@yahoo.com,  Phone +40741533097, Fax +40214029333}

\address{Sever Silvestru Dragomir \newline
College of Engineering and Science,
Victoria University, PO Box 14428,
Melbourne City, MC 8001, Australia}
\email{sever.dragomir@vu.edu.au, Phone +61 3 9919 4437, Fax +61 3 9919 4050}

\address{Emil Ioan \ Slu\c{s}anschi \newline
Computer Science and Engineering Departament,
Faculty of Automatic Control and Computers,
University Politehnica of Bucharest,
Splaiul Independen\c{t}ei 313, 060042,
Bucharest (6), Romania}
\email{emil.slusanschi@cs.pub.ro, Phone +40741533097, Fax +40214029333}

\address{Octavian Nicolae St\u{a}n\u{a}\c{s}il\u{a} \newline
Computer Science and Engineering Departament,
Faculty of Automatic Control and Computers,
University Politehnica of Bucharest,
Splaiul Independen\c{t}ei 313, 060042,
Bucharest (6), Romania}
\email{ostanasila@hotmail.com, Phone +40741533097, Fax +40214029333}


\thanks{Submitted June 19, 2016. Published August 30, 2016.}
\subjclass[2010]{26B25, 94A17}
\keywords{Entropy; bounds; refinements; generalization}

\begin{abstract}
 Entropy, conditional entropy and mutual information for discrete-valued
 random variables play important roles in the information theory.
 The purpose of this paper is to present new bounds for relative
 entropy $D(p||q)$ of two probability distributions and then to apply
 them to simple entropy and mutual information. The relative entropy
 upper bound obtained is a refinement of a bound previously presented
 into literature.
\end{abstract}

\maketitle
\numberwithin{equation}{section}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{corollary}[theorem]{Corollary}
\allowdisplaybreaks

\section{Introduction}

 The relative entropy $D(p||q)$ (see \cite{Ash},\cite{Cover})
is the measure of distance between two distributions. It can also be
 expressed like a measure of the inefficiency of assuming that the distribution
is $q$ when the true distribution is $p$.

\begin{definition} \label{def}  \rm
 The relative entropy, of the Kullback-Leibler distance, between two
probability mass functions $p(x)$ and $q(x)$ is defined as
$$
D(p||q):=\sum_{x\in X}p(x)\ln\big(\frac{p(x)}{q(x)}\big)
=E_p\ln\big(\frac pq\big),
$$
where $\ln(\cdot)$ is the natural logarithm.
\end{definition}

A fundamental property of the relative entropy is the following.

\begin{theorem}\label{t2}
Let $p(x),q(x),x\in X$ be two probability mass functions.
Then
$$
D(p||q)\geq 0,
$$
with equality if and only if $p(x)=q(x)$ for all $x\in X$.
\end{theorem}

Obtaining a lower bound, the above fundamental inequality can be
improved as follows (see \cite{Cover}).

\begin{theorem}\label{t3}
Let $p(x),q(x)$  be two probability mass functions for $x\in X$.
Then
$$
D(p||q)\geq \frac 12\Big(\sum_{x\in X}|p(x)-q(x)|\Big)^2.
$$
\end{theorem}

As an upper bound for relative entropy, we have taken into consideration
the result presented by Dragomir et al.\ \cite[Theorem 1]{ssd3}.
Other bounds can also be found in \cite{ssd1,pec,Simic,pgp1}.

\begin{theorem}[\cite{ssd3}] \label{t4}
Let $p(x),q(x)>0,x\in X$ be two probability mass functions. Then
$$
D(p||q)\leq \sum_{x\in X}\frac{p^2(x)}{q(x)}-1
=\frac 12\sum_{x,y\in X}p(x)q(x)
\Big(\frac{p(x)}{q(x)}-\frac{p(y)}{q(y)}\Big)
\Big(\frac{q(y)}{p(y)}-\frac{q(x)}{p(x)}\Big),
$$
with equality if and only if $p(x)=q(x)$ for all $x\in X$.
\end{theorem}



\section{A new inequality for strictly convex functions}

 First we need a generalization of the classical Lagrange's theorem.

\begin{theorem} \label{t6}
Let $n$ be a natural number, and $f:[a,b]\to \mathbb{R}$ a continuous
function on $[a,b]$ and derivable on $(a,b)$, with $b>a>0$.
Then exists distinct $c_1,c_2,\dots ,c_n\in(a,b)$ such that
$$
\frac{f(b)-f(a)}{b-a}=\frac{f'(c_1)+f'(c_2)+\dots +f'(c_n)}n.
$$
\end{theorem}

\begin{proof}
First we split the interval $[a,b]$  into $n$ equal subintervals, as
$[a,x_1]$, $[x_1,x_2]$, \dots, $[x_{n-1},b]$, with
$$
x_1-a=x_2-x_1=\dots =b-x_{n-1}=\frac{b-a}n.
$$
Now applying the Lagrange Theorem for each interval, we obtain that
exists $c_1\in[a,x_1]$, $c_2\in[x_1,x_2],\dots ,c_n\in[x_{n-1},b]$ such that
$$
\frac{f(x_1)-f(a)}{\frac{b-a}{n}}=f'(c_1),\quad
\frac{f(x_2)-f(x_1)}{\frac{b-a}{n}}=f'(c_2),
\dots ,\frac{f(b)-f(x_{n-1})}{\frac{b-a}{n}}=f'(c_n).
$$
Summing the above equalities, yields that
$$
\frac{f(b)-f(a)}{b-a}
=\frac{f'(c_1)+f'(c_2)+\dots +f'(c_n)}n\,.
$$
the proof is complete.
\end{proof}

 Applying the condition of strictly convex functions to the function $f$,
we obtain the following result.

\begin{theorem}\label{t7}
Let $n$ be a natural number, and $f:[a,b]\to \mathbb{R}$ a continuous function
on $[a,b]$, diffferentiable  on $(a,b)$ and strictly convex, with $b>a>0$. Then
$$
f'(a)+\sum_{i=1}^{n-1}f'\Big(a+i\frac{b-a}n\Big)
< n\frac{f(b)-f(a)}{b-a}
< \sum_{i=1}^{n-1}f'\Big(a+i\frac{b-a}n\Big)+f'(b).
$$
\end{theorem}

\begin{proof}
As $f$ is a strictly convex function implies that $f'$ is an increasing
function, so because from the above theorem exist
$c_1\in[a,x_1]$, $c_2\in[x_1,x_2]$, \dots, $c_n\in[x_{n-1},b]$, with
$x_1-a=x_2-x_1=\dots =b-x_{n-1}=\frac{b-a}n$. This implies
$$
f'(a)<f'(c_1)<f'(x_1), f'(x_1)<f'(c_2)<f'(x_2),\dots ,f'(x_{n-1})<f'(c_n)<f'(b)
$$
and considering the result of the previous theorem and summating,
we get the wanted result.
\end{proof}

\begin{remark}\label{r8} \rm
It is easy to see that for any positive natural number $n$
\[
nf'(a)\leq f'(a)+\sum_{i=1}^{n-1}f'\Big(a+i\frac{b-a}n\Big)
\]
and
\[
  \sum_{i=1}^{n-1}f'\Big(a+i\frac{b-a}n\Big)+f'(b)\leq nf'(b),
\]
because $a<a+i\frac{b-a}n$ and $a+i\frac{b-a}n<b$, for  $i=1,2,\dots ,n-1$.
\end{remark}


\section{New bounds for relative entropy}

To present a general inequality for $-\ln x$ we start with a helpful result,
which can be deducted by simple calculus.

\begin{lemma}\label{l1}
 Let $a,b,t,T$ be real numbers with $b\neq 0$ and $T>t>0$.
Then the following two inequalities are equivalent
$$
t<\frac ab<T
$$
and
$$
b\frac{T+t-\frac b{|b|}(T-t)}2<a<b\frac{T+t+\frac b{|b|}(T-t)}2.
$$
\end{lemma}


 Now, applying Theorem \ref{t7} to the function $-\ln x$ and
taking into consideration the previous Lemma, yields the following result.

\begin{corollary}\label{c9}
Let $a,b>0$, with $m=\min\{a,b\}$, $M=\max\{a,b\}$ and
$n\geq 1$ a natural number, then
\begin{align*}
(a-b)\frac 1a
&\leq (a-b)\Big(\sum_{i=1}^{n-1}\frac 1{nm+i(M-m)}+\frac{m+M+b-a}{2nmM}\Big)
&\leq\ln a-\ln b \\
&\leq(a-b)\Big(\sum_{i=1}^{n-1}\frac 1{nm+i(M-m)}+\frac{m+M+a-b}{2nmM}\Big)\\
&\leq (a-b)\frac 1b,
\end{align*}
with equality holding for $a=b$.
\end{corollary}


\begin{proof} 
If $a=b$ then the inequality is obvious, so if $a\neq b$, 
by applying Theorem \ref{t7} to the function $-\ln x$ defined on the interval 
$I=[m,M]$ and taking cont that $\frac{f(M)-f(m)}{M-m}=\frac{f(b)-f(a)}{b-a}$ we obtain
$$
-\frac 1{nm}-\sum_{i=1}^{n-1}\frac 1{nm+i(M-m)}
<\frac{\ln a-\ln b}{b-a}
<-\sum_{i=1}^{n-1}\frac 1{nm+i(M-m)}-\frac 1{nM}\,.
$$
The equivalence from Lemma \ref{l1}, yields
\begin{align*}
&(a-b)\frac{2\sum_{i=1}^{n-1}\frac1{nm+i(M-m)}
 +\frac{m+M}{nmM}-\frac{b-a}{M-m}\frac{m-M}{nmM}}{2} \\
&<\ln a -\ln b \\
&<(a-b)\frac{2\sum_{i=1}^{n-1}\frac1{nm+i(M-m)}+\frac{m+M}{nmM}
 +\frac{b-a}{M-m}\frac{m-M}{nmM}}{2},
\end{align*}
which is equivalent to
\begin{align*}
&(a-b)\Big(\sum_{i=1}^{n-1}\frac 1{nm+i(M-m)}+\frac{m+M+b-a}{2nmM}\Big) \\
&<\ln a-\ln b \\
&<(a-b)\Big(\sum_{i=1}^{n-1}\frac 1{nm+i(M-m)}+\frac{m+M+a-b}{2nmM}\Big)
\end{align*}
and by comparing with the previous remark we obtain the wanted result.
\end{proof}



 We present the main result of this article, namely, new bounds for the 
relative entropy, where we have considered two probability mass 
function $p(x)$ and $q(x), x\in X$.

\begin{theorem}\label{t10}
Let $p(x), q(x)>0,x\in X$ be two probability mass functions,
 with $m(x)=\min\{p(x),q(x)\}$ and $M(x)=\max\{p(x),q(x)\},x\in X$. 
If $r(x)=p(x)-q(x)$, then
\begin{align*}
&\sum_{x\in X}p(x)r(x)\Big(\sum_{i=1}^{n-1}\frac 1{nm(x)+i(M(x)-m(x))}
+\frac{m(x)+M(x)+r(x)}{2nm(x)M(x)}\Big) \\
&\geq D(p||q) \\
&\geq\sum_{x\in X}p(x)r(x)\Big(\sum_{i=1}^{n-1}\frac 1{nm(x)+i(M(x)-m(x))}
+\frac{m(x)+M(x)-r(x)}{2nm(x)M(x)}\Big),
\end{align*}
with equality if and only if $p(x)=q(x)$ for all $x\in X$.
\end{theorem}

\begin{proof} 
Setting $a=q(x)$ and $b=p(x)$ in Corollary \ref{c9} we obtain
\begin{align*}
&-r(x)\Big(\sum_{i=1}^{n-1}\frac 1{nm(x)+i(M(x)-m(x))}
 +\frac{m(x)+M(x)+r(x)}{2nm(x)M(x)}\Big) \\
&\leq \ln q(x)-\ln p(x) \\
&\leq-r(x)\Big(\sum_{i=1}^{n-1}\frac 1{nm(x)+i(M(x)-m(x))}
 +\frac{m(x)+M(x)-r(x)}{2nm(x)M(x)}\Big),
\end{align*}
and multiplying by $-p(x)$ yields
\begin{align*}
&p(x)r(x)\Big(\sum_{i=1}^{n-1}\frac 1{nm(x)+i(M(x)-m(x))}
 +\frac{m(x)+M(x)+r(x)}{2nm(x)M(x)}\Big)\\
&\geq p(x)\ln \frac{p(x)}{q(x)} \\
&\geq p(x)r(x)\Big(\sum_{i=1}^{n-1}\frac 1{nm(x)+i(M(x)-m(x))}
 +\frac{m(x)+M(x)-r(x)}{2nm(x)M(x)}\Big),
\end{align*}
from which summing over $x\in X$ we get the wanted result.
\end{proof}


\begin{remark}\label{r11} \rm
From Corollary \ref{c9} and Theorem \ref{t10} we  deduce that
\begin{align*}
& \sum_{x\in X}\frac{p^2(x)}{q(x)}-1\\
&\geq \sum_{x\in X}p(x)r(x)\Big(\sum_{i=1}^{n-1}
 \frac 1{nm(x)+i(M(x)-m(x))}+\frac{m(x)+M(x)+r(x)}{2nm(x)M(x)}\Big)\\
&\geq D(p||q),
\end{align*}
which leads us to the conclusion that the upper bound of relative 
entropy provided by Theorem \ref{t10} is stronger that the one from \cite{ssd3}.
\end{remark}

 Furthermore we present new bounds for entropy and mutual information.

\begin{corollary}\label{c12}
Let $X$ be a random variable whose range has $|X|$ elements and has the 
probability mass function $p(x)>0$, with $m(x)=\min\{p(x),1/|X|\}$ and 
$M(x)=\max\{p(x),1/|X|\},x\in X$. If $r(x)=p(x)-1/|X|$, then
\begin{align*}
&\sum_{x\in X}p(x)r(x)\Big(\sum_{i=1}^{n-1}\frac 1{nm(x)+i(M(x)-m(x))}
+\frac{m(x)+M(x)+r(x)}{2nm(x)M(x)}\Big)\\
&\geq \ln |X|-H(X) \\
&\geq\sum_{x\in X}p(x)r(x)
 \Big(\sum_{i=1}^{n-1}\frac 1{nm(x)+i(M(x)-m(x))}+\frac{m(x)
 +M(x)-r(x)}{2nm(x)M(x)}\Big).
\end{align*}
The equality holds if and only if $p(x)=1/|X|$.
\end{corollary}

\begin{proof} 
It follows from Theorem \ref{t10} applied for $D(p||q)$, where
 $p(x)=p(x)$ and $q(x)=1/|X|)$, i.e. $D(p(x)||1/|X|)$.
\end{proof}


\begin{corollary}\label{c13}
Let $X,Y$ be two random variables with a joint probability mass function 
$p(x,y)$ and marginal probability mass function $p(x)$ and $p(y)$, 
with $p(x,y)$, $p(x)$, $p(y)>0$, $x\in X, y\in Y$ and 
$m_{x,y}=\min\{p(x,y),p(x)p(y)\}$ and 
$M_{x,y}=\max\{p(x,y),p(x)p(y)\},x\in X,y\in Y$. 
If $r_{x,y}=p(x,y)-p(x)p(y)$, then
\begin{align*}
&\sum_{(x,y)\in X\times Y}p(x,y)r_{x,y}
\Big(\sum_{i=1}^{n-1}\frac 1{nm_{x,y}+i(M_{x,y}-m_{x,y})}
 +\frac{m_{x,y}+M_{x,y}+r_{x,y}}{2nm_{x,y}M_{x,y}}\Big)\\
&\geq I(X;Y) \\
&\geq\sum_{(x,y)\in X\times Y}p(x,y)r_{x,y}
\Big(\sum_{i=1}^{n-1}\frac 1{nm_{x,y}+i(M_{x,y}-m_{x,y})}
 +\frac{m_{x,y}+M_{x,y}-r_{x,y}}{2nm_{x,y}M_{x,y}}\Big),
\end{align*}
The equality holds if and only if $X$ and $Y$ are independent.
\end{corollary}

\begin{proof} 
It follows from Theorem \ref{t10} applied for $D(p||q)$, where 
$p(x)=p(x,y)$ and $q(x)=p(x)p(y)$, i.e. $D(p(x,y)||p(x)p(y))$ 
and where $m(x)=m_{x,y},M(x)=M_{x,y},r(x)=r_{x,y}$.
\end{proof}

\begin{thebibliography}{00}
\bibitem{Ash} R. Ash;
\emph{Information Theory}, Interscience, New York, 1965.

\bibitem{Cover} T. M. Cover, J. A. Thomas;
\emph{Elements of Information Theory}, Jhon Wiley and Sons, Inc., 2006.

\bibitem{ssd3} S. S. Dragomir, M. L. Scholz, J. Sunde;
\emph{Some upper bounds for relative entropy and applications}, 
Computers and Mathematics with Applications, 39 (2000), 91-100.

\bibitem{ssd1} S. S. Dragomir, C. J. Goh;
\emph{A counterpart of Jensen's discrete inequality for differentiable 
convex mappings and applications in information theory}, 
Math. Comput. Modelling, 24 (1996), 1-11.

\bibitem{pec} M. Matic, C. E. M. Pearce, J. Pecaric;
\emph{Refinements of some bounds in information theory}, 
ANZIAM J., 42 (2001), 387-398.

\bibitem{Simic} S. Simic;
\emph{Jensen's inequality and new entropy bounds}, 
Applied Mathematics Letters, 22(2009), 1262-1265.

\bibitem{pgp1} N. Tapus, P. G. Popescu;
\emph{New entropy upper bound}, Applied Mathematics Letters, 
25 no. 11 (2014), 1887--1890.


\end{thebibliography}

\end{document}
