\documentclass[reqno]{amsart}
\usepackage{hyperref}

\AtBeginDocument{{\noindent\small
\emph{Electronic Journal of Differential Equations},
Vol. 2012 (2012), No. 03, pp. 1--8.\newline
ISSN: 1072-6691. URL: http://ejde.math.txstate.edu or http://ejde.math.unt.edu
\newline ftp ejde.math.txstate.edu}
\thanks{\copyright 2012 Texas State University - San Marcos.}
\vspace{9mm}}

\begin{document}
\title[\hfilneg EJDE-2012/03\hfil
Newton's method for stochastic differential equations]
{Newton's method for stochastic differential equations and its
probabilistic second-order error estimate}

\author[K. Amano \hfil EJDE-2012/03\hfilneg]
{Kazuo Amano}

\address{Kazuo Amano \newline
Department of Mathematics,
Faculty of Engineering,
Gunma University,
Kiryu, 376-8515, Japan}
\email{kamano@gunma-u.ac.jp}

\thanks{Submitted June 27, 2011. Published January 4, 2012.}
\subjclass[2000]{60H10, 65C30}
\keywords{Newton's method; stochastic differential equation;
\hfill\break\indent
second order error estimate}

\begin{abstract}
 Kawabata and Yamada \cite{Kawabata} proposed an implicit
 Newton's method for nonlinear stochastic differential equations
 and proved its convergence. Later Amano \cite{Amano2} gave an
 explicit formulation of method and showed its direct error estimate.
 In this article, we prove a probabilistic second-order error
 estimate which has been an open problem since 1991.
\end{abstract}

\maketitle
\numberwithin{equation}{section}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{remark}[theorem]{Remark}
%\allowdisplaybreaks

\section{Introduction}

Let \(a(t,x)\) and \(b(t,x)\) be
real-valued bounded \(C^2\) smooth functions
defined in the two dimensional Euclidean space \({\mathbb R}^2\).
We assume that
there exist nonnegative constants
\(A_1\), \(A_2\), \(B_1\) and \(B_2\)
satisfying
\[
\big|\frac{\partial a}{\partial x}(t,x)\big|\le A_1\,,
\quad
\big|\frac{\partial^2 a}{\partial x^2}(t,x)\big|\le A_2
\]
and
\[
\big|\frac{\partial b}{\partial x}(t,x)\big|\le B_1\,,
\quad
\big|\frac{\partial^2 b}{\partial x^2}(t,x)\big|\le B_2
\]
in \({\mathbb R}^2\).

Let \(w(t)\), \(t\ge 0\)  be a standard Brownian motion
on a probability space \((\Omega,\mathcal{F}, P)\)
and let \(\mathcal{F}_t\), \(t\ge 0\) be the natural filtration
of \(\mathcal{F}\).
We assume that
\(\xi(t)\), \(t\ge 0\) is a solution of the initial value problem
for stochastic differential equation
\begin{equation}
\label{problem0}
d\xi(t)=a\bigl(t,\xi(t)\bigr)\,dt
+b\bigl(t,\xi(t)\bigr)\,dw(t)\,,\quad \xi(0)=\xi_0\,,
\end{equation}
where
\(\xi_0\) is a bounded random variable independent of
\(\mathcal{F}_t\), \(t\ge 0\).
Without loss of generality,
we may assume that \(\xi(t)\) is continuous with respect to \(t\ge 0\).

For \(T > 0\) and \(1 \le  p < \infty\),
\(L_w^p[0,T]\) stands for the class of
all separable non-anticipative functions \(f(t)\), \(t\ge 0\)
with respect to \(\{\mathcal{F}_t\}\) satisfying
\[
P\Bigl[\int_0^T |f(t)|^p\,dt<\infty\Bigr]=1
\]
and \(M_w^p[0,T]\) denotes the subset of
\(L_w^p[0,T]\) consisting of all functions \(f(t)\) with
\[
E\Bigl[\int_0^T |f(t)|^p\,dt\Bigr]<\infty\,.
\]
It is well-known that \(\xi(t)\in M_w^2[0,T]\)
for any \(T>0\) (see, for example, \cite{Friedman}).

The explicit Newton's scheme for \eqref{problem0}
is formulated as follows (see \cite{Amano2}):
We define a sequence \(\{\xi_n(t)\}\) by
\(\xi_0(t)=\xi_0\)
and
\begin{align*}
&\xi_{n + 1}(t)\\
&= e^{\eta_n(t)}\Bigl(\xi_0
+\int_0^t\bigl(a_{0,n}(s)-b_{0,n}(s) b_{1,n}(s)\bigr)
e^{-\eta_n(s)}ds
 +\int_0^t b_{0,n}(s)e^{-\eta_n(s)}dw(s)\Bigr)
\end{align*}
for \(n = 0,1,2,\dots\), where
\begin{gather*}
\eta_n(t)
= \int_0^t\Bigl(a_{1,n}(s)-\frac{1}{2}\,(b_{1,n}(s))^2\Bigr)\,ds
+\int_0^t b_{1,n}(s)\,dw(s)\,,\\
a_{0,n}(t)
= a(t,\xi_n(t))-\frac{\partial a}{\partial x}
\bigl(t,\xi_n(t)\bigr)\,\xi_n(t)\,,\\
a_{1,n}(t)
= \frac{\partial a}{\partial x}\bigl(t,\xi_n(t)\bigr)\,,\\
b_{0,n}(t)
= b(t,\xi_n(t))-\frac{\partial b}{\partial x}
\bigl(t,\xi_n(t)\bigr)\,\xi_n(t)\,,\\
b_{1,n}(t)
= \frac{\partial b}{\partial x}\bigl(t,\xi_n(t)\bigr)\,.
\end{gather*}

In this article,
we shall  estimate the approximation errors
\begin{equation}
\label{errors}
\varepsilon_n(t)=\xi_n(t)-\xi(t),\quad n=0,1,2,\dots\,.
\end{equation}

\begin{theorem}\label{theorem1}
For any \(T>0\),
there exists a nonnegative constant \(C\)
depending only on \(T\), \(A_1\), \(A_2\), \(B_1\) and \(B_2\)
such that
\[
P\Bigl[\sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho
\text{ \rm  implies }
\sup_{0\le t\le T}|\varepsilon_{n + 1}(t)|\le R\rho^2\Bigr]
\ge 1-C\,R^{- 1 / 2}
\]
for all \(R \ge 1\), \(0 < \rho \le 1\)
and every \(n = 0,1,2,\dots\).
\end{theorem}

Our symbolic Newton's method may give a new possibility
to the study of computer algebraic method
in stochastic analysis (see, for example, \cite{Cyganowsky}).

If the positive constants
\(T_0\), \(A_1\), \(A_2\), \(B_1\) and \(B_2\)
are given,
then a repeated use of Theorem \ref{theorem1} gives
an approximate solution of \eqref{problem0}
in terms of multiple stochastic integrals.
For example, we first note that,
seeing the proof of Theorem \ref{theorem1},
we can choose \(R\ge 1\) sufficiently large so that
\[
CR^{- 1 / 2}<\frac{1}{100},\quad \text{where }C=C(A_1,A_2,B_1,B_2,T)
\]
for all positive \(T\le T_0\).
Second, we take a small \(\rho>0\) so as to satisfy
\[
R\rho<\frac{1}{10}\,.
\]
Third, by using a martingale inequality (Lemma \ref{lemma6}),
we take a sufficiently small \(T>0\) such that
\[
P\Bigl[
\sup_{0\le t\le T}|\varepsilon_0(t)|\le\rho
\Bigr]\ge 1-\frac{1}{100}\,.
\]
Now, a repeated use of Theorem \ref{theorem1} and
\[
R^{- 1}\bigl(R\rho\bigr)^{2^{10}}\le\frac{1}{10^{2^{10}}}
\]
show that
\[
P\Bigl[
\sup_{0\le t\le T}|\varepsilon_{10}(t)|\le\frac{1}{10^{1024}}
\Bigr]
\ge 1-\frac{1}{10}\,.
\]
It is clear, by the definition of \(\{\xi_n(t)\}\), that
the approximate solution \(\xi_{10}(t)\) has
a multiple stochastic integral representation.

\section{Preliminaries}

The following two lemmas follow immediately from It\^o's formula.

\begin{lemma}\label{lemma1}
If \(\alpha(t)\in L_w^1[0,T]\) and \(\beta(t)\in L_w^2[0,T]\)
for any \(T>0\) and if
\[
d\eta(t)=\alpha(t)\,dt+\beta(t)\,dw(t)\,,
\]
then
\[
de^{\eta(t)}
=e^{\eta(t)}\,d\eta(t)+\frac{1}{2}\,\beta^2 (t)e^{\eta(t)}\,dt\,.
\]
\end{lemma}

\begin{proof}
For a function \( f(x) = e^x \),
It\^o's formula gives
\[
d\,f(\eta(t))=\Bigl(f'(\eta(t))\alpha(t)
+\frac{1}{2}\,f''(\eta(t))\beta^2 (t)\Bigr)dt
+f'(\eta(t))\beta(t)\,dw(t)\,;
\]
this implies the desired formula.
\end{proof}


\begin{lemma}\label{lemma2}
If \(\alpha_1(t), \alpha_2(t)\in L_w^1[0,T]\)
and \(\beta_1(t), \beta_2(t)\in L_w^2[0,T]\)
for any \(T>0\) and if
\[
d\xi_i(t)=\alpha_i(t)\,dt+\beta_i(t)\,dw(t)\,,\quad i=1,2\,,
\]
then
\[
d \bigl(\xi_1(t)\,\xi_2(t)\bigr)
=\xi_2(t)\,d\xi_1(t)+\xi_1(t)\,d\xi_2(t)+\beta_1(t)\beta_2(t)\,dt\,.
\]
\end{lemma}

\begin{proof}
Applying It\^o's formula
for a 2-dimensional diffusion process \( (\xi_1(t), \xi_2(t)) \)
and a function \( f(x_1,x_2) = x_1x_2 \),
we have
\begin{align*}
&d f\bigl(\xi_1(t),\xi_2(t)\bigr)\\
&= \Bigl(\sum_{i=1}^2\frac{\partial f}{\partial x_i}
\bigl(\xi_1(t),\xi_2(t)\bigr)\alpha_i(t)
+\frac{1}{2}\sum_{i,j=1}^2\frac{\partial^2 f}{\partial x_i\partial x_j}
\bigl(\xi_1(t),\xi_2(t)\bigr)\beta_i(t)\beta_j(t)\Bigr)dt\\
&\quad+\sum_{i=1}^2\frac{\partial f}{\partial x_i}
 \bigl(\xi_1(t),\xi_2(t)\bigr)\beta_i(t)\,dw(t)\,;\\
\end{align*}
this completes the proof.
\end{proof}

Lemmas \ref{lemma1} and \ref{lemma2} show
the following three key lemmas.


\begin{lemma} \label{lemma3}
For \(n=1,2,3,\dots\) and
\[
a_0(t)=a_{0,n}(t)\,,\quad a_1(t)=a_{1,n}(t)\,,
\quad b_0(t)=b_{0,n}(t)\,,\quad b_1(t)=b_{1,n}(t)\,,
\]
the initial value problem for the linear stochastic
differential equation
\[
d\xi(t)=\bigl(a_0(t)+a_1(t)\,\xi(t)\bigr)\,dt
+\bigl(b_0(t)+b_1(t)\,\xi(t)\bigr)\,dw(t)\,,
\quad \xi(0)=\xi_0
\]
has an explicit solution
\[
\zeta(t)=e^{\eta(t)}\Bigl(
\xi_0+\int_0^t\bigl(a_0(s)-b_0(s)b_1(s)\bigr)e^{-\eta(s)}\,ds
+\int_0^tb_0(s)e^{-\eta(s)}\,dw(s)\Bigr)\,,
\]
where
\[
\eta(t)=\int_0^t\Bigl(a_1(s)-\frac{1}{2} b_1^2(s)\Bigr)\,ds
+\int_0^t b_1(s)\,dw(s)\,.
\]
\end{lemma}

\begin{proof}
Since Lemma \ref{lemma1} gives
\[
de^{\eta(t)}
=e^{\eta(t)}d\eta(t)+\frac{1}{2} b_1^2(t)e^{\eta(t)}\,dt
=a_1(t)e^{\eta(t)}\,dt+b_1(t)e^{\eta(t)}\,dw(t)\,,
\]
Lemma \ref{lemma2} shows
\begin{align*}
d\zeta(t)
&=d\,\Bigl(e^{\eta(t)}\Bigl(
\xi_0+\int_0^t\bigl(a_0(s)-b_0(s)b_1(s)\bigr)e^{-\eta(s)}\,ds
+\int_0^tb_0(s)e^{-\eta(s)}\,dw(s)\Bigr)\Bigr)\\
&=\Bigl(
\xi_0+\int_0^t\bigl(a_0(s)-b_0(s)b_1(s)\bigr)e^{-\eta(s)}\,ds
+\int_0^t b_0(s)e^{-\eta(s)}\,dw(s)
\Bigr)\,de^{\eta(t)}\\
&\quad +e^{\eta(t)}\Bigl(
\bigl(a_0(t)-b_0(t)b_1(t)\bigr)e^{-\eta(t)}\,dt
+b_0(t)e^{-\eta(t)}\,dw(t)\Bigr)\\
&\quad +\bigl(b_1(t)e^{\eta(t)}\bigr)\bigl(b_0(t)e^{-\eta(t)}\bigr)\,dt\\
&= \zeta(t)\bigl(a_1(t)\,dt+b_1(t)\,dw(t)\bigr)\\
&\quad +\bigl(a_0(t)-b_0(t)b_1(t)\bigr)\,dt+b_0(t)\,dw(t)
 +b_1(t) b_0(t)\,dt\\
&=\bigl(a_0(t)+a_1(t)\,\zeta(t)\bigr)\,dt
+\bigl(b_0(t)+b_1(t)\,\zeta(t)\bigr)\,dw(t)\,.
\end{align*}
\end{proof}

\begin{remark}\label{remark1} \rm
It follows immediately from the definition of \(\xi_{n + 1}(t)\)
and Lemma \ref{lemma3} that \(\xi_{n + 1}(0) = \xi_0\) and
\[
d\xi_{n + 1}(t)
=\bigl(a_{0,n}(t)+a_{1,n}(t)\,\xi_{n + 1}(t)\bigr)\,dt
+\bigl(b_{0,n}(t)+b_{1,n}(t)\,\xi_{n + 1}(t)\bigr)\,dw(t)
\]
for \(n=0,1,2,\dots\).
Therefore,
\(\{\xi_n(t)\}\) is exactly the same sequence
introduced by Kawabata and Yamada \cite{Kawabata};
this implies the convergence
\[
\lim_{n\to\infty} E\Bigl[
\sup_{0\le t\le T}|\,\xi_n(t)-\xi(t)\,|^2\Bigr]=0
\]
for any \(T>0\).
By  their result,
we have only to concentrate on the estimation of errors.
\end{remark}


\begin{lemma}\label{lemma4}
For \(n=0,1,2,\dots\),
we have
\[
\varepsilon_{n + 1}(t)
=e^{\eta_n(t)}\Bigl(\int_0^t\bigl(\alpha_{0,n}(s)
-\beta_{0,n}(s) b_{1,n}(s)\bigr)e^{-\eta_n(s)}ds
+\int_0^t \beta_{0,n}(s)e^{-\eta_n(s)}dw(s)\Bigr)\,,
\]
where
\begin{gather*}
\alpha_{0,n}(t)
= \varepsilon_n^2(t)\int_0^1
 (\theta-1)\,\frac{\partial^2 a}{\partial x^2}
\bigl(t,\xi_n(t)-\theta \varepsilon_n(t)\bigr)\,d\theta\,,\\
\beta_{0,n}(t)
= \varepsilon_n^2(t)\int_0^1
 (\theta-1)\,\frac{\partial^2 b}{\partial x^2}
\bigl(t,\xi_n(t)-\theta \varepsilon_n(t)\bigr)\,d\theta\,.
\end{gather*}
\end{lemma}

\begin{proof}
Since \(\xi_{n + 1}(t)\) is a solution of
the linear stochastic differential equation in Remark \ref{remark1},
 by \eqref{problem0} and \eqref{errors}, we have
\begin{align*}
&d \varepsilon_{n + 1}(t)\\
&= d\,\xi_{n + 1}(t)-d\,\xi(t)\\
&= \Bigl(a(t,\xi_n(t))
-\frac{\partial a}{\partial x}\bigl(t,\xi_n(t)\bigr)\varepsilon_n(t)
-a\bigl(t,\xi_n(t) - \varepsilon_n(t)\bigr)
+a_{1,n}(t) \varepsilon_{n + 1}(t)\Bigr)\,dt\\
&\quad +\Bigl(b(t,\xi_n(t))
-\frac{\partial b}{\partial x}\bigl(t,\xi_n(t)\bigr)\varepsilon_n(t)
-b\bigl(t,\xi_n(t) - \varepsilon_n(t)\bigr)
+b_{1,n}(t) \varepsilon_{n + 1}(t)\Bigr)\,dw(t)\,.
\end{align*}
Let us consider an auxiliary function
\[
F(\theta)=a\bigl(t,\xi_n(t)-\theta \varepsilon_n(t)\bigr)\,,
\quad 0\le\theta\le 1\,.
\]
Then, integration by parts shows
\[
F(1)=F(0)+F'(0)+\int_0^1 (1-\theta)\,F''(\theta)\,d\theta\,;
\]
this gives
\[
\alpha_{0,n}(t)
=a(t,\xi_n(t))
-\frac{\partial a}{\partial x}\bigl(t,\xi_n(t)\bigr)\varepsilon_n(t)
-a\bigl(t,\xi_n(t) - \varepsilon_n(t)\bigr)\,.
\]
Similarly, we have
\[
\beta_{0,n}(t)
=b(t,\xi_n(t))
-\frac{\partial b}{\partial x}\bigl(t,\xi_n(t)\bigr)\varepsilon_n(t)
-b\bigl(t,\xi_n(t) - \varepsilon_n(t)\bigr)\,.
\]
Therefore, we obtain
\[
d\varepsilon_{n + 1}(t)
=\bigl(\alpha_{0,n}(t)
+a_{1,n}(t) \varepsilon_{n + 1}(t)\bigr)\,dt
+\bigl(\beta_{0,n}(t)
+b_{1,n}(t) \varepsilon_{n + 1}(t)\bigr)\,dw(t)\,;
\]
seeing the proof  of Lemma \ref{lemma3}
for \(a_0(t)=\alpha_{0,n}(t)\),
\(a_1(t)=a_{1,n}(t)\),
\(b_0(t)=\beta_{0,n}(t)\)
and \(b_1(t)=b_{1,n}(t)\),
this completes the proof.
\end{proof}

\begin{lemma}\label{lemma5}
For any \(t > 0\), we obtain
\begin{gather*}
E\bigl[|e^{\eta_n(t)}-1|^2\bigr]
\leq 4t(A_1\sqrt{t}+B_1)^2e^{4t(A_1\sqrt{t}+B_1)^2}\,,\\
E\bigl[|e^{-\eta_n(t)}-1|^2\bigr]
\leq 4t\bigl((A_1+B_1^2)\sqrt{t}+B_1\bigr)^2
 e^{4t((A_1+B_1^2)\sqrt{t}+B_1)^2}\,.
\end{gather*}
\end{lemma}

\begin{proof}
Since Lemma \ref{lemma1} implies
\[
de^{\eta_n(t)}=a_{1,n}(t)e^{\eta_n(t)}dt
+b_{1,n}(t)e^{\eta_n(t)}dw(t)\,,
\]
we easily have
\begin{gather*}
e^{\eta_n(t)}-1
= \int_0^t a_{1,n}(s)e^{\eta_n(s)}ds
+\int_0^t b_{1,n}(s)e^{\eta_n(s)}dw(s)\,,\\
\big|\,a_{1,n}(s)e^{\eta_n(s)}\big|
\leq A_1+A_1|e^{\eta_n(s)}-1|\,,\\
\big| b_{1,n}(s)e^{\eta_n(s)}\big|
\leq B_1+B_1|e^{\eta_n(s)}-1|\,.
\end{gather*}
Hence, the stochastic Gronwall inequality \cite{Amano1}
shows one of the desired estimates.

Similarly, by Lemma \ref{lemma1}, we obtain
\[
e^{-\eta_n(t)}-1
=-\int_0^t \bigl(a_{1,n}(s)-(b_{1,n}(s))^2\bigr)e^{-\eta_n(s)}ds
-\int_0^t b_{1,n}(s)e^{-\eta_n(s)}dw(s)
\]
and a simple calculation gives
\begin{gather*}
\big|\bigl(a_{1,n}(s)-(b_{1,n}(s))^2\bigr)e^{-\eta_n(s)}\big|
\leq  (A_1+B_1^2)+(A_1+B_1^2)|e^{-\eta_n(s)}-1|\,,\\
\big| b_{1,n}(s)e^{-\eta_n(s)}\big|
\leq  B_1+B_1|e^{-\eta_n(s)}-1|\,.
\end{gather*}
Therefore, by the stochastic Gronwall inequality \cite{Amano1},
we obtain the remaining inequality.
\end{proof}


\begin{remark}\label{remark2} \rm
By  Fubini's theorem and Lemma \ref{lemma5},
we can show that
\[
e^{\pm\eta_n(t)}=(e^{\pm\eta_n(t)}-1)+1\in M_w^2[0,T]
\]
and
\begin{gather*}
E\Bigl[\int_0^T  e^{2\eta_n(t)}dt\Bigr]
\leq  2\int_0^T  4t(A_1\sqrt{t}+B_1)^2
 e^{4t(A_1\sqrt{t}+B_1)^2} dt+2T\,,\\
E\Bigl[\int_0^T  e^{-2\eta_n(t)}dt\Bigr]
\leq  2\int_0^T  4t\bigl((A_1+B_1^2)\sqrt{t}+B_1\bigr)^2
 e^{4t((A_1+B_1^2)\sqrt{t}+B_1)^2}dt+2T
\end{gather*}
for any \(T > 0\).
\end{remark}

Martingale inequalities (see, for example, \cite{Friedman})
play important roles in the proof of our error estimate.

\begin{lemma}\label{lemma6}
If \(f(t) \in M_w^2[0,T]\), \(T > 0\), then
\[
P\Bigl[\sup_{0\le t\le T}\Big|\int_0^t f(s)\,dw(s)
\,\Big|>\alpha\Bigr]
\le\frac{1}{\alpha^2}\,E\Bigl[\int_0^T  f^2(s)\,ds\Bigr]
\]
for any positive number \(\alpha\).
\end{lemma}


\begin{lemma} \label{lemma7}
If \(f(t) \in L_w^2[0,T]\), \(T > 0\), then
\[
P\Bigl[\sup_{0\le t\le T}\Bigl(
\int_0^t f(s)\,dw(s)-\frac{\alpha}{2}\int_0^t f^2(s)\,ds
\Bigr)>\beta\Bigr]
\le e^{-\alpha\beta}
\]
for any positive numbers \(\alpha\) and \(\beta\).
\end{lemma}

\begin{remark}\label{remark3} \rm
Since \(b_{1,n}(t) \in L_w^2[0,T]\),
\begin{align*}
\eta_n(t)
&= \int_0^t a_{1,n}(s)\,ds+\int_0^t b_{1,n}(s)\,dw(s)
-\frac{1}{2}\,\int_0^t \bigl(b_{1,n}(s)\bigr)^2 ds\\
&\leq A_1 t+\int_0^t b_{1,n}(s)\,dw(s)
-\frac{1}{2}\,\int_0^t \bigl(b_{1,n}(s)\bigr)^2 ds
\end{align*}
and
\begin{align*}
-\eta_n(t)
&= \int_0^t (-a_{1,n}(s))\,ds+\int_0^t \bigl(b_{1,n}(s)\bigr)^2 ds\\
&\quad +\int_0^t \bigl(-b_{1,n}(s)\bigr)\,dw(s)
-\frac{1}{2}\,\int_0^t \bigl(-b_{1,n}(s)\bigr)^2 ds\\
&\leq (A_1+B_1^2)\,t+\int_0^t \bigl(-b_{1,n}(s)\bigr)\,dw(s)
-\frac{1}{2}\,\int_0^t \bigl(-b_{1,n}(s)\bigr)^2 ds\,,\\
\end{align*}
it follows from Lemma \ref{lemma7} that
\begin{align*}
&P\bigl[\;\sup_{0\le s\le t}e^{\eta_n(s)}>R\,\bigr]\\
&\le P\Bigl[\sup_{0\le s\le t}
\Bigl(\int_0^s b_{1,n}(u)\,dw(u)
-\frac{1}{2}\,\int_0^s \bigl(b_{1,n}(u)\bigr)^2 du\Bigr)
>-A_1t+\log R\Bigr]\\
&\le e^{A_1 t}R^{- 1}
\end{align*}
and
\begin{align*}
&P\bigl[\;\sup_{0\le s\le t}e^{-\eta_n(s)}>R\,\bigr]\\
&\le P\Bigl[\sup_{0\le s\le t}\Bigl(\int_0^s \bigl(-b_{1,n}(u)\bigr)\,dw(u)
-\frac{1}{2}\,\int_0^s \bigl(-b_{1,n}(u)\bigr)^2 du\Bigr)
>-(A_1 + B_1^2)\,t+\log R\Bigr]\\
&\le e^{(A_1+B_1^2)\,t}\,R^{- 1}
\end{align*}
for all \(R \ge 1\) and \(0 \le  t \le  T\).
\end{remark}

\section{Proof of Theorem \ref{theorem1}}

\begin{proof}
Let us take real numbers \(R \ge 1\) and \(0 < \rho \le  1\) arbitrarily.
Then, by Lemma \ref{lemma4}, we can show that
\[
\sup_{0\le t\le T}|\varepsilon_{n + 1}(t)|>R\rho^2
\quad\text{and}\quad
\sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho
\]
imply
\[
\sup_{0\le t\le T}e^{\eta_n(t)}>\sqrt{R}
\]
or
\[
\sup_{0\le t\le T}\int_0^t
\big|\alpha_{0,n}(s)-\beta_{0,n}(s) b_{1,n}(s)\big|e^{-\eta_n(s)}ds
>\frac{\sqrt{R}}{2}\rho^2
\]
or
\[
\sup_{0\le t\le T}
\,\Big|\int_0^t\beta_{0,n}(s)e^{-\eta_n(s)}dw(s)\Big|
>\frac{\sqrt{R}}{2}\rho^2
\]
for every \(n = 0,1,2,\dots\)\,.
In fact, we have to use only an argument of contradiction.

By Remark \ref{remark3}, we easily have
\[
P\bigl[\,\sup_{0\le t\le T}e^{\eta_n(t)}>\sqrt{R}\;\bigr]
\le e^{TA_1}R^{- 1 / 2}\,.
\]
By
\[
\big|\,\alpha_{0,n}(s)-\beta_{0,n}(s) b_{1,n}(s)\,\big|
\le\frac{1}{2}(A_2+B_1B_2) \varepsilon_n^2(s)\,,
\]
Remark \ref{remark3} and direct computation, it follows that
\begin{align*}
&P\Big[\,\sup_{0\le t\le T}\int_0^t
\big|\alpha_{0,n}(s)-\beta_{0,n}(s) b_{1,n}(s)\big|e^{-\eta_n(s)}ds
>\frac{\sqrt{R}}{2}\rho^2
\text{ and }\sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho\Bigr]\\
&\le T(A_2+B_1B_2)e^{T(A_1+B_1^2)}\,R^{- 1 / 2}\,.
\end{align*}
Since
\[
\beta_{0,n}(s)=\min\bigl(\varepsilon_n^2(s), \rho^2\bigr)
\int_0^1 (\theta-1)\,\frac{\partial^2 b}{\partial x^2}
\bigl(s,\xi_n(s)-\theta\varepsilon_n(s)\bigr)\,d\theta
\]
for \(0\le s\le T\)
when \(\sup_{0\le t\le T}|\varepsilon_n(t)| \le \rho\),
Lemma \ref{lemma6} and Remark \ref{remark2} show
\begin{align*}
&P\Bigl[\sup_{0\le t\le T}
\Big|\int_0^t\beta_{0,n}(s)e^{-\eta_n(s)}dw(s)\Big|
>\frac{\sqrt{R}}{2}\rho^2\text{ and }
\sup_{0\le t\le T}|\varepsilon_n(t)| \le \rho\Bigr]\\
& \le P\Bigl[\sup_{0\le t\le T}\,\Big|\;\int_0^t
\Bigl(\min\bigl(\varepsilon_n^2(s), \rho^2\bigr)
\int_0^1 (\theta-1)\,\frac{\partial^2 b}{\partial x^2}
\bigl(s,\xi_n(s)-\theta\varepsilon_n(s)\bigr)\,d\theta\Bigr)\\
&\quad \times e^{-\eta_n(s)}dw(s)\;\Big|>\frac{\sqrt{R}}{2}\rho^2\Bigr]\\
&\le 2B_2^2\Bigl(\int_0^T  4t\bigl((A_1+B_1^2)\sqrt{t}+B_1\bigr)^2
e^{4t((A_1+B_1^2)\sqrt{t}+B_1)^2}dt+T\Bigr) R^{- 1}\,.
\end{align*}
Combining the above estimates, we can show that
there exists a nonnegative constant
\(C=C(A_1,A_2,B_1,B_2,T)\)
independent of \(R\), \(\rho\) and \(n\) such that
\[
P\Bigl[\sup_{0\le t\le T}|\varepsilon_{n + 1}(t)|>R\rho^2
\text{ and }
\sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho\Bigr]\le C\,R^{- 1 / 2}
\]
for  \(n = 0,1,2,\dots\).
Consequently, we have
\[
P\Bigl[\sup_{0\le t\le T}|\varepsilon_n(t)|\le\rho
\text{ implies }
\sup_{0\le t\le T}|\varepsilon_{n + 1}(t)|\le R\rho^2\Bigr]
\ge 1-C\,R^{- 1 / 2}
\]
for all \(R \ge 1\), \(0 < \rho \le  1\)
and every \(n = 0,1,2,\dots\).
\end{proof}

Finally, we give a slight improvement of Theorem \ref{theorem1}.
At the beginning of the proof of Theorem \ref{theorem1}
where we have made a classification of an event,
if we replace the lower bounds
\[
\sqrt{R}\,,\quad \frac{\sqrt{R}}{2}\rho^2\,,
\quad \frac{\sqrt{R}}{2}\rho^2
\]
with
\[
T^{-1 / 3}\sqrt{R}\,,\quad \frac{T^{1 / 3}\sqrt{R}}{2}\rho^2\,,
\quad \frac{T^{1 / 3}\sqrt{R}}{2}\rho^2
\]
respectively,
then we can show that the above constant
\(C(A_1,A_2,B_1,B_2,T)=O(T^{1 / 3})\)
as \(T \to  0\).
Therefore, our Newton's method may work better
when it is used with a small time interval.

\begin{thebibliography}{0}

\bibitem{Amano1} K. Amano;
A stochastic Gronwall inequality and its applications,
\emph{J. Ineq. Pure Appl. Math.},
\textbf{6}(2005), Issue 1, Article 17, 1--5.

\bibitem{Amano2} K.~Amano;
Newton's method for stochastic differential equations
and its error estimate,
\emph{Proc. Japan Acad.}, \textbf{84}(2008), Ser.A, 1-3.

\bibitem{Cyganowsky} S.~Cyganowsky, J.~Ombach and P.~E.~Kloeden;
\emph{From Elementary Probability
to Stochastic Differential Equations with MAPLE\/},
Springer, 2001.

\bibitem{Friedman} A.~Friedman;
\emph{Stochastic Differential Equations and Applications, Volume I},
Academic Press, 1975.

\bibitem{Kawabata} S.~Kawabata and T.~Yamada;
On Newton's method for stochastic differentail equations,
\emph{Seminaire de Probabilites}, XXV(1991), 121--137.

\end{thebibliography}

\end{document}