#LyX 2.2 created this file. For more info see http://www.lyx.org/ \lyxformat 508 \begin_document \begin_header \save_transient_properties true \origin unavailable \textclass article \use_default_options true \begin_modules theorems-std \end_modules \maintain_unincluded_children false \language english \language_package default \inputencoding auto \fontencoding global \font_roman "default" "default" \font_sans "default" "default" \font_typewriter "default" "default" \font_math "auto" "auto" \font_default_family default \use_non_tex_fonts false \font_sc false \font_osf false \font_sf_scale 100 100 \font_tt_scale 100 100 \graphics default \default_output_format default \output_sync 0 \bibtex_command default \index_command default \paperfontsize default \spacing single \use_hyperref false \papersize default \use_geometry true \use_package amsmath 1 \use_package amssymb 1 \use_package cancel 1 \use_package esint 1 \use_package mathdots 1 \use_package mathtools 1 \use_package mhchem 1 \use_package stackrel 1 \use_package stmaryrd 1 \use_package undertilde 1 \cite_engine basic \cite_engine_type default \biblio_style plain \use_bibtopic false \use_indices false \paperorientation portrait \suppress_date false \justification true \use_refstyle 1 \index Index \shortcut idx \color #008000 \end_index \leftmargin 2cm \topmargin 2cm \rightmargin 2cm \bottommargin 2cm \secnumdepth 3 \tocdepth 3 \paragraph_separation indent \paragraph_indentation default \quotes_language english \papercolumns 1 \papersides 1 \paperpagestyle default \tracking_changes false \output_changes false \html_math_output 0 \html_css_as_file 0 \html_be_strict false \end_header \begin_body \begin_layout Title Know For Midterm 2018 \end_layout \begin_layout Author Mingyang Li \end_layout \begin_layout Abstract This is meant for STAT512 by Professor Ewens at the University of Pennsylvania. \end_layout \begin_layout Part Concepts \end_layout \begin_layout Section Basic Aims of Statistics \end_layout \begin_layout Itemize To \series bold estimate \series default the range of a \series bold parameter \series default \bar under optimally \bar default . \end_layout \begin_layout Itemize To \series bold test hypotheses \series default about the numerical value of the parameter \bar under optimally \bar default . \end_layout \begin_layout Section Statistics \end_layout \begin_layout Standard Statistics is an inferential science bansed on observations involving randomness. \end_layout \begin_layout Section Quantities \end_layout \begin_layout Itemize A " \series bold random variable \series default ", \begin_inset Formula $Y$ \end_inset , follows a distribution which depends on some \series bold parameter \series default \begin_inset Formula $\theta$ \end_inset . \end_layout \begin_deeper \begin_layout Itemize We want to estimate the \bar under parameter \bar default \begin_inset Formula $\theta$ \end_inset , but -- more often -- we estimate an one-to-one function of it, \begin_inset Formula $\tau(\theta)$ \end_inset . Whichever the case, the variable we want to estimate is called the \series bold estimand \series default . \end_layout \begin_layout Itemize A function involving a R.V. \begin_inset Formula $Y$ \end_inset , \begin_inset Formula $f(Y,...)$ \end_inset , is also a RV. \end_layout \end_deeper \begin_layout Itemize Any function \begin_inset Formula $f(Y)$ \end_inset of the RV \begin_inset Formula $Y$ \end_inset alone can be seen as an \series bold estimator \series default for the \bar under estimand \bar default \begin_inset Formula $\tau(\theta)$ \end_inset associated with its distribution. \end_layout \begin_deeper \begin_layout Itemize If the mean of this function, \begin_inset Formula $\text{E}\left[f(Y)\right]$ \end_inset , happens to be the \bar under estimand \bar default itself, then this function -- as an \bar under estimator \bar default -- is \series bold unbiased \series default . \end_layout \begin_deeper \begin_layout Itemize The \series bold MVU (“minimal variance unbiased”) estimator \series default of \begin_inset Formula $\tau(\theta)$ \end_inset : The \bar under unbiased estimator \bar default of \begin_inset Formula $\tau(\theta)$ \end_inset whose variance is ≤ any other \bar under unbiased estimator \bar default of \begin_inset Formula $\tau(\theta)$ \end_inset . \end_layout \end_deeper \begin_layout Itemize The value an estimator takes on (or "yields") is called an \series bold estimate \series default . \end_layout \end_deeper \begin_layout Itemize \series bold Sufficient Statistics \series default , \begin_inset Formula $w(Y_{1},...,Y_{n})$ \end_inset , of a parameter, \begin_inset Formula $\theta$ \end_inset , is a function of the \begin_inset Formula $n$ \end_inset iid RVs whose JDF will become independent of this parameter if \begin_inset Formula $w$ \end_inset is given. \end_layout \begin_deeper \begin_layout Itemize The \series bold Minimal Non-Trivial Sufficient Statistics (MNTSS) \series default has two constraints over the ordinary definition of SS: \end_layout \begin_deeper \begin_layout Itemize \bar under Minimality \bar default : Any other SS can be reduced (read: "transformed via a function") into this SS. \end_layout \begin_layout Itemize \bar under Non-triviality: \bar default The dimension of this SS should be \begin_inset Formula $0$ \end_inset but is not int: Use the Recurrence Relation to strip the \begin_inset Quotes eld \end_inset \begin_inset Formula $x$ \end_inset \begin_inset Quotes erd \end_inset to the lowest number \begin_inset Formula $\in(1,2)$ \end_inset , then plug in the value as given in the table. \end_layout \end_deeper \begin_layout Itemize Integrals involving Gamma Function: \end_layout \begin_deeper \begin_layout Itemize \begin_inset Formula $\int_{0}^{\infty}t^{x-1}e^{-ct}dt=c^{-x}\cdot\Gamma\left(x\right)$ \end_inset \end_layout \begin_layout Itemize \begin_inset Formula $\int_{0}^{\infty}g(t)\cdot e^{-h(t)}dt$ \end_inset : often helpful to set \begin_inset Formula $h(t)=:t'$ \end_inset . \end_layout \end_deeper \begin_layout Section The density functions of order statistics (OS) of \begin_inset Formula $n$ \end_inset iid continuous RVs \begin_inset Formula $Y_{i}\sim f(y)$ \end_inset \end_layout \begin_layout Itemize The \begin_inset Formula $i$ \end_inset -th OS alone: \begin_inset Formula $f_{Y_{\left(i\right)}}\left(y_{\left(i\right)}\right)=\frac{n!}{\left(i-1\right)!\left(n-i\right)!}\left[F_{Y}\left(y_{\left(i\right)}\right)\right]^{i-1}\cdot f_{Y}\left(y_{\left(i\right)}\right)\cdot\left[1-F_{Y}\left(y_{\left(i\right)}\right)\right]^{n-i}$ \end_inset \end_layout \begin_layout Itemize The JDF of the \begin_inset Formula $i$ \end_inset -th OS and the \begin_inset Formula $j$ \end_inset -th OS: \begin_inset Formula $f_{Y_{\left(i\right)},Y_{\left(j\right)}}\left(y_{\left(i\right)},y_{\left(j\right)}\right)=\frac{n!}{\left(i-1\right)!\left(j-i\right)!\left(n-j\right)!}\left[F_{Y}\left(y_{\left(i\right)}\right)\right]^{i-1}\cdot f_{Y}\left(y_{\left(i\right)}\right)\cdot\left[F_{Y}\left(y_{\left(j\right)}\right)-F_{Y}\left(y_{\left(i\right)}\right)\right]^{j-i-1}\cdot f_{Y}\left(y_{\left(j\right)}\right)\cdot\left[1-F_{Y}\left(y_{\left(j\right)}\right)\right]^{n-j}$ \end_inset \end_layout \begin_layout Section The Cramer-Rao Lower Bound of the Variance of an Estimator \end_layout \begin_layout Itemize This Bound is \series bold achievable \begin_inset Foot status open \begin_layout Plain Layout \begin_inset Quotes eld \end_inset There exists an estimad of \begin_inset Formula $\theta$ \end_inset , \begin_inset Formula $\tau\left(\theta\right)$ \end_inset , that has an unbiased estimator, \begin_inset Formula $\hat{\tau}_{\text{MLU}}\left(y_{1},...,y_{n}\right)$ \end_inset , whose variance is this value. \begin_inset Quotes erd \end_inset \end_layout \end_inset \series default iff the JDF \begin_inset Formula $f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n};\theta\right)$ \end_inset can be written in the \begin_inset Quotes eld \end_inset exponential family \begin_inset Quotes erd \end_inset form: \begin_inset Formula \[ f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n};\theta\right)=h(y_{1},...,y_{n})\cdot e^{C\left(\theta\right)+D\left(\theta\right)\cdot\hat{\tau}_{\text{MLU}}\left(y_{1},...,y_{n}\right)} \] \end_inset \begin_inset Foot status open \begin_layout Plain Layout As you convert it into this form, in the same time, the MVU estimator \begin_inset Formula $\hat{\tau}_{\text{MLU}}\left(y_{1},...,y_{n}\right)$ \end_inset is identified. \end_layout \end_inset \end_layout \begin_layout Itemize The Bound is given by: \begin_inset Foot status collapsed \begin_layout Plain Layout The MVU estimator \begin_inset Formula $\hat{\tau}_{\text{MLU}}\left(y_{1},...,y_{n}\right)$ \end_inset may not exist / be known by the time you evaluate this Bound. \end_layout \end_inset \begin_inset Formula $\text{Var}\left[\hat{\tau}\left(y_{1},...,y_{n}\right)\right]\ge$ \end_inset \begin_inset Formula \[ \text{Var}\left[\hat{\tau}_{\text{MLE}}\left(y_{1},...,y_{n}\right)\right]=\frac{-\left(\frac{\partial}{\partial\theta}\tau\left(\theta\right)\right)^{2}}{\text{E}\left[\frac{\partial^{2}}{\partial\theta^{2}}\ln f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n};\theta\right)\right]}\begin{array}{c} \leftarrow\text{is }-1\text{ if }\tau\left(\theta\right)=\theta\\ \leftarrow\text{is }n\cdot\text{E}\left[\frac{\partial^{2}}{\partial\theta^{2}}\ln f_{Y}\left(y;\theta\right)\right]\text{ if }iid \end{array} \] \end_inset \end_layout \begin_layout Itemize Such estimad \begin_inset Formula $\tau\left(\theta\right)$ \end_inset is given by \begin_inset Formula \[ \tau\left(\theta\right)=-\frac{\frac{\partial}{\partial\theta}C\left(\theta\right)}{\frac{\partial}{\partial\theta}D\left(\theta\right)}\text{, or }=-\frac{A\left(\theta\right)}{B\left(\theta\right)}. \] \end_inset \end_layout \begin_layout Itemize After this estimad is found, its variance can be calculated by: \end_layout \begin_deeper \begin_layout Itemize CR Bound \end_layout \begin_layout Itemize Traditional statistics \end_layout \begin_layout Itemize \begin_inset Formula $\text{Var}\left[\hat{\tau}\left(y_{1},...,y_{n}\right)\right]=\frac{-1}{B\left(\theta\right)}\cdot\frac{d}{d\theta}\frac{A\left(\theta\right)}{B\left(\theta\right)}$ \end_inset \end_layout \end_deeper \begin_layout Section Sufficient Statistics (SS), \begin_inset Formula $w(Y_{1},...,Y_{n})$ \end_inset , for a parameter \begin_inset Formula $\theta$ \end_inset \end_layout \begin_layout Standard For \begin_inset Formula $n$ \end_inset RVs, \begin_inset Formula $Y_{1},...,Y_{n}$ \end_inset , whose JDF is \begin_inset Formula $f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n};\theta\right)$ \end_inset , a function \begin_inset Formula $w:=w(Y_{1},...,Y_{n})$ \end_inset is a SS for the paramter \begin_inset Formula $\theta$ \end_inset iff the conditional distribution of those RVs – given \begin_inset Formula $w$ \end_inset – is independent of \begin_inset Formula $\theta$ \end_inset : \begin_inset Foot status open \begin_layout Plain Layout \begin_inset Formula $w$ \end_inset is like a sponge on a wet plate \begin_inset Formula $f_{Y_{1},...,Y_{n}}$ \end_inset : it \series bold sucks up \series default all the information contained in the water \begin_inset Formula $\theta$ \end_inset . \end_layout \end_inset \begin_inset Formula \begin{align*} f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n}|w;\theta\right)\text{, by definition} & \equiv\frac{f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n},w;\theta\right)}{f_{W}\left(w;\theta\right)}\\ \text{this is equivalently:} & =\frac{f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n};\theta\right)}{f_{W}\left(w;\theta\right)}\\ \text{core of this "iff"\rightarrow} & =h\left(Y_{1},...,Y_{n}\right)\text{ (i.e., indep. of \ensuremath{\theta})}\\ & \Leftrightarrow w(Y_{1},...,Y_{n})\text{ is a SS for }\theta. \end{align*} \end_inset \end_layout \begin_layout Standard (Reason for the equivalence on the second line: \family roman \series medium \shape up \size normal \emph off \bar no \strikeout off \uuline off \uwave off \noun off \color none Since \begin_inset Formula $w$ \end_inset is a function of \begin_inset Formula $Y_{i}$ \end_inset 's, when \begin_inset Formula $Y_{i}$ \end_inset 's are all speficied, \begin_inset Formula $w$ \end_inset is also determined. \family default \series default \shape default \size default \emph default \bar default \strikeout default \uuline default \uwave default \noun default \color inherit ) \end_layout \begin_layout Standard This expression is equivalent to: \begin_inset Formula \[ f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n};\theta\right)=f_{W}\left(w;\theta\right)\cdot h\left(y_{1},...,y_{n}\right)\Leftrightarrow w(Y_{1},...,Y_{n})\text{ is a SS for }\theta. \] \end_inset \end_layout \begin_layout Standard If the support of \family roman \series medium \shape up \size normal \emph off \bar no \strikeout off \uuline off \uwave off \noun off \color none \begin_inset Formula $Y_{i}$ \end_inset 's is independent of the parameter \begin_inset Formula $\theta$ \end_inset , then this is also equivalent to: \begin_inset Formula \[ f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n};\theta\right)=g\left(w;\theta\right)\cdot h\left(y_{1},...,y_{n}\right)\Leftrightarrow w(Y_{1},...,Y_{n})\text{ is a SS for }\theta \] \end_inset \end_layout \begin_layout Standard where \begin_inset Formula $g$ \end_inset is any function of \begin_inset Formula $w$ \end_inset (and thus of \begin_inset Formula $\theta$ \end_inset ). \end_layout \begin_layout Subsection Minimal, Non-Trivial Sufficient Statistics (MNTSS) – How To Find \end_layout \begin_layout Subsubsection When the support of \begin_inset Formula $Y_{i}$ \end_inset 's is independent of \begin_inset Formula $\theta$ \end_inset \end_layout \begin_layout Paragraph Method 1: Factorization \end_layout \begin_layout Standard If: \end_layout \begin_layout Itemize the JDF \begin_inset Formula $f_{Y_{1},...,Y_{n}}\left(y_{1},...,y_{n};\theta\right)$ \end_inset can be factorized into \family roman \series medium \shape up \size normal \emph off \bar no \strikeout off \uuline off \uwave off \noun off \color none \begin_inset Formula $f_{W}\left(w;\theta\right)\cdot h\left(y_{1},...,y_{n}\right)$ \end_inset , \family default \series bold \shape default \size default \emph default \bar default \strikeout default \uuline default \uwave default \noun default \color inherit and \end_layout \begin_layout Itemize \begin_inset Formula $dim\left(w\right) \begin_inset Text \begin_layout Plain Layout Name \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Expression \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Mean \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Variance \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Normal( \begin_inset Formula $\mu,\sigma^{2}$ \end_inset ) \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\frac{\ensuremath{1}}{\sqrt{2\pi}\sigma}e^{-\frac{\left(y-\mu\right)^{2}}{2\sigma^{2}}}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\mu$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\sigma^{2}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Gamma( \begin_inset Formula $\alpha,\beta$ \end_inset ) \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\frac{1}{\Gamma\left(\alpha\right)\beta^{\alpha}}y^{\alpha-1}e^{-\frac{y}{\beta}}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\alpha\beta$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\alpha\beta^{2}$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Cauchy( \begin_inset Formula $\theta,\sigma$ \end_inset ) \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\frac{\ensuremath{1}}{\pi\sigma}\cdot\frac{1}{1+\left(\frac{y-\theta}{\sigma}\right)^{2}}$ \end_inset , \begin_inset Formula $\sigma>0$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout D.N.E. \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout D.N.E. \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Quotes eld \end_inset Chi-2 \begin_inset Quotes erd \end_inset \begin_inset Formula $\chi^{2}(\nu)$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\frac{\ensuremath{1}}{y^{\frac{\nu}{2}}\cdot\Gamma\left(\frac{\nu}{2}\right)}\cdot y^{\frac{\nu}{2}-1}\cdot e^{-\frac{y}{2}}$ \end_inset , \begin_inset Formula $y>0$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\nu$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $2\nu$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Binomial( \begin_inset Formula $n,p$ \end_inset ) \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Prob \begin_inset Formula $\left(Y=y\right)=\binom{n}{y}\theta^{y}\left(1-\theta\right)^{n-y}$ \end_inset , \begin_inset Formula $y=0,...,n$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $np$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $np(1-p)$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Poisson( \begin_inset Formula $\lambda$ \end_inset ) \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Prob \begin_inset Formula $\left(Y=y\right)=e^{-\lambda}\frac{\lambda^{y}}{y!}$ \end_inset , \family roman \series medium \shape up \size normal \emph off \bar no \strikeout off \uuline off \uwave off \noun off \color none \begin_inset Formula $y=0,1,...$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\lambda$ \end_inset \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \begin_inset Formula $\lambda$ \end_inset \end_layout \end_inset \end_inset \end_layout \begin_layout Subsection Conversion Between Distributions \end_layout \begin_layout Itemize (Any) Normal Distribution \begin_inset Formula $\rightarrow$ \end_inset Standard Normal Distribution: If \begin_inset Formula $Y\sim N\left(\mu,\sigma^{2}\right)$ \end_inset , then \begin_inset Formula $\frac{Y-\mu}{\sigma}\sim N\left(0,1\right)$ \end_inset . \end_layout \begin_layout Itemize Standard Normal Distribution \begin_inset Formula $\rightarrow$ \end_inset Chi-Square Distribution: If \begin_inset Formula $Y\sim N\left(0,1\right)$ \end_inset , then \begin_inset Formula $Y^{2}\sim\chi^{2}\left(\nu=1\right)$ \end_inset . \end_layout \begin_layout Subsection Properties of Chi-Square Distribution \end_layout \begin_layout Itemize The sum of some \begin_inset Formula $\chi^{2}$ \end_inset -distributed RVs is another \begin_inset Formula $\chi^{2}$ \end_inset -distributed RV with a degree-of-freedom of the sum of those of the summand RVs: \begin_inset Formula $Y_{i}\sim\chi^{2}(\nu_{i})$ \end_inset for \begin_inset Formula $i=1,...,n$ \end_inset \begin_inset Formula $\Rightarrow\sum_{i=1}^{n}Y_{i}\sim\chi^{2}\left(\sum_{i=1}^{n}\nu_{i}\right)$ \end_inset . \end_layout \begin_layout Subsection Properties of Poisson Distribution \end_layout \begin_layout Itemize The sum of some Poisson-distributed RVs is another Poisson-distributed RV with a \begin_inset Formula $\lambda$ \end_inset of the sum of those of the summand RVs: \begin_inset Formula $Y_{i}\sim\text{Poisson}(\lambda_{i})$ \end_inset for \begin_inset Formula $i=1,...,n$ \end_inset \begin_inset Formula $\Rightarrow\sum_{i=1}^{n}Y_{i}\sim\text{Poisson}\left(\sum_{i=1}^{n}\lambda_{i}\right)$ \end_inset . \end_layout \begin_layout Itemize If the sum of some Poisson-distributed RVs is fixed, then any partial sum of these RVs is a binomially-distributed RV whose \end_layout \begin_deeper \begin_layout Itemize index \begin_inset Formula $n$ \end_inset is equal to the fixed total sum; \end_layout \begin_layout Itemize parameter \begin_inset Formula $p$ \end_inset is equal to the ratio \begin_inset Formula $\frac{\sum_{\text{partial sum}}\lambda_{j}}{\sum_{\text{total sum}}\lambda_{i}}$ \end_inset . \end_layout \end_deeper \begin_layout Itemize (Continuing from above) When the summand RVs are iid, the partial sum of any \begin_inset Formula $j$ \end_inset of them \begin_inset Formula $\sim\text{Binomial}\left(\text{total sum},\frac{j}{n}\right)$ \end_inset . \end_layout \end_body \end_document