;----------------------------------------------------------------------
;this subroutine needed for solving for c50 (inner quartile)
;----------------------------------------------------------------------
function newton_c50, c50

;value of p is transferred via common
  common newton_c50_common,pcom


p=pcom

;the following two are equivalent:

;using IDL cumulative function of normalized gaussian
  func=(1.-p)*(gauss_pdf(c50)-gauss_pdf(-c50))+$
            p*(gauss_pdf(c50/3.)-gauss_pdf(-c50/3.))$
             -0.5

;error-function
  func=(1-p)*erf(c50/sqrt(2.))+$
           p*erf(c50/sqrt(2.)/3.)$
           -0.5

return,func
end

;----------------------------------------------------------------------
;main
;----------------------------------------------------------------------
common newton_c50_common,pcom
program='exe2_gaussian'
ps=.1

;Illustrate the effect of outliers, in terms of a superposition of
;two Gaussians, with standard deviations sigma=1 and sigma=3 
;(variance sigma^2=1 and 9)
;Weight of the sigma=3 distribution (=outliers) given by paramater p
;  g1=1./sqrt(2*!pi)*exp(-0.5*x^2)
;  g2=1./sqrt(2*!pi)/3.*exp(-0.5*x^2/3.^2)
;  g=(1-p)*g1 + p*g2


;-----------------------------------------------------------
;create distributions with random numbers
;-----------------------------------------------------------
psdirect,program+'_a',ps,/color
!p.multi=[0,1,2]
nwin

;--------------------------
;first for p=0.05

  N=1000000l   ; number of points
  p=0.05
  N1=(1.-p)*N
  N2=p*N
  X1=[randomn(seed,N1),3*randomn(seed,N2)]


;plot results with histo_f procedure:
; x1=minimum of tabulation
; x2=maximum of tabulation
; dx=binsize of tabulation
; xbin = centers of bins
; ybin = number of points in the bin
; /noscale keyword -> 
;         histo_f returns yt =ybin
;         probability density pdf = yt/(dx*N)

  xmin=-10.
  xmax= 10.
  dx=0.05
  histo_f,X1,xmin,xmax,dx,xt1,yt1,/noscale
  pdf1=1.*yt1/(n_elements(x1)*dx)
  plot,xt1,pdf1,psym=10,xtitle='x',ytitle='pdf',$
    title='Two gaussians: effect of outliers'

;------------------------------------------------
;similarly for p=0.1 and p=0.2

  p=0.10
  N1=(1.-p)*N
  N2=p*N
  X2=[randomn(seed,N1),3*randomn(seed,N2)]
  histo_f,X2,xmin,xmax,dx,xt2,yt2,/noscale
  pdf2=1.*yt2/(n_elements(x2)*dx)
  oplot,xt2,pdf2,col=2,lines=2
  
  p=0.20
  N1=(1.-p)*N
  N2=p*N
  X3=[randomn(seed,N1),3*randomn(seed,N2)]
  histo_f,X3,xmin,xmax,dx,xt3,yt3,/noscale
  pdf3=1.*yt3/(n_elements(x3)*dx)
  oplot,xt3,pdf3,col=3,lines=1
    
  xyouts,0.15,.9,'pdf=(1-p)*Gauss(0,1)+p*Gauss(0,3^2)',/normal  
  label_data,0.67,.9,['p=0.05','p=0.1','p=0.2'],lines=[0,2,1],col=[1,2,3]


;-------------------------------------------------------------
;plot the same in logarithmic scale
;show also the theoretical functions 

  nwin
  plot,xt1,pdf1,psym=10,/ylog,yr=[1d-5,1]
  oplot,xt2,pdf2,psym=10,col=2,lines=2
  oplot,xt3,pdf3,psym=10,col=3,lines=1
  
;theoretical for p=0.05
  xarg=xt1
  p=0.05
  g1=1./sqrt(2*!pi)*exp(-0.5*xarg^2)
  g2=1./sqrt(2*!pi)/3.*exp(-0.5*xarg^2/3.^2)
  oplot,xarg,(1.-p)*g1
  oplot,xarg,p*g2
  oplot,xarg,(1.-p)*g1+p*g2,lines=0
  
  !p.multi=0

psdirect,program+'_a',ps,/stop


;------------------------------------------------
;compare RMS, mean deviation, and inner quartile width
;as a measure of the width of the distribution
;------------------------------------------------
psdirect,program+'_b',ps


;------------
;Analytically
;------------

;for values of p from 0 to 0.3

  parg=findgen(31)*.01

;root-mean-square: integral of x^2g(x)
  RMS=sqrt((1.-parg)+parg*3.^2)

;mean (absolute) deviation: integral of abs(x)g(x)
  MAD=2./sqrt(2.*!pi)*((1.-parg)*1. +parg/3.*9.)

;interquartile width c50: integral form -c50 to c50 g(x) equals 0.5
;need to solve numerically (G is the cumulative distribution of gaussian)
; (1-p)*(G1(c50)-G1(-c50))+p*(G2(c50)-G2(-c50))=0.5

;use Newton's method to find the root of
;  f(c50;p)=(1-p)*(G1(c50)-G1(-c50))+p*(G2(c50)-G2(-c50))-0.5
;this function f(c50;p) is returned by
;the function-procedure    newton_c50(c50)
;    p is passed to the function via a common block

  c50=parg*0
  for i=0,n_elements(parg)-1 do begin
      pcom=parg(i)                       ;passed via common to newton_c50
      p_ini=0.                        ;initial guess for iteration
      c50(i)=newton(p_ini,'newton_c50')   
  endfor

nwin
plot,parg,rms,lines=0,xtitle='p',ytitle='width of the distribution'
oplot,parg,mad,lines=2
oplot,parg,c50,lines=1

;--------------------------------------------
;numerically from the generated distributions
;------------;------------;------------;-----
;for p=0.05,0.1,0.2

for ip=1,3 do begin
    if(ip eq 1) then begin
        p=0.05
        pdf=pdf1
        x=x1
        xt=xt1
    endif

    if(ip eq 2) then begin
        p=0.1
        pdf=pdf2
        x=x2
        xt=xt2
    endif

    if(ip eq 3) then begin
        p=0.2
        pdf=pdf3
        x=x3
        xt=xt3
    endif

;directly from array of x

    rms=sqrt(mean(x^2))
    mad=mean(abs(x))
    x=x(sort(x))
    c50=0.5*(x(N*0.75)-x(N*0.25))    

    plots,[p,rms],psym=6,col=ip
    plots,[p,mad],psym=6,col=ip
    plots,[p,c50],psym=6,col=ip

;from the tabulated pdf's

    rms=sqrt(mean(xt^2*pdf)/mean(pdf))
    mad=mean(abs(xt)*pdf)/mean(pdf)
    sum=pdf*0.
    for ii=1,n_elements(xt)-1 do begin
        sum(ii)=sum(ii-1)+pdf(ii)*dx
    endfor

    ind=where(sum gt 0.25)
    ind25=ind(0)
;0.25 attained between xt(ind25) and xt(ind25-1)
;weigted mean
    c50m=(sum(ind25-1)*xt(ind25)+sum(ind25)*xt(ind25-1))/$
      (sum(ind25)+sum(ind25-1))+0.5*dx


    ind=where(sum gt 0.75)
    ind75=ind(0)
;0.75 attained between xt(ind75) and xt(ind75-1)
;weigted mean
    c50p=(sum(ind75-1)*xt(ind75)+sum(ind75)*xt(ind75-1))/$
      (sum(ind75)+sum(ind75-1))+0.5*dx

print,c50m,c50p
c50=(c50p-c50m)/2.
    plots,[p,rms],psym=2,col=ip
    plots,[p,mad],psym=2,col=ip
    plots,[p,c50],psym=2,col=ip


endfor

label_data,0.1,.9,['RMS','MD','C50'],lines=[0,2,1]


psdirect,program+'_b',ps,/stop


end