Geometric distribution

The geometric distribution is based on a (possibly infinite) sequence of independent and identically distributed binary trials, each with probability of success \(p\). There are two versions:

Basic properties

For the first version of the distribution that we saw in lecture, the properties are:

Notation \(X\sim\text{Geom}(p)\)
Range \(\{1,\,2,\,3,\,4,\,...\}\)
Parameter space \(p\in[0,\,1]\)
PMF \(P(X=k)=(1-p)^{k-1}p\)
Expectation \(1/p\)
Variance \(\frac{1-p}{p^2}\)

R commands

For the second version of the distribution, R provides:

dgeom(x, prob) # PMF: P(X = x)
pgeom(q, prob) # CDF: P(X <= q)
qgeom(p, prob) # quantile function (inverse CDF)
rgeom(n, prob) # random numbers

Play around!

#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 500

library(shiny)

discrete_pmf <- function(x, p, xlim = c(min(x) - 1, max(x) + 1), label = "", add_mean = FALSE){
  plot(x, p,
       pch = 19,
       cex = 0.5,
       xlab = "",
       ylab = "",
       main = "",
       ylim = c(0, 0.4),
       yaxs = "i",
       yaxt = "n",
       xlim = xlim,
       xaxt = "n",
       bty = "n"
  )
  segments(x,
           rep(0, length(x) + 1),
           x1 = x,
           y1 = p,
           lwd = 3
  )
  axis(1, at = floor(xlim[1]):ceiling(xlim[2]), cex.axis = 1)
  axis(2, at = seq(0, 1, length.out = 11), las = 1, cex.axis = 1.5)
  legend("topright", label, bty = "n", cex = 3)
  if(add_mean == TRUE){
    mtext("E(X)", side = 1, at = sum(x * p), col = "red", line = 2)
  }
}

discrete_cdf <- function(x, p, xlim = c(min(x) - 1, max(x) + 1), label = ""){
  closeddot = cumsum(p)
  opencircle = c(0, closeddot[1:length(x)-1])
  plot(x, closeddot, pch = 19, cex = 0.5,
       ylim = c(0, 1),
       ylab = "", main = "", xlab = "",
       yaxt = "n",
       xlim = xlim,
       xaxt = "n",
       #yaxs = "i", 
       bty = "n")
  points(x, opencircle, cex = 0.5)
  segments(c(xlim[1], x), c(0, closeddot), c(x, xlim[2]), c(0, closeddot), lwd = 1)
  axis(1, at = floor(xlim[1]):ceiling(xlim[2]), cex.axis = 1)
  axis(2, at = seq(0, 1, length.out = 11), las = 1, cex.axis = 1.5)
  legend("bottomright", label, bty = "n", cex = 3)
}

# Define UI for application that draws a histogram
ui <- fluidPage(
  
  # Application title
  titlePanel("Geometric distribution CDF and PMF"),
  
  # Sidebar with a slider input for number of bins 
  sidebarLayout(
    sidebarPanel(
      sliderInput("p",
                  "Probability of success (p):",
                  min = 0,
                  max = 1,
                  value = 0.5,
                  step = 0.01)
    ),
    
    # Show a plot of the generated distribution
    mainPanel(
      plotOutput("distPlot")
    )
  )
)

# Define server logic required to draw a histogram
server <- function(input, output) {
  
  output$distPlot <- renderPlot({
    
    p <- input$p
    n <- 30

    par(mfrow = c(2, 1), mar = c(4, 4, 2, 2))
    
    discrete_cdf(1:n, dgeom(0:(n-1), p))
    plot(1:n, dgeom(0:(n-1), p), type = "h",
         #ylim = c(0, 1),
        yaxs = "i",
       yaxt = "n",
       ylab = "",
       xlim = c(0, n + 1),
       xlab = "",
       xaxt = "n",
       bty = "n")
    axis(1, at = 0:(n+1), cex.axis = 1)
    axis(2, at = seq(0, 1, length.out = 11), las = 1, cex.axis = 1.5)
    mtext("E(X)", side = 1, at = 1 / p, col = "red", line = 2)
  })
}

# Run the application 
shinyApp(ui = ui, server = server)