Simulation Study 1

library(magrittr)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.0      ✔ purrr   1.0.0 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.5.0 
✔ readr   2.1.3      ✔ forcats 0.5.2 ── Conflicts ───────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::extract()   masks magrittr::extract()
✖ dplyr::filter()    masks stats::filter()
✖ dplyr::lag()       masks stats::lag()
✖ purrr::set_names() masks magrittr::set_names()
sim1 <- function(i){
  x1 <- rnorm(1000, 2)
  x2 <- rnorm(1000, -4)
  y <- 3 + 2 * x1 + 4 * x2 + rnorm(1000, sd = sqrt(2))
  df <- tibble(x1, x2, y)
  res <- df %$% lm(y ~ x1 + x2)
  return(c(coef(res),sigma(res)))
}
sim2 <- function(i){
  x1 <- rnorm(1000, 2)
  x2 <- rnorm(1000, -4)
  y <- 3 + 2 * x1 + 4 * x2 + rnorm(1000, sd = sqrt(2))
  df <- tibble(x1, x2, y)
  res <- df %$% lm(y ~ x1)
  return(c(coef(res),sigma(res)))
}

# sim1()
print("Correct Model")
[1] "Correct Model"
print("Average estimate")
[1] "Average estimate"
results1 <- sapply(1:1000, sim1)
rowMeans(results1)
(Intercept)          x1          x2             
   3.000889    1.996928    3.998753    1.413139 
print("Theoretical Standard Error Estimates")
[1] "Theoretical Standard Error Estimates"
apply(results1, 1, sd)
(Intercept)          x1          x2             
 0.20244465  0.04521681  0.04406489  0.03066468 
print("Incorrect Model")
[1] "Incorrect Model"
print("Average estimate")
[1] "Average estimate"
results2 <- sapply(1:1000, sim2)
rowMeans(results2)
(Intercept)          x1             
 -12.994301    1.998836    4.247681 
print("Theoretical Standard Error Estimates")
[1] "Theoretical Standard Error Estimates"
apply(results2, 1, sd)
(Intercept)          x1             
 0.28824676  0.13099029  0.09396826 

Simulation Study 2

sim3 <- function(i){
  x1 <- rnorm(1000, 8)
  # x2 <- rnorm(1000, -4)
  y <- 3 + 2 * log(x1)  + rnorm(1000, sd = sqrt(2))
  df <- tibble(x1, y)
  res <- df %$% lm(y ~ log(x1))
  return(c(coef(res),sigma(res)))
}
sim4 <- function(i){
  x1 <- rnorm(1000, 8)
  # x2 <- rnorm(1000, -4)
  y <- 3 + 2 * log(x1) + rnorm(1000, sd = sqrt(2))
  df <- tibble(x1, y)
  res <- df %$% lm(y ~ x1)
  return(c(coef(res),sigma(res)))
}


print("Correct Model")
[1] "Correct Model"
print("Average estimate")
[1] "Average estimate"
results1 <- sapply(1:1000, sim3)
rowMeans(results1)
(Intercept)     log(x1)             
   3.020812    1.989513    1.412930 
print("Theoretical Standard Error Estimates")
[1] "Theoretical Standard Error Estimates"
apply(results1, 1, sd)
(Intercept)     log(x1)             
 0.73301536  0.35310754  0.03167802 
print("Incorrect Model")
[1] "Incorrect Model"
print("Average estimate")
[1] "Average estimate"
results2 <- sapply(1:1000, sim4)
rowMeans(results2)
(Intercept)          x1             
  5.1104472   0.2539425   1.4129081 
print("Theoretical Standard Error Estimates")
[1] "Theoretical Standard Error Estimates"
apply(results2, 1, sd)
(Intercept)          x1             
 0.36313921  0.04505431  0.03127642 

Matrix Formulation

library(palmerpenguins)
penguins %<>% drop_na
ymat <- penguins$body_mass_g
print("flipper length and bill length")
[1] "flipper length and bill length"
xmat <- cbind(1, penguins$flipper_length_mm ,penguins$bill_length_mm)
solve(t(xmat)%*%xmat)%*%t(xmat)%*%ymat
             [,1]
[1,] -5836.298732
[2,]    48.889692
[3,]     4.958601
print("lm results")
[1] "lm results"
penguins %$% lm(body_mass_g ~ flipper_length_mm + bill_length_mm)

Call:
lm(formula = body_mass_g ~ flipper_length_mm + bill_length_mm)

Coefficients:
      (Intercept)  flipper_length_mm     bill_length_mm  
        -5836.299             48.890              4.959  
print("flipper length and island")
[1] "flipper length and island"
penguins %<>% mutate(island_torg = relevel(island, ref = "Torgersen"))
xmat <- penguins %$% model.matrix(~flipper_length_mm+island_torg)
solve(t(xmat)%*%xmat)%*%t(xmat)%*%ymat
                         [,1]
(Intercept)       -4889.32199
flipper_length_mm    44.88982
island_torgBiscoe   201.46081
island_torgDream    -63.90430
penguins %$% lm(body_mass_g ~ flipper_length_mm + island_torg)

Call:
lm(formula = body_mass_g ~ flipper_length_mm + island_torg)

Coefficients:
      (Intercept)  flipper_length_mm  island_torgBiscoe   island_torgDream  
         -4889.32              44.89             201.46             -63.90  
LS0tCnRpdGxlOiAiQ2xhc3MgMTMiCmF1dGhvcjogIklzYWFjIFF1aW50YW5pbGxhIFNhbGluYXMiCmRhdGU6ICJgciBmb3JtYXQoU3lzLnRpbWUoKSwnJW0tJWQtJVknKWAiCm91dHB1dDogCiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUKLS0tCgojIyBTaW11bGF0aW9uIFN0dWR5IDEKCmBgYHtyfQpsaWJyYXJ5KG1hZ3JpdHRyKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKc2ltMSA8LSBmdW5jdGlvbihpKXsKICB4MSA8LSBybm9ybSgxMDAwLCAyKQogIHgyIDwtIHJub3JtKDEwMDAsIC00KQogIHkgPC0gMyArIDIgKiB4MSArIDQgKiB4MiArIHJub3JtKDEwMDAsIHNkID0gc3FydCgyKSkKICBkZiA8LSB0aWJibGUoeDEsIHgyLCB5KQogIHJlcyA8LSBkZiAlJCUgbG0oeSB+IHgxICsgeDIpCiAgcmV0dXJuKGMoY29lZihyZXMpLHNpZ21hKHJlcykpKQp9CnNpbTIgPC0gZnVuY3Rpb24oaSl7CiAgeDEgPC0gcm5vcm0oMTAwMCwgMikKICB4MiA8LSBybm9ybSgxMDAwLCAtNCkKICB5IDwtIDMgKyAyICogeDEgKyA0ICogeDIgKyBybm9ybSgxMDAwLCBzZCA9IHNxcnQoMikpCiAgZGYgPC0gdGliYmxlKHgxLCB4MiwgeSkKICByZXMgPC0gZGYgJSQlIGxtKHkgfiB4MSkKICByZXR1cm4oYyhjb2VmKHJlcyksc2lnbWEocmVzKSkpCn0KCiMgc2ltMSgpCnByaW50KCJDb3JyZWN0IE1vZGVsIikKcHJpbnQoIkF2ZXJhZ2UgZXN0aW1hdGUiKQpyZXN1bHRzMSA8LSBzYXBwbHkoMToxMDAwLCBzaW0xKQpyb3dNZWFucyhyZXN1bHRzMSkKcHJpbnQoIlRoZW9yZXRpY2FsIFN0YW5kYXJkIEVycm9yIEVzdGltYXRlcyIpCmFwcGx5KHJlc3VsdHMxLCAxLCBzZCkKcHJpbnQoIkluY29ycmVjdCBNb2RlbCIpCnByaW50KCJBdmVyYWdlIGVzdGltYXRlIikKcmVzdWx0czIgPC0gc2FwcGx5KDE6MTAwMCwgc2ltMikKcm93TWVhbnMocmVzdWx0czIpCnByaW50KCJUaGVvcmV0aWNhbCBTdGFuZGFyZCBFcnJvciBFc3RpbWF0ZXMiKQphcHBseShyZXN1bHRzMiwgMSwgc2QpCmBgYAoKIyMgU2ltdWxhdGlvbiBTdHVkeSAyCgpgYGB7cn0Kc2ltMyA8LSBmdW5jdGlvbihpKXsKICB4MSA8LSBybm9ybSgxMDAwLCA4KQogICMgeDIgPC0gcm5vcm0oMTAwMCwgLTQpCiAgeSA8LSAzICsgMiAqIGxvZyh4MSkgICsgcm5vcm0oMTAwMCwgc2QgPSBzcXJ0KDIpKQogIGRmIDwtIHRpYmJsZSh4MSwgeSkKICByZXMgPC0gZGYgJSQlIGxtKHkgfiBsb2coeDEpKQogIHJldHVybihjKGNvZWYocmVzKSxzaWdtYShyZXMpKSkKfQpzaW00IDwtIGZ1bmN0aW9uKGkpewogIHgxIDwtIHJub3JtKDEwMDAsIDgpCiAgIyB4MiA8LSBybm9ybSgxMDAwLCAtNCkKICB5IDwtIDMgKyAyICogbG9nKHgxKSArIHJub3JtKDEwMDAsIHNkID0gc3FydCgyKSkKICBkZiA8LSB0aWJibGUoeDEsIHkpCiAgcmVzIDwtIGRmICUkJSBsbSh5IH4geDEpCiAgcmV0dXJuKGMoY29lZihyZXMpLHNpZ21hKHJlcykpKQp9CgoKcHJpbnQoIkNvcnJlY3QgTW9kZWwiKQpwcmludCgiQXZlcmFnZSBlc3RpbWF0ZSIpCnJlc3VsdHMxIDwtIHNhcHBseSgxOjEwMDAsIHNpbTMpCnJvd01lYW5zKHJlc3VsdHMxKQpwcmludCgiVGhlb3JldGljYWwgU3RhbmRhcmQgRXJyb3IgRXN0aW1hdGVzIikKYXBwbHkocmVzdWx0czEsIDEsIHNkKQpwcmludCgiSW5jb3JyZWN0IE1vZGVsIikKcHJpbnQoIkF2ZXJhZ2UgZXN0aW1hdGUiKQpyZXN1bHRzMiA8LSBzYXBwbHkoMToxMDAwLCBzaW00KQpyb3dNZWFucyhyZXN1bHRzMikKcHJpbnQoIlRoZW9yZXRpY2FsIFN0YW5kYXJkIEVycm9yIEVzdGltYXRlcyIpCmFwcGx5KHJlc3VsdHMyLCAxLCBzZCkKYGBgCgojIyBNYXRyaXggRm9ybXVsYXRpb24KCmBgYHtyfQpsaWJyYXJ5KHBhbG1lcnBlbmd1aW5zKQpwZW5ndWlucyAlPD4lIGRyb3BfbmEKeW1hdCA8LSBwZW5ndWlucyRib2R5X21hc3NfZwpwcmludCgiZmxpcHBlciBsZW5ndGggYW5kIGJpbGwgbGVuZ3RoIikKeG1hdCA8LSBjYmluZCgxLCBwZW5ndWlucyRmbGlwcGVyX2xlbmd0aF9tbSAscGVuZ3VpbnMkYmlsbF9sZW5ndGhfbW0pCnNvbHZlKHQoeG1hdCklKiV4bWF0KSUqJXQoeG1hdCklKiV5bWF0CnByaW50KCJsbSByZXN1bHRzIikKcGVuZ3VpbnMgJSQlIGxtKGJvZHlfbWFzc19nIH4gZmxpcHBlcl9sZW5ndGhfbW0gKyBiaWxsX2xlbmd0aF9tbSkKCnByaW50KCJmbGlwcGVyIGxlbmd0aCBhbmQgaXNsYW5kIikKcGVuZ3VpbnMgJTw+JSBtdXRhdGUoaXNsYW5kX3RvcmcgPSByZWxldmVsKGlzbGFuZCwgcmVmID0gIlRvcmdlcnNlbiIpKQp4bWF0IDwtIHBlbmd1aW5zICUkJSBtb2RlbC5tYXRyaXgofmZsaXBwZXJfbGVuZ3RoX21tK2lzbGFuZF90b3JnKQpzb2x2ZSh0KHhtYXQpJSoleG1hdCklKiV0KHhtYXQpJSoleW1hdApwZW5ndWlucyAlJCUgbG0oYm9keV9tYXNzX2cgfiBmbGlwcGVyX2xlbmd0aF9tbSArIGlzbGFuZF90b3JnKQoKYGBgCg==