sampling data A short description of the post.
library(tidyverse)
library(moderndive) #install before loading
Question Modify the code for comparing differnet sample sizes from the virtual bowl
Segment 1: sample size = 28 1a.)
virtual_sample_28 <- bowl %>%
rep_sample_n(size = 28, reps = 1150)
virtual_sample_28
# A tibble: 32,200 x 3
# Groups: replicate [1,150]
replicate ball_ID color
<int> <int> <chr>
1 1 2372 white
2 1 991 white
3 1 1475 white
4 1 2341 white
5 1 256 white
6 1 1460 red
7 1 1026 white
8 1 763 white
9 1 1336 white
10 1 1630 white
# ... with 32,190 more rows
virtual_samples_28 <- bowl %>%
rep_sample_n(size = 28, reps = 1150)
virtual_samples_28
# A tibble: 32,200 x 3
# Groups: replicate [1,150]
replicate ball_ID color
<int> <int> <chr>
1 1 2229 white
2 1 1146 white
3 1 2266 red
4 1 2350 white
5 1 1504 red
6 1 1377 white
7 1 1851 red
8 1 576 red
9 1 829 red
10 1 2206 red
# ... with 32,190 more rows
virtual_prop_red_28 <- virtual_samples_28 %>%
group_by(replicate) %>%
summarize(red = sum(color == "red")) %>%
mutate(prop_red = red / 28)
virtual_prop_red_28
# A tibble: 1,150 x 3
replicate red prop_red
* <int> <int> <dbl>
1 1 11 0.393
2 2 17 0.607
3 3 7 0.25
4 4 8 0.286
5 5 9 0.321
6 6 15 0.536
7 7 12 0.429
8 8 9 0.321
9 9 8 0.286
10 10 11 0.393
# ... with 1,140 more rows
ggplot(virtual_prop_red_28, aes(x = prop_red)) +
geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
labs(x = "Proportion of 28 balls that were", title = "28")
segment 2
2a.)
virtual_samples_53 <- bowl %>%
rep_sample_n(size = 53, reps = 1150)
2b.) compute resulting of proportions red
virtual_prop_red_53 <- virtual_samples_53 %>%
group_by(replicate) %>%
summarize(red = sum(color == "red")) %>%
mutate(prop_red = red / 53)
virtual_prop_red_53
# A tibble: 1,150 x 3
replicate red prop_red
* <int> <int> <dbl>
1 1 21 0.396
2 2 23 0.434
3 3 23 0.434
4 4 12 0.226
5 5 23 0.434
6 6 23 0.434
7 7 28 0.528
8 8 25 0.472
9 9 18 0.340
10 10 14 0.264
# ... with 1,140 more rows
FALSE
[1] FALSE
2c.)Plot distribution of virtual_prop_red_SEE QUIZ via a histogram
ggplot(virtual_prop_red_53, aes(x = prop_red)) +
geom_histogram(binwidth = 0.05, boundary = 0.4, color = "white") +
labs(x = "Proportion of 53 balls that were red", title = "53")
3a.)
virtual_samples_118 <- bowl %>%
rep_sample_n(size = 118, reps = 1150)
3b.) compute resulting replicas of proportion red
virtual_prop_red_118 <- virtual_samples_118 %>%
group_by(replicate) %>%
summarize(red = sum(color == "red")) %>%
mutate(prop_red = red / 118)
3c.)
ggplot(virtual_prop_red_118, aes(x = prop_red)) +
geom_histogram (binwidth = 0.05, boundary = 0.4, color = "white") +
labs(x = "Proportion of 118 balls that were red", title = "118")
virtual_prop_red_28 %>%
summarize(sd = sd(prop_red))
# A tibble: 1 x 1
sd
<dbl>
1 0.0912
virtual_prop_red_53 %>%
summarize(sd = sd(prop_red))
# A tibble: 1 x 1
sd
<dbl>
1 0.0632
virtual_prop_red_118 %>%
summarize(sd= sd(prop_red))
# A tibble: 1 x 1
sd
<dbl>
1 0.0422