In-Class_Ex05

Author

Siddharth Singh

Published

February 11, 2023

Modified

February 16, 2023

Correlation

Load necessary packages

pacman::p_load(ggtern, plotly, tidyverse, corrplot, ggstatsplot, seriation, dendextend, heatmaply)

Wine Data

wine <- read_csv("../data/wine_quality.csv", show_col_types = FALSE)
pairs(wine[,2:12])

panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use="complete.obs"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * (1 + r) / 2)
}

pairs(wine[,2:12], 
      upper.panel = panel.cor)

grouped_ggcorrmat(
  data = wine,
  cor.vars = 1:11,
  grouping.var = type,
  type = "robust",
  p.adjust.method = "holm",
  plotgrid.args = list(ncol = 2),
  ggcorrplot.args = list(outline.color = "black", 
                         hc.order = TRUE,
                         tl.cex = 10),
  annotation.args = list(
    tag_levels = "a",
    title = "Correlogram for wine dataset",
    subtitle = "The measures are: alcohol, sulphates, fixed acidity, citric acid, chlorides, residual sugar, density, free sulfur dioxide and volatile acidity",
    caption = "Dataset: UCI Machine Learning Repository"
  )
)

wine.cor <- cor(wine[, 1:11])
corrplot(wine.cor, 
         method = "ellipse", 
         type="lower")

pop_data <- read_csv("../data/respopagsex2000to2018_tidy.csv") 
agpop_mutated <- pop_data %>%
  mutate(`Year` = as.character(Year))%>%
  pivot_wider(names_from = AG, 
              values_from = Population) %>%
  mutate(YOUNG = rowSums(.[4:8]))%>%
  mutate(ACTIVE = rowSums(.[9:16]))  %>%
  mutate(OLD = rowSums(.[17:21])) %>%
  mutate(TOTAL = rowSums(.[22:24])) %>%
  filter(Year == 2018)%>%
  filter(TOTAL > 0)
ggtern(data=agpop_mutated,aes(x=YOUNG,y=ACTIVE, z=OLD)) +
  geom_point()

ggtern(data=agpop_mutated, aes(x=YOUNG,y=ACTIVE, z=OLD)) +
  geom_point() +
  labs(title="Population structure, 2015") +
  theme_rgbw()

label <- function(txt) {
  list(
    text = txt, 
    x = 0.1, y = 1,
    ax = 0, ay = 0,
    xref = "paper", yref = "paper", 
    align = "center",
    font = list(family = "serif", size = 15, color = "white"),
    bgcolor = "#b3b3b3", bordercolor = "black", borderwidth = 2
  )
}

axis <- function(txt) {
  list(
    title = txt, tickformat = ".0%", tickfont = list(size = 10)
  )
}

ternaryAxes <- list(
  aaxis = axis("Young"), 
  baxis = axis("Active"), 
  caxis = axis("Old")
)

plot_ly(
  agpop_mutated, 
  a = ~YOUNG, 
  b = ~ACTIVE, 
  c = ~OLD, 
  color = I("black"), 
  type = "scatterternary"
) %>%
  layout(
    annotations = label("Ternary Markers"), 
    ternary = ternaryAxes
  )

WH Data - Heatmap

wh <- read_csv("../data/WHData-2018.csv")
row.names(wh) <- wh$Country
wh1 <- dplyr::select(wh, c(3, 7:12))
wh_matrix <- data.matrix(wh)
wh_heatmap <- heatmap(wh_matrix,
                      Rowv=NA, Colv=NA)

wh_heatmap <- heatmap(wh_matrix)