### ### ggplot2 with Diamonds data ### ### ggplot grammer: ### ### ggplot(data, aes( )) + (required) ### geom_ and/or stat_ + (one is required) ### Facests (optional) ### Coordinate system (optional) ### Themes (optional) ### annotation (optional) ### ### See CheatSheet-ggplot2 for list of available functions ########################################################################### ###--- Dataset library(ggplot2) ggplot2::diamonds diamonds # it is tibble (data.frame) # 53940 rows and 10 variables: ?diamonds # shows description # # price : price in US dollars (\$326–\$18,823) # carat : weight of the diamond (0.2–5.01) # cut : quality of the cut (Fair, Good, Very Good, Premium, Ideal) # color : diamond colour, from D (best) to J (worst) # clarity : a measurement of how clear the diamond is (I1 (worst), SI2, SI1, VS2, VS1, VVS2, VVS1, IF (best)) # x : length in mm (0–10.74) # y : width in mm (0–58.9) # z : depth in mm (0–31.8) # depth : total depth percentage = z / mean(x, y) = 2 * z / (x + y) (43–79) # table : width of top of diamond relative to widest point (43–95) ###-------------------------------------------------------------------------- ###--- Scatter Plot (Continuous X vs Continuous Y) (geom_point) library(ggplot2) ggplot2::diamonds # Plot carat vs price ggplot(diamonds, aes(x=carat, y=price)) + geom_point() # Zoom in ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + coord_cartesian(xlim=c(0,1), ylim=c(0,1500)) # Add CUT using color ggplot(diamonds, aes(x=carat, y=price, color=cut)) + geom_point() # Add smoothed line for each CUT ggplot(diamonds, aes(x=carat, y=price, color=cut)) + geom_point() + # geom_point(shape=25, color="orangered", fill="limegreen") # shape is same as pch stat_smooth() # stat_smooth(method="lm", se=FALSE) # lm, glm, gam, loess, rlm, etc. # Add CLARITY using shape ggplot(diamonds, aes(x=carat, y=price, color=cut, shape=clarity)) + geom_point() # Add DEPTH by using size, and set alpha (opacity) ggplot(diamonds, aes(x=carat, y=price, color=cut, shape=clarity, size=depth, alpha=.1)) + geom_point() ###--- Facetting (facet_grid and facet_wrap) # Columns by CUT ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + facet_grid(.~cut) # Rows by CUT ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + facet_grid(cut~.) # Rows by CLARITY but wrapped ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + facet_wrap(~clarity) # 2-way Matrix by CUT vs CLARITY ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + facet_grid(cut~clarity) ###--- Cuting and Zooming # Cuts off data ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + xlim(c(3,5)) # Uses all data, just zooms in (slower) ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + coord_cartesian(xlim=c(3,5)) ###-------------------------------------------------------------------------- ###--- Box Plot (Categorical X vs Continuous Y) (geom_boxplot) library(ggplot2) ggplot2::diamonds # Boxplot of PRICE ggplot(diamonds, aes(y=price)) + geom_boxplot() # Use binned CARAT as X ggplot(diamonds, aes(x=cut(carat, breaks=12), y=price, color=cut(carat, breaks=12))) + geom_boxplot() ggplot(diamonds, aes(x=cut(carat, breaks=c(0:6)), y=price, fill=cut(carat, breaks=c(0:6)))) + geom_boxplot() # Add in CUT as 2nd X ggplot(diamonds, aes(x=cut(carat, breaks=c(0:6)), y=price, color=cut)) + geom_boxplot() ggplot(diamonds, aes(x=cut(carat, breaks=c(0:6)), y=price, fill=cut)) + geom_boxplot() # Zooming in (limit the X on dataset) diamonds01 <- diamonds[diamonds$carat>=.2 & diamonds$carat<.3, ] ggplot(diamonds01, aes(x=cut, y=price, fill=cut)) + geom_boxplot() # Zooming in on Y ggplot(diamonds, aes(x=cut, y=price, fill=cut)) + geom_boxplot() + ylim(0, 1500) ###----------------------------------- ### Histogram (geom_histogram) library(ggplot2) ggplot2::diamonds # Univaiate Histogram ggplot( data=diamonds, aes(x=price)) + geom_histogram() # Change number of bins ggplot( data=diamonds, aes(x=price)) + geom_histogram(bins=10) # Change with of bins ggplot( data=diamonds, aes(x=price)) + geom_histogram(binwidth=100) # Add CUT as X2 ggplot( data=diamonds, aes(x=price, fill=cut)) + geom_histogram() # Each bar is 100% ggplot( data=diamonds, aes(x=price, fill=cut)) + geom_histogram(position="fill") # Make it side-by-side ggplot( data=diamonds, aes(x=price, fill=cut)) + geom_histogram(position="dodge") ###----------------------------------- ### Bar Plot (Frequency as Y on categorical X1 and categorical X2) (geom_bar) library(ggplot2) ggplot2::diamonds # Limit the dataset) diamonds01 <- diamonds[diamonds$carat>=.2 & diamonds$carat<.3, ] # Count as Y. CLARITY and CUT as X1 and X2 ggplot(diamonds01, aes(x=clarity, fill=cut)) + geom_bar() # No gap b/w each X ggplot(diamonds01, aes(x=clarity, fill=cut)) + geom_bar(width=1) # Each bar is 100% ggplot(diamonds01, aes(x=clarity, fill=cut)) + geom_bar(position="fill") # Make it side by side ggplot(diamonds01, aes(x=clarity, fill=cut)) + geom_bar(position="dodge") ###----------------------------------- ### Pie Chart (geom_bar + coord_polar) library(ggplot2) ggplot2::diamonds # Simple Bar Plot ggplot(data=diamonds, aes(x=clarity, fill=clarity)) + geom_bar() # Make it Pie Chart ggplot(data=diamonds, aes(x=clarity, fill=clarity)) + geom_bar() + coord_polar() # Make it Pie Chart with whole pie as 100% ggplot(data=diamonds, aes(x=factor(1), fill=clarity)) + geom_bar(width=1) + coord_polar(theta="y") ###----------------------------------- ### Trainsformation library(ggplot2) ggplot2::diamonds # Take log10 of X and Y ggplot(diamonds, aes(x=log10(carat), y=log10(price))) + geom_point() # This will do the same ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + coord_trans(x="log10", y="log10") # Take log of X and Y ggplot(diamonds, aes(x=sqrt(carat), y=sqrt(price))) + geom_point() # This will do the same ggplot(diamonds, aes(x=carat, y=price)) + geom_point() + coord_trans(x="sqrt", y="sqrt") # Name in Function Inverse # coord_trans() # "exp" exp(x) log(y) # "log" log(x) exp(y) # "log10" log10(x) 10^y # "log2" log(x,2) 2^y # "logit" log(x/(1-x)) log(x/(1+e(y))) # "pow10" 10^x log10(y) # "probit" pnorm(x) qnorm(y) # "recip" x^(-1) y^(-1) # "reverse" -x -y # "sqrt" x^(1/2) y^2 # "asn" atanh(x) tanh(y) ###----------------------------------- ### Layout ggplots library(ggplot2) ggplot2::diamonds p1 <- ggplot(diamonds, aes(x=carat, y=price)) + geom_point() p2 <- ggplot(diamonds, aes(x=carat, y=price)) + geom_point() p3 <- ggplot(diamonds, aes(x=carat, y=price)) + geom_point() p4 <- ggplot(diamonds, aes(x=carat, y=price)) + geom_point() ggarrange(p1, p2, p3, p4, nrow=2, ncol=2)