churn <- read.csv(file = "C:/2018/churn.txt") churn2 <- read.csv(file = "C:/2018/churn.txt", stringsAsFactors=FALSE) churn[1:10,] str(churn) str(churn2) ?read.csv() churn[1:5,] churn2[1:5,] sum.churn <- summary(churn$Churn) sum.churn barplot(sum.churn, ylim=c(0,3000), main="Bar Graph of Churners and Non-Churners", col = "lightblue") box(which="plot", lty = "solid", col="black") counts <- table(churn$Churn, churn$Int.l.Plan,dnn=c("Churn","International Plan")) names(churn) counts barplot(counts, legend = rownames(counts), col=c("blue","red"), ylim=c(0,3300),ylab="Count",xlab="International Plan", main="January 16th Bar Chart from page 83 R Zone")
install.packages("ggplot2") library(ggplot2) # Look at the data from ggplot2 libary that we're going to use - miles per gallon ?mpg head(mpg) str(mpg) names(mpg) # Basic scatterplot qplot(displ, hwy, data = mpg) # Add an additional variable with aesthetics: colour, shape, size qplot(displ, hwy, data = mpg, colour = class) qplot(displ, hwy, data = mpg, colour = cyl) qplot(displ, hwy, data = mpg, shape = factor(cyl)) qplot(displ, hwy, data = mpg, shape = factor(cyl), colour = factor(cyl)) # Add an additional variable with faceting qplot(displ, hwy, data = mpg) qplot(displ, hwy, data = mpg) + facet_grid(. ~ cyl) qplot(displ, hwy, data = mpg) + facet_grid(drv ~ .) qplot(displ, hwy, data = mpg) + facet_grid(drv ~ cyl) qplot(displ, hwy, data = mpg) + facet_wrap(~ class) # Deal with overplotting by using JITTER qplot(cty, hwy, data = mpg) qplot(cty, hwy, data = mpg, geom = "jitter") qplot(cty, hwy, data = mpg, geom = "jitter", colour = year) qplot(cty, hwy, data = mpg, geom = "jitter", colour = class) # Note: On 09/11/Thursday # We did NOT do the following two R qplots # with the added very smooth GEOM method lm (linear model) qplot(cty, hwy, data = mpg, geom = "jitter") + geom_smooth(method = "lm") qplot(cty, hwy, data = mpg, geom = "jitter", colour = class) + geom_smooth(method = "lm") # Reordering + boxplots qplot(class, hwy, data = mpg) qplot(reorder(class, hwy), hwy, data = mpg) qplot(reorder(class, hwy), hwy, data = mpg, geom = "jitter") qplot(reorder(class, hwy), hwy, data = mpg, geom = "boxplot") qplot(reorder(class, hwy), hwy, data = mpg, geom = c("jitter", "boxplot"))
> summary(juul) age menarche sex igf1 Min. : 0.170 Min. : 1.000 Min. :1.000 Min. : 25.0 1st Qu.: 9.053 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.:202.2 Median :12.560 Median : 1.000 Median :2.000 Median :313.5 Mean :15.095 Mean : 1.476 Mean :1.534 Mean :340.2 3rd Qu.:16.855 3rd Qu.: 2.000 3rd Qu.:2.000 3rd Qu.:462.8 Max. :83.000 Max. : 2.000 Max. :2.000 Max. :915.0 NA's : 5.000 NA's :635.000 NA's :5.000 NA's :321.0 tanner testvol Min. : 1.000 Min. : 1.000 1st Qu.: 1.000 1st Qu.: 1.000 Median : 2.000 Median : 3.000 Mean : 2.640 Mean : 7.896 3rd Qu.: 5.000 3rd Qu.: 15.000 Max. : 5.000 Max. : 30.000 NA's :240.000 NA's :859.000 > juul$sex <- factor(juul$sex, labels=c("Male","Female")) > summary(juul) age menarche sex igf1 Min. : 0.170 Min. : 1.000 Male :621 Min. : 25.0 1st Qu.: 9.053 1st Qu.: 1.000 Female:713 1st Qu.:202.2 Median :12.560 Median : 1.000 NA's : 5 Median :313.5 Mean :15.095 Mean : 1.476 Mean :340.2 3rd Qu.:16.855 3rd Qu.: 2.000 3rd Qu.:462.8 Max. :83.000 Max. : 2.000 Max. :915.0 NA's : 5.000 NA's :635.000 NA's :321.0