15 Introduction to plotly

Moya Zhu and Yunshu Cai

15.1 Introduction

This is a cheat sheet to help you find the proper visualization with your demostration purpose using plotly. The plots are interactive and documented with variable types. The package we will use is ‘Plotly’, ‘dplyr’

15.1.1 Prepare: Install & Load

# After install
library(plotly)

15.1.2 Comparision

15.1.2.1 Comparing over items – Bar Charts

Bar charts are aimed to show the distribution of data points and to see how a specific group of values behave comparing against other groups.

Vertical bar charts

fig <- plot_ly(
  x = c("giraffes", "orangutans", "monkeys"),
  y = c(20, 14, 23),
  name = "SF Zoo",
  type = "bar"
)

fig

Horizontal bar charts

fig <- plot_ly(x = c(20, 14, 23), y = c('giraffes', 'orangutans', 'monkeys'), type = 'bar', orientation = 'h')

fig

Customizing Individual Bar Widths for pictorial representation of grouped data

x= c(1, 2, 3, 5.5, 10)
y= c(10, 8, 6, 4, 2)
width = c(0.8, 0.8, 0.8, 3.5, 4)
data <- data.frame(x, y, width)

fig <- plot_ly(data)
fig <- fig %>% add_bars(
    x= ~x,
    y= ~y,
    width = ~width
  )

fig

Customizing Individual Bar Colors for focusing at a particular item and comparing to others

x <- c('item A', 'item B', 'item C', 'item D', 'item E')
y <- c(5, 17, 20, 12, 15)
data <- data.frame(x, y)

fig <- plot_ly(data, x = ~x, y = ~y, type = 'bar',
        marker = list(color = c('rgba(204,204,204,1)', 'rgba(204,204,204,1)',
                                'rgba(204,204,204,1)', 'rgba(199,77,120,0.8)',
                                'rgba(204,204,204,1)')))
fig <- fig %>% layout(title = "Least Used Features",
         xaxis = list(title = ""),
         yaxis = list(title = ""))

fig

Customizing Individual Bar Base for comparing groups having relatively opposite values (ex. income and expense, restocking and sales)

fig <- plot_ly()
fig <- fig %>% add_bars(
    x = c("2016", "2017", "2018"),
    y = c(500,600,700),
    base = c(-500,-600,-700),
    marker = list(
      color = 'lightpink'
    ),
    name = 'expenses'
  )
fig <- fig %>% add_bars(
    x = c("2016", "2017", "2018"),
    y = c(300,400,700),
    base = 0,
    marker = list(
      color = 'lightblue'
    ),
    name = 'revenue'
  )

fig

15.1.2.2 Comparing over time – Line Charts, Area Plots

Line charts are aimed to show the changes of data over either short or long periods of time, and to compare the trend of different groups of data.

Area charts are aimed to also indicate the total value or to compare differences among variables.

Line chart with single variable

x <- c(1:100)
random_y <- rnorm(100, mean = 0)
data <- data.frame(x, random_y)

fig <- plot_ly(data, x = ~x, y = ~random_y, type = 'scatter', mode = 'lines')

fig

(Styled) Line chart with more than one variables

month <- c('January', 'February', 'March', 'April', 'May', 'June', 'July',
         'August', 'September', 'October', 'November', 'December')
high_2000 <- c(32.5, 37.6, 49.9, 53.0, 69.1, 75.4, 76.5, 76.6, 70.7, 60.6, 45.1, 29.3)
low_2000 <- c(13.8, 22.3, 32.5, 37.2, 49.9, 56.1, 57.7, 58.3, 51.2, 42.8, 31.6, 15.9)
high_2007 <- c(36.5, 26.6, 43.6, 52.3, 71.5, 81.4, 80.5, 82.2, 76.0, 67.3, 46.1, 35.0)
low_2007 <- c(23.6, 14.0, 27.0, 36.8, 47.6, 57.7, 58.9, 61.2, 53.3, 48.5, 31.0, 23.6)
high_2014 <- c(28.8, 28.5, 37.0, 56.8, 69.7, 79.7, 78.5, 77.8, 74.1, 62.6, 45.3, 39.9)
low_2014 <- c(12.7, 14.3, 18.6, 35.5, 49.9, 58.0, 60.0, 58.6, 51.7, 45.2, 32.2, 29.1)

data <- data.frame(month, high_2000, low_2000, high_2007, low_2007, high_2014, low_2014)

#The default order will be alphabetized unless specified as below:
data$month <- factor(data$month, levels = data[["month"]])

fig <- plot_ly(data, x = ~month, y = ~high_2014, name = 'High 2014', type = 'scatter', mode = 'lines',
        line = list(color = 'rgb(205, 12, 24)', width = 4)) 
fig <- fig %>% add_trace(y = ~low_2014, name = 'Low 2014', line = list(color = 'rgb(22, 96, 167)', width = 4)) 
fig <- fig %>% add_trace(y = ~high_2007, name = 'High 2007', line = list(color = 'rgb(205, 12, 24)', width = 4, dash = 'dash')) 
fig <- fig %>% add_trace(y = ~low_2007, name = 'Low 2007', line = list(color = 'rgb(22, 96, 167)', width = 4, dash = 'dash')) 
fig <- fig %>% add_trace(y = ~high_2000, name = 'High 2000', line = list(color = 'rgb(205, 12, 24)', width = 4, dash = 'dot')) 
fig <- fig %>% add_trace(y = ~low_2000, name = 'Low 2000', line = list(color = 'rgb(22, 96, 167)', width = 4, dash = 'dot')) 
fig <- fig %>% layout(title = "Average High and Low Temperatures in New York",
         xaxis = list(title = "Months"),
         yaxis = list (title = "Temperature (degrees F)"))

fig

Area chart with single variable

density <- density(diamonds$carat)

fig <- plot_ly(x = ~density$x, y = ~density$y, type = 'scatter', mode = 'lines', fill = 'tozeroy')
fig <- fig %>% layout(xaxis = list(title = 'Carat'),
         yaxis = list(title = 'Density'))

fig

(Custom colored) Area chart with more than one variables

diamonds1 <- diamonds[which(diamonds$cut == "Fair"),]
density1 <- density(diamonds1$carat)

diamonds2 <- diamonds[which(diamonds$cut == "Ideal"),]
density2 <- density(diamonds2$carat)

fig <- plot_ly(x = ~density1$x, y = ~density1$y, type = 'scatter', mode = 'lines', name = 'Fair cut', fill = 'tozeroy',
        fillcolor = 'rgba(168, 216, 234, 0.5)',
        line = list(width = 0.5))
fig <- fig %>% add_trace(x = ~density2$x, y = ~density2$y, name = 'Ideal cut', fill = 'tozeroy',
            fillcolor = 'rgba(255, 212, 96, 0.5)')
fig <- fig %>% layout(xaxis = list(title = 'Carat'),
         yaxis = list(title = 'Density'))

fig

(Stacked) Area chart with more than one variables is suitable for displaying part-to-whole relations by showing the constituent parts of a whole one over the other.

data <- t(USPersonalExpenditure)
data <- data.frame("year"=rownames(data), data)

fig <- plot_ly(data, x = ~year, y = ~Food.and.Tobacco, name = 'Food and Tobacco', type = 'scatter', mode = 'none', stackgroup = 'one', fillcolor = '#F5FF8D')
fig <- fig %>% add_trace(y = ~Household.Operation, name = 'Household Operation', fillcolor = '#50CB86')
fig <- fig %>% add_trace(y = ~Medical.and.Health, name = 'Medical and Health', fillcolor = '#4C74C9')
fig <- fig %>% add_trace(y = ~Personal.Care, name = 'Personal Care', fillcolor = '#700961')
fig <- fig %>% add_trace(y = ~Private.Education, name = 'Private Education', fillcolor = '#312F44')
fig <- fig %>% layout(title = 'United States Personal Expenditures by Categories',
         xaxis = list(title = "",
                      showgrid = FALSE),
         yaxis = list(title = "Expenditures (in billions of dollars)",
                      showgrid = FALSE))

fig

15.1.3 Distribution

One coutinuous variable histogram:

fig <- plot_ly(data = iris, x = ~Sepal.Length,type="histogram")
fig

One continuous variable histogram with density line:

dens<-density(iris$Sepal.Length)

fig <- plot_ly(data = iris,x = ~Sepal.Length,type="histogram", name = "Histogram")%>%
  add_trace(x=dens$x,y=dens$y,mode = "lines",type='scatter', fill = "tozeroy", yaxis = "y2", name = "Density") %>% 
  layout(yaxis2 = list(overlaying = "y", side = "right"))
fig

multi-continuous variable histogram overlaid:

fig <- plot_ly(data=iris,alpha = 0.6)
fig <- fig %>% add_histogram(x = ~Sepal.Length,name='sepal length')
fig <- fig %>% add_histogram(x = ~Petal.Length,name='petalvlength')
fig <- fig %>% add_histogram(x = ~Petal.Width,name='petal width')
fig <- fig %>% layout(barmode = "overlay")

fig

15.1.4 Relationship

15.1.4.1 Between two continuous variables:

Scatter plot

fig <- plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length)
fig
#with color:
fig_color <- plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = ~Species)
fig_color

15.1.4.2 Multiple variables:

Bubble size scatter plot indicating relationship of 3 variables with text hover

data <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/school_earnings.csv")

fig <- plot_ly(data, x = ~Women, y = ~Men, text = ~School, type = 'scatter', mode = 'markers',
        marker = list(size = ~Gap, opacity = 0.5, color = 'rgb(255, 65, 54)'))
fig <- fig %>% layout(title = 'Gender Gap in Earnings per University',
         xaxis = list(showgrid = FALSE),
         yaxis = list(showgrid = FALSE))
fig

15.1.5 Composition

15.1.5.1 Changing over time

Cumulative values: Stacked area chart

data <- t(USPersonalExpenditure)
data <- data.frame("year"=rownames(data), data)

fig <- plot_ly(data, x = ~year, y = ~Food.and.Tobacco, name = 'Food and Tobacco', type = 'scatter', mode = 'none', stackgroup = 'one', groupnorm = 'percent', fillcolor = '#F5FF8D')
fig <- fig %>% add_trace(y = ~Household.Operation, name = 'Household Operation', fillcolor = '#50CB86')
fig <- fig %>% add_trace(y = ~Medical.and.Health, name = 'Medical and Health', fillcolor = '#4C74C9')
fig <- fig %>% add_trace(y = ~Personal.Care, name = 'Personal Care', fillcolor = '#700961')
fig <- fig %>% add_trace(y = ~Private.Education, name = 'Private Education', fillcolor = '#312F44')
fig <- fig %>% layout(title = 'United States Personal Expenditures by Categories',
         xaxis = list(title = "",
                      showgrid = FALSE),
         yaxis = list(title = "Proportion from the Total Expenditures",
                      showgrid = FALSE,
                      ticksuffix = '%'))

fig

15.1.5.2 Static

Single categorical variable: pie chart:

fig <- plot_ly()
fig <- fig %>% add_pie(data = count(iris,Species), labels = ~Species, values = ~n,
                         name = "Species", domain = list(row = 0, column = 0))
fig

Multi-categorical variables: Stacked bar plot:

data(Arthritis)
uniq<-unique(Arthritis$Treatment)
Arthritis%>% group_by(Treatment) %>%arrange(Improved)%>%
  plot_ly( x = ~Treatment, y = ~Improved,color = ~ Improved,type = 'bar')%>%
layout(yaxis = list(title = 'value'), barmode = 'stack')

Hierarchical data: Multi-categorical: Tree map displays hierarchical data as sets of nested rectangles. It represent branches of the dimension of the data.

df1 = read.csv('https://raw.githubusercontent.com/plotly/datasets/718417069ead87650b90472464c7565dc8c2cb1c/sunburst-coffee-flavors-complete.csv')

fig <- plot_ly(
  type='treemap',
  ids=df1$ids,
  labels=df1$labels,
  parents=df1$parents,
  domain=list(column=0))
fig

sequential data waterfall chart helps in understanding the cumulative effect of sequential values. It represent the accumulation or subtraction of total. It’s a great representation for lots of the financial data.

x= list("Sales", "Consulting", "Net revenue", "Purchases", "Other expenses", "Profit before tax")
measure= c("relative", "relative", "total", "relative", "relative", "total")
text= c("+60", "+80", "", "-40", "-20", "Total")
y= c(60, 80, 0, -40, -20, 0)
data = data.frame(x=factor(x,levels=x),measure,text,y)

fig <- plot_ly(
  data, name = "20", type = "waterfall", measure = ~measure,
  x = ~x, textposition = "outside", y= ~y, text =~text,
  connector = list(line = list(color= "rgb(63, 63, 63)"))) 
fig <- fig %>%
  layout(title = "Profit and loss statement 2018",
        xaxis = list(title = ""),
        yaxis = list(title = ""),
        autosize = TRUE,
        showlegend = TRUE)

fig