一台云服务器做多个网站,自己如何做网站优化,xp系统没有lls组件可以做网站吗,专业的网站服务公司R语言基础图形合集
欢迎大家关注全网生信学习者系列#xff1a;
WX公zhong号#xff1a;生信学习者Xiao hong书#xff1a;生信学习者知hu#xff1a;生信学习者CDSN#xff1a;生信学习者2
基础图形可视化
数据分析的图形可视化是了解数据分布、波动和相关性等属性必…R语言基础图形合集
欢迎大家关注全网生信学习者系列
WX公zhong号生信学习者Xiao hong书生信学习者知hu生信学习者CDSN生信学习者2
基础图形可视化
数据分析的图形可视化是了解数据分布、波动和相关性等属性必不可少的手段。不同的图形类型对数据属性的表征各不相同通常具体问题使用具体的可视化图形。R语言在可视化方面具有极大的优势因其本身就是统计学家为了研究统计问题开发的编程语言因此极力推荐使用R语言可视化数据。
散点图
散点图是由x值和y值确定的点散乱分布在坐标轴上一是可以用来展示数据的分布和聚合情况二是可通过分布情况得到x和y之间的趋势结论。多用于回归分析发现自变量和因变量的变化趋势进而选择合适的函数对数据点进行拟合。
library(ggplot2)
library(dplyr)dat - %% mutate(cyl factor(cyl))
ggplot(dat, aes(x wt, y mpg, shape cyl, color cyl)) geom_point(size 3, alpha 0.4) geom_smooth(method lm, linetype dashed, color darkred, fill blue) geom_text(aes(label rownames(dat)), size 4) theme_bw(base_size 12) theme(plot.title element_text(size 10, color black, face bold, hjust 0.5), axis.title element_text(size 10, color black, face bold), axis.text element_text(size 9, color black), axis.ticks.length unit(-0.05, in), axis.text.y element_text(margin unit(c(0.3, 0.3, 0.3, 0.3), cm), size 9), axis.text.x element_blank(), text element_text(size 8, color black), strip.text element_text(size 9, color black, face bold), panel.grid element_blank())直方图
直方图是一种对数据分布情况进行可视化的图形它是二维统计图表对应两个坐标分别是统计样本以及该样本对应的某个属性如频率等度量。
library(ggplot2)data - data.frame(Conpany c(Apple, Google, Facebook, Amozon, Tencent), Sale2013 c(5000, 3500, 2300, 2100, 3100), Sale2014 c(5050, 3800, 2900, 2500, 3300), Sale2015 c(5050, 3800, 2900, 2500, 3300), Sale2016 c(5050, 3800, 2900, 2500, 3300))
mydata - tidyr::gather(data, Year, Sale, -Conpany)
ggplot(mydata, aes(Conpany, Sale, fill Year)) geom_bar(stat identity, position dodge) guides(fill guide_legend(title NULL)) ggtitle(The Financial Performance of Five Giant) scale_fill_wsj(rgby, ) theme_wsj() theme(axis.ticks.length unit(0.5, cm), axis.title element_blank()))library(patternplot)data - read.csv(system.file(extdata, monthlyexp.csv, package patternplot))
data - data[which(data$City City 1), ]
x - factor(data$Type, c(Housing, Food, Childcare))
y - data$Monthly_Expenses
pattern.type - c(hdashes, blank, crosshatch)
pattern.color - c(black, black, black)
background.color - c(white, white, white)
density - c(20, 20, 10)patternplot::patternbar(data, x, y, group NULL, ylab Monthly Expenses, Dollar, pattern.type pattern.type, pattern.color pattern.color,background.color background.color, pattern.line.size 0.5, frame.color c(black, black, black), density density)
ggtitle((A) Black and White with Patterns))箱线图
箱线图是一种显示一组数据分布情况的统计图它形状像箱子因此被也被称为箱形图。它通过六个数据节点将一组数据从大到小排列上极限到下极限反应原始数据分布特征。意义在于发现关键数据如平均值、任何异常值、数据分布紧密度和偏分布等。
library(ggplot2)
library(dplyr)pr - unique(dat$Fruit)
grp.col - c(#999999, #E69F00, #56B4E9)dat %% mutate(Fruit factor(Fruit)) %% ggplot(aes(x Fruit, y Weight, color Fruit)) stat_boxplot(geom errorbar, width 0.15) geom_boxplot(aes(fill Fruit), width 0.4, outlier.colour black, outlier.shape 21, outlier.size 1) stat_summary(fun.y mean, geom point, shape 16,size 2, color black) # 在顶部显示每组的数目stat_summary(fun.data function(x) {return(data.frame(y 0.98 * 120, label length(x)))}, geom text, hjust 0.5, color red, size 6) stat_compare_means(comparisons list(c(pr[1], pr[2]), c(pr[1], pr[3]), c(pr[2], pr[3])),label p.signif, method wilcox.test) labs(title Weight of Fruit, x Fruit, y Weight (kg)) scale_color_manual(values grp.col, labels pr) scale_fill_manual(values grp.col, labels pr) guides(color F, fil F) scale_y_continuous(sec.axis dup_axis(label NULL, name NULL),breaks seq(90, 108, 2), limits c(90, 120)) theme_bw(base_size 12) theme(plot.title element_text(size 10, color black, face bold, hjust 0.5),axis.title element_text(size 10, color black, face bold), axis.text element_text(size 9, color black),axis.ticks.length unit(-0.05, in), axis.text.y element_text(margin unit(c(0.3, 0.3, 0.3, 0.3), cm), size 9),axis.text.x element_text(margin unit(c(0.3, 0.3, 0.3, 0.3), cm)),text element_text(size 8, color black),strip.text element_text(size 9, color black, face bold),panel.grid element_blank())面积图
面积图是一种展示个体与整体的关系的统计图更多用于时间序列变化的研究。
library(ggplot2)
library(dplyr)dat %% group_by(Fruit, Store) %%
summarize(mean_Weight mean(Weight)) %% ggplot(aes(x Store, group Fruit)) geom_area(aes(y mean_Weight, fill as.factor(Fruit)), position stack, linetype dashed) geom_hline(aes(yintercept mean(mean_Weight)), color blue, linetype dashed, size 1) guides(fill guide_legend(title NULL)) theme_bw(base_size 12) theme(plot.title element_text(size 10, color black, face bold, hjust 0.5), axis.title element_text(size 10, color black, face bold), axis.text element_text(size 9, color black), axis.ticks.length unit(-0.05, in), axis.text.y element_text(margin unit(c(0.3, 0.3, 0.3, 0.3), cm), size 9), axis.text.x element_text(margin unit(c(0.3, 0.3, 0.3, 0.3), cm)), text element_text(size 8, color black), strip.text element_text(size 9, color black, face bold), panel.grid element_blank())热图
热图也是一种对数据分布情况可视化的统计图形如下图表现得是数据差异性的具象化实例。一般用于样本聚类等可视化过程。在基因表达或者丰度表达差异研究中热图既可以展现数据质量间的差异性也可以用于聚类等。
library(ggplot2)data - as.data.frame(matrix(rnorm(9 * 10), 9, 10))
rownames(data) - paste(Gene, 1:9, sep _)
colnames(data) - paste(sample, 1:10, sep _)
data$ID - rownames(data)
data_m - tidyr::gather(data, sampleID, value, -ID)ggplot(data_m, aes(x sampleID, y ID)) geom_tile(aes(fill value)) scale_fill_gradient2(Expression, low green, high red, mid black) xlab(samples) theme_classic() theme(axis.ticks element_blank(), axis.line element_blank(), panel.grid.major element_blank(),legend.key element_blank(), axis.text.x element_text(angle 45, hjust 1, vjust 1),legend.position top)相关图
相关图是热图的一种特殊形式展示的是样本间相关系数大小的热图。
library(corrplot)corrplot(corr cor(dat[1:7]), order AOE, type upper, tl.pos d)
corrplot(corr cor(dat[1:7]), add TRUE, type lower, method number, order AOE, diag FALSE, tl.pos n, cl.pos n)折线图
折线图是反应数据分布趋势的可视化图形其本质和堆积图或者说面积图有些相似。
library(ggplot2)
library(dplyr)grp.col - c(#999999, #E69F00, #56B4E9)
dat.cln - sampling::strata(dat, stratanames Fruit, size rep(round(nrow(dat) * 0.1/3, -1), 3), method srswor)dat %% slice(dat.cln$ID_unit) %% mutate(Year as.character(rep(1996:2015, times 3))) %% mutate(Year factor(as.character(Year))) %% ggplot(aes(x Year, y Weight, linetype Fruit, colour Fruit, shape Fruit, fill Fruit)) geom_line(aes(group Fruit)) geom_point() scale_linetype_manual(values c(1:3)) scale_shape_manual(values c(19, 21, 23)) scale_color_manual(values grp.col, labels pr) scale_fill_manual(values grp.col, labels pr) theme_bw() theme(plot.title element_text(size 10, color black, face bold, hjust 0.5),axis.title element_text(size 10, color black, face bold), axis.text element_text(size 9, color black),axis.ticks.length unit(-0.05, in), axis.text.y element_text(margin unit(c(0.3, 0.3, 0.3, 0.3), cm), size 9),axis.text.x element_text(margin unit(c(0.3, 0.3, 0.3, 0.3), cm)),text element_text(size 8, color black),strip.text element_text(size 9, color black, face bold), panel.grid element_blank())韦恩图
韦恩图是一种展示不同分组之间集合重叠区域的可视化图。
library(VennDiagram)A - sample(LETTERS, 18, replace FALSE)
B - sample(LETTERS, 18, replace FALSE)
C - sample(LETTERS, 18, replace FALSE)
D - sample(LETTERS, 18, replace FALSE)venn.diagram(x list(A A, D D, B B, C C),filename Group4.png, height 450, width 450, resolution 300, imagetype png, col transparent, fill c(cornflowerblue, green, yellow, darkorchid1),alpha 0.5, cex 0.45, cat.cex 0.45)library(ggplot2)
library(UpSetR)movies - read.csv(system.file(extdata, movies.csv, package UpSetR), header T, sep ;)
mutations - read.csv(system.file(extdata, mutations.csv, package UpSetR), header T, sep ,)another.plot - function(data, x, y) {round_any_new - function(x, accuracy, f round) {f(x/accuracy) * accuracy}data$decades - round_any_new(as.integer(unlist(data[y])), 10, ceiling)data - data[which(data$decades 1970), ]myplot - (ggplot(data, aes_string(x x)) geom_density(aes(fill factor(decades)), alpha 0.4) theme_bw() theme(plot.margin unit(c(0, 0, 0, 0), cm), legend.key.size unit(0.4, cm)))
}upset(movies, main.bar.color black, mb.ratio c(0.5, 0.5), queries list(list(query intersects, params list(Drama),color red, active F), list(query intersects, params list(Action, Drama), active T),list(query intersects, params list(Drama, Comedy, Action),color orange,active T)), attribute.plots list(gridrows 50, plots list(list(plot histogram, x ReleaseDate, queries F), list(plot scatter_plot, x ReleaseDate, y AvgRating, queries T), list(plot another.plot,x AvgRating, y ReleaseDate,queries F)),ncols 3)))火山图
火山图通过两个属性Fold change和P value反应两组数据的差异性。
library(ggplot2)data - read.table(choose.files(),header TRUE)
data$color - ifelse(data$padj0.05 abs(data$log2FoldChange) 1,ifelse(data$log2FoldChange 1,red,blue),gray)
color - c(red red,gray gray,blue blue)ggplot(data, aes(log2FoldChange, -log10(padj), col color)) geom_point() theme_bw() scale_color_manual(values color) labs(xlog2 (fold change),y-log10 (q-value)) geom_hline(yintercept -log10(0.05), lty4,colgrey,lwd0.6) geom_vline(xintercept c(-1, 1), lty4,colgrey,lwd0.6) theme(legend.position none,panel.gridelement_blank(),axis.title element_text(size 16),axis.text element_text(size 14))饼图
饼图是用于刻画分组间如频率等属性的相对关系图。
library(patternplot)data - read.csv(system.file(extdata, vegetables.csv, package patternplot))
pattern.type - c(hdashes, vdashes, bricks)
pattern.color - c(red3, green3, white)
background.color - c(dodgerblue, lightpink, orange)patternpie(group data$group, pct data$pct, label data$label, pattern.type pattern.type,pattern.color pattern.color, background.color background.color, frame.color grey40, pixel 0.3, pattern.line.size 0.3, frame.size 1.5, label.size 5, label.distance 1.35) ggtitle((B) Colors with Patterns))密度曲线图
密度曲线图反应的是数据在不同区间的密度分布情况和概率密度函数PDF曲线类似。
library(ggplot2)
library(plyr)set.seed(1234)
df - data.frame(sexfactor(rep(c(F, M), each200)),weightround(c(rnorm(200, mean55, sd5),rnorm(200, mean65, sd5)))
)
mu - ddply(df, sex, summarise, grp.meanmean(weight))ggplot(df, aes(xweight, fillsex)) geom_histogram(aes(y..density..), alpha0.5, positionidentity) geom_density(alpha0.4) geom_vline(datamu, aes(xinterceptgrp.mean, colorsex),linetypedashed) scale_color_grey() theme_classic()theme(legend.positiontop)边界散点图(Scatterplot With Encircling
library(ggplot2)
library(ggalt)
midwest_select - midwest[midwest$poptotal 350000 midwest$poptotal 500000 midwest$area 0.01 midwest$area 0.1, ]ggplot(midwest, aes(xarea, ypoptotal)) geom_point(aes(colstate, sizepopdensity)) # draw pointsgeom_smooth(methodloess, seF) xlim(c(0, 0.1)) ylim(c(0, 500000)) # draw smoothing linegeom_encircle(aes(xarea, ypoptotal), datamidwest_select, colorred, size2, expand0.08) # encirclelabs(subtitleArea Vs Population, yPopulation, xArea, titleScatterplot Encircle, captionSource: midwest)边缘箱图/直方图(Marginal Histogram / Boxplot)
2、边缘箱图/直方图(Marginal Histogram / Boxplot)
library(ggplot2)
library(ggExtra)
data(mpg, packageggplot2)theme_set(theme_bw())
mpg_select - mpg[mpg$hwy 35 mpg$cty 27, ]
g - ggplot(mpg, aes(cty, hwy)) geom_count() geom_smooth(methodlm, seF)ggMarginal(g, type histogram, filltransparent)
#ggMarginal(g, type boxplot, filltransparent)拟合散点图
library(ggplot2)
theme_set(theme_bw())
data(midwest)ggplot(midwest, aes(xarea, ypoptotal)) geom_point(aes(colstate, sizepopdensity)) geom_smooth(methodloess, seF) xlim(c(0, 0.1)) ylim(c(0, 500000)) labs(subtitleArea Vs Population, yPopulation, xArea, titleScatterplot, caption Source: midwest)相关系数图(Correlogram)
library(ggplot2)
library(ggcorrplot)data(mtcars)
corr - round(cor(mtcars), 1)ggcorrplot(corr, hc.order TRUE, type lower, lab TRUE, lab_size 3, methodcircle, colors c(tomato2, white, springgreen3), titleCorrelogram of mtcars, ggthemetheme_bw)水平发散型文本(Diverging Texts)
library(ggplot2)
library(dplyr)
library(tibble)
theme_set(theme_bw()) # Data Prep
data(mtcars)plotdata - mtcars %% rownames_to_column(car_name) %%mutate(mpg_zround((mpg - mean(mpg))/sd(mpg), 2),mpg_typeifelse(mpg_z 0, below, above)) %%arrange(mpg_z)
plotdata$car_name - factor(plotdata$car_name, levels as.character(plotdata$car_name))ggplot(plotdata, aes(xcar_name, ympg_z, labelmpg_z)) geom_bar(statidentity, aes(fillmpg_type), width.5) scale_fill_manual(nameMileage, labels c(Above Average, Below Average), values c(above#00ba38, below#f8766d)) labs(subtitleNormalised mileage from mtcars, title Diverging Bars) coord_flip()水平棒棒糖图(Diverging Lollipop Chart)
ggplot(plotdata, aes(xcar_name, ympg_z, labelmpg_z)) geom_point(statidentity, fillblack, size6) geom_segment(aes(y 0, x car_name, yend mpg_z, xend car_name), color black) geom_text(colorwhite, size2) labs(titleDiverging Lollipop Chart, subtitleNormalized mileage from mtcars: Lollipop) ylim(-2.5, 2.5) coord_flip()去棒棒糖图(Diverging Dot Plot)
ggplot(plotdata, aes(xcar_name, ympg_z, labelmpg_z)) geom_point(statidentity, aes(colmpg_type), size6) scale_color_manual(nameMileage, labels c(Above Average, Below Average), values c(above#00ba38, below#f8766d)) geom_text(colorwhite, size2) labs(titleDiverging Dot Plot, subtitleNormalized mileage from mtcars: Dotplot) ylim(-2.5, 2.5) coord_flip()面积图(Area Chart)
library(ggplot2)
library(quantmod)
data(economics, package ggplot2)economics$returns_perc - c(0, diff(economics$psavert)/economics$psavert[-length(economics$psavert)])brks - economics$date[seq(1, length(economics$date), 12)]
lbls - lubridate::year(economics$date[seq(1, length(economics$date), 12)])ggplot(economics[1:100, ], aes(date, returns_perc)) geom_area() scale_x_date(breaksbrks, labelslbls) theme(axis.text.x element_text(angle90)) labs(titleArea Chart, subtitle Perc Returns for Personal Savings, y% Returns for Personal savings, captionSource: economics)排序条形图(Ordered Bar Chart)
cty_mpg - aggregate(mpg$cty, bylist(mpg$manufacturer), FUNmean)
colnames(cty_mpg) - c(make, mileage)
cty_mpg - cty_mpg[order(cty_mpg$mileage), ]
cty_mpg$make - factor(cty_mpg$make, levels cty_mpg$make) library(ggplot2)
theme_set(theme_bw())ggplot(cty_mpg, aes(xmake, ymileage)) geom_bar(statidentity, width.5, filltomato3) labs(titleOrdered Bar Chart, subtitleMake Vs Avg. Mileage, captionsource: mpg) theme(axis.text.x element_text(angle65, vjust0.6))直方图(Histogram)
library(ggplot2)
theme_set(theme_classic())g - ggplot(mpg, aes(displ)) scale_fill_brewer(palette Spectral)g geom_histogram(aes(fillclass), binwidth .1, colblack, size.1) # change binwidthlabs(titleHistogram with Auto Binning, subtitleEngine Displacement across Vehicle Classes) g geom_histogram(aes(fillclass), bins5, colblack, size.1) # change number of binslabs(titleHistogram with Fixed Bins, subtitleEngine Displacement across Vehicle Classes)library(ggplot2)
theme_set(theme_classic())g - ggplot(mpg, aes(manufacturer))
g geom_bar(aes(fillclass), width 0.5) theme(axis.text.x element_text(angle65, vjust0.6)) labs(titleHistogram on Categorical Variable, subtitleManufacturer across Vehicle Classes) 核密度图(Density plot)
library(ggplot2)
theme_set(theme_classic())g - ggplot(mpg, aes(cty))
g geom_density(aes(fillfactor(cyl)), alpha0.8) labs(titleDensity plot, subtitleCity Mileage Grouped by Number of cylinders,captionSource: mpg,xCity Mileage,fill# Cylinders)点图结合箱图(Dot Box Plot)
library(ggplot2)
theme_set(theme_bw())# plot
g - ggplot(mpg, aes(manufacturer, cty))
g geom_boxplot() geom_dotplot(binaxisy, stackdircenter, dotsize .5, fillred) theme(axis.text.x element_text(angle65, vjust0.6)) labs(titleBox plot Dot plot, subtitleCity Mileage vs Class: Each dot represents 1 row in source data,captionSource: mpg,xClass of Vehicle,yCity Mileage)小提琴图(Violin Plot)
library(ggplot2)
theme_set(theme_bw())# plot
g - ggplot(mpg, aes(class, cty))
g geom_violin() labs(titleViolin plot, subtitleCity Mileage vs Class of vehicle,captionSource: mpg,xClass of Vehicle,yCity Mileage)饼图
library(ggplot2)
theme_set(theme_classic())# Source: Frequency table
df - as.data.frame(table(mpg$class))
colnames(df) - c(class, freq)
pie - ggplot(df, aes(x , yfreq, fill factor(class))) geom_bar(width 1, stat identity) theme(axis.line element_blank(), plot.title element_text(hjust0.5)) labs(fillclass, xNULL, yNULL, titlePie Chart of class, captionSource: mpg)pie coord_polar(theta y, start0)时间序列图(Time Series多图)
## From Timeseries object (ts)
library(ggplot2)
library(ggfortify)
theme_set(theme_classic())# Plot
autoplot(AirPassengers) labs(titleAirPassengers) theme(plot.title element_text(hjust0.5))library(ggplot2)
theme_set(theme_classic())# Allow Default X Axis Labels
ggplot(economics, aes(xdate)) geom_line(aes(yreturns_perc)) labs(titleTime Series Chart, subtitleReturns Percentage from Economics Dataset, captionSource: Economics, yReturns %)data(economics_long, package ggplot2)
library(ggplot2)
library(lubridate)
theme_set(theme_bw())df - economics_long[economics_long$variable %in% c(psavert, uempmed), ]
df - df[lubridate::year(df$date) %in% c(1967:1981), ]# labels and breaks for X axis text
brks - df$date[seq(1, length(df$date), 12)]
lbls - lubridate::year(brks)# plot
ggplot(df, aes(xdate)) geom_line(aes(yvalue, colvariable)) labs(titleTime Series of Returns Percentage, subtitleDrawn from Long Data format, captionSource: Economics, yReturns %, colorNULL) # title and captionscale_x_date(labels lbls, breaks brks) # change to monthly ticks and labelsscale_color_manual(labels c(psavert, uempmed), values c(psavert#00ba38, uempmed#f8766d)) # line colortheme(axis.text.x element_text(angle 90, vjust0.5, size 8), # rotate x axis textpanel.grid.minor element_blank()) # turn off minor grid堆叠面积图(Stacked Area Chart)
library(ggplot2)
library(lubridate)
theme_set(theme_bw())df - economics[, c(date, psavert, uempmed)]
df - df[lubridate::year(df$date) %in% c(1967:1981), ]# labels and breaks for X axis text
brks - df$date[seq(1, length(df$date), 12)]
lbls - lubridate::year(brks)# plot
ggplot(df, aes(xdate)) geom_area(aes(ypsavertuempmed, fillpsavert)) geom_area(aes(yuempmed, filluempmed)) labs(titleArea Chart of Returns Percentage, subtitleFrom Wide Data format, captionSource: Economics, yReturns %) # title and captionscale_x_date(labels lbls, breaks brks) # change to monthly ticks and labelsscale_fill_manual(name, values c(psavert#00ba38, uempmed#f8766d)) # line colortheme(panel.grid.minor element_blank()) # turn off minor grid分层树形图(Hierarchical Dendrogram)
library(ggplot2)
library(ggdendro)
theme_set(theme_bw())hc - hclust(dist(USArrests), ave) # hierarchical clustering# plot
ggdendrogram(hc, rotate TRUE, size 2)聚类图(Clusters)
library(ggplot2)
library(ggalt)
library(ggfortify)
theme_set(theme_classic())# Compute data with principal components ------------------
df - iris[c(1, 2, 3, 4)]
pca_mod - prcomp(df) # compute principal components# Data frame of principal components ----------------------
df_pc - data.frame(pca_mod$x, Speciesiris$Species) # dataframe of principal components
df_pc_vir - df_pc[df_pc$Species virginica, ] # df for virginica
df_pc_set - df_pc[df_pc$Species setosa, ] # df for setosa
df_pc_ver - df_pc[df_pc$Species versicolor, ] # df for versicolor# Plot ----------------------------------------------------
ggplot(df_pc, aes(PC1, PC2, colSpecies)) geom_point(aes(shapeSpecies), size2) # draw pointslabs(titleIris Clustering, subtitleWith principal components PC1 and PC2 as X and Y axis,captionSource: Iris) coord_cartesian(xlim 1.2 * c(min(df_pc$PC1), max(df_pc$PC1)), ylim 1.2 * c(min(df_pc$PC2), max(df_pc$PC2))) # change axis limitsgeom_encircle(data df_pc_vir, aes(xPC1, yPC2)) # draw circlesgeom_encircle(data df_pc_set, aes(xPC1, yPC2)) geom_encircle(data df_pc_ver, aes(xPC1, yPC2))气泡图
# Libraries
library(ggplot2)
library(dplyr)
library(plotly)
library(viridis)
library(hrbrthemes)# The dataset is provided in the gapminder library
library(gapminder)
data - gapminder %% filter(year2007) %% dplyr::select(-year)# Interactive version
p - data %%mutate(gdpPercapround(gdpPercap,0)) %%mutate(popround(pop/1000000,2)) %%mutate(lifeExpround(lifeExp,1)) %%# Reorder countries to having big bubbles on toparrange(desc(pop)) %%mutate(country factor(country, country)) %%# prepare text for tooltipmutate(text paste(Country: , country, \nPopulation (M): , pop, \nLife Expectancy: , lifeExp, \nGdp per capita: , gdpPercap, sep)) %%# Classic ggplotggplot( aes(xgdpPercap, ylifeExp, size pop, color continent, texttext)) geom_point(alpha0.7) scale_size(range c(1.4, 19), namePopulation (M)) scale_color_viridis(discreteTRUE, guideFALSE) theme_ipsum() theme(legend.positionnone)# turn ggplot interactive with plotly
pp - ggplotly(p, tooltiptext)
pp小提琴图Violin
# Libraries
library(ggplot2)
library(dplyr)
library(hrbrthemes)
library(viridis)# create a dataset
data - data.frame(namec( rep(A,500), rep(B,500), rep(B,500), rep(C,20), rep(D, 100) ),valuec( rnorm(500, 10, 5), rnorm(500, 13, 1), rnorm(500, 18, 1), rnorm(20, 25, 4), rnorm(100, 12, 1) )
)# sample size
sample_size data %% group_by(name) %% summarize(numn())# Plot
data %%left_join(sample_size) %%mutate(myaxis paste0(name, \n, n, num)) %%ggplot( aes(xmyaxis, yvalue, fillname)) geom_violin(width1.4) geom_boxplot(width0.1, colorgrey, alpha0.2) scale_fill_viridis(discrete TRUE) theme_ipsum() theme(legend.positionnone,plot.title element_text(size11)) ggtitle(A Violin wrapping a boxplot) xlab()# Libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
library(hrbrthemes)
library(viridis)# Load dataset from github
data - read.table(dataset/viz/probly.csv, headerTRUE, sep,)# Data is at wide format, we need to make it tidy or long
data - data %% gather(keytext, valuevalue) %%mutate(text gsub(\\., ,text)) %%mutate(value round(as.numeric(value),0)) %%filter(text %in% c(Almost Certainly,Very Good Chance,We Believe,Likely,About Even, Little Chance, Chances Are Slight, Almost No Chance))# Plot
p - data %%mutate(text fct_reorder(text, value)) %% # Reorder dataggplot( aes(xtext, yvalue, filltext, colortext)) geom_violin(width2.1, size0.2) scale_fill_viridis(discreteTRUE) scale_color_viridis(discreteTRUE) theme_ipsum() theme(legend.positionnone) coord_flip() # This switch X and Y axis and allows to get the horizontal versionxlab() ylab(Assigned Probability (%))p核密度图 density chart
library(ggplot2)
library(hrbrthemes)
library(dplyr)
library(tidyr)
library(viridis)data - read.table(dataset/viz/probly.csv, headerTRUE, sep,)
data - data %%gather(keytext, valuevalue) %%mutate(text gsub(\\., ,text)) %%mutate(value round(as.numeric(value),0))# A dataframe for annotations
annot - data.frame(text c(Almost No Chance, About Even, Probable, Almost Certainly),x c(5, 53, 65, 79),y c(0.15, 0.4, 0.06, 0.1)
)# Plot
data %%filter(text %in% c(Almost No Chance, About Even, Probable, Almost Certainly)) %%ggplot( aes(xvalue, colortext, filltext)) geom_density(alpha0.6) scale_fill_viridis(discreteTRUE) scale_color_viridis(discreteTRUE) geom_text( dataannot, aes(xx, yy, labeltext, colortext), hjust0, size4.5) theme_ipsum() theme(legend.positionnone) ylab() xlab(Assigned Probability (%))# library
library(ggplot2)
library(ggExtra)# classic plot :
p - ggplot(mtcars, aes(xwt, ympg, colorcyl, sizecyl)) geom_point() theme(legend.positionnone)# Set relative size of marginal plots (main plot 10x bigger than marginals)
p1 - ggMarginal(p, typehistogram, size10)# Custom marginal plots:
p2 - ggMarginal(p, typehistogram, fill slateblue, xparams list( bins10))# Show only marginal plot for x axis
p3 - ggMarginal(p, margins x, colorpurple, size4)cowplot::plot_grid(p, p1, p2, p3, ncol 2, align hv, labels LETTERS[1:4])柱状图 histogram
# library
library(ggplot2)
library(dplyr)
library(hrbrthemes)# Build dataset with different distributions
data - data.frame(type c( rep(variable 1, 1000), rep(variable 2, 1000) ),value c( rnorm(1000), rnorm(1000, mean4) )
)# Represent it
p - data %%ggplot( aes(xvalue, filltype)) geom_histogram( color#e9ecef, alpha0.6, position identity) scale_fill_manual(valuesc(#69b3a2, #404080)) theme_ipsum() labs(fill)
p# Libraries
library(ggplot2)
library(hrbrthemes)# Dummy data
data - data.frame(var1 rnorm(1000),var2 rnorm(1000, mean2)
)# Chart
p - ggplot(data, aes(xx) ) # Topgeom_density( aes(x var1, y ..density..), fill#69b3a2 ) geom_label( aes(x4.5, y0.25, labelvariable1), color#69b3a2) # Bottomgeom_density( aes(x var2, y -..density..), fill #404080) geom_label( aes(x4.5, y-0.25, labelvariable2), color#404080) theme_ipsum() xlab(value of x)p1 - ggplot(data, aes(xx) ) geom_histogram( aes(x var1, y ..density..), fill#69b3a2 ) geom_label( aes(x4.5, y0.25, labelvariable1), color#69b3a2) geom_histogram( aes(x var2, y -..density..), fill #404080) geom_label( aes(x4.5, y-0.25, labelvariable2), color#404080) theme_ipsum() xlab(value of x)
cowplot::plot_grid(p, p1, ncol 2, align hv, labels LETTERS[1:2])箱线图 boxplot
# Library
library(ggplot2)
library(dplyr)
library(forcats)# Dataset 1: one value per group
data - data.frame(namec(north,south,south-east,north-west,south-west,north-east,west,east),valsample(seq(1,10), 8 )
)# Reorder following the value of another column:
p1 - data %%mutate(name fct_reorder(name, val)) %%ggplot( aes(xname, yval)) geom_bar(statidentity, fill#f68060, alpha.6, width.4) coord_flip() xlab() theme_bw()# Reverse side
p2 - data %%mutate(name fct_reorder(name, desc(val))) %%ggplot( aes(xname, yval)) geom_bar(statidentity, fill#f68060, alpha.6, width.4) coord_flip() xlab() theme_bw()# Using median
p3 - mpg %%mutate(class fct_reorder(class, hwy, .funmedian)) %%ggplot( aes(xreorder(class, hwy), yhwy, fillclass)) geom_boxplot() geom_jitter(colorblack, size0.4, alpha0.9) xlab(class) theme(legend.positionnone) xlab()# Using number of observation per group
p4 - mpg %%mutate(class fct_reorder(class, hwy, .funlength )) %%ggplot( aes(xclass, yhwy, fillclass)) stat_summary(fun.ymean, geompoint, shape20, size6, colorred, fillred) geom_boxplot() xlab(class) theme(legend.positionnone) xlab() xlab()p5 - data %%arrange(val) %% # First sort by val. This sort the dataframe but NOT the factor levelsmutate(namefactor(name, levelsname)) %% # This trick update the factor levelsggplot( aes(xname, yval)) geom_segment( aes(xendname, yend0)) geom_point( size4, colororange) coord_flip() theme_bw() xlab()p6 - data %%arrange(val) %%mutate(name factor(name, levelsc(north, north-east, east, south-east, south, south-west, west, north-west))) %%ggplot( aes(xname, yval)) geom_segment( aes(xendname, yend0)) geom_point( size4, colororange) theme_bw() xlab()cowplot::plot_grid(p1, p2, p3, p4, p5, p6, ncol 2, align hv, labels LETTERS[1:6])library(dplyr)
# Dummy data
names - c(rep(A, 20) , rep(B, 8) , rep(C, 30), rep(D, 80))
value - c( sample(2:5, 20 , replaceT) , sample(4:10, 8 , replaceT), sample(1:7, 30 , replaceT), sample(3:8, 80 , replaceT) )
data - data.frame(names, value) %%mutate(namesfactor(names))# Draw the boxplot. Note result is also stored in a object called boundaries
boundaries - boxplot(data$value ~ data$names , col#69b3a2 , ylimc(1,11))
# Now you can type boundaries$stats to get the boundaries of the boxes# Add sample size on top
nbGroup - nlevels(data$names)
text( xc(1:nbGroup), yboundaries$stats[nrow(boundaries$stats),] 0.5, paste(n ,table(data$names),sep)
)山脊图 ridgeline
# library
library(ggridges)
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)# Load dataset from github
data - read.table(dataset/viz/probly.csv, headerTRUE, sep,)
data - data %% gather(keytext, valuevalue) %%mutate(text gsub(\\., ,text)) %%mutate(value round(as.numeric(value),0)) %%filter(text %in% c(Almost Certainly,Very Good Chance,We Believe,Likely,About Even, Little Chance, Chances Are Slight, Almost No Chance))# Plot
p1 - data %%mutate(text fct_reorder(text, value)) %%ggplot( aes(ytext, xvalue, filltext)) geom_density_ridges(alpha0.6, statbinline, bins20) theme_ridges() theme(legend.positionnone,panel.spacing unit(0.1, lines),strip.text.x element_text(size 8)) xlab() ylab(Assigned Probability (%))p2 - data %%mutate(text fct_reorder(text, value)) %%ggplot( aes(ytext, xvalue, filltext)) geom_density_ridges_gradient(scale 3, rel_min_height 0.01) theme_ridges() theme(legend.positionnone,panel.spacing unit(0.1, lines),strip.text.x element_text(size 8)) xlab() ylab(Assigned Probability (%))cowplot::plot_grid(p1, p2, ncol 2, align hv, labels LETTERS[1:2])散点图 Scatterplot
library(ggplot2)
library(dplyr)ggplot(datamtcars %% mutate(cylfactor(cyl)), aes(xmpg, disp))geom_point(aes(colorcyl), size3)geom_rug(colblack, alpha0.5, size1)geom_smooth(methodlm , colorred, fill#69b3a2, seTRUE) geom_text(labelrownames(mtcars), nudge_x 0.25, nudge_y 0.25, check_overlap T,label.size 0.35,color black,familyserif)theme_classic()theme(axis.title element_text(face bold,color black,size 14),axis.text element_text(color black,size 10),text element_text(size 8, color black, familyserif),legend.position right,legend.key.height unit(0.6,cm),legend.text element_text(face bold, color black,size 10),strip.text element_text(face bold, size 14)) 热图 heatmap
library(ComplexHeatmap)
library(circlize)set.seed(123)
mat - matrix(rnorm(100), 10)
rownames(mat) - paste0(R, 1:10)
colnames(mat) - paste0(C, 1:10)
column_ha - HeatmapAnnotation(foo1 runif(10), bar1 anno_barplot(runif(10)))
row_ha - rowAnnotation(foo2 runif(10), bar2 anno_barplot(runif(10)))col_fun - colorRamp2(c(-2, 0, 2), c(green, white, red))Heatmap(mat, name mat,column_title pre-defined distance method (1 - pearson),column_title_side bottom,column_title_gp gpar(fontsize 10, fontface bold),col col_fun, clustering_distance_rows pearson,cluster_rows TRUE, show_column_dend FALSE,row_km 2,column_km 3,width unit(6, cm), height unit(6, cm), top_annotation column_ha, right_annotation row_ha)相关图 correlogram
library(GGally)
library(ggplot2)data(flea)
ggpairs(flea, columns 2:4, aes(colourspecies))theme_bw()theme(axis.title element_text(face bold,color black,size 14),axis.text element_text(color black,size 10),text element_text(size 8, color black, familyserif),legend.position right,legend.key.height unit(0.6,cm),legend.text element_text(face bold, color black,size 10),strip.text element_text(face bold, size 14)) 气泡图 Bubble
library(ggplot2)
library(dplyr)
library(gapminder)data - gapminder %% filter(year2007) %%dplyr::select(-year)
data %%arrange(desc(pop)) %%mutate(country factor(country, country)) %%ggplot(aes(xgdpPercap, ylifeExp, sizepop, colorcontinent)) geom_point(alpha0.5) scale_size(range c(.1, 24), namePopulation (M))theme_bw()theme(axis.title element_text(face bold,color black,size 14),axis.text element_text(color black,size 10),text element_text(size 8, color black, familyserif),legend.position right,legend.key.height unit(0.6,cm),legend.text element_text(face bold, color black,size 10),strip.text element_text(face bold, size 14)) 连线点图 Connected Scatterplot
library(ggplot2)
library(dplyr)
library(babynames)
library(ggrepel)
library(tidyr)data - babynames %% filter(name %in% c(Ashley, Amanda)) %%filter(sex F) %%filter(year 1970) %%select(year, name, n) %%spread(key name, valuen, -1)tmp_date - data %% sample_frac(0.3)data %% ggplot(aes(xAmanda, yAshley, labelyear)) geom_point(color#69b3a2) geom_text_repel(datatmp_date) geom_segment(color#69b3a2, aes(xendc(tail(Amanda, n-1), NA), yendc(tail(Ashley, n-1), NA)),arrowarrow(lengthunit(0.3,cm)))theme_bw()theme(axis.title element_text(face bold,color black,size 14),axis.text element_text(color black,size 10),text element_text(size 8, color black, familyserif),legend.position right,legend.key.height unit(0.6,cm),legend.text element_text(face bold, color black,size 10),strip.text element_text(face bold, size 14)) 二维密度图 Density 2d
library(tidyverse)a - data.frame( xrnorm(20000, 10, 1.9), yrnorm(20000, 10, 1.2) )
b - data.frame( xrnorm(20000, 14.5, 1.9), yrnorm(20000, 14.5, 1.9) )
c - data.frame( xrnorm(20000, 9.5, 1.9), yrnorm(20000, 15.5, 1.9) )
data - rbind(a, b, c)pl1 - ggplot(data, aes(xx, yy))stat_density_2d(aes(fill ..density..), geom raster, contour FALSE)scale_x_continuous(expand c(0, 0))scale_y_continuous(expand c(0, 0))scale_fill_distiller(palette4, direction-1)theme(legend.positionnone)pl2 - ggplot(data, aes(xx, yy))geom_hex(bins 70) scale_fill_continuous(type viridis) theme_bw()theme(axis.title element_text(face bold,color black,size 14),axis.text element_text(color black,size 10),text element_text(size 8, color black, familyserif),legend.position right,legend.key.height unit(0.6,cm),legend.text element_text(face bold, color black,size 10),strip.text element_text(face bold, size 14)) cowplot::plot_grid(pl1, pl2, ncol 2, align h, labels LETTERS[1:2])条形图 Barplot
library(ggplot2)
library(dplyr)data - iris %% select(Species, Sepal.Length) %%group_by(Species) %%summarise( nn(),meanmean(Sepal.Length),sdsd(Sepal.Length)) %%mutate( sesd/sqrt(n)) %%mutate( icse * qt((1-0.05)/2 .5, n-1))ggplot(data)geom_bar(aes(xSpecies, ymean), statidentity, fillskyblue, alpha0.7)geom_errorbar(aes(xSpecies, yminmean-sd, ymaxmeansd), width0.4, colourorange, alpha0.9, size1.3)# geom_errorbar(aes(xSpecies, yminmean-ic, ymaxmeanic), # width0.4, colourorange, alpha0.9, size1.5) # geom_crossbar(aes(xSpecies, ymean, yminmean-sd, ymaxmeansd), # width0.4, colourorange, alpha0.9, size1.3)geom_pointrange(aes(xSpecies, ymean, yminmean-sd, ymaxmeansd), colourorange, alpha0.9, size1.3)scale_y_continuous(expand c(0, 0),limits c(0, 8))labs(x,y)theme_bw()theme(axis.title element_text(face bold,color black,size 14),axis.text element_text(color black,size 10),text element_text(size 8, color black, familyserif),legend.position right,legend.key.height unit(0.6,cm),legend.text element_text(face bold, color black,size 10),strip.text element_text(face bold, size 14)) 根据大小控制条形图宽度
library(ggplot2)data - data.frame(groupc(A ,B ,C ,D ) , valuec(33,62,56,67) , number_of_obsc(100,500,459,342)
)data$right - cumsum(data$number_of_obs) 30*c(0:(nrow(data)-1))
data$left - data$right - data$number_of_obs ggplot(data, aes(ymin 0)) geom_rect(aes(xmin left, xmax right, ymax value, color group, fill group))xlab(number of obs) ylab(value)scale_y_continuous(expand c(0, 0),limits c(0, 81)) theme_bw()theme(axis.title element_text(face bold,color black,size 14),axis.text element_text(color black,size 10),text element_text(size 8, color black, familyserif),legend.position right,legend.key.height unit(0.6,cm),legend.text element_text(face bold, color black,size 10),strip.text element_text(face bold, size 14)) 雷达图 radar chart
library(fmsb)set.seed(99)
data - as.data.frame(matrix( sample( 0:20 , 15 , replaceF) , ncol5))
colnames(data) - c(math , english , biology , music , R-coding )
rownames(data) - paste(mister , letters[1:3] , sep-)
data - rbind(rep(20,5) , rep(0,5) , data)colors_border - c(rgb(0.2,0.5,0.5,0.9), rgb(0.8,0.2,0.5,0.9), rgb(0.7,0.5,0.1,0.9))
colors_in - c(rgb(0.2,0.5,0.5,0.4), rgb(0.8,0.2,0.5,0.4), rgb(0.7,0.5,0.1,0.4) )radarchart(data, axistype1, pcolcolors_border, pfcolcolors_in, plwd4, plty1,cglcolgrey, cglty1, axislabcolgrey, caxislabelsseq(0,20,5), cglwd0.8,vlcex0.8)
legend(x1.2, y1.2, legendrownames(data[-c(1,2),]), bty n, pch20 , colcolors_in , text.col grey, cex1.2, pt.cex3)词云 wordcloud
library(wordcloud2) wordcloud2(demoFreq, size 2.3, minRotation -pi/6,maxRotation -pi/6, rotateRatio 1)平行坐标系统 Parallel Coordinates chart
library(hrbrthemes)
library(GGally)
library(viridis)data - irisp1 - ggparcoord(data,columns 1:4, groupColumn 5, order anyClass,scaleglobalminmax,showPoints TRUE, title No scaling,alphaLines 0.3) scale_color_viridis(discreteTRUE)theme_ipsum()theme(legend.positionnone,plot.title element_text(size13))xlab()p2 - ggparcoord(data,columns 1:4, groupColumn 5, order anyClass,scaleuniminmax,showPoints TRUE, title Standardize to Min 0 and Max 1,alphaLines 0.3) scale_color_viridis(discreteTRUE)theme_ipsum()theme(legend.positionnone,plot.title element_text(size13))xlab()p3 - ggparcoord(data,columns 1:4, groupColumn 5, order anyClass,scalestd,showPoints TRUE, title Normalize univariately (substract mean divide by sd),alphaLines 0.3) scale_color_viridis(discreteTRUE)theme_ipsum()theme(legend.positionnone,plot.title element_text(size13))xlab()p4 - ggparcoord(data,columns 1:4, groupColumn 5, order anyClass,scalecenter,showPoints TRUE, title Standardize and center variables,alphaLines 0.3) scale_color_manual(valuesc( #69b3a2, #E8E8E8, #E8E8E8))theme_ipsum()theme(legend.positionnone,plot.title element_text(size13))xlab()cowplot::plot_grid(p1, p2, p3, p4, ncol 2, align hv, labels LETTERS[1:4])棒棒糖图 Lollipop plot
library(ggplot2)data - data.frame(xLETTERS[1:26],yabs(rnorm(26))) %%arrange(y) %%mutate(xfactor(x, x))p1 - ggplot(data, aes(xx, yy))geom_segment(aes(xx, xendx, y1, yendy), colorgrey)geom_point(colororange, size4)xlab() ylab(Value of Y) theme_light()theme(axis.title element_text(face bold,color black,size 14),axis.text element_text(color black,size 10),text element_text(size 8, color black, familyserif),panel.grid.major.x element_blank(),panel.border element_blank(),axis.ticks.x element_blank(),legend.position right,legend.key.height unit(0.6, cm),legend.text element_text(face bold, color black,size 10),strip.text element_text(face bold, size 14)) p2 - ggplot(data, aes(xx, yy))geom_segment(aes(xx, xendx, y0, yendy), colorifelse(data$x %in% c(A, D), blue, red), sizeifelse(data$x %in% c(A, D), 1.3, 0.7) ) geom_point(colorifelse(data$x %in% c(A, D), blue, red), sizeifelse(data$x %in% c(A,D), 5, 2))annotate(text, xgrep(D, data$x),ydata$y[which(data$xD)]*1.2,labelGroup D is very impressive,colororange, size4 , angle0, fontfacebold, hjust0)annotate(text, x grep(A, data$x),y data$y[which(data$xA)]*1.2,label paste(Group A is not too bad\n (val,data$y[which(data$xA)] %% round(2),),sep),colororange, size4 , angle0, fontfacebold, hjust0)theme_ipsum()coord_flip()theme(legend.positionnone)xlab()ylab(Value of Y)ggtitle(How did groups A and D perform?) cowplot::plot_grid(p1, p2, ncol 2, align h, labels LETTERS[1:4])循环条形图 circular barplot
library(tidyverse)data - data.frame(individualpaste(Mister , seq(1,60), sep),groupc(rep(A, 10), rep(B, 30), rep(C, 14), rep(D, 6)) ,valuesample( seq(10,100), 60, replaceT)) %%mutate(groupfactor(group))# Set a number of empty bar to add at the end of each group
empty_bar - 3
to_add - data.frame(matrix(NA, empty_bar*nlevels(data$group), ncol(data)))
colnames(to_add) - colnames(data)
to_add$group - rep(levels(data$group), eachempty_bar)
data - rbind(data, to_add)
data - data %% arrange(group)
data$id - seq(1, nrow(data))# Get the name and the y position of each label
label_data - data
number_of_bar - nrow(label_data)
angle - 90 - 360 * (label_data$id-0.5) /number_of_bar
label_data$hjust - ifelse( angle -90, 1, 0)
label_data$angle - ifelse(angle -90, angle180, angle)# prepare a data frame for base lines
base_data - data %% group_by(group) %% summarize(startmin(id), endmax(id) - empty_bar) %% rowwise() %% mutate(titlemean(c(start, end)))# prepare a data frame for grid (scales)
grid_data - base_data
grid_data$end - grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] 1
grid_data$start - grid_data$start - 1
grid_data - grid_data[-1, ]# Make the plot
p - ggplot(data, aes(xas.factor(id), yvalue, fillgroup))geom_bar(aes(xas.factor(id), yvalue, fillgroup), statidentity, alpha0.5)# Add a val100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.geom_segment(datagrid_data, aes(x end, y 80, xend start, yend 80), colour grey, alpha1, size0.3 , inherit.aes FALSE )geom_segment(datagrid_data, aes(x end, y 60, xend start, yend 60), colour grey, alpha1, size0.3 , inherit.aes FALSE )geom_segment(datagrid_data, aes(x end, y 40, xend start, yend 40), colour grey, alpha1, size0.3 , inherit.aes FALSE )geom_segment(datagrid_data, aes(x end, y 20, xend start, yend 20), colour grey, alpha1, size0.3 , inherit.aes FALSE )# Add text showing the value of each 100/75/50/25 linesannotate(text, x rep(max(data$id),4), y c(20, 40, 60, 80), label c(20, 40, 60, 80), colorgrey, size3, angle0, fontfacebold, hjust1) geom_bar(aes(xas.factor(id), yvalue, fillgroup), statidentity, alpha0.5)ylim(-100,120)theme_minimal()theme(legend.position none,axis.text element_blank(),axis.title element_blank(),panel.grid element_blank(),plot.margin unit(rep(-1,4), cm))coord_polar() geom_text(datalabel_data, aes(xid, yvalue10, labelindividual, hjusthjust), colorblack, fontfacebold,alpha0.6, size2.5, angle label_data$angle, inherit.aes FALSE )# Add base line informationgeom_segment(database_data, aes(x start, y -5, xend end, yend -5), colour black, alpha0.8, size0.6 , inherit.aes FALSE ) geom_text(database_data, aes(x title, y -18, labelgroup), hjustc(1,1,0,0), colour black, alpha0.8, size4, fontfacebold, inherit.aes FALSE)p分组堆积图 grouped stacked barplot
library(ggplot2)
library(viridis)
library(hrbrthemes)specie - c(rep(sorgho , 3) , rep(poacee , 3) , rep(banana , 3) , rep(triticum , 3) )
condition - rep(c(normal , stress , Nitrogen) , 4)
value - abs(rnorm(12 , 0 , 15))
data - data.frame(specie,condition,value)ggplot(data, aes(fillcondition, yvalue, xspecie)) geom_bar(positionstack, statidentity) scale_fill_viridis(discrete T) ggtitle(Studying 4 species..) theme_ipsum() xlab()矩形树图 Treemap
library(treemap)group - c(rep(group-1,4),rep(group-2,2),rep(group-3,3))
subgroup - paste(subgroup , c(1,2,3,4,1,2,1,2,3), sep-)
value - c(13,5,22,12,11,7,3,1,23)
data - data.frame(group,subgroup,value)treemap(data,indexc(group,subgroup),vSizevalue,typeindex) 圆圈图 doughhut
library(ggplot2)data - data.frame(categoryc(A, B, C),countc(10, 60, 30))data$fraction - data$count / sum(data$count)
data$ymax - cumsum(data$fraction)
data$ymin - c(0, head(data$ymax, n-1))
data$labelPosition - (data$ymax data$ymin) / 2
data$label - paste0(data$category, \n value: , data$count)ggplot(data, aes(ymaxymax, yminymin, xmax4, xmin3, fillcategory)) geom_rect() geom_label( x3.5, aes(ylabelPosition, labellabel), size6) scale_fill_brewer(palette4) coord_polar(thetay) xlim(c(2, 4)) theme_void() theme(legend.position none)饼图 pie
library(ggplot2)
library(dplyr)data - data.frame(groupLETTERS[1:5],valuec(13,7,9,21,2))data - data %% arrange(desc(group)) %%mutate(prop value / sum(data$value) *100) %%mutate(ypos cumsum(prop)- 0.5*prop )ggplot(data, aes(x, yprop, fillgroup)) geom_bar(statidentity, width1, colorwhite) coord_polar(y, start0) theme_void() theme(legend.positionnone) geom_text(aes(y ypos, label group), color white, size6) scale_fill_brewer(paletteSet1)系统树图 dendrogram
library(ggraph)
library(igraph)
library(tidyverse)theme_set(theme_void())d1 - data.frame(fromorigin, topaste(group, seq(1,7), sep))
d2 - data.frame(fromrep(d1$to, each7), topaste(subgroup, seq(1,49), sep_))
edges - rbind(d1, d2)name - unique(c(as.character(edges$from), as.character(edges$to)))
vertices - data.frame(namename,groupc( rep(NA,8) , rep( paste(group, seq(1,7), sep), each7)),clustersample(letters[1:4], length(name), replaceT),valuesample(seq(10,30), length(name), replaceT))mygraph - graph_from_data_frame( edges, verticesvertices)ggraph(mygraph, layout dendrogram) geom_edge_diagonal() geom_node_text(aes( labelname, filterleaf, colorgroup) , angle90 , hjust1, nudge_y-0.1) geom_node_point(aes(filterleaf, sizevalue, colorgroup) , alpha0.6) ylim(-.6, NA) theme(legend.positionnone)sample - paste(rep(sample_,24) , seq(1,24) , sep)
specie - c(rep(dicoccoides , 8) , rep(dicoccum , 8) , rep(durum , 8))
treatment - rep(c(rep(High,4 ) , rep(Low,4)),3)
data - data.frame(sample,specie,treatment)
for (i in seq(1:5)){genesample(c(1:40) , 24 )datacbind(data , gene)colnames(data)[ncol(data)]paste(gene_,i,sep)}
data[data$treatmentHigh , c(4:8)]data[data$treatmentHigh , c(4:8)]100
data[data$speciedurum , c(4:8)]data[data$speciedurum , c(4:8)]-30
rownames(data) - data[,1] dist - dist(data[ , c(4:8)] , diagTRUE)
hc - hclust(dist)
dhc - as.dendrogram(hc)
specific_leaf - dhc[[1]][[1]][[1]]i0
colLab-function(n){if(is.leaf(n)){aattributes(n)lignematch(attributes(n)$label,data[,1])treatmentdata[ligne,3];if(treatmentLow){col_treatmentblue};if(treatmentHigh){col_treatmentred}speciedata[ligne,2];if(speciedicoccoides){col_speciered};if(speciedicoccum){col_specieDarkgreen};if(speciedurum){col_specieblue}attr(n,nodePar)-c(a$nodePar,list(cex1.5,lab.cex1,pch20,colcol_treatment,lab.colcol_specie,lab.font1,lab.cex1))}return(n)
}dL - dendrapply(dhc, colLab)
plot(dL , mainstructure of the population)
legend(topright, legend c(High Nitrogen , Low Nitrogen , Durum , Dicoccoides , Dicoccum), col c(red, blue , blue , red , Darkgreen), pch c(20,20,4,4,4), bty n, pt.cex 1.5, cex 0.8 , text.col black, horiz FALSE, inset c(0, 0.1))library(dendextend)
d1 - USArrests %% dist() %% hclust( methodaverage ) %% as.dendrogram()
d2 - USArrests %% dist() %% hclust( methodcomplete ) %% as.dendrogram()dl - dendlist(d1 %% set(labels_col, value c(skyblue, orange, grey), k3) %%set(branches_lty, 1) %%set(branches_k_color, value c(skyblue, orange, grey), k 3),d2 %% set(labels_col, value c(skyblue, orange, grey), k3) %%set(branches_lty, 1) %%set(branches_k_color, value c(skyblue, orange, grey), k 3)
)tanglegram(dl, common_subtrees_color_lines FALSE, highlight_distinct_edges TRUE, highlight_branches_lwdFALSE, margin_inner7,lwd2)library(dendextend)
library(tidyverse)dend - mtcars %% select(mpg, cyl, disp) %% dist() %% hclust() %% as.dendrogram()
my_colors - ifelse(mtcars$am0, forestgreen, green)par(marc(9,1,1,1))
dend %%set(labels_col, value c(skyblue, orange, grey), k3) %%set(branches_k_color, value c(skyblue, orange, grey), k 3) %%set(leaves_pch, 19) %% set(nodes_cex, 0.7) %% plot(axesFALSE)
rect.dendrogram( dend, k3, lty 5, lwd 0, x1, colrgb(0.1, 0.2, 0.4, 0.1) )
colored_bars(colors my_colors, dend dend, rowLabels am)library(ggraph)
library(igraph)
library(tidyverse)
library(RColorBrewer) d1 - data.frame(fromorigin, topaste(group, seq(1,10), sep))
d2 - data.frame(fromrep(d1$to, each10), topaste(subgroup, seq(1,100), sep_))
edges - rbind(d1, d2)vertices - data.frame(name unique(c(as.character(edges$from), as.character(edges$to))) , value runif(111))
vertices$group - edges$from[ match( vertices$name, edges$to ) ]vertices$id - NA
myleaves - which(is.na( match(vertices$name, edges$from) ))
nleaves - length(myleaves)
vertices$id[myleaves] - seq(1:nleaves)
vertices$angle - 90 - 360 * vertices$id / nleavesvertices$hjust - ifelse( vertices$angle -90, 1, 0)
vertices$angle - ifelse(vertices$angle -90, vertices$angle180, vertices$angle)
mygraph - graph_from_data_frame( edges, verticesvertices )# Make the plot
ggraph(mygraph, layout dendrogram, circular TRUE) geom_edge_diagonal(colourgrey) scale_edge_colour_distiller(palette RdPu) geom_node_text(aes(x x*1.15, yy*1.15, filter leaf, labelname, angle angle, hjusthjust, colourgroup), size2.7, alpha1) geom_node_point(aes(filter leaf, x x*1.07, yy*1.07, colourgroup, sizevalue, alpha0.2)) scale_colour_manual(values rep( brewer.pal(9,Paired) , 30)) scale_size_continuous( range c(0.1,10) ) theme_void() theme(legend.positionnone,plot.marginunit(c(0,0,0,0),cm),) expand_limits(x c(-1.3, 1.3), y c(-1.3, 1.3))圆形图 Circular packing
library(ggraph)
library(igraph)
library(tidyverse)
library(viridis)edges - flare$edges %% filter(to %in% from) %% droplevels()
vertices - flare$vertices %% filter(name %in% c(edges$from, edges$to)) %% droplevels()
vertices$size - runif(nrow(vertices))# Rebuild the graph object
mygraph - graph_from_data_frame(edges, verticesvertices)ggraph(mygraph, layout circlepack) geom_node_circle(aes(fill depth)) geom_node_label( aes(labelshortName, filterleaf, sizesize)) theme_void() theme(legend.positionFALSE) scale_fill_viridis()分组线条图 grouped line chart
library(ggplot2)
library(babynames)
library(dplyr)
library(hrbrthemes)
library(viridis)# Keep only 3 names
don - babynames %% filter(name %in% c(Ashley, Patricia, Helen)) %%filter(sexF)# Plot
don %%ggplot( aes(xyear, yn, groupname, colorname)) geom_line() scale_color_viridis(discrete TRUE) ggtitle(Popularity of American names in the previous 30 years) theme_ipsum() ylab(Number of babies born)面积图 Area
library(ggplot2)
library(hrbrthemes)xValue - 1:10
yValue - abs(cumsum(rnorm(10)))
data - data.frame(xValue,yValue)ggplot(data, aes(xxValue, yyValue)) geom_area( fill#69b3a2, alpha0.4) geom_line(color#69b3a2, size2) geom_point(size3, color#69b3a2) theme_ipsum() ggtitle(Evolution of something)面积堆积图 Stacked area chart
library(ggplot2)
library(dplyr)time - as.numeric(rep(seq(1,7),each7))
value - runif(49, 10, 100)
group - rep(LETTERS[1:7],times7)
data - data.frame(time, value, group)plotdata - data %%group_by(time, group) %%summarise(n sum(value)) %%mutate(percentage n / sum(n))ggplot(plotdata, aes(xtime, ypercentage, fillgroup)) geom_area(alpha0.6 , size1, colourwhite)scale_fill_viridis(discrete T) theme_ipsum()Streamgraph
# devtools::install_github(hrbrmstr/streamgraph)
library(streamgraph)
library(dplyr)
library(babynames)babynames %%filter(grepl(^Kr, name)) %%group_by(year, name) %%tally(wtn) %%streamgraph(name, n, year)babynames %%filter(grepl(^I, name)) %%group_by(year, name) %%tally(wtn) %%streamgraph(name, n, year, offsetzero, interpolatelinear) %%sg_legend(showTRUE, labelI- names: )Time Series
library(ggplot2)
library(dplyr)
library(hrbrthemes)data - data.frame(day as.Date(2017-06-14) - 0:364,value runif(365) seq(-140, 224)^2 / 10000
)ggplot(data, aes(xday, yvalue)) geom_line( colorsteelblue) geom_point() xlab() theme_ipsum() theme(axis.text.xelement_text(angle60, hjust1)) scale_x_date(limitc(as.Date(2017-01-01),as.Date(2017-02-11))) ylim(0,1.5)library(dygraphs)
library(xts)
library(tidyverse)
library(lubridate)data - read.table(https://python-graph-gallery.com/wp-content/uploads/bike.csv, headerT, sep,) %% head(300)
data$datetime - ymd_hms(data$datetime)don - xts(x data$count, order.by data$datetime)dygraph(don) %%dyOptions(labelsUTC TRUE, fillGraphTRUE, fillAlpha0.1, drawGrid FALSE, colors#D8AE5A) %%dyRangeSelector() %%dyCrosshair(direction vertical) %%dyHighlight(highlightCircleSize 5, highlightSeriesBackgroundAlpha 0.2, hideOnMouseOut FALSE) %%dyRoller(rollPeriod 1)