Click on any of the cells to zoom in to the different groups. Some levels could not be classified by Classyfire and these are colored grey and marked as “NA”. One compound could not be classified.
Scroll in the figure to zoom in and out.
# R version: 4.1.1
# RStudio version: 2021.09.0
# Loading required packages
library(tidyverse) #v1.3.2
library(tidyr) #v1.2.1
library(plotly) #v4.10.0
library(readxl) #v1.4.1
library(htmlwidgets) #v1.5.4
library(htmltools) #v0.5.3
library(visNetwork) #2.1.0
library(circlize) #v0.4.15
library(sunburstR) #v2.1.6
library(wesanderson) #v0.3.6
library(heatmaply) #v1.3.0
set.seed(12345)
# requires input data table named GC_LC from the hitlist of suspects from both LC and GC analyses.
# The structure can be seen below
## Rows: 346
## Columns: 39
## $ ID <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14~
## $ Name <chr> "Perfluorohexane sulfonic acid", "Divaricatic~
## $ Common_name <chr> "PFHxS", "Divaricatic acid", "Dihydrokaempfer~
## $ Formula <chr> "C6HF13O3S", "C21H24O7", "C15H12O6", "C21H20O~
## $ Formula_MonoisotopicMass <dbl> 399.9439, 388.1522, 288.0634, 464.0955, 363.9~
## $ CAS <chr> "355-46-4", "491-62-3", "5150-32-3", "20229-5~
## $ SMILES <chr> "C(C(C(C(F)(F)S(=O)(=O)O)(F)F)(F)F)(C(C(F)(F)~
## $ InChI <chr> "InChI=1S/C6HF13O3S/c7-1(8,3(11,12)5(15,16)17~
## $ InChIKey <chr> "QZHDEAJFRJCDMF-UHFFFAOYSA-N", "FSRDIJIAQPSMM~
## $ ID_method <chr> "SSA (ESI-)", "SSA (ESI-)", "SSA (ESI-)", "SS~
## $ ICL <chr> "2a", "2a", "3", "3", "2b", "2a", "2a", "2a",~
## $ Chrom_instr <chr> "LC", "LC", "LC", "LC", "LC", "LC", "LC", "LC~
## $ Ionisation_mode <chr> "ESI neg", "ESI neg", "ESI neg", "ESI neg", "~
## $ FeatureFinder <chr> "MSDIAL", "MSDIAL", "MSDIAL", "MSDIAL", "MSDI~
## $ RT_min <dbl> 9.317, 10.237, 5.293, 5.162, 8.240, 10.360, 8~
## $ RT_sec <dbl> 559, 614, 318, 310, 494, 622, 539, 653, 596, ~
## $ Retention_index <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ Precursor_ion <chr> "M-H", "M-H", "M-H", "M-H", "M-H", "M-H", "M-~
## $ Precursor_measured_mass <dbl> 398.9351, 387.1450, 287.0570, 463.0888, 362.9~
## $ Precursor_exact_mass <dbl> 398.9361, 387.1444, 287.0556, 463.0877, 362.9~
## $ Mass_error_ppm <dbl> -2.3474564, 1.5026694, 5.1446486, 2.5669870, ~
## $ FragIon1_Mass <dbl> 79.9568, 177.0552, 125.0239, 301.0348, 318.97~
## $ FragIon2_Mass <dbl> 98.9552, 195.0657, 177.0552, 178.9981, 168.98~
## $ FragIon3_Mass <dbl> NA, 151.0759, 259.0606, 151.0031, 130.9920, 1~
## $ FragIon1_Formula <chr> "[HO3S-H]-", "[C10H10O3-H]-", "[C6H6O3-H]-", ~
## $ FragIon2_Formula <chr> "[HFO3S-H]-", "[C10H12O4-H]-", "[C10H10O3-H]-~
## $ FragIon3_Formula <chr> NA, "[C19H12O2-H]-", "[C14H12O5-H]-", "[C7H4O~
## $ Use_category <chr> "PFAS", "Plant natural product", "Plant natur~
## $ Precursor_Intermediate <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ Transformation_Product <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ ClassyFy_Status <chr> "Completed", "Completed", "Completed", "Compl~
## $ Kingdom <chr> "Organic compounds", "Organic compounds", "Or~
## $ Superclass <chr> "Organohalogen compounds", "Phenylpropanoids ~
## $ Class <chr> "Alkyl halides", "Depsides and depsidones", "~
## $ Subclass <chr> "Alkyl fluorides", NA, "Flavans", "Flavonoid ~
## $ Parent_Level_1 <chr> "Perfluoroalkyl sulfonic acid and derivatives~
## $ Parent_Level_2 <chr> NA, NA, "Flavanonols", NA, NA, NA, NA, NA, NA~
## $ Parent_Level_3 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "~
## $ Parent_Level_4 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
# The intensities of the quantifier ions were extracted for each hit in LC and GC mode and combined into one data table (named ALL_hits_hm).
# columns represent the samples and rows are for the compounds.
## Rows: 2
## Columns: 37
## $ H1 <dbl> 3222909, 278155
## $ H2 <dbl> 712554, 31744
## $ H3a_rep1 <dbl> 3233942, 64710
## $ H3a_rep2 <dbl> 3964053, 80484
## $ H3a_rep3 <dbl> 4105269, 84695
## $ H3b <dbl> 1250739, 113780
## $ H4_1 <dbl> 5262658, 1358628
## $ H4_2 <dbl> 4146854, 373944
## $ H4_3 <dbl> 4090129, 788817
## $ H5_rep1 <dbl> 560994, 144994
## $ H5_rep2 <dbl> 58131, 42220
## $ N_rep1 <dbl> 0, 12353
## $ N_rep2 <dbl> 27854, 23557
## $ N_rep3 <dbl> 24611, 36991
## $ O1a <dbl> 720793, 53604
## $ O1b <dbl> 788145, 62763
## $ O2 <dbl> 4254457, 1198326
## $ O3_rep1 <dbl> 852455, 188509
## $ O3_rep2 <dbl> 1012372, 217649
## $ O3_rep3 <dbl> 1039905, 229112
## $ O4 <dbl> 2589977, 72419
## $ P1a <dbl> 356950, 35966
## $ P1b <dbl> 254071, 34963
## $ P2a <dbl> 217243, 44255
## $ P2b_rep1 <dbl> 1629956, 51745
## $ P2b_rep2 <dbl> 1249783, 34375
## $ P2b_rep3 <dbl> 1584438, 46440
## $ P2c <dbl> 110981, 34299
## $ P3 <dbl> 2137605, 1530537
## $ P4a <dbl> 2792926, 96857
## $ P4b <dbl> 1156299, 15951
## $ P6 <dbl> 28736, 37726
## $ PWa <dbl> 188975, 58212
## $ PWb <dbl> 839402, 25277
## $ IT <dbl> 868053, 33419
## $ FS <dbl> 3776762, 126070
## $ ES <dbl> 417475, 935234
#-------------------------- generate heatmap
heatmaply(ALL_hits_hm,
distfun = dist,
hclust_method = "average",
k_row = NA, # if NA then optimum number of clusters are calculated
#k_col = NA,
scale_fill_gradient_fun = ggplot2::scale_fill_gradient2(
low = "white",
mid = "white",
#mid = "snow",
high = "red",
midpoint = 0,
limits = NULL),
scale = "row",
fontsize_col = 14,
grid_gap = 0.1,
column_text_angle = 90,
showticklabels = c(TRUE, FALSE)) # removes y-axis tick label
#-----------------
# Generate sunburst chart
# Manually specify the color scale based on previous auto-generated color scales with satisfactory results
colors <- c("#76F670","#C2B1AF","#C170D9","#C1C0CD","#D558D1","#AAD584","#9BCED5","#879DC3","#8778C9","#F3EFF7","#D7C957","#F68ED5","#ED3FC2","#F2E663","#7FEEDE","#68C5F5","#276459","#D576CE","#F089AC","#A8C3F4","#FABFEE","#A5F447","#9BEE90","#46C5BD","#9CFE67","#7DD268","#A4B9C5","#D26395","#EBADA2","#E95DBC","#8DD075","#B776F4","#F568AB","#8F4C87","#DBA7F9","#61F982","#F6F3B6","#7DF0CD","#F0A7D1","#BBF059","#E22E75","#BAC659","#F33AE7","#FD721C","#A6E880","#D0BDA4","#41BAD2","#DFF4B0","#2EF068","#9958F0","#52AAF8","#BAC1B7","#F69737","#76BAF1","#75DC3C","#8367EA","#B8DCD6","#F34D9B","#BDF97F","#ED707B","#2146A8","#BFDAF1","#F9F7C9","#9CD9F0","#E5F551","#B25E8B","#4746D0","#1A6E46","#F19986","#7B7D49","#8CBBB3","#72CBAC","#5F8FBE","#F6DFB7","#5B8ECF","#DAEFEF","#6BAEBB","#B69FEF","#C992B7","#EEEAA0","#35A3D8","#3EFDBD","#A5AD95","#82838F","#CEF9DB","#F4E8D6","#F3E2A4","#B36CBB","#A44771","#BB7993","#96DCB5","#7835BF","#A8BDDB","#F8C231","#D01283","#C5EEBB","#F2AFB7","#CDE294","#A32FBF","#D1B42E","#E3DC87","#643E9A","#39E19C","#EE4D71","#BEADCF","#AE9B64","#C2F1A1","#D4D6F3","#45F956","#C2DD9E","#9BC51D","#F7F697","#DCE5B3","#BC6C57","#F86F97","#C58016","#E975BF","#E65626","#F3CF68","#83A8E2","#DCC3DC","#8AF3A3","#E3BBF8","#A9809F","#F70B53","#CEBEF1","#3974EB","#C675B2","#D1D7C2","#4DBE82","#F7D0B3","#AA92B3","#BF0DA8","#7997E7","#59D4A5","#76D086","#A65914","#8E68C4","#D1F1E0","#3CE520","#ECE4E2","#4C908E","#F5CFC2","#D9ADE2","#B7D945","#574CED","#E5D6CC","#53F1EE","#C6C7F0","#9283F3","#AB9CAC","#CAF893","#71BC1D","#2AA48D","#F35ADC","#D23EB7","#909674","#3FD5E7","#D923C0","#68D4F3","#E3D02D","#B1F2D4","#96E8F1","#7EE064","#498CF0","#1FAA5D","#95C9F1","#9DFA81","#609438","#E988F3","#A2F5C5","#EEE739","#B69E39","#86F17F","#8FD1C0","#EBF6E3","#9A833B","#D9C095","#6AC36A","#B59CCD","#B6D270","#DEB574","#B2F8A6","#B7C49B","#DADAE2","#BFC986","#C68AE8","#ECC4CA","#DCABC6","#E6EBCC","#D0F329","#3579CB","#EFF62C","#50D65C","#36E98D","#B58ACB","#90F1B3","#E69AD6","#DDE570","#EC695B","#ABBC32","#4FF2C4","#F5DBF8","#E6C5F2","#253C80","#CE4891","#3FE8D3","#C38585","#C75BEC","#D881AB","#5B69E5","#E775E7","#85C445","#FDE49E","#C1EDF8","#E0D598","#AFEFE2","#E689C2","#F7AEEF","#EA9BAF","#94C39D","#BD656F","#DEB8AC","#52B1B5","#D7B0F8","#8418F9","#675B76","#C38BC5","#E2F88B","#A0ACDD","#903F54","#B1E16B","#E87F45","#8BF3F5","#E7C672","#82BA4D","#BC8C6B","#33527F","#DA8B54","#67F497","#B53F65","#E6D5B5","#351CB2","#D4F66D","#C8E132","#F184DA","#F1C7E5","#E899F0","#6DF559","#A8918A","#B1D1AA","#5EDBDD","#E9AC4D","#B7E3C5","#E0F59A","#6FE4A0","#92C68D","#8EA1B8","#77B49C","#CC38EF","#50AC25","#6B2DE3","#659F62","#798AEA","#76514C","#C3BB82","#C9A0A4","#C3579D","#9EE09A","#F6A8F4","#9B6F83","#47BD22","#F43CAF","#9A4BB4","#51BEE3","#83D5D2","#BAF2F4","#8871AE","#2AE974","#9E9CE2","#F1DDEC","#95F22D","#B649C4","#9B63D6","#E2F6D2","#C931CE","#AF48A3","#F8D0A0","#AC7BAA","#B8AAE5","#428193","#86A233","#F1B189","#F28B92","#8D8EC5","#F25FF2","#73B8D8")
Group_sun <- GC_LC %>%
mutate(across(where(is.character), str_squish)) %>%
filter(ClassyFy_Status == "Completed") %>%
distinct(InChIKey, .keep_all = TRUE) %>%
mutate(Common_name = str_replace_all(Common_name, "-", "_")) %>%
mutate(path = paste(Superclass, Class, Subclass, Parent_Level_1, Parent_Level_2, Common_name, sep = "-")) %>%
select(path) %>%
group_by(path) %>%
tally()
labels <- Group_sun$path
sund2b(Group_sun, rootLabel = "Total", colors = list(range = colors, domain = labels))
#----------------------------------
# Histogram of use categories / functional uses
Use_category <- GC_LC %>%
unite(., col = "Use", Use_category, Precursor_Intermediate, Transformation_Product, na.rm = TRUE, sep = ", ") %>%
separate_rows(Use, sep = ",") %>%
mutate(across(where(is.character), str_squish)) %>%
group_by(Use) %>%
tally(sort = TRUE) %>%
ungroup()
Use_category %>%
plot_ly(x = ~Use, y = ~n, type = "bar") %>%
layout(xaxis = list(categoryorder = "total descending"))
#------------------------------------------
# Network visualization of chemical groups and use groups / functional use
# Common name - Parent1 - Use
# Format data to use in visnetwork
Common_name <- GC_LC %>%
select(Common_name) %>%
distinct(Common_name) %>% # this will keep only one copy of compound if it appear in both GC and LC
group_by(Common_name) %>%
tally(sort = TRUE) %>%
ungroup()
Parent_Level_1 <- GC_LC %>%
select(Parent_Level_1) %>%
group_by(Parent_Level_1) %>%
tally(sort = TRUE) %>%
ungroup() %>%
na.omit()
Use_category <- GC_LC %>%
unite(., col = "Use", Use_category, Precursor_Intermediate, Transformation_Product, na.rm = TRUE, sep = ", ") %>%
separate_rows(Use, sep = ",") %>%
mutate(across(where(is.character), str_squish)) %>%
group_by(Use) %>%
tally(sort = TRUE) %>%
ungroup()
Group <- GC_LC %>%
filter(ClassyFy_Status == "Completed") %>%
unite(., col = "Use", Use_category, Precursor_Intermediate, Transformation_Product, na.rm = TRUE, sep = ", ") %>%
separate_rows(Use, sep = ",") %>%
mutate(across(where(is.character), str_squish))
Group1 <- Group %>%
select(Common_name, Parent_Level_1, Use) %>%
distinct(Common_name, .keep_all = TRUE) %>% #keep only one unique common name since LC, GC can give same compound
group_by(Common_name, Parent_Level_1) %>%
tally() %>%
ungroup() %>%
rename(from = Common_name, to = Parent_Level_1, value = n)
Group2 <- Group %>%
select(Common_name, Parent_Level_1, Use) %>%
group_by(Parent_Level_1, Use) %>%
tally() %>%
ungroup() %>%
rename(from = Parent_Level_1, to = Use, value = n)
Group_vis <- rbind(Group1, Group2)
# visnetwork start from index 1.
Nodes <- data.frame(id = seq_along(unique(c(Group_vis$from, Group_vis$to))), label = unique(c(Group_vis$from, Group_vis$to))) %>%
mutate(group = case_when(label %in% GC_LC$Common_name ~ "Compound",
label %in% GC_LC$Parent_Level_1 ~ "PL1",
label %in% Use_category$Use ~ "Use")) %>%
mutate(title = label) %>%
mutate(value = 1) %>%
left_join(Common_name, by = c("label" = "Common_name")) %>%
left_join(Parent_Level_1, by = c("label" = "Parent_Level_1")) %>%
left_join(Use_category, by = c("label" = "Use")) %>%
replace_na(list(n.x = 0, n.y = 0, n = 0)) %>%
mutate(value = (n.x + n.y + n)) %>%
select(-c(n.x, n.y, n), id, label, group, value, title) %>%
rename(size = value) %>%
as.data.frame()
Links <- Group_vis %>%
left_join(Nodes, b = c("from" = "label")) %>%
mutate(from = id) %>%
select(-id) %>%
left_join(Nodes, by = c("to" = "label")) %>%
mutate(to = id) %>%
select(from, to, value) %>%
as.data.frame()
# renaming label. NOTE: above Links code will not work after executing this.
Nodes <- Nodes %>% mutate(label = as.character(id)) %>%
mutate(size = size*3) #making the nodes larger
visNetwork(nodes = Nodes, edges = Links, height = "1200px", width = "1200px") %>%
visPhysics(solver = "forceAtlas2Based", stabilization = FALSE) %>%
visIgraphLayout()
#-----------------
# Network visualization of individual compounds and use groups / functional use
# Format data to use in visnetwork
# Use distinct(from, to) here to remove duplicates
Group_vis2 <- Group %>%
select(Common_name, Use) %>%
ungroup() %>%
rename(from = Common_name, to = Use) %>%
distinct(from, to)
# visnetwork start from index 1
Nodes2 <- data.frame(id = seq_along(unique(c(Group_vis2$from, Group_vis2$to))), label = unique(c(Group_vis2$from, Group_vis2$to))) %>%
mutate(group = case_when(label %in% GC_LC$Common_name ~ "Compound",
label %in% Use_category$Use ~ "Use")) %>%
mutate(title = label) %>%
mutate(value = 1) %>%
left_join(Common_name, by = c("label" = "Common_name")) %>%
left_join(Use_category, by = c("label" = "Use")) %>%
replace_na(list(n.x = 0, n.y = 0, n = 0)) %>%
mutate(value = (n.x + n.y)) %>%
select(-c(n.x, n.y), id, label, group, value, title) %>%
rename(size = value) %>%
as.data.frame()
Links2 <- Group_vis2 %>%
left_join(Nodes2, b = c("from" = "label")) %>%
mutate(from = id) %>%
select(-id) %>%
left_join(Nodes2, by = c("to" = "label")) %>%
mutate(to = id) %>%
select(from, to) %>%
as.data.frame()
visNetwork(nodes = Nodes2, edges = Links2, height = "1200px", width = "1200px") %>%
visPhysics(solver = "forceAtlas2Based", stabilization = FALSE) %>%
visIgraphLayout()