This html document is part of supplementary material to the paper, which include several interactive visualization charts of the results, as well as source codes for these.





Figure S1. Heatmap of identified compounds and their relative intensities.



Figure S2. Sunburst chart of identified compounds by chemical groups.

Click on any of the cells to zoom in to the different groups. Some levels could not be classified by Classyfire and these are colored grey and marked as “NA”. One compound could not be classified.

Figure S3. Histogram of use categories / functional uses.




Figure S4. Network visualization of chemical groups and use groups / functional use.

Scroll in the figure to zoom in and out.

Figure S5. Network visualization of individual compounds and use groups / functional use.



Source codes for the visualizations


# R version: 4.1.1
# RStudio version: 2021.09.0

# Loading required packages
library(tidyverse) #v1.3.2
library(tidyr) #v1.2.1
library(plotly) #v4.10.0
library(readxl) #v1.4.1
library(htmlwidgets) #v1.5.4
library(htmltools) #v0.5.3
library(visNetwork) #2.1.0
library(circlize) #v0.4.15
library(sunburstR) #v2.1.6
library(wesanderson) #v0.3.6
library(heatmaply) #v1.3.0

set.seed(12345)


# requires input data table named GC_LC from the hitlist of suspects from both LC and GC analyses. 
# The structure can be seen below
## Rows: 346
## Columns: 39
## $ ID                       <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14~
## $ Name                     <chr> "Perfluorohexane sulfonic acid", "Divaricatic~
## $ Common_name              <chr> "PFHxS", "Divaricatic acid", "Dihydrokaempfer~
## $ Formula                  <chr> "C6HF13O3S", "C21H24O7", "C15H12O6", "C21H20O~
## $ Formula_MonoisotopicMass <dbl> 399.9439, 388.1522, 288.0634, 464.0955, 363.9~
## $ CAS                      <chr> "355-46-4", "491-62-3", "5150-32-3", "20229-5~
## $ SMILES                   <chr> "C(C(C(C(F)(F)S(=O)(=O)O)(F)F)(F)F)(C(C(F)(F)~
## $ InChI                    <chr> "InChI=1S/C6HF13O3S/c7-1(8,3(11,12)5(15,16)17~
## $ InChIKey                 <chr> "QZHDEAJFRJCDMF-UHFFFAOYSA-N", "FSRDIJIAQPSMM~
## $ ID_method                <chr> "SSA (ESI-)", "SSA (ESI-)", "SSA (ESI-)", "SS~
## $ ICL                      <chr> "2a", "2a", "3", "3", "2b", "2a", "2a", "2a",~
## $ Chrom_instr              <chr> "LC", "LC", "LC", "LC", "LC", "LC", "LC", "LC~
## $ Ionisation_mode          <chr> "ESI neg", "ESI neg", "ESI neg", "ESI neg", "~
## $ FeatureFinder            <chr> "MSDIAL", "MSDIAL", "MSDIAL", "MSDIAL", "MSDI~
## $ RT_min                   <dbl> 9.317, 10.237, 5.293, 5.162, 8.240, 10.360, 8~
## $ RT_sec                   <dbl> 559, 614, 318, 310, 494, 622, 539, 653, 596, ~
## $ Retention_index          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ Precursor_ion            <chr> "M-H", "M-H", "M-H", "M-H", "M-H", "M-H", "M-~
## $ Precursor_measured_mass  <dbl> 398.9351, 387.1450, 287.0570, 463.0888, 362.9~
## $ Precursor_exact_mass     <dbl> 398.9361, 387.1444, 287.0556, 463.0877, 362.9~
## $ Mass_error_ppm           <dbl> -2.3474564, 1.5026694, 5.1446486, 2.5669870, ~
## $ FragIon1_Mass            <dbl> 79.9568, 177.0552, 125.0239, 301.0348, 318.97~
## $ FragIon2_Mass            <dbl> 98.9552, 195.0657, 177.0552, 178.9981, 168.98~
## $ FragIon3_Mass            <dbl> NA, 151.0759, 259.0606, 151.0031, 130.9920, 1~
## $ FragIon1_Formula         <chr> "[HO3S-H]-", "[C10H10O3-H]-", "[C6H6O3-H]-", ~
## $ FragIon2_Formula         <chr> "[HFO3S-H]-", "[C10H12O4-H]-", "[C10H10O3-H]-~
## $ FragIon3_Formula         <chr> NA, "[C19H12O2-H]-", "[C14H12O5-H]-", "[C7H4O~
## $ Use_category             <chr> "PFAS", "Plant natural product", "Plant natur~
## $ Precursor_Intermediate   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ Transformation_Product   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ ClassyFy_Status          <chr> "Completed", "Completed", "Completed", "Compl~
## $ Kingdom                  <chr> "Organic compounds", "Organic compounds", "Or~
## $ Superclass               <chr> "Organohalogen compounds", "Phenylpropanoids ~
## $ Class                    <chr> "Alkyl halides", "Depsides and depsidones", "~
## $ Subclass                 <chr> "Alkyl fluorides", NA, "Flavans", "Flavonoid ~
## $ Parent_Level_1           <chr> "Perfluoroalkyl sulfonic acid and derivatives~
## $ Parent_Level_2           <chr> NA, NA, "Flavanonols", NA, NA, NA, NA, NA, NA~
## $ Parent_Level_3           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "~
## $ Parent_Level_4           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~


# The intensities of the quantifier ions were extracted for each hit in LC and GC mode and combined into one data table (named ALL_hits_hm).
# columns represent the samples and rows are for the compounds.
## Rows: 2
## Columns: 37
## $ H1       <dbl> 3222909, 278155
## $ H2       <dbl> 712554, 31744
## $ H3a_rep1 <dbl> 3233942, 64710
## $ H3a_rep2 <dbl> 3964053, 80484
## $ H3a_rep3 <dbl> 4105269, 84695
## $ H3b      <dbl> 1250739, 113780
## $ H4_1     <dbl> 5262658, 1358628
## $ H4_2     <dbl> 4146854, 373944
## $ H4_3     <dbl> 4090129, 788817
## $ H5_rep1  <dbl> 560994, 144994
## $ H5_rep2  <dbl> 58131, 42220
## $ N_rep1   <dbl> 0, 12353
## $ N_rep2   <dbl> 27854, 23557
## $ N_rep3   <dbl> 24611, 36991
## $ O1a      <dbl> 720793, 53604
## $ O1b      <dbl> 788145, 62763
## $ O2       <dbl> 4254457, 1198326
## $ O3_rep1  <dbl> 852455, 188509
## $ O3_rep2  <dbl> 1012372, 217649
## $ O3_rep3  <dbl> 1039905, 229112
## $ O4       <dbl> 2589977, 72419
## $ P1a      <dbl> 356950, 35966
## $ P1b      <dbl> 254071, 34963
## $ P2a      <dbl> 217243, 44255
## $ P2b_rep1 <dbl> 1629956, 51745
## $ P2b_rep2 <dbl> 1249783, 34375
## $ P2b_rep3 <dbl> 1584438, 46440
## $ P2c      <dbl> 110981, 34299
## $ P3       <dbl> 2137605, 1530537
## $ P4a      <dbl> 2792926, 96857
## $ P4b      <dbl> 1156299, 15951
## $ P6       <dbl> 28736, 37726
## $ PWa      <dbl> 188975, 58212
## $ PWb      <dbl> 839402, 25277
## $ IT       <dbl> 868053, 33419
## $ FS       <dbl> 3776762, 126070
## $ ES       <dbl> 417475, 935234
#-------------------------- generate heatmap
heatmaply(ALL_hits_hm,
          distfun = dist,
          hclust_method = "average",
          k_row = NA, # if NA then optimum number of clusters are calculated 
          #k_col = NA,
          scale_fill_gradient_fun = ggplot2::scale_fill_gradient2(
            low = "white", 
            mid = "white",
            #mid = "snow",
            high = "red", 
            midpoint = 0, 
            limits = NULL),
          scale = "row",
          fontsize_col = 14,
          grid_gap = 0.1,
          column_text_angle = 90,
          showticklabels = c(TRUE, FALSE)) # removes y-axis tick label


#-----------------
# Generate sunburst chart

# Manually specify the color scale based on previous auto-generated color scales with satisfactory results
colors <- c("#76F670","#C2B1AF","#C170D9","#C1C0CD","#D558D1","#AAD584","#9BCED5","#879DC3","#8778C9","#F3EFF7","#D7C957","#F68ED5","#ED3FC2","#F2E663","#7FEEDE","#68C5F5","#276459","#D576CE","#F089AC","#A8C3F4","#FABFEE","#A5F447","#9BEE90","#46C5BD","#9CFE67","#7DD268","#A4B9C5","#D26395","#EBADA2","#E95DBC","#8DD075","#B776F4","#F568AB","#8F4C87","#DBA7F9","#61F982","#F6F3B6","#7DF0CD","#F0A7D1","#BBF059","#E22E75","#BAC659","#F33AE7","#FD721C","#A6E880","#D0BDA4","#41BAD2","#DFF4B0","#2EF068","#9958F0","#52AAF8","#BAC1B7","#F69737","#76BAF1","#75DC3C","#8367EA","#B8DCD6","#F34D9B","#BDF97F","#ED707B","#2146A8","#BFDAF1","#F9F7C9","#9CD9F0","#E5F551","#B25E8B","#4746D0","#1A6E46","#F19986","#7B7D49","#8CBBB3","#72CBAC","#5F8FBE","#F6DFB7","#5B8ECF","#DAEFEF","#6BAEBB","#B69FEF","#C992B7","#EEEAA0","#35A3D8","#3EFDBD","#A5AD95","#82838F","#CEF9DB","#F4E8D6","#F3E2A4","#B36CBB","#A44771","#BB7993","#96DCB5","#7835BF","#A8BDDB","#F8C231","#D01283","#C5EEBB","#F2AFB7","#CDE294","#A32FBF","#D1B42E","#E3DC87","#643E9A","#39E19C","#EE4D71","#BEADCF","#AE9B64","#C2F1A1","#D4D6F3","#45F956","#C2DD9E","#9BC51D","#F7F697","#DCE5B3","#BC6C57","#F86F97","#C58016","#E975BF","#E65626","#F3CF68","#83A8E2","#DCC3DC","#8AF3A3","#E3BBF8","#A9809F","#F70B53","#CEBEF1","#3974EB","#C675B2","#D1D7C2","#4DBE82","#F7D0B3","#AA92B3","#BF0DA8","#7997E7","#59D4A5","#76D086","#A65914","#8E68C4","#D1F1E0","#3CE520","#ECE4E2","#4C908E","#F5CFC2","#D9ADE2","#B7D945","#574CED","#E5D6CC","#53F1EE","#C6C7F0","#9283F3","#AB9CAC","#CAF893","#71BC1D","#2AA48D","#F35ADC","#D23EB7","#909674","#3FD5E7","#D923C0","#68D4F3","#E3D02D","#B1F2D4","#96E8F1","#7EE064","#498CF0","#1FAA5D","#95C9F1","#9DFA81","#609438","#E988F3","#A2F5C5","#EEE739","#B69E39","#86F17F","#8FD1C0","#EBF6E3","#9A833B","#D9C095","#6AC36A","#B59CCD","#B6D270","#DEB574","#B2F8A6","#B7C49B","#DADAE2","#BFC986","#C68AE8","#ECC4CA","#DCABC6","#E6EBCC","#D0F329","#3579CB","#EFF62C","#50D65C","#36E98D","#B58ACB","#90F1B3","#E69AD6","#DDE570","#EC695B","#ABBC32","#4FF2C4","#F5DBF8","#E6C5F2","#253C80","#CE4891","#3FE8D3","#C38585","#C75BEC","#D881AB","#5B69E5","#E775E7","#85C445","#FDE49E","#C1EDF8","#E0D598","#AFEFE2","#E689C2","#F7AEEF","#EA9BAF","#94C39D","#BD656F","#DEB8AC","#52B1B5","#D7B0F8","#8418F9","#675B76","#C38BC5","#E2F88B","#A0ACDD","#903F54","#B1E16B","#E87F45","#8BF3F5","#E7C672","#82BA4D","#BC8C6B","#33527F","#DA8B54","#67F497","#B53F65","#E6D5B5","#351CB2","#D4F66D","#C8E132","#F184DA","#F1C7E5","#E899F0","#6DF559","#A8918A","#B1D1AA","#5EDBDD","#E9AC4D","#B7E3C5","#E0F59A","#6FE4A0","#92C68D","#8EA1B8","#77B49C","#CC38EF","#50AC25","#6B2DE3","#659F62","#798AEA","#76514C","#C3BB82","#C9A0A4","#C3579D","#9EE09A","#F6A8F4","#9B6F83","#47BD22","#F43CAF","#9A4BB4","#51BEE3","#83D5D2","#BAF2F4","#8871AE","#2AE974","#9E9CE2","#F1DDEC","#95F22D","#B649C4","#9B63D6","#E2F6D2","#C931CE","#AF48A3","#F8D0A0","#AC7BAA","#B8AAE5","#428193","#86A233","#F1B189","#F28B92","#8D8EC5","#F25FF2","#73B8D8")


Group_sun <- GC_LC %>%
  mutate(across(where(is.character), str_squish)) %>%
  filter(ClassyFy_Status == "Completed") %>% 
  distinct(InChIKey, .keep_all = TRUE) %>%
  mutate(Common_name = str_replace_all(Common_name, "-", "_")) %>%
  mutate(path = paste(Superclass, Class, Subclass, Parent_Level_1, Parent_Level_2, Common_name, sep = "-")) %>%
  select(path) %>%
  group_by(path) %>%
  tally()

labels <- Group_sun$path

sund2b(Group_sun, rootLabel = "Total", colors = list(range = colors, domain = labels))



 #----------------------------------
# Histogram of use categories / functional uses

Use_category <- GC_LC %>%
  unite(., col = "Use", Use_category, Precursor_Intermediate, Transformation_Product, na.rm = TRUE, sep = ", ") %>%
  separate_rows(Use, sep = ",") %>%
  mutate(across(where(is.character), str_squish)) %>%
  group_by(Use) %>%
  tally(sort = TRUE) %>%
  ungroup()

Use_category %>% 
    plot_ly(x = ~Use, y = ~n, type = "bar") %>%
    layout(xaxis = list(categoryorder = "total descending"))


#------------------------------------------

# Network visualization of chemical groups and use groups / functional use
# Common name - Parent1 - Use

# Format data to use in visnetwork

Common_name <- GC_LC %>%
  select(Common_name) %>%
  distinct(Common_name) %>% # this will keep only one copy of compound if it appear in both GC and LC
  group_by(Common_name) %>%
  tally(sort = TRUE) %>%
  ungroup()

Parent_Level_1 <- GC_LC %>%
  select(Parent_Level_1) %>%
  group_by(Parent_Level_1) %>%
  tally(sort = TRUE) %>%
  ungroup() %>%
  na.omit()

Use_category <- GC_LC %>%
  unite(., col = "Use", Use_category, Precursor_Intermediate, Transformation_Product, na.rm = TRUE, sep = ", ") %>%
  separate_rows(Use, sep = ",") %>%
  mutate(across(where(is.character), str_squish)) %>%
  group_by(Use) %>%
  tally(sort = TRUE) %>%
  ungroup()


Group <- GC_LC %>%
  filter(ClassyFy_Status == "Completed") %>%
  unite(., col = "Use", Use_category, Precursor_Intermediate, Transformation_Product, na.rm = TRUE, sep = ", ") %>%
  separate_rows(Use, sep = ",") %>%
  mutate(across(where(is.character), str_squish))
  
  
Group1 <- Group %>%
  select(Common_name, Parent_Level_1, Use) %>%
  distinct(Common_name, .keep_all = TRUE) %>% #keep only one unique common name since LC, GC can give same compound
  group_by(Common_name, Parent_Level_1) %>%
  tally() %>%
  ungroup() %>%
  rename(from = Common_name, to = Parent_Level_1, value = n)

Group2 <- Group %>%
  select(Common_name, Parent_Level_1, Use) %>%
  group_by(Parent_Level_1, Use) %>%
  tally() %>%
  ungroup() %>%
  rename(from = Parent_Level_1, to = Use, value = n)

Group_vis <- rbind(Group1, Group2)


# visnetwork start from index 1.
Nodes <- data.frame(id = seq_along(unique(c(Group_vis$from, Group_vis$to))), label = unique(c(Group_vis$from, Group_vis$to))) %>%
  mutate(group = case_when(label %in% GC_LC$Common_name ~ "Compound",
                           label %in% GC_LC$Parent_Level_1 ~ "PL1",
                           label %in% Use_category$Use ~ "Use")) %>%
    mutate(title = label) %>%
    mutate(value = 1) %>%
    left_join(Common_name, by = c("label" = "Common_name")) %>%
    left_join(Parent_Level_1, by = c("label" = "Parent_Level_1")) %>%
    left_join(Use_category, by = c("label" = "Use")) %>%
    replace_na(list(n.x = 0, n.y = 0, n = 0)) %>%
    mutate(value = (n.x + n.y + n)) %>%  
    select(-c(n.x, n.y, n), id, label, group, value, title) %>%
    rename(size = value) %>%
    as.data.frame()

Links <- Group_vis %>%
  left_join(Nodes, b = c("from" = "label")) %>%
  mutate(from = id) %>%
  select(-id) %>%
  left_join(Nodes, by = c("to" = "label")) %>%
  mutate(to = id) %>%
  select(from, to, value) %>%
  as.data.frame()



# renaming label. NOTE: above Links code will not work after executing this.
Nodes <- Nodes %>% mutate(label = as.character(id)) %>%
  mutate(size = size*3) #making the nodes larger


visNetwork(nodes = Nodes, edges = Links, height = "1200px", width = "1200px") %>%
    visPhysics(solver = "forceAtlas2Based", stabilization = FALSE) %>%
    visIgraphLayout()



#-----------------
# Network visualization of individual compounds and use groups / functional use
# Format data to use in visnetwork


# Use distinct(from, to) here to remove duplicates
Group_vis2 <- Group %>%
    select(Common_name, Use) %>%  
    ungroup() %>%
    rename(from = Common_name, to = Use) %>%
    distinct(from, to)
  

# visnetwork start from index 1
Nodes2 <- data.frame(id = seq_along(unique(c(Group_vis2$from, Group_vis2$to))), label = unique(c(Group_vis2$from, Group_vis2$to))) %>%
  mutate(group = case_when(label %in% GC_LC$Common_name ~ "Compound",
                           label %in% Use_category$Use ~ "Use")) %>%
  mutate(title = label) %>%
  mutate(value = 1) %>%
  left_join(Common_name, by = c("label" = "Common_name")) %>%
  left_join(Use_category, by = c("label" = "Use")) %>%
  replace_na(list(n.x = 0, n.y = 0, n = 0)) %>%
  mutate(value = (n.x + n.y)) %>%
  select(-c(n.x, n.y), id, label, group, value, title) %>%
  rename(size = value) %>%
  as.data.frame()

Links2 <- Group_vis2 %>%
  left_join(Nodes2, b = c("from" = "label")) %>%
  mutate(from = id) %>%
  select(-id) %>%
  left_join(Nodes2, by = c("to" = "label")) %>%
  mutate(to = id) %>%
  select(from, to) %>%
  as.data.frame()

visNetwork(nodes = Nodes2, edges = Links2, height = "1200px", width = "1200px") %>%
    visPhysics(solver = "forceAtlas2Based", stabilization = FALSE) %>%
    visIgraphLayout()