Skip to contents

E.coli Lac operon

Click to see the code
lacZ_operon <- data.frame(
  start = c(361249, 361926, 363231, 366428), 
  end = c(361860, 363179, 366305, 367510), 
  Gene_symbol = c("lacA", "lacY", "lacZ", "lacI"), 
  Strand = c("minus", "minus", "minus", "minus"), 
  NCBI_Gene_ID = as.character(c(945674, 949083, 945006, 945007)), 
  Description = c("galactoside O-acetyltransferase", 
                  "lactose permease", "beta-galactosidase", 
                  "DNA-binding transcriptional repressor LacI")
)

GC_chart(lacZ_operon, group = "Gene_symbol", height = "140px") %>%
  GC_labels() %>%
  GC_scaleBar(title = "0.2 kb", scaleBarUnit = 200, y=30) %>%
  GC_tooltip(
    formatter = "
    <b>ID:</b> {NCBI_Gene_ID}<br>
    <b>Description:</b> {Description}<br>
    <b>Start:</b> {start}<br>
    <b>End:</b> {end}",
  ) %>%
  GC_clusterFooter(
    align = "center",
    y = -30,
    subtitle = "<i>Escherichia coli</i> str. K-12 substr. MG1655" 
  ) %>%
  GC_coordinates(
    rotate = 0,
    tickValuesBottom = c(min(lacZ_operon$start), max(lacZ_operon$end)),
    textStyle = list(fontSize = "10px", x = -2.2, y = 1.2),
    tickStyle = list(lineLength = 10)
    ) %>%
  GC_legend(FALSE)

Omphalotin gene clusters

Click to see the code
  GC_chart(ophA_clusters, 
           start = "start",
           end = "end",
           group = "class",
           cluster = "cluster") %>%
  GC_clusterFooter(
    title = c("<i>Omphalotus olearius</i>", "<i>Dendrothele bispora</i>"), 
    subtitle = c("Locus: 2,522 - 21,484", "Locus: 19,236 - 43,005"),
    align = "left",
    x = 50) %>%
  GC_legend(position = "top") %>%
  GC_labels(label = "name") %>%
  GC_scaleBar(y = 20) %>%
  GC_scale(cluster = 1,  scale_breaks = TRUE, hidden = TRUE) %>%
  GC_scale(cluster = 2, reverse = TRUE, hidden = TRUE) %>%
  GC_tooltip(
    formatter = "<b>Gene:</b> {name}<br> <b>Start:</b> {start}<br><b>end:</b> {end}"
  )


H. sapiens Hox genes using biomaRt

Click to see the code
# Get HOX gene positions 
library(biomaRt)

ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")

hox_gene_sets <- list(
  HOXA = c("HOXA1", "HOXA2", "HOXA3", "HOXA4", "HOXA5", "HOXA6", "HOXA7", "HOXA9", "HOXA10", "HOXA11", "HOXA13"),
  HOXB = c("HOXB1", "HOXB2", "HOXB3", "HOXB4", "HOXB5", "HOXB6", "HOXB7", "HOXB8", "HOXB9", "HOXB13"),
  HOXC = c("HOXC4", "HOXC5", "HOXC6", "HOXC8", "HOXC9", "HOXC10", "HOXC11", "HOXC12", "HOXC13"),
  HOXD = c("HOXD1", "HOXD3", "HOXD4", "HOXD8", "HOXD9", "HOXD10", "HOXD11", "HOXD12", "HOXD13")
)

get_gene_positions <- function(gene_list, cluster_name) {

  result <- getBM(attributes = c('hgnc_symbol', 'chromosome_name', 'strand', 'start_position', 'end_position'),
                  filters = 'hgnc_symbol',
                  values = gene_list,
                  mart = ensembl)
  transform(result, cluster = cluster_name)
}

# Apply get_gene_positions to each gene set
hox_gene_positions <- base::lapply(names(hox_gene_sets), function(cluster_name) {
  get_gene_positions(hox_gene_sets[[cluster_name]], cluster_name)
})

# Combine the results into a single data frame
combined_hox_genes <- do.call(rbind, hox_gene_positions)

# Add gene names
combined_hox_genes$name <- paste0(substr(combined_hox_genes$hgnc_symbol, 1, 3), substr(combined_hox_genes$hgnc_symbol, 5, nchar(combined_hox_genes$hgnc_symbol)))
GC_chart(
  combined_hox_genes,
  start = "start_position",
  end = "end_position",
  group = "cluster",
  height = "400px",
  strand = "strand",
  cluster = "cluster") %>%
  GC_sequence(y=45) %>%
  GC_legend(FALSE) %>%
  GC_clusterLabel(title = unique(human_hox_genes$cluster)) %>%
  GC_genes(
    marker = "boxarrow",
    markerHeight = 10,
    arrowheadHeight = 10,
    arrowheadWidth = 3
  ) %>% 
  GC_labels(label = "name", fontSize = "10px", adjustLabels = FALSE) %>%
  GC_labels(cluster = 2, label = "name", itemStyle = list(list(index = 5, x = -4))) %>%
  GC_scale(
    cluster = c(1,2),
    hidden = TRUE,
    reverse = TRUE
  ) %>%
  GC_cluster(prevent_gene_overlap = TRUE, overlap_spacing=20)


H. sapiens Dystrophin transcript variants

Click to see the code
GC_chart(hs_dystrophin_transcripts,
         start = "start",
         end = "end",
         group = "type",
         cluster = "transcript") %>%
  GC_title("Dystrophin transcript variants") %>%
  GC_genes(
    stroke = "#1f77b4",
  ) %>%
  GC_scale(
    cluster = c(1, 2, 3, 4),
    start = 1,
    end = max(hs_dystrophin_transcripts$length),
    hidden = TRUE
  ) %>%
  GC_scale(
    cluster = 5,
    y = 25,
    ticksCount = 8,
    start = 1,
    end = max(hs_dystrophin_transcripts$length),
  ) %>%
  GC_clusterLabel(title = unique(hs_dystrophin_transcripts$transcript)) %>%
  GC_legend(
    position = "top"
  )


Saccharopolyspora erythraea Erythromycin

Click to see the code
colors <- list(
  "Tailoring (Hydroxylation)" = "#d62728", 
  "Precursor biosynthesis" = "#2ca02c",
  "Tailoring (Glycosylation)" = "#e6b0aa", 
  "Scaffold biosynthesis" = "#9467bd", 
  "Tailoring (Methylation)" = "#8b0000", 
  "Activation / processing" = "#1f77b4", 
  "Resistance / immunity" = "#f7dc6f" ,
  "Other" = "#808080"
)

GC_chart(erythromycin_cluster, 
         start = "Start",
         end = "End",
         height = "160px",
         group = "Functions") %>%
  GC_title(
    title = "Erythromycin A biosynthetic gene cluster from <i>Saccharopolyspora erythraea</i>",
    height = 40,
    align = "left"
    ) %>%
  GC_genes(
    marker = "boxarrow",
    marker_size = "small"
  ) %>%
  GC_tooltip(
    formatter = "<b>{Identifiers}</b>
                 <br>{Product}
                 <br>{start} - {end}({Strand})" 
  ) %>%
  GC_scale(
    start = 778000,
    end = 832000,
    ticksCount = 8
  ) %>%
  GC_legend(
    legendTextOptions = list(fontSize = "12px"),
    order = c(setdiff(sort(erythromycin_cluster$Functions), "Other"), "Other")
    
    ) %>%
  GC_color(customColors = colors)


Saccharopolyspora erythraea Erythromycin BlastP

Click to see the code
### Erythromycin BlastP
GC_chart(erythromycin_BlastP, 
         cluster = "cluster",
         strand = "strand",
         group = "Functions",
         height = 700) %>%
  GC_title(
    title = "BlastP Results"
  ) %>%
  GC_labels("Gene", cluster = 1) %>%
  GC_links(group = "Gene", 
           show_links = FALSE,
           labelStyle = list(fontSize = "10px")
           ) %>%
  GC_clusterLabel()
Click to see the code
library(dplyr)
### Erythromycin BlastP
GC_chart(erythromycin_BlastP %>% dplyr::filter(cluster %in% c("BGC0000055", "BGC0000054")), 
         cluster = "cluster",
         strand = "strand",
         group = "BlastP",
         height = 250) %>%
  GC_labels("Gene", cluster = 1) %>%
  GC_genes(marker_size = "small", marker = "rbox", markerHeight = 15) %>%
  GC_links(group = "BlastP", 
           show_links = TRUE,
           identity_label = FALSE
           ) %>%
  GC_clusterLabel() %>%
  GC_legend(FALSE)