Introduction

The following script analyses the density of OCT2 (POU2F2) motifs in essential vs non-essential enhancers. Lucy Skrabanek developed the approach. Her original script has been modified and stripped down to the key analysis presented in our paper.
The following data files are required to be able to run this script:

Please feel free to email me with any questions, comments or suggestions.
info at jchellmuth.com

Load required libraries

library(tidyverse)
library(dplyr)
library(GenomicRanges)
library(rtracklayer)

Load required data

Note: updated links / files will be provided shortly.

# consituent enhancers:
load("~/Sync/CRISPRscan_BCL6_analysis/191112_CRISPRi_LY7_hg38/H1_CE_definition/LCR.constituent.enhancers.and.TSS.2kb.categorized.Rda") # gr.re
# OCT2 motif matches:
load("BCL6locus-matches_OCT2_MA0507.1.Rda")
# limit matches to rel. score > 80 (of note, lower thresholds also produce significantly higher density in essential enhancers vs non-essential)
res <- res %>% filter(score.pct>80)

# calculate spacing / distance between octamer motifs:
res <- res %>% arrange(start)
res$spacing <- c(res$start %>% diff,0)

# convert TF motif match results to Granges object
gr.res <- makeGRangesFromDataFrame(res,seqnames.field = "sequence",keep.extra.columns = T)

# overlap motif matches and constitutent enhancers
gr.ov <- subsetByOverlaps(gr.res,gr.re)

# assign names of overlapping REs:
hits <- findOverlaps(query = gr.res,subject = gr.re)
gr.ov$re <- gr.re[subjectHits(hits)]$re
gr.ov$category <- gr.re[subjectHits(hits)]$category
# make df for plots:
df.ov <- data.frame(gr.ov)
df.ov <- df.ov %>% arrange(start)

# limit analysis to CE in LCR key region:
df.ov <- df.ov %>% filter(start>187892000 & start<187992000 )

# t test
t.test(df.ov %>% filter(category=="constituent.enhancer") %>% pull(spacing),
       df.ov %>% filter(category=="essential.enhancer") %>% pull(spacing))
## 
##  Welch Two Sample t-test
## 
## data:  df.ov %>% filter(category == "constituent.enhancer") %>% pull(spacing) and df.ov %>% filter(category == "essential.enhancer") %>% pull(spacing)
## t = 3.3777, df = 368.11, p-value = 0.0008089
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   9.600922 36.355520
## sample estimates:
## mean of x mean of y 
##  72.40541  49.42718
# plot
ggplot(df.ov %>% filter(category %in% c("constituent.enhancer","essential.enhancer")),aes(x=category,y=spacing,color=category))+
  geom_violin()+
  geom_jitter(width = 0.15,alpha=0.3)+
  labs(x="",y="distance between adjacent\nOCT2 sites")+
  theme_linedraw()+
  theme(panel.grid = element_blank(),
        panel.border = element_blank(),
        axis.line = element_line(),
        legend.position = "none")+
  scale_color_manual(values = c("grey50","dodgerblue3"))

ggsave("OCT2.site.distance.pdf",width = 3,height = 3)

Session info

## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] rtracklayer_1.44.4   GenomicRanges_1.36.1 GenomeInfoDb_1.20.0 
##  [4] IRanges_2.18.3       S4Vectors_0.22.1     BiocGenerics_0.30.0 
##  [7] forcats_0.4.0        stringr_1.4.0        dplyr_0.8.3         
## [10] purrr_0.3.3          readr_1.3.1          tidyr_1.0.0         
## [13] tibble_2.1.3         ggplot2_3.2.1        tidyverse_1.3.0     
## 
## loaded via a namespace (and not attached):
##  [1] Biobase_2.44.0              httr_1.4.1                 
##  [3] jsonlite_1.6                modelr_0.1.5               
##  [5] assertthat_0.2.1            GenomeInfoDbData_1.2.1     
##  [7] cellranger_1.1.0            Rsamtools_2.0.3            
##  [9] yaml_2.2.0                  pillar_1.4.2               
## [11] backports_1.1.5             lattice_0.20-38            
## [13] glue_1.3.1                  digest_0.6.23              
## [15] XVector_0.24.0              rvest_0.3.5                
## [17] colorspace_1.4-1            htmltools_0.4.0            
## [19] Matrix_1.2-18               XML_3.98-1.20              
## [21] pkgconfig_2.0.3             broom_0.5.2                
## [23] haven_2.2.0                 zlibbioc_1.30.0            
## [25] scales_1.1.0                BiocParallel_1.18.1        
## [27] generics_0.0.2              farver_2.0.1               
## [29] withr_2.1.2                 SummarizedExperiment_1.14.1
## [31] lazyeval_0.2.2              cli_1.1.0                  
## [33] magrittr_1.5                crayon_1.3.4               
## [35] readxl_1.3.1                evaluate_0.14              
## [37] fs_1.3.1                    nlme_3.1-142               
## [39] xml2_1.2.2                  tools_3.6.1                
## [41] hms_0.5.2                   lifecycle_0.1.0            
## [43] matrixStats_0.55.0          munsell_0.5.0              
## [45] reprex_0.3.0                DelayedArray_0.10.0        
## [47] Biostrings_2.52.0           compiler_3.6.1             
## [49] rlang_0.4.2                 grid_3.6.1                 
## [51] RCurl_1.95-4.12             rstudioapi_0.10            
## [53] bitops_1.0-6                labeling_0.3               
## [55] rmarkdown_1.18              gtable_0.3.0               
## [57] DBI_1.0.0                   R6_2.4.1                   
## [59] GenomicAlignments_1.20.1    lubridate_1.7.4            
## [61] knitr_1.26                  zeallot_0.1.0              
## [63] stringi_1.4.3               Rcpp_1.0.3                 
## [65] vctrs_0.2.0                 dbplyr_1.4.2               
## [67] tidyselect_0.2.5            xfun_0.11