Skip to content

A pipeline to call intron and intergenic eRNAs from PRO-seq Data

Notifications You must be signed in to change notification settings

monnieb92/PROseq_Intron_Intergenic_Enhancers

Repository files navigation

eRNA peak calling

This is performed for each timepoint and replicate.

Call peaks using macs2

$MACS2 callpeak -t ${BAM}/10767-MB-${ids[$i]}.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam -q 0.01 -f BAM -g mm --call-summits -n $OUTPUT/10767-MB-${times[$i]}-PS.q0.01.fBAM.gmm.callsummits

Annotate Peaks with HOMER

annotatePeaks.pl $OUTPUT/10767-MB-${times[$i]}-PS.q0.01.fBAM.gmm.callsummits_peaks.narrowPeak $FASTA -size 200 -strand both -gtf $ANNO > $OUTPUT2/10767-MB-${times[$i]}-PS.q0.01.fBAM.gmm.callsummits_peaks.annotated.bothstrand.txt

Filter for only peaks in introns and intergenic regions and not within 750bp of TSS using R

added 150bp to each peak (this will merge peaks that might very close together)

T0hA <- read_delim("/Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-macs2.q0.01/10767-PS-annotations/10767-MB-0hA-PS.q0.01.fBAM.gmm.callsummits_peaks.annotated.bothstrand.txt")
names(T0hA)[1] <- "PeakID" 
T0hA_macs <- read_delim("/Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-macs2.q0.01/10767-MB-0hA-PS.q0.01.fBAM.gmm.callsummits_peaks.narrowPeak", col_names = FALSE)
Macs2colnames <- c("Chrom","Start", "End", "PeakID", "score", "Strand", "signalValue", "pValue", "qValue", "peak")
colnames(T0hA_macs) <- Macs2colnames

T0hA_int <- subset(T0hA, grepl("(intron|intergenic)", tolower(Annotation))) %>%
  dplyr::filter(`Distance to TSS` > 750 | `Distance to TSS` < -750) %>%
  left_join(T0hA_macs, by = "PeakID", suffix = c("_anno", "_macs2")) %>% 
  dplyr::select(Chrom, Start_macs2, End_macs2,PeakID, "Peak Score", Strand_macs2) %>%
  dplyr::mutate(Start_macs2 = Start_macs2 - 150, 
                End_macs2 = End_macs2 + 150) %>%
  as.data.frame()
write_tsv(T0hA_int,"10767-MB-0hA-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt", col_names = FALSE)

Concatenate peaks from all time points

##concatentate
cat /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-0hA-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-0hB-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-0hC-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-15mA-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-15mB-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-15mC-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-30mA-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-30mB-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-30mC-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-1hA-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-1hB-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-R_PROseq/10767-MB-PROseq-Routput/10767-MB-1hC-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.outsideTSS750.plusminus150.bothstrand.txt > 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.bed.txt
#sort bed files
sort -k1,1 -k2,2n 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.bed.txt > 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.bed.txt
#mergepeaks
bedtools merge -i 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.bed.txt > 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.merged.bed
#saf file for featurecounts
awk -v OFS="\t" 'BEGIN {print "GeneID","Chr","Start","End","Strand"} { print "Peak_"NR,$1,$2,$3,"."}' 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.merged.bed > 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.merged.saf
#replace positive strand
sed 's/\./+/g' 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.merged.saf >10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.merged.pos.saf

Feature counts of the possible intron and intergenic regions

#!/bin/bash 

featureCounts -F SAF -T 18 -s 2 -O -a 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.merged.pos.saf  -o 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150_SubreadCounts.reversestrand.possaf.txt \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0001.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0002.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0003.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0004.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0005.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0006.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0007.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0008.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0009.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0010.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0011.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0012.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam

featureCounts -F SAF -T 18 -s 1 -O -a 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.merged.pos.saf  -o 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150_SubreadCounts.forwardstrand.possaf.txt \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0001.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0002.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0003.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0004.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0005.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0006.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0007.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0008.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0009.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0010.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0011.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0012.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam

featureCounts -F SAF -T 18 -s 0 -O -a 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.sorted.merged.pos.saf -o 10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150_SubreadCounts.allcounts.possaf.txt \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0001.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0002.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0003.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0004.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0005.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0006.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0007.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0008.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0009.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0010.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0011.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam \
  /Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-BAM/10767-MB-0012.75.adap.trail.15bp.revcomp.mm39.F4q10.sorted.bam

Filter for peaks with bidirectional transcription

Counts tables filtered > 0.001 & _rev !=0


counts_strand <- read.table("/Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-FeatureCounts/10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150_SubreadCounts.forwardstrand.possaf.txt", header = TRUE)
CountColumns <- c("PeakID","Chrom","Start","End","Strand","Length","minusdT_A","minusdT_B","minusdT_C","plusdT15m_A","plusdT15m_B","plusdT15m_C","plusdT30m_A","plusdT30m_B","plusdT30m_C","plusdT1h_A","plusdT1h_B","plusdT1h_C")
colnames(counts_strand) <- CountColumns
counts_reverse <- read.table("/Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-FeatureCounts/10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150_SubreadCounts.reversestrand.possaf.txt", header = TRUE)
colnames(counts_reverse) <- CountColumns
counts_total <- read.table("/Volumes/Mbomber_7_14TB/10767-MB-MEL_Mtg16_PROSeq/10767-MB-FeatureCounts/10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150_SubreadCounts.allcounts.possaf.txt", header = TRUE)
colnames(counts_total) <- CountColumns
AllCounts <- counts_strand %>% left_join(counts_reverse, by = "PeakID", suffix=c("_fwd", "_rev")) %>%
  left_join(counts_total, by = "PeakID", suffix=c("_rev", "_both"))

AllCounts_filtered <- AllCounts %>%
  dplyr::filter((`minusdT_A_rev` != 0 ) & (`minusdT_A_fwd`/`minusdT_A_rev`) > 0.001 & (`minusdT_B_rev` != 0 ) & (`minusdT_B_fwd`/`minusdT_B_rev`) > 0.001 & (`minusdT_C_rev` != 0 ) & (`minusdT_C_fwd`/`minusdT_C_rev`) > 0.001 |  (`plusdT1h_A_rev` != 0 ) & (`plusdT1h_A_fwd`/`plusdT1h_A_rev`) > 0.001 & (`plusdT1h_B_rev` != 0 ) & (`plusdT1h_B_fwd`/`plusdT1h_B_rev`) > 0.001  & (`plusdT1h_C_rev` != 0 ) &  (`plusdT1h_C_fwd`/`plusdT1h_C_rev`) > 0.001 ) %>%
  glimpse()

AllCounts_filtered_bed <- AllCounts_filtered %>%
  dplyr::select("Chrom", "Start", "End", "PeakID","Length","Strand")

write_tsv(AllCounts_filtered_bed, "10767-MB-AllPeaks-PS.q0.01.fBAM.gmm.callsummits_peaks.intron.intergenic.filtered.outsideTSS750.plusminus150.greaterthan0.001.bed", col_names = FALSE)

Differential analysis using NRSA normalization factors and DESeq2

use the counts table from both strands to perform the differential anslysis (all counts)

countData1h <- AllCounts_filtered[,c(41:43,50:52)]

colData_1h <- data.frame(condition = factor(c("DMSO", "DMSO","DMSO", "dT1h","dT1h","dT1h"), levels = c("DMSO", "dT1h")), sample = c("minusdT_A","minusdT_B","minusdT_C", "dT1h_A","dT1h_B","dT1h_C"))

dds1h <- DESeqDataSetFromMatrix(countData = countData1h, colData = colData_1h, design = ~condition)
## these size factors are output from NRSA
sizeFactors(dds1h) <- c(1/0.721057608621947,1/0.994705249048991,1/1.288104653729,1/1.05075548011527,1/0.910653829570098,1/1.12421859250555)
#dds <- DESeq(dds)
dds1h <- estimateDispersions(dds1h)
dds1h <- nbinomWaldTest(dds1h)

##Create PCA plot of normalized data

vsd<-varianceStabilizingTransformation(dds1h)
z <- plotPCA(vsd, intgroup=c("condition"))
nudge <- position_nudge(y = 1)
z + geom_text(aes(label = vsd$sample), position = nudge, size=2)
 
normdata <- counts(dds1h,normalized=TRUE)
colnames(normdata)<-paste("norm",c("minusdT_A","minusdT_B","minusdT_C", "dT1h_A","dT1h_B","dT1h_C"),sep="_")
 
result_eRNA_1h <- results(dds1h,contrast=c("condition","dT1h","DMSO"))
result_eRNA_1h_table <- cbind(AllCounts_filtered[,c(1:5)],countData1h,normdata,result_eRNA_1h)
glimpse(result_eRNA_1h_table)
write_tsv(result_eRNA_1h_table,"10767-MB-result_eRNA_1h_table.includeintron.intergentic.excludenearTSS750.plusminus150.filtered0.001.txt")
#create a MA plot of differential changes 
plotMA(result_eRNA_1h, ylim=c(-7.5,5),main="eRNA",cex.lab=1.75, cex.axis=1.75 ,cex.main=1.75, alpha=0.05)

About

A pipeline to call intron and intergenic eRNAs from PRO-seq Data

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy