$ conda install -c bioconda fastp $ conda install -c bioconda bowtie2 $ mkdir ~/bowtie2_index # Pre-built indexΛೖΕΔͨΊͷσΟϨΫτϦΛ࡞͢Δ $ cd ~/bowtie2_index # # Pre-built indexΛೖΕΔͨΊͷσΟϨΫτϦʹҠಈ͢Δ $ wget ftp://ftp.ccb.jhu.edu/pub/data/bowtie2_indexes/mm10.zip $ unzip mm10.zip $ cd ~/bowtie2_index $ wget ftp://ftp.ncbi.nlm.nih.gov/genomes/archive/old_genbank/Eukaryotes/vertebrates_mammals/ Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/ GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.bowtie_index.tar.gz $ tar xvzf bowtie2_index/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.bowtie_index.tar.gz $ mkdir ~/tools $ cd ~/tools $ python3 -m venv MACS2/ $ source MACS2/bin/activate $ pip install --upgrade pip $ pip install numpy $ pip install cython $ git clone https://github.com/taoliu/MACS.git $ cd MACS $ git checkout remotes/origin/macs2python3 $ python setup_w_cython.py install $ macs2 --help # ϔϧϓϝοηʔδ͕ग़ྗ͞ΕΕOK $ deactivate $ source ~/tools/MACS2/bin/activate $ brew install samtools $ conda install -c bioconda homer $ perl /anaconda3/share/homer-*/configureHomer.pl -install hg38 $ perl /anaconda3/share/homer-*/configureHomer.pl -install mm10 $ conda install -c bioconda deeptools $ deeptools --version $ brew install r $ brew cask install rstudio $ brew install igv $ brew install bedtools $ open -a RStudio > if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") > BiocManager::install("ChIPpeakAnno") $ mkdir ~/gencode $ cd ~/gencode $ wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M20/ gencode.vM20.annotation.gtf.gz # $ gzip -d gencode.vM20.annotation.gtf.gz $ ls gencode.vM20.annotation.gtfɹ# gencode.vM20.annotation.gtf͕Ͱ͖ͨ͜ͱΛ֬ೝ͢Δɻ # σʔλऔಘ $ mkdir ~/chipseq # ChIP-seqղੳ༻ͷσΟϨΫτϦΛ࡞͢Δ $ cd ~/chipseq # ChIP-seqղੳ༻ͷσΟϨΫτϦҠಈ͢Δ $ mkdir fastq # FASTQϑΝΠϧΛೖΕΔσΟϨΫτϦΛ࡞͢Δ $ cd ~/chipseq/fastq $ wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR520/004/SRR5208824/SRR5208824.fastq.gz $ cd ~/chipseq/fastq $ wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR520/008/SRR5208828/SRR5208828.fastq.gz $ cd ~/chipseq/fastq $ wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR520/008/SRR5208838/SRR5208838.fastq.gz $ cd ~/chipseq/fastq $ cp SRR5208824.fastq.gz BRD4_ChIP_IFNy.R1.fastq.gz $ cp SRR5208828.fastq.gz IRF1_ChIP_IFNy.R1.fastq.gz $ cp SRR5208838.fastq.gz Input_DNA.R1.fastq.gz $ cd ~/chipseq # ղੳ $ mkdir fastqc $ cd ~/chipseq $ fastqc -o fastqc fastq/BRD4_ChIP_IFNy.R1.fastq.gz $ cd ~/chipseq $ fastqc -o fastqc fastq/IRF1_ChIP_IFNy.R1.fastq.gz $ fastqc -o fastqc fastq/Input_DNA.R1.fastq.gz $ cd ~/chipseq $ mkdir fastp # fastpͱ͍͏໊લͷσΟϨΫτϦΛ࡞Δ $ cd ~/chipseq $ fastp -i fastq/BRD4_ChIP_IFNy.R1.fastq.gz -o fastp/BRD4_ChIP_IFNy.R1.trim.fastq.gz --html fastp/ BRD4_ChIP_IFNy.fastp.html ɹಉ༷ʹIRF1_ChIP_IFNy.R1.fastq.gzɺInput_DNA.R1.fastq.gzʹରͯ͠fastpΛ࣮ߦ͢Δɻ $ cd ~/chipseq $ fastp -i fastq/IRF1_ChIP_IFNy.R1.fastq.gz -o fastp/IRF1_ChIP_IFNy.R1.trim.fastq.gz --html fastp/ IRF1_ChIP_IFNy.fastp.html $ fastp -i fastq/Input_DNA.R1.fastq.gz -o fastp/Input_DNA.R1.trim.fastq.gz --html fastp/ Input_DNA.fastp.html ɹfastpσΟϨΫτϦʹҎԼͷϑΝΠϧ͕Ͱ͖ͯΔ͜ͱΛ֬ೝ͢Δɻ $ cd ~/chipseq $ open fastp/BRD4_ChIP_IFNy.fastp.html $ system_profiler SPHardwareDataType | grep Cores $ cd ~/chipseq $ mkdir bowtie2 $ cd ~/chipseq $ bowtie2 -p 2 -x data/external/bowtie2_index/mm10 \ -U fastp/BRD4_ChIP_IFNy.R1.trim.fastq.gz > bowtie2/BRD4_ChIP_IFNy.trim.sam $ cd ~/chipseq $ bowtie2 -p 2 -x data/external/bowtie2_index/mm10 \ -U fastp/IRF1_ChIP_IFNy.R1.trim.fastq.gz > bowtie2/IRF1_ChIP_IFNy.trim.sam $ bowtie2 -p 2 -x data/external/bowtie2_index/mm10 \ -U fastp/Input_DNA.R1.trim.fastq.gz > bowtie2/Input_DNA.trim.sam $ cd ~/chipseq $ samtools view -bhS -F 0x4 -q 42 bowtie2/BRD4_ChIP_IFNy.trim.sam | samtools sort -T bowtie2/ BRD4_ChIP_IFNy.trim - > bowtie2/BRD4_ChIP_IFNy.trim.uniq.bam $ cd ~/chipseq $ samtools view -bhS -F 0x4 -q 42 bowtie2/IRF1_ChIP_IFNy.trim.sam | samtools sort -T bowtie2/ IRF1_ChIP_IFNy.trim - > bowtie2/IRF1_ChIP_IFNy.trim.uniq.bam $ samtools view -bhS -F 0x4 -q 42 bowtie2/Input_DNA.trim.sam | samtools sort -T bowtie2/Input_DNA.trim - > bowtie2/Input_DNA.trim.uniq.bam $ cd ~/chipseq $ samtools index bowtie2/BRD4_ChIP_IFNy.trim.uniq.bam $ samtools index bowtie2/IRF1_ChIP_IFNy.trim.uniq.bam $ samtools index bowtie2/Input_DNA.trim.uniq.bam $ cd ~/chipseq $ mkdir macs2 # MACS2ͷग़ྗ݁ՌΛอଘ͢ΔσΟϨΫτϦΛ࡞͢ΔʢඞਢͰͳ͍ʣ $ cd ~/chipseq $ source ~/tools/MACS2/bin/activate # MACS2͕Πϯετʔϧ͞ΕͨڥΓସ͑Δ $ macs2 callpeak -t bowtie2/BRD4_ChIP_IFNy.trim.uniq.bam \ -c bowtie2/Input_DNA.trim.uniq.bam -f BAM -g mm -n BRD4_ChIP_IFNy --outdir macs2 -B -q 0.01 $ macs2 callpeak -t bowtie2/IRF1_ChIP_IFNy.trim.uniq.bam \ -c bowtie2/Input_DNA.trim.uniq.bam -f BAM -g mm -n IRF1_ChIP_IFNy --outdir macs2 -B -q 0.01 $ deactivate # ݩͷڥΓସ͑Δɻ $ head macs2/BRD4_ChIP_IFNy_peaks.narrowPeak $ head macs2/BRD4_ChIP_IFNy_summits.bed $ cd ~/chipseq $ wc -l macs2/*_peaks.narrowPeak $ cd ~/chipseq $ bedtools intersect -u -a macs2/BRD4_ChIP_IFNy_peaks.narrowPeak -b macs2/ IRF1_ChIP_IFNy_peaks.narrowPeak > macs2/ BRD4_ChIP_IFNy_peaks.overlapped_with_IRF1_ChIP_IFNy_peaks.narrowPeak $ cd ~/chipseq $ bedtools intersect -u -a macs2/IRF1_ChIP_IFNy_peaks.narrowPeak -b macs2/ BRD4_ChIP_IFNy_peaks.narrowPeak > macs2/ IRF1_ChIP_IFNy_peaks.overlapped_with_BRD4_ChIP_IFNy_peaks.narrowPeak $ cd ~/chipseq $ bedtools intersect -v -a macs2/BRD4_ChIP_IFNy_peaks.narrowPeak -b macs2/ IRF1_ChIP_IFNy_peaks.narrowPeak > macs2/ BRD4_ChIP_IFNy_peaks.not_overlapped_with_IRF1_ChIP_IFNy_peaks.narrowPeak $ bedtools intersect -v -a macs2/IRF1_ChIP_IFNy_peaks.narrowPeak -b macs2/ BRD4_ChIP_IFNy_peaks.narrowPeak > macs2/ IRF1_ChIP_IFNy_peaks.not_overlapped_with_BRD4_ChIP_IFNy_peaks.narrowPeak $ cd ~/chipseq $ wc -l macs2/*overlapped*.narrowPeak $ cd ~/chipseq $ mkdir deeptools # deepToolsͷग़ྗ݁ՌΛอଘ͢ΔσΟϨΫτϦΛ࡞͢Δ $ cd ~/chipseq $ bamCoverage -b bowtie2/BRD4_ChIP_IFNy.trim.uniq.bam -o deeptools/BRD4_ChIP_IFNy.trim.uniq.bw -of bigwig --normalizeUsing CPM $ cd ~/chipseq $ bamCoverage -b bowtie2/IRF1_ChIP_IFNy.trim.uniq.bam -o deeptools/IRF1_ChIP_IFNy.trim.uniq.bw -of bigwig --normalizeUsing CPM $ bamCoverage -b bowtie2/Input_DNA.trim.uniq.bam -o deeptools/Input_DNA.trim.uniq.bw -of bigwig -- normalizeUsing CPM $ igv $ cd ~/chipseq $ mkdir homer $ cd ~/chipseq $ mkdir homer/BRD4_ChIP_IFNy $ findMotifsGenome.pl macs2/BRD4_ChIP_IFNy_summits.bed mm10 homer/BRD4_ChIP_IFNy -size 200 -mask $ cd ~/chipseq $ mkdir homer/IRF1_ChIP_IFNy $ findMotifsGenome.pl macs2/IRF1_ChIP_IFNy_summits.bed mm10 homer/IRF1_ChIP_IFNy -size 200 -mask $ cd ~/chipseq $ computeMatrix scale-regions \ --regionsFileName ~/gencode/gencode.vM20.annotation.gtf \ --scoreFileName deeptools/BRD4_ChIP_IFNy.trim.uniq.bw \ --outFileName deeptools/BRD4_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \ --upstream 1000 --downstream 1000 \ --skipZeros $ cd ~/chipseq $ plotProfile -m deeptools/BRD4_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \ -out deeptools/metagene_BRD4_ChIP_IFNy_gencode_vM20_gene.pdf \ --plotTitle "GENCODE vM20 genes" $ cd ~/chipseq $ plotHeatmap -m deeptools/BRD4_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \ -out deeptools/heatmap_BRD4_ChIP_IFNy_gencode_vM20_gene.pdf \ --plotTitle "GENCODE vM20 genes" $ cd ~/chipseq $ computeMatrix scale-regions \ --regionsFileName ~/gencode/gencode.vM20.annotation.gtf \ --scoreFileName deeptools/IRF1_ChIP_IFNy.trim.uniq.bw \ --outFileName deeptools/IRF1_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \ --upstream 1000 --downstream 1000 \ --skipZeros $ plotProfile -m deeptools/IRF1_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \ -out deeptools/metagene_IRF1_ChIP_IFNy_gencode_vM20_gene.pdf \ --plotTitle "GENCODE vM20 genes" $ plotHeatmap -m deeptools/IRF1_ChIP_IFNy.trim.uniq.matrix_gencode_vM20_gene.txt.gz \ -out deeptools/heatmap_IRF1_ChIP_IFNy_gencode_vM20_gene.pdf \ --plotTitle "GENCODE vM20 genes" $ cd ~/ $ computeMatrix reference-point \ --regionsFileName macs2/IRF1_ChIP_IFNy_summits.bed \ --scoreFileName deeptools/BRD4_ChIP_IFNy.trim.uniq.bw \ --referencePoint center \ --upstream 1000 \ --downstream 1000 \ --outFileName deeptools/BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.matrix.txt.gz \ --skipZeros $ plotProfile -m deeptools/BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.matrix.txt.gz \ -out deeptools/aggregation_BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.pdf \ --regionsLabel "IRF1_ChIP_IFNy Peaks" $ plotHeatmap -m deeptools/BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.matrix.txt.gz \ -out deeptools/heatmap_BRD4_ChIP_IFNy.trim.IRF1_ChIP_IFNy_summits.pdf \ --samplesLabel "BRD4_ChIP_IFNy" \ --regionsLabel "IRF1_ChIP_IFNy Peaks" $ computeMatrix scale-regions \ --regionsFileName ../data/gencode/gencode.vM20.annotation.gtf \ --scoreFileName deeptools/BRD4_ChIP_IFNy.trim.uniq.bw \ deeptools/IRF1_ChIP_IFNy.trim.uniq.bw \ --outFileName deeptools/chipseq_matrix_gencode_vM20_gene.txt.gz \ --upstream 1000 --downstream 1000 \ --skipZeros $ plotHeatmap -m deeptools/chipseq_matrix_gencode_vM20_gene.txt.gz \ -out deeptools/heatmap_BRD4_ChIP_IFNy_gencode_vM20_gene.k3.pdf \ --kmeans 3 \ --plotTitle "GENCODE vM20 genes" $ cd ~/chipseq $ cut -f 1,2,3,4,5,6 macs2/BRD4_ChIP_IFNy_peaks.narrowPeak > macs2/BRD4_ChIP_IFNy_peaks.narrowPeak.bed $ cut -f 1,2,3,4,5,6 macs2/IRF1_ChIP_IFNy_peaks.narrowPeak > macs2/IRF1_ChIP_IFNy_peaks.narrowPeak.bed $ cd ~/chipseq $ head macs2/*.narrowPeak.bed $ cd ~/chipseq $ open -a RStudio > library(ChIPpeakAnno) > gr1 <- toGRanges("macs2/BRD4_ChIP_IFNy_peaks.narrowPeak", format="narrowPeak", header=FALSE) > gr2 <- toGRanges("macs2/IRF1_ChIP_IFNy_peaks.narrowPeak", format="narrowPeak", header=FALSE) > ol <- findOverlapsOfPeaks(gr1, gr2) # ϐʔΫಉ࢜ͷॏͳΓΛௐࠪ͢Δ > makeVennDiagram(ol, NameOfPeaks=c(“BRD4”, “IRF1”)) # ॏͳΓΛϕϯਤͱͯ͠ՄࢹԽ͢Δ > BiocManager::install("TxDb.Mmusculus.UCSC.mm10.ensGene") # ϚεήϊϜmm10ͷҨࢠϞσϧͷύοέʔδΛμ ϯϩʔυ͢Δ > library(TxDb.Mmusculus.UCSC.mm10.ensGene) # ϚεήϊϜmm10ͷҨࢠϞσϧͷύοέʔδΛϩʔυ͢Δ > annoData <- toGRanges(TxDb.Mmusculus.UCSC.mm10.ensGene) > seqlevelsStyle(gr1) <- seqlevelsStyle(annoData) # છ৭ମ໊ͷελΠϧΛἧ͑Δ > anno1 <- annotatePeakInBatch(gr1, AnnotationData=annoData) # ϐʔΫΛ࠷͍ۙసࣸ։࢝ʢTSSʣʹׂΓͯΔ > pie1(table(anno1$insideFeature), main="BRD4") # ϐʔΫ͕Ҩࢠ͔ΒΈͯͲͷྖҬʹஔ͍ͨͷ͔Λɺԁάϥϑͱͯ͠ දࣔ͢Δ > seqlevelsStyle(gr2) <- seqlevelsStyle(annoData) # છ৭ମ໊ͷελΠϧΛἧ͑Δ > anno2 <- annotatePeakInBatch(gr2, AnnotationData=annoData) # ϐʔΫΛ࠷͍ۙసࣸ։࢝ʢTSSʣʹׂΓͯΔ > pie1(table(anno2$insideFeature), main="IRF2") # ϐʔΫ͕Ҩࢠ͔ΒΈͯͲͷྖҬʹஔ͍ͨͷ͔Λɺԁάϥϑͱͯ͠ දࣔ͢Δ > overlaps <- ol$peaklist[["gr1///gr2"]] > aCR <- assignChromosomeRegion(overlaps, nucleotideLevel=FALSE, precedence=c("Promoters", "immediateDownstream", "fiveUTRs", "threeUTRs", "Exons", "Introns"), TxDb=TxDb.Mmusculus.UCSC.mm10.ensGene) > pie1(aCR$percentage, main="BRD4 & IRF1") > BiocManager::install("EnsDb.Mmusculus.v79") > library(EnsDb.Mmusculus.v79) > anno1$feature[is.na(anno1$feature)] <- "." # ΤϥʔΛආ͚ΔͨΊʹ NA ΛϐϦΦυʹม͑Δ > anno1$geneName <- mapIds(EnsDb.Mmusculus.v79, keys=anno1$feature, column = "GENENAME", keytype="GENEID") > anno1[1:2] if(!dir.exists("ChIPpeakAnno")) dir.create("ChIPpeakAnno") df_anno1 <- as.data.frame(anno1) write.table(df_anno1, "ChIPpeakAnno/BRD4_ChIP_IFNy_peaks.annot.txt", sep="\t", quote=F) ChIP-seqղੳͷྫ >200ߦͷίϚϯυ https://github.com/yuifu/ngsdat2_epigenome_chipseq/blob/master/chipseq.md