利用usearch生成OTU表

2021-08-31 21:28:23 字數 4310 閱讀 6797

解壓序列檔案

gzip -d *.gz

重新命名fq檔案為fastq

rename 's/fq/fastq/' *.fq

rename 's/\.1\.fq/\_r1\.fastq/' *.fq

rename 's/\.2\.fq/\_r2\.fastq/' *.fq

rename 's/raw\.split\.//' *.fastq

拼接序列

usearch -fastq_mergepairs *r1*.fastq -relabel @ -fastq_maxdiffs 10 \

-fastq_pctid 80 -fastqout merged.fq

刪除引物和barcode序列

usearch -fastx_truncate merged.fq -stripleft 26 -stripright 27 -fastqout stripped.fq

訓練集小寫轉成大寫

cat trainset16_022016.pds.fasta | tr a-z a-z >trainset16.fasta

將互補連同一方向到同一條鏈(its序列用unite99作為參考序列)

usearch -orient stripped.fq -db /home/kxf/documents/database/trainset16.fasta -fastqout orient.fq #細菌甲烷菌

質量控制並刪除質量資訊(設定maximum expected error為1)

usearch -fastq_filter orient.fq -fastq_maxee 1.0 -fastaout filtered.fa

去除嵌合體

usearch -uchime2_ref filtered.fa -db /home/kxf/documents/database/trainset16.fasta -uchimeout out.txt -strand plus -mode sensitive #細菌甲烷菌原蟲

usearch -uchime2_ref filtered.fa -db reference=/home/kxf/documents/database/unitev6_sh_97_s.fasta -uchimeout out.txt -strand plus -mode sensitive #真菌

vsearch -uchime_ref filtered.fa -nonchimeras outvsearch.fa -db /home/kxf/documents/database/silva_132_ssuref_nr99_tax_silva.fasta

檢視序列長度分度

usearch -fastq_eestats2 stripped.fq -output eestats2.txt -length_cutoffs 100,300,10

mothur > summary.seqs(fasta=outvsearch.fa,processors=88)

序列修剪到相同長度(配對its不需要修剪)

usearch -fastx_truncate outvsearch.fa -trunclen 250 -fastaout reads250.fa

mothur > screen.seqs(fasta=outvsearch.fa,start=95%value,end=5%value, maxhomop=10)

序列去冗餘

usearch -fastx_uniques outvsearch.good.fa -minuniquesize 8 -fastaout uniques.fasta -sizeout -relabel uniq # mothur 修剪

usearch -fastx_uniques reads250.fa -minuniquesize 4 -fastaout uniques.fasta -sizeout -relabel uniq

刪除數量少的序列

usearch -sortbysize uniques.fasta -fastaout uniques.fasta -minsize 2

聚成otu

usearch -cluster_otus uniques.fasta -otus otus.fa -relabel otu

生成otu表(輸入檔案要用未去冗餘的序列)

usearch -otutab filtered.fa -otus otus.fa -mothur_shared_out otutab.txt -mapout map.txt #mothur

usearch -otutab reads250.fa -otus otus.fa -otutabout otutab.txt -mapout map.txt

多樣性指數

otu的樹檔案

計算距離矩陣

usearch -calc_distmx otus.fa -tabbedout mx.txt -maxdist 0.2 -termdist 0.3

利用距離矩陣生成tree檔案

usearch -cluster_aggd mx.txt -treeout clusters.tree -clusterout clusters.txt \

-id 0.80 -linkage min

注釋資訊

mothur >classify.seqs(fasta=otus.fa, reference=/home/kxf/documents/database/silva.nr_v132.align, taxonomy=/home/kxf/documents/database/silva.nr_v132.tax, cutoff=60, processors=88) #細菌,原蟲

mothur >classify.seqs(fasta=otus.fa, reference=/home/kxf/documents/database/rim_db_14_07.fasta, taxonomy=/home/kxf/documents/database/rim_db_14_07_c.txt, processors=88) #產甲烷菌

mothur >classify.seqs(fasta=otus.fa, reference=/home/kxf/documents/database/unitev6_sh_97_s.fasta, taxonomy=/home/kxf/documents/database/unitev6_sh_97_s.tax, processors=88) #真菌its

稀釋性曲線

rarefaction.single(shared=otutab.txt)

計算α多樣性指數

usearch -alpha_div otutab.txt -output alpha.txt #32位不夠

summary.single(shared=current)

計算β多樣性指數

dist.shared(shared=current, calc=thetayc-jclass-braycurtis)

count.seqs(shared=current)

unifrac.unweighted(tree=clusters.tree, count=current, distance=lt, processors=88, random=f)

unifrac.weighted(tree=clusters.tree, count=current, distance=lt, processors=88, random=f)

pcoa(phylip=clusters.tree1.unweighted.phylip.dist)

pcoa(phylip=clusters.tree1.weighted.phylip.dist)

pcoa(phylip=otutab.braycurtis.usearch.lt.dist)

pcoa(phylip=otutab.jclass.usearch.lt.dist)

pcoa(phylip=otutab.thetayc.usearch.lt.dist)

system(rm -f *.rabund)

system(mv *.dist *.loadings *.axes -t result/pcoa/)

system(mv *.tree otus.fa *.rarefaction otutab.txt otutab.usearch.count_table otutab.groups.summary *.taxonomy otutab.groups.rarefaction -t result/)

建立帶注釋資訊的otu表

create.database(shared=otutab.txt,constaxonomy=otus.nr_v132.wang.taxonomy, repfasta=otus.fa,count=otutab.usearch.count_table)

利用mysql生成UUID

在日常的開發環境中,我們經常使用uuid來當做主鍵,一般我們使用的都是現成的工具類,但其實mysql就可以生成uuid。mysql生成uuid是使用的uuid 函式,如下圖 但是生成的這個uuid中間是有 隔開的,我們一般是不要的,所以需要將 去掉。這裡我們使用replace 函式來實現去掉 這樣,...

利用certbot auto生成證書

cd mnt runtime mkdir certbot auto wget chmod a x certbot auto certbot auto certonly d domain.com manual preferred challenges dns 01 server 登入網域名稱控制台,按...

利用XML生成Excel

先用excel將我們要生成的 設計好 然後另存為xml 將生成的book1.xml複製到專案中並開啟 找到table節點,將節點的ss expandedrowcount 2 刪除掉 往下會看到我們剛輸入的標題了吧 row cell ss styleid s23 data ss type string...