AnalyticalandBioanalytical,Analytical

转换成 7
andBioanalyticalChemistry ElectronicSupplementaryMaterial Serogroup-levelresolutionofthe“Super-7”Shigatoxin-producingEscherichiacoliusingnanoporesingle-moleculeDNAsequencingAdamPeritz,eC.Paoli,Chin-YiChen,AndrewG.Gehring Fig.S1PythonscriptusedtoparsetheBLASTresultsbasedonthenumberofsignificantalignments #-*-coding:utf-8-*"""CreatedonThuSep2910:06:262016 @author:aperitz"""#intializeimportarraystrt_txt='Query='db_txt='gnl|Super7db|'align_txt='>'read_file='/Users/aperitz//python/super7test5'#read_file='/Users/aperitz//python/blasttest10.fa'writefile_less3hit='/Users/aperitz//python/less3blasthts.fa'writefile_great3hit='/Users/aperitz//python/great3blasthts.fa'#writefile_nohit='/Users/aperitz//python/noblasthts.fa'file_path='/Users/aperitz//python/'my_file=open(read_file,'r')scr_cutoff=0.5t=0line_lst=[]t=0t=0line=''sig_align_len=array.array('i',[])p_sigalign_len=array.array('b',[])tr=array.array('i',[])tr=[0]*10tr=array.array('i',[])tr=[0]*8#[0]=nohit.[1]=1hit,[2]=1hitshortalign,[3]=2hits,[4]=2hitshortalign#[5]=multihit,[6]=multihitshortalignstrain_array=array.array('b',[])scr_diff_array=array.array('b',[])##readlines1-15infile,readsFileheader#defskip_intro(my_file,strt_txt,line_lst): line=my_file.readline()whilenotline.startswith(strt_txt): line=my_file.readline()ifline.startswith(strt_txt): line_lst.append(line)return(line_lst)# definput_read(my_file,strt_txt,line,line_lst):##readsaReadandcopiesReadinformationtolistline_lst# line=my_file.readline()line_lst.append(line)whilenotline.startswith(strt_txt): line=my_file.readline()line_lst.append(line)ifline=='': return(line_lst)return(line_lst)#defget_no_query_lines(line_lst):##producesanarraycontaininglinenumberinreadwheresignificantalignmentstart# tr=0tr=0sigalign_no=array.array('i',[])line=line_lst[tr]whilenotline.startswith('Effective'): tr=tr+1line=line_lst[tr]ifline.startswith('>'): sigalign_no.append(tr)tr=tr+1ifline=='':return(sigalign_no)return(sigalign_no)#defcalc_sig_align_len(line_lst,sigalign_no,tr):#calculateslengthofalignmentforreachsignificantalignmentsig_align_len=array.array('i',[])tr=0i=0#print'align_counter=',trforiinrange(tr):read_strt=get_align_strt(line_lst,sigalign_no,i)tr=end_tr(line_lst,sigalign_no,i)read_end=get_align_end(tr,line_lst)align_len=read_end-read_strtsig_align_len.append(align_len)return(sig_align_len)#defsig_tr(line_lst,db_txt,tr):#Countthenumberofsignificantalignmentsforareadtr=
0 tr=11#significantalignmentsstartat11line read_line=line_lst[tr] ifread_line.startswith('\n'): # tr[0]=tr[0]+
1 return(tr) else: whileread_line.startswith(db_txt): tr=tr+
1 tr=tr+tr read_line=line_lst[tr] ifread_line=='\n': return(tr) # #readsfileto1stsignificantalignment # def parse_file(line_lst,align_txt,tr,scr_cutoff,sig_align_len,hit_ cntr,file_path,strain_array,scr_diff_array,tr): # #parsesalignmentfilebasedonthenumberofhitsorscoreor alignmentlength # wrtfile_name0='0blasthit.fa' wrtfile_name1='1blasthit.fa' wrtfile_name2='1blasthitshortalign.fa' wrtfile_name3='2blasthit.fa' wrtfile_name4='2blasthitshortalign.fa' wrtfile_name5='3ormoreblasthit.fa' wrtfile_name6='3ormoreblasthitshortalign.fa' wrtfile_name7='unsureblasthit.fa' # comp_sigalign_len= comp_align_len(line_lst,sig_align_len,tr) strain_array=p_strain(line_lst,strain_array,tr) scr_diff_array=scr_calc(line_lst,scr_cutoff,tr) iftr==0: new_file=file_path+wrtfile_name0 write_file(line_lst,new_file) tr[0]=tr[0]+
1 return() eliftr==1: p_sigalign_len[0]: #signifalignmentislessthan75%ofreadlengthcountas1short alignblasthit new_file=file_path+wrtfile_name2 write_file(line_lst,new_file) tr[2]=tr[2]+
1 return() else: get_strain(line_lst,tr,strain_array) new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return()eliftr==2:#testtoseeifsignificantalignmentlengthiswithcertainpercentageofreadlength#if2alignmentsarefromsameO-groupwriteto1hitfileifstrain_array[0]==strain_array[1]:ifp_sigalign_len[0]: get_strain(line_lst,tr,strain_array)new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return()else:#signifalignmentislessthan75%ofreadlengthcountas2shortalignblasthitnew_file=file_path+wrtfile_name4write_file(line_lst,new_file)tr[4]=tr[4]+1return()else:#Oantigennotsamefirstcheckscrdifferentialthenlengthifscr_diff_array[(tr-2)]:#scr_diffbetweenalign1&2is>=0.5*scoreofalign1#lengthisshortp_sigalign_len[0]: new_file=file_path+wrtfile_name4write_file(line_lst,new_file)tr[4]=tr[4]+1return()else:get_strain(line_lst,tr,strain_array)new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return()#scrdiffbetweenalign1&2is<0.5*scoreofalign1else:p_sigalign_len[0]:#iflenofalign2<75%ofreadlencountas1hit,elsecountas2hitp_sigalign_len[1]: new_file=file_path+wrtfile_name4write_file(line_lst,new_file)tr[4]=tr[4]+1return()else:new_file=file_path+wrtfile_name7 write_file(line_lst,new_file)tr[7]=tr[7]+1return() else:p_sigalign_len[1]:get_strain(line_lst,tr,strain_array)new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return()else:new_file=file_path+wrtfile_name3write_file(line_lst,new_file)tr[3]=tr[3]+1return() else:#tr>=3: ifscr_diff_array[0]:p_sigalign_len[0]:new_file=file_path+wrtfile_name6write_file(line_lst,new_file)tr[6]=tr[6]+1return()else:get_strain(line_lst,tr,strain_array)new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return() #1checkScoreDifferentialofSigalign2&3else: #ifscoredifferntialof2&3is>50%of2ifscr_diff_array[1]: #ChecktoseeifOantibenofsigalign1=OantigenofsigAlign2ifstrain_array[0]==strain_array[1]: #IfOantigensarethesamecheckthelengthoffirstsigAligntoreadlength p_sigalign_len[0]:#iffirstsigalignis<=75%ofreadlength,checklengthof2ndsigalign p_sigalign_len[1]:#2ndsigalignlengthshort,assumerestofalgnareshort new_file=file_path+wrtfile_name6write_file(line_lst,new_file)tr[6]=tr[6]+1return()else:new_file=file_path+wrtfile_name5write_file(line_lst,new_file) tr[5]=tr[5]+1return()else:p_sigalign_len[1]:new_file=file_path+wrtfile_name5write_file(line_lst,new_file)tr[5]=tr[5]+1return()else: get_strain(line_lst,tr,strain_array)new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return() else:p_sigalign_len[0]:p_sigalign_len[1]:new_file=file_path+wrtfile_name6write_file(line_lst,new_file)tr[6]=tr[6]+1return()else:new_file=file_path+wrtfile_name5write_file(line_lst,new_file)tr[5]=tr[5]+1return()else:p_sigalign_len[1]: get_strain(line_lst,tr,strain_array)new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return() else:new_file=file_path+wrtfile_name3write_file(line_lst,new_file)tr[3]=tr[3]+1return() else:p_sigalign_len[0]:p_sigalign_len[1]:new_file=file_path+wrtfile_name6write_file(line_lst,new_file)tr[6]=tr[6]+1return()else:new_file=file_path+wrtfile_name5write_file(line_lst,new_file) tr[5]=tr[5]+1return()else:p_sigalign_len[1]:get_strain(line_lst,tr,strain_array)new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return()else:p_sigalign_len[2]: ifstrain_array[0]==strain_array[1]: get_strain(line_lst,tr,strain_array)new_file=file_path+wrtfile_name1write_file(line_lst,new_file)tr[1]=tr[1]+1return() else:new_file=file_path+wrtfile_name5write_file(line_lst,new_file)tr[5]=tr[5]+1return() else:new_file=file_path+wrtfile_name5write_file(line_lst,new_file)tr[5]=tr[5]+1 return() #defscr_calc(line_lst,scr_cutoff,tr):#calculatesscoredifferentialoffirstthreelinesandreturnstrueorfalsebasedondifferential#returnsanarrayofbooleanscoreparedtoapercentageofscore#scr_diff_array[i]isTrueifscr_diff[i]>=scr_cutoff*scr_int[i]#andFalseifscr_diff[i]=int(scr_cutoff*scr_int[i]) scr_diff_array[i]=foo # scr_diff_array.insert(i,foo) #iffoo1orfoo2: # foo3=True #else: # foo3=False return(scr_diff_array) # return(scr_diff_array[1]) # defwrite_file(line_lst,new_file): data_file=open(new_file,'a+') data_file.writelines(line_lst[0:-1]) data_file.close() line_lst[0:-1]=[]#initializeline_lst return() # defwrite_strain_file(tr,tr,t,file_path): write_str=['']*10 hit_str=['']*
9 ret_str='\n' strain_lst=[] hit_lst=[] strainfile_name=file_path+'t.fa' read_str='TotalnumberofReads=' strain_file=open(strainfile_name,'a+') strain_lst.append('O26=') strain_lst.append('O45=') strain_lst.append('O103=') strain_lst.append('O111=') strain_lst.append('O121=') strain_lst.append('O145=') strain_lst.append('O157=') strain_lst.append('O14=') strain_lst.append('O55=') strain_lst.append('Other=') hit_lst.append('Numberofreadswithnohits=') hit_lst.append('Numberofreadswithasinglehit=') hit_lst.append('Numberofreadswithasinglehitbutshort alignment=') hit_lst.append('Numberofreadswithatwohits=') hit_lst.append('Numberofreadswithatwohitsbutshort alignment=') hit_lst.append('Numberofreadswith3ormorehits=') hit_lst.append('Numberofreadswith3ormorehitsbutshortalignment=') hit_lst.append('unsurenumberofhits=')#tr=get_strain(line_lst,tr) read_str=read_str+repr(t)+ret_strstrain_file.write(read_str)strain_file.write(ret_str)forxinrange
(8): hit_str[x]=hit_lst[x]+repr(tr[x])+ret_strstrain_file.write(hit_str[x])strain_file.write(ret_str)forxinrange(10):write_str[x]=strain_lst[x]+repr(tr[x])+ret_strstrain_file.write(write_str[x])strain_file.close()return()#t_query(my_file,strt_txt):t=0line2='foo'whileline2!
='':line2=my_file.readline()ifline2.startswith(strt_txt): t=t+1my_file.close()return(t)#p_align_len(line_lst,sig_align_len,tr):#paressignificantalignmentlengthtoreadlength.Ifsignificant#alignmentlengthis<75%ofreadlengthreturnTruealign_perc=array.array('i',[])i=p_sigalign_len=array.array('b',[])sigalign_no=get_no_query_lines(line_lst)foo=calc_sig_align_len(line_lst,sigalign_no,tr)p_sigalign_len=[True]*trread_len=0read_len=get_read_len(line_lst)read_len_fl=float(read_len)#print'readlength=',read_len,'alignlength=',fooforiinrange(tr): align_val=int((foo[i]/read_len_fl)*100)align_perc.append(align_val)ifalign_perc[i]<75: comp_sigalign_len[i]=Trueelse: comp_sigalign_len[i]=Falsep_sigalign_len)# defget_read_len(line_lst):##Returnsthelengthoftheread# read_len=0read_len_line=line_lst[7]read_str=read_len_line[7:15]#print'readstring=',read_strread_len=int(read_str)return(read_len)#defget_align_strt(line_lst,sigalign_no,i):##ReturnsthestartpositionontheRead(Query)strand#read_str=''x=7line=line_lst[sigalign_no[i]+7]whileline[x]!
='': read_str=read_str+line[x]x=x+1#read_str=line[7:13]read_strt=int(read_str)return(read_strt)#defget_align_end(tr,line_lst):#REturnstheendbasenumberofalignmentline=line_lst[tr-5]#lastQuerylineis-5linesfrom>t=0read_str='foo'end_str=''whileread_str!
='':t=t-1read_str=line[t]end_str=end_str+read_strend_str=end_str.lstrip('\n')end_str=end_str.rstrip('')end_str=end_str[::-1]read_end=int(end_str)return(read_end)#defend_tr(line_lst,sigalign_no,i):#Countsthenumberoflinesinasignificantalignmenttogetlastbasepositiontr=sigalign_no[i]+7line=line_lst[tr]whilenotline.startswith('>'):#marksnewalignmenttr=tr+1line=line_lst[tr] ifline.startswith('Lambda'):#orline.startswith('Score'):#hitendofread tr=tr-1return(tr)else:ifline.startswith('Score'): return(tr)return(tr)#p_strain(line_lst,strain_array,tr):strain_array=['']*trtr=11#significantalignmentsstartat11lineread_line=line_lst[tr]signif_tr=0x=0#whileread_line.startswith(db_txt):forsignif_trinrange(tr): strain_str=''read_line=line_lst[tr]ifread_line.startswith('\n'): return(tr)foo=read_line.find(':')tr=tr+1iffoo!
=-1: whileread_line[foo]!
='O':strain_str=strain_str+read_line[foo]foo=foo-1 strain_str=strain_str[::-1]strain_str='O'+strain_str.rstrip(':')strain_array[x]=strain_strx=x+1return(strain_array)#defget_strain(line_lst,tr,strain_array):#countsthenumberofreadswithsinglealignmentsforeachofthe"SuperSeven"#strain_array=p_strain(line_lst,strain_array,tr)x=0ifstrain_array[x]=='O26':tr[0]=tr[0]+1elifstrain_array[x]=='O45':tr[1]=tr[1]+1elifstrain_array[x]=='O103':tr[2]=tr[2]+1elifstrain_array[x]=='O111':tr[3]=tr[3]+1elifstrain_array[x]=='O121':tr[4]=tr[4]+1elifstrain_array[x]=='O145':tr[5]=tr[5]+
1 elifstrain_array[x]=='O157':tr[6]=tr[6]+1 elifstrain_array[x]=='O14':tr[7]=tr[7]+1 elifstrain_array[x]=='O55':tr[8]=tr[8]+1 else:tr[9]=tr[9]+
1 #signif_tr=signif_tr+1return() #t=t_query(my_file,strt_txt)my_file=open(read_file,'r')#skipintrolinesline_lst=skip_intro(my_file,strt_txt,line_lst)##copyReadinformationtoline_lst#x=0tr=0forxinrange(t): line_lst=input_read(my_file,strt_txt,line,line_lst)read_len=get_read_len(line_lst)#tr=sig_tr(line_lst,db_txt,tr) parse_file(line_lst,align_txt,tr,scr_cutoff,sig_align_len,hit_tr,file_path,strain_array,scr_diff_array,tr) x=x+1tr=tr+1print'readnumber=',xprint'tr=',trwrite_strain_file(tr,tr,t,file_path)my_file.close() TableS1ThedetailsofBLASTresultsforthe58ONTMinIONlibrarysequencesthatalignedtoasingleO-antigencluster Serogroup[O-antigendatabasesequencelength(bp)] O26[13,270] O45[14,483] O103[12,003] MinIONSequenceReadLength(bp)8707564366698579 678775477903550273647215696857468469883510152652110440 5934 BLASTScorea 11954bits(6473) 8892bits(4815) 9738bits5273 6311bits(3417) 6338bits(3432) 9005bits(4876) 10050bits(5442) 5489bits(2972) 9540bits(5166) 8780bits(4754)9214bits(4989)6778bits(3670) 10083bits(5460) 10770bits(5832) 10754bits(5823) 8983bits(4864) 13200bits(7148) 4634bits(2509) Identitiesb 8179/8908(92%) 5460/5734(95%) 6405/6891(93%) 7204/8867(81%) 6141/7307(84%) 5828/6234(93%) 6005/6241(96%) 6891/7633(90%) 6510/7170(91%) 6095/6663(91%) 6510/7170(91%) 5291/5993(88%) 7678/8620(89%) 8391/9475(89%) 8318/9383(89%) 6141/6686(92%) 10002/11200(89%) 4537/540(84%) Gapsc 496/8908(6%) 194/5734(3%) 319/6891(5%) 921/8867(10%) 753/7307(10%) 279/6234(4%) 181/6241(3%) 289/4422(6%) 481/7633(6%) 409/6663(6%) 402/7170(6%) 434/5993(7%) 668/8620(8%) 782/9475(8%) 730/9383(8%) 374/6686(6%) 916/11200(8%) 591/5403(11%) O111[14,516] O121[15,155] 91277960679548547506 58751005573635971632310096596883357446 978915539841112134465481705630 11367bits(6155) 10728bits(5809) 8961bits(4852) 7539bits(4082) 5886bits(3187) 8335bits(4513)16083bits(8709)9289bits(5030)5123bits(2774)10187bits(5516)11110bits(6016)4573bits(2476)10746bits(5819)11947bits(6469) 14187bits(7682) 19145bits(10367) 16856bits(7503) 16325bits(8840) 7448bits(4033) 3195bits(1730) 5199bits(2815) 8591/9622(89%) 7559/8308(91%) 6437/7114(90%) 4665/4912(95%) 5125/596(86%) 748/9622(8%) 503/8308(6%) 461/7114(6%) 177/4912(4%) 524/5963(9%) 52623/6096(92%) 9800/10255(95%) 361/10255(90%) 5147/6188(83%) 6171/6446(96%) 7370/7945(93%) 5244/6446(81%) 7746/08578(90%) 7257/7585(96%) 85626/8881(96%) 13143/14325(92%) 8253/8566(96%) 10355/10994(94%) 4524/4729(96%) 5580/7271(77%) 4376/5052(87%) 328/6096(5%) 361/10255(4%) 499/7539(7%) 582/6188(9%) 209/6446(3%) 408/7945(5%) 727/6446(11%) 526/8578(6%) 264/7585(3%) 267/8881(3%) 826/14325(6%) 250/8566(4%) 476/10994(4%) 162/4729(3%) 937/7271(13%) 417/5052(8%) O145[15,475] 1184954261223068071090271338826 13193bits(7144) 7160bits(3877) 11686bits(8755) 4039bits(2655) 11300bits(6199) 4841bits(2621) 10757bits(5825) 10196/1151389(%) 4716/5071(93%) 11399/12536(91%) 5731/7080(81%) 8477/9489(89%) 5127/6194(83%) 8428/9520(89%) 836/11513(7%) 257/5071(5%) 740/12536(6%) 756/7080(11%) 667/9489(7%) 744/6194(12%) 837/9520(9%) O157[14,002] 6484 4658bits4633/5538602/5538 (2522) (84%) (11%) 6784 10946bits6651/6953239/6953 (5927) (96%) (3%) 6409 6340bits5629/6583576/6583 (3433) (86%) (9%) 9503 6220bits6610/8013871/8013 (3368) (82%) (11%) 4994 5867bits4737/5398476/5398 (3177) (88%) (9%) 4831 5400bits4455/5114426/5114 (2249) (87%) (8%) 4566 6691bits4239/4501184/4501 (3623) (94%) (4%) 6095 9710bits5926/6207211/6207 (5258) (95%) (3%) 8161 9995bits6541/7020342/7020 (5412) (93%) (5%) 4962 6396bits4684/5205358/5205 (3463) (90%) (7%) 9522 15564bits9346/9728308/9728 (8428) (96%) (3%) 7081 11956bits6941/7135157/7135 (6474) (97%) (2%) aAllExpectationvalueswere0.0bTheaverageidentityscorewas85.4%withamaximumof97%andaminimumof77%cTheaveragegapscorewas6.3%withamaximumof12%andaminimumof2%

标签: #怎么看 #换行 #邮箱 #cdr #平局 #雷暴 #csgo #枪法