#!/usr/bin/python
# gets the protein sequence for a list of swissprot accessions
import sys
fn=sys.argv[1]
ofn='seq_'+fn
database='/mnt/raid/uniprot_sprot.dat'
open(ofn,'w')
for i in open(fn):
sprot=i.strip()
on1,on2,sequence=False,False,''
for j in open(database):
if j[:2]=='//' and on2: break
if on2: sequence+=j.strip().replace(' ','')
if j[:2]=='SQ' and on1: on2=True
if j[:2]!='AC': continue
ac=j.split()[1].strip(';')
if sprot!=ac: continue
on1=True
if sequence: open(ofn,'a').write( '>%s\n%s\n' % (sprot,sequence) )