#!/usr/bin/python
# gets the protein sequence for a list of swissprot accessions
import sys
fn=sys.argv[1]
ofn='seq_'+fn
database='/mnt/raid/uniprot_sprot.dat'
open(ofn,'w')
for i in open(fn):
 sprot=i.strip()
 on1,on2,sequence=False,False,''
 for j in open(database):
  if j[:2]=='//' and on2: break
  if on2: sequence+=j.strip().replace(' ','')
  if j[:2]=='SQ' and on1: on2=True
  if j[:2]!='AC': continue
  ac=j.split()[1].strip(';')
  if sprot!=ac: continue
  on1=True
 if sequence: open(ofn,'a').write( '>%s\n%s\n' % (sprot,sequence) )