import sys,dbcnf
db=dbcnf.db
dbc=db.cursor()
fn=sys.argv[1]
ofn='tmd_%s' % fn
fields='gene,tmd,secreted,gpi,mito'.split(',')
def prediction_priority((tmd,loc,gpi)):
"""Decides which prediction has priority, as there can only be one"""
if gpi=='G' and loc=='S' and int(tmd)<2: return { 'gpi' : '1' }
if int(tmd)>9: return { 'tmd' : '>9' }
if int(tmd)>0: return { 'tmd' : str(tmd) }
if loc=='S': return { 'secreted' : '1' }
if loc=='M': return { 'mito' : '1' }
return {}
out_text='\t'.join(fields) + '\n'
count_dict={}
genelist=set(map(lambda x : x.strip(),open(fn).readlines()))
for gene in genelist:
dbc.execute('SELECT tmd,loc,gpi FROM beta WHERE protein=%s',(gene,))
prediction_dict=prediction_priority(dbc.fetchone())
if len(prediction_dict):
cat=prediction_dict.keys()[0]
if cat=='tmd': cat+=prediction_dict[cat]
count_dict[cat]=count_dict.get(cat,0)+1
prediction_dict['gene']=gene
out_text+='\t'.join(map(lambda x : prediction_dict.get(x,''),fields)) + '\n'
open(ofn,'w').write(out_text)
for kv in count_dict.items(): print '\t'.join(map(str,kv))