#!/usr/bin/python
import dbcnf,re
db=dbcnf.db
dbc=db.cursor()
gp=0.9
tn='csc_min6_final'
rowlist=lambda x : zip(*x.fetchall())[0]
dbc.execute('SELECT DISTINCT peptidesequence FROM %s WHERE groupprobability>%%s' % tn,(gp,))
peptides=rowlist(dbc)
tp=len(peptides),tn,gp
print '%d peptides in %s at groupprobability>%2.1f' % tp
glyco=re.compile('N[^P][ST][^P]')
noMass=re.compile('\[.+?\]')
c1,c2,c3,c4,c5,c6=[0]*6
for p in peptides:
 if not p: continue
 p2=noMass.sub('',p)
 gp=glyco.findall(p2)
 if len(gp)>0: 
  c1+=1
  if 'N[' in p: c3+=1
  else: c4+=1
 else: 
  c2+=1
  if 'N[' in p: c5+=1
  else: c6+=1
tot=c1+c2
prz=lambda x : float(x)/float(tot)*100.0
print '%d (%d%%) pattern' % (c1,prz(c1))
print '%d (%d%%) nopattern' % (c2,prz(c2))
print '%d (%d%%) pattern+mass' % (c3,prz(c3))
print '%d (%d%%) pattern+nomass' % (c4,prz(c4))
print '%d (%d%%) nopattern+mass' % (c5,prz(c5))
print '%d (%d%%) nopattern+nomass' % (c6,prz(c6))