import xml.etree.cElementTree as ET
import logging
import sys
def KGML2Graph(xmlfile, filter_by = ()):
nodes = {}
genes = []
pathway_reactions = {}
pathway_types = {}
pathway_edges = []
pathway_relations = {}
pathway_labels = {}
tree = ET.parse(xmlfile)
organism = tree.getroot().get('org')
if organism == 'ko':
entriestype = ('ortholog', 'map', 'compound',)
elif organism == 'ec':
raise NotImplementedError('Didn\'t implement EC pathways yet')
else:
entriestype = ('gene', 'compound', 'map')
pathway_title = tree.getroot().get('title')
pathway_name = tree.getroot().get('name')
pathway_id = tree.getroot().get('id')
for entry in tree.getiterator('entry'):
logging.debug(entry.get('type') + ' ' + entry.get('id'))
node_type = entry.get('type')
name = entry.get('name')
node_id = entry.get('id')
graphics = entry.find('graphics')
node_title = graphics.get('name')
logging.debug(node_title)
nodes[node_id] = (name, node_title, node_type)
pathway_labels[node_id] = node_title
pathway_types[node_id]=node_type
for rel in tree.getiterator('relation'):
e1 = rel.get('entry1')
e2 = rel.get('entry2')
pathway_edges.append([e1, e2])
pathway_relations[e1+'_'+e2] = rel
for reaction in tree.getiterator('reaction'):
rid = reaction.get('name')
substrates = []
products = []
for sub in reaction.getiterator('substrate'):
substrates.append(sub.get('name'))
for prod in reaction.getiterator('product'):
products.append(sub.get('name'))
pathway_reactions[rid] = {'reaction': reaction, 'substrates': substrates, 'products': products}
return pathway_name,pathway_title,pathway_labels,pathway_types,pathway_edges
name,title,labels,types,edges=KGML2Graph(sys.argv[1])
for e1,e2 in edges:
if types[e1]!='gene' or types[e2]!='gene': continue
print '%s\t%s\t%s\t%s' % (labels[e1].replace('...',''),labels[e2].replace('...',''),title,name)