#!/usr/bin/perl
# Get chromosomal positions of a list of ENSEMBL gene ids
# 2010-01-05, Nick Fankhauser

use lib "/opt/ensembl/modules";
use Bio::EnsEMBL::Registry;
use strict;
my $registry = 'Bio::EnsEMBL::Registry';
$registry->load_registry_from_db(-host => 'ensembldb.ensembl.org',-user => 'anonymous');

sub exon_positions {
	my $gene_symbol=shift;
	my $ensembl_id=shift;
	my $gene_adaptor = $registry->get_adaptor( 'Human', 'Core', 'Gene' );
	my $gene = $gene_adaptor->fetch_by_stable_id( $ensembl_id );
	my $transcripts = $gene->get_all_Transcripts();
	my $lines="";
	while ( my $tr = shift @{$transcripts} ) {
		my $tr_stable_id  = $tr->stable_id();
		foreach my $exon ( @{ $tr->get_all_Exons() } ) {
				my $exon_stable_id  = $exon->stable_id();
				my $exon_strand     = $exon->strand();
				my $exon_phase      = $exon->phase();
				my $exon_start      = $exon->start();
				my $exon_chr        = $exon->seq_region_name();
				my $relative_start  = $exon->start()-$gene->start();
				my $relative_end    = $exon->end()-$gene->start();
				$lines.="$gene_symbol\t$ensembl_id\t$tr_stable_id\t$exon_stable_id\t";
				$lines.="$exon_strand\t$exon_phase\t$exon_chr\t";
				$lines.="$exon_start\t$relative_start\t$relative_end\n";
        	}
	}
	return $lines;
}

my $fn=shift;	# get filename from first command line argument
my $ofn=$fn.".pos.txt";
open (FILE, $fn);	# open the file
open (OUT, ">$ofn");	# open output file
print OUT "gene\tgene_id\ttranscript_id\texon_id\tstrand\tphase\tchr\tabs_start\trel_start\trel_end\n";
close OUT;
while (<FILE>) {	# loop though the file
	chomp();
	next if length($_)<2;
	my @line_split=split("\t");	# split line into columns (Gene_symbol\tENSEMBL_id)
	open (OUT, ">>$ofn");	# open output file
	print OUT exon_positions($line_split[0],$line_split[1]);
	close OUT;
}