use lib "/opt/ensembl/modules";
use Bio::EnsEMBL::Registry;
use strict;
my $debug=0;
my $range=10000;
my $registry = 'Bio::EnsEMBL::Registry';
$registry->load_registry_from_db( -host => 'ensembldb.ensembl.org', -user => 'anonymous' );
my $slice_adaptor = $registry->get_adaptor( 'Mouse', 'Core', 'Slice' );
my $tr_adaptor = $registry->get_adaptor( 'Mouse', 'Core', 'Transcript' );
my $gene_adaptor = $registry->get_adaptor( 'Mouse', 'Core', 'Gene' );
sub chromo_pos {
my $adaptor=shift;
my $chromo=shift;
my $pos1=int(shift);
my $pos2=int(shift);
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', $chromo, $pos1, $pos2 );
my @transcripts = @{ $adaptor->fetch_all_by_Slice($slice) };
return @transcripts;
}
sub output {
my $s=shift;
my $fn=shift;
my $logfile="$fn.log";
unless (-r $logfile) {open(L,">$logfile");close L}
open(L,">>$logfile");print L $s;close L;
print $s;
}
sub xref {
my $a=shift;
return '-' if not $a;
my $db_entries = $a->get_all_DBEntries();
my %rh=();
my $other_id;
foreach my $dbe ( @{$db_entries} ) {
$rh{$dbe->dbname()}=$dbe->display_id();
$other_id=$dbe->display_id();
}
if ( defined $rh{ "UniGene" } ) {return $rh{ "UniGene" };}
if ( defined $rh{ "RefSeq_dna" } ) {return $rh{ "RefSeq_dna" };}
return $other_id;
}
sub ens {my $a=shift;return '-' if not $a;return $a->stable_id();}
sub symbol {my $a=shift;return '-' if not $a;return $a->external_name();}
sub strand {my $a=shift;return '-' if not $a;return $a->strand();}
sub display_genes {
my $array_ref=shift;
my $info=shift;
print "\n";
foreach ( @{$array_ref} ) {
print "Symbol: ",symbol($_),"\n";
print "ID: ",ens($_),"\n";
print "Start: ",$_->start(),"\n";
print "End: ",$_->end(),"\n";
print "Strand: ",$_->strand(),"\n";
print "\n";
}
}
sub find_genes {
my $chromo=int(shift);
my $start_pos=shift;
my $end_pos=shift;
my $info=shift;
my $fn=shift;
my $middle=int(($start_pos + $end_pos) / 2.0);
my @trans_list=chromo_pos($tr_adaptor,$chromo,$middle-$range,$middle+$range);
display_genes(\@trans_list,$info) if $debug;
my %dists=();
foreach ( @trans_list ) {
my $distance_to_start;
if ($_->strand()==1) {$distance_to_start=($_->start()-$range)}
else {$distance_to_start=$_->end()-$range}
$dists{$distance_to_start}=$_;
}
my $down;
my $up;
my $down_distance=0;
my $up_distance=0;
foreach my $d (sort { $a <=> $b } keys %dists){
next if $d>0;
$up=$dists{$d};
if ($dists{$d}->strand()==1) {$up_distance=-1*$d;}
else {$up_distance=$d}
}
foreach my $d (sort { $a <=> $b } keys %dists){
next if $d<0;
$down=$dists{$d};
if ($dists{$d}->strand()==1) {$down_distance=-1*$d;}
else {$down_distance=$d}
last;
}
output( sprintf("%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s\t%s\t%d\n", $info,
ens($down), symbol($down), xref($down), strand($down), $down_distance,
ens($up), symbol($up), xref($up), strand($up), $up_distance),$fn);
}
my $fn=shift;
die if not -r $fn;
my $header1="Symbol\tChromosome\tStart-Pos\tEnd-Pos\t";
my $header2="Down-Ensembl\tDown-Symbol\tDown-ID\tDown-Strand\tDown-Distance\t";
my $header3="Up-Ensembl\tUp-Symbol\tUp-ID\tUp-Strand\tUp-Distance\n";
my $header=$header1.$header2.$header3;
open (FILE, $fn);
my $cnt=0;
unlink("$fn.log");
output($header,$fn);
while (<FILE>) {
$cnt++;
next if $cnt<2;
my @line_split=split("\t");
my $gene_symbol=$line_split[0];
my $chromo=$line_split[1];
$chromo=~s/chr//;
my $start_pos=$line_split[2];
my $end_pos=$line_split[3];
next if not $chromo;
next if not $start_pos;
next if not $end_pos;
my $info="$gene_symbol\t$chromo\t$start_pos\t$end_pos";
find_genes($chromo,$start_pos,$end_pos,$info,$fn);
}
close (FILE);