use lib "/opt/ensembl/modules";
use Bio::EnsEMBL::Registry;
use strict;
my $registry = 'Bio::EnsEMBL::Registry';
$registry->load_registry_from_db(
-host => 'ensembldb.ensembl.org',
-user => 'anonymous'
);
my $slice_adaptor = $registry->get_adaptor( 'Mouse', 'Core', 'Slice' );
my $tr_adaptor = $registry->get_adaptor( 'Mouse', 'Core', 'Transcript' );
my $gene_adaptor = $registry->get_adaptor( 'Mouse', 'Core', 'Gene' );
sub chromo_pos {
my $chromo=int(shift);
my $pos1=int(shift);
my $pos2=int(shift);
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', $chromo, $pos1, $pos2 );
my @transcripts = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
return $transcripts[0]
}
sub chromo_pos2 {
my $chromo=int(shift);
my $pos1=int(shift);
my $pos2=int(shift);
my $slice = $slice_adaptor->fetch_by_region( 'chromosome', $chromo, $pos1, $pos2 );
my @transcripts = @{ $tr_adaptor->fetch_all_by_Slice($slice) };
return $transcripts[0]
}
sub gene_search_unlim {
my $chromo=int(shift);
my $p1=shift;
my $p2=shift;
my $p1f=shift;
my $p2f=shift;
my $i=0;
while ( not chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i+=10000}
while ( chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i-=1000}
$i+=1000;
while ( chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i-=100}
$i+=100;
while ( chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i-=10}
$i+=10;
while ( chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i-=1}
return $i+1;
}
sub gene_search10000 {
my $chromo=int(shift);
my $p1=shift;
my $p2=shift;
my $p1f=shift;
my $p2f=shift;
my $i=0;
for ($i=0;$i<=10000;$i+=1000) {last if chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f)}
if (not chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f)) {return 0}
while ( chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i-=1000}
$i+=1000;
while ( chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i-=100}
$i+=100;
while ( chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i-=10}
$i+=10;
while ( chromo_pos($chromo,$p1+$i*$p1f,$p2+$i*$p2f) ) {$i-=1}
return $i+1;
}
sub output {
my $s=shift;
my $fn=shift;
my $logfile="$fn.log";
unless (-r $logfile) {open(L,">$logfile");close L}
open(L,">>$logfile");print L $s;close L;
print $s;
}
sub xref {
my $db_entries = shift->get_all_DBEntries();
my %rh=();
my $other_id;
foreach my $dbe ( @{$db_entries} ) {
$rh{$dbe->dbname()}=$dbe->display_id();
$other_id=$dbe->display_id();
}
if ( defined $rh{ "UniGene" } ) {return $rh{ "UniGene" };}
if ( defined $rh{ "RefSeq_dna" } ) {return $rh{ "RefSeq_dna" };}
return $other_id;
}
sub find_genes {
my $chromo=int(shift);
my $start_pos=shift;
my $end_pos=shift;
my $info=shift;
my $fn=shift;
my $gene_pos_plus=gene_search10000($chromo,$start_pos,$end_pos,0,1);
my $tr_plus=chromo_pos($chromo,$start_pos,$end_pos+$gene_pos_plus);
my $tr_plus2=chromo_pos2($chromo,$start_pos,$end_pos+$gene_pos_plus);
my $gene_pos_minus=gene_search10000($chromo,$start_pos,$end_pos,-1,0);
my $tr_minus=chromo_pos($chromo,$start_pos-$gene_pos_minus,$end_pos);
my $tr_minus2=chromo_pos2($chromo,$start_pos-$gene_pos_minus,$end_pos);
my $plus_sign="";
my $plus_ens="-";
my $plus_name="-";
my $plus_id="-";
my $plus_strand="-";
my $plus_distance="-";
if ($tr_plus) {
$plus_strand=$tr_plus->strand();
if ($plus_strand==1) {$plus_sign="-";}
else {$plus_sign="+";}
$plus_name=$tr_plus->external_name();
$plus_ens=$tr_plus->stable_id();
$plus_id=xref($tr_plus2);
if ($plus_strand==1) {$plus_distance=$tr_plus->start();}
else {$plus_distance=$tr_plus->end();}
}
my $minus_sign="";
my $minus_ens="-";
my $minus_name="-";
my $minus_id="-";
my $minus_strand="-";
my $minus_distance="-";
if ($tr_minus) {
$minus_strand=$tr_minus->strand();
if ($minus_strand==1) {$minus_sign="+";}
else {$minus_sign="-";}
$minus_name=$tr_minus->external_name();
$minus_ens=$tr_minus->stable_id();
$minus_id=xref($tr_minus2);
if ($minus_strand==1) {$minus_distance=$gene_pos_minus - $tr_minus->start();}
else {$minus_distance=$gene_pos_minus - $tr_minus->end();}
}
output( sprintf("%s\tOut\t%s\t%s\t%s\t%s\t%s%d\t%s\t%s\t%s\t%s\t%s%d\n", $info,
$plus_ens, $plus_name, $plus_id, $plus_strand, $plus_sign, $plus_distance,
$minus_ens, $minus_name, $minus_id, $minus_strand, $minus_sign, $minus_distance),$fn);
}
my $fn=shift;
die if not -r $fn;
my $header1="Symbol\tChromosome\tStart-Pos\tEnd-Pos\tOutOrIn\t";
my $header2="Down-Ensembl\tDown-Symbol\tDown-ID\tDown-Strand\tDown-Distance\t";
my $header3="Up-Ensembl\tUp-Symbol\tUp-ID\tUp-Strand\tUp-Distance\n";
my $header=$header1.$header2.$header3;
open (FILE, $fn);
my $cnt=0;
unlink("$fn.log");
output($header,$fn);
while (<FILE>) {
$cnt++;
next if $cnt<2;
my @line_split=split("\t");
my $gene_symbol=$line_split[0];
my $chromo=$line_split[1];
$chromo=~s/chr//;
my $start_pos=$line_split[2];
my $end_pos=$line_split[3];
next if not $chromo;
next if not $start_pos;
next if not $end_pos;
my $info="$gene_symbol\t$chromo\t$start_pos\t$end_pos";
my $tr=chromo_pos($chromo,$start_pos,$end_pos);
my $tr2=chromo_pos2($chromo,$start_pos,$end_pos);
if ($tr) {
my $name=$tr->external_name();
my $id=xref($tr2);
my $distance;
if ($tr->strand()==1) {$distance=$tr->start();}
else {$distance=$tr->end();}
my $strand=$tr->strand();
my $ens=$tr->stable_id();
output( sprintf("%s\tIn\t%s\t%s\t%s\t%s\t%s%d\t%s\t%s\t%s\t%s\t%s%d\n", $info,
$ens, $name, $id, $strand, "", $distance, $ens, $name, $id, $strand, "", $distance),$fn);
} else {
find_genes($chromo,$start_pos,$end_pos,$info,$fn);
}
}
close (FILE);