#!/usr/bin/perl package refseq_exon_intron_sizes; use strict; use DBI; use Exporter; our @ISA = qw(Exporter); our @EXPORT = qw(%refseq_exon_sizes %refseq_intron_sizes %refseq_chrom %refseq_txStart %refseq_txEnd); my @chroms = ('chr1', 'chr2', 'chr3', 'chr4', 'chr5', 'chr6', 'chr7', 'chr8', 'chr9', 'chr10', 'chr11', 'chr12', 'chr13', 'chr14', 'chr15', 'chr16', 'chr17', 'chr18', 'chr19', 'chr20', 'chr21', 'chr22', 'chrX', 'chrY', 'chrM'); my $db = "DBI:mysql:DATABASE_NAME;host=HOST_NAME"; my $user = "USER_NAME"; my $pass = "PASSWORD"; our %refseq_chrom = (); our %refseq_txStart = (); our %refseq_txEnd = (); our %refseq_exon_sizes = (); our %refseq_intron_sizes = (); my $dbh = DBI->connect($db, $user, $pass) || die "Couldn't connect to database: " . DBI->errstr; my $str = "select * from refSeqAli where (matches/qSize) >= .9 and tName NOT LIKE '\%random'"; my %percentages = (); if($str) { my $sth = $dbh->prepare($str) || die "Couldn't prepare statement: " . $dbh->errstr; $sth->execute() || die "Couldn't execute statement: " . $sth->errstr; if(my @row = $sth->fetchrow_array) { do { my $matches = $row[1]; my $name = $row[10]; my $qSize = $row[11]; if($percentages{$name}) { if(($matches / $qSize) > $percentages{$name}) { $percentages{$name} = $matches / $qSize; } } else { $percentages{$name} = $matches / $qSize; } }while(@row = $sth->fetchrow_array) } else { print "Nothing\n"; } $sth->finish; } my %transcripts_that_vary = (); foreach my $chrom (@chroms) { my $str = "select * from refSeqAli where tName = '$chrom' and (matches/qSize) >= .9"; if($str) { my $sth = $dbh->prepare($str) || die "Couldn't prepare statement: " . $dbh->errstr; $sth->execute() || die "Couldn't execute statement: " . $sth->errstr; while(my @row = $sth->fetchrow_array) { my $matches = $row[1]; my $strand = $row[9]; my $name = $row[10]; my $qSize = $row[11]; my $tStart = $row[16]; if($percentages{$name} == $matches / $qSize) { my $str2= "select * from refGene where name = '$name' and chrom = '$chrom' and txStart = '$tStart'"; if($str2) { my $sth2 = $dbh->prepare($str2) || die "Couldn't prepare statement: " . $dbh->errstr; $sth2->execute() || die "Couldn't execute statement: " . $sth2->errstr; if(my @row2 = $sth2->fetchrow_array) { if(!($refseq_chrom{$row2[0]})) { $refseq_chrom{$row2[0]} = $row2[1]; $refseq_txStart{$row2[0]} = $row2[3]; $refseq_txEnd{$row2[0]} = $row2[4]; } my $exonCount = $row2[7]; my @exonStarts = split(/,/,$row2[8]); my @exonEnds = split(/,/,$row2[9]); for(my $a=0; $a<@exonStarts; $a++) { my $exon_size = $exonEnds[$a] - $exonStarts[$a]; if($exon_size < 1) { print "Error: Exon size should not be less than one.\n" } my $exon_name; if($strand eq "+") { $exon_name = $row2[0].".".($a+1); } else { $exon_name = $row2[0].".".($exonCount-$a); } if(($refseq_exon_sizes{$exon_name}) && ($refseq_exon_sizes{$exon_name} != $exon_size)) { $transcripts_that_vary{$row2[0]} = 1; # If the size of this exon varies, I choose the larger one # to be less stringent when choosing an multi-intron size cutoff. # Most variances are minor anyways. if($exon_size > $refseq_exon_sizes{$exon_name}) { $refseq_exon_sizes{$exon_name} = $exon_size; } #print "Exon: $exon_name\n"; #print "New = $exon_size\t Old = $refseq_exon_sizes{$exon_name}\n\n"; } else { $refseq_exon_sizes{$exon_name} = $exon_size; } } for(my $a=0; $a<@exonStarts-1; $a++) { my $intron_size = $exonStarts[$a+1] - $exonEnds[$a]; if($intron_size < 0) { print "Error: Intron size should not be less than zero.\n" } my $intron_name; if($strand eq "+") { $intron_name = $row2[0].".".($a+1); } else { $intron_name = $row2[0].".".($exonCount-$a-1); } if(($refseq_intron_sizes{$intron_name}) && ($refseq_intron_sizes{$intron_name} != $intron_size)) { $transcripts_that_vary{$row2[0]} = 1; # If the size of this intron varies, I choose the larger one # to be less stringent when choosing an intron size cutoff. # Most variances are minor anyways. if($intron_size > $refseq_intron_sizes{$intron_name}) { $refseq_intron_sizes{$intron_name} = $intron_size; } #print "Intron: $intron_name\n"; #print "New = $intron_size\t Old = $refseq_intron_sizes{$intron_name}\n\n"; } else { $refseq_intron_sizes{$intron_name} = $intron_size; } } } } } } } } return 1; #print join("\n", sort keys %transcripts_that_vary);