#!/usr/bin/perl
package Phase;
#标准phase结构定义如下 scf_begin_end_sRNA_totalsRNA_peek_phaseScore_phaseLength
#***********************************test begin***************************************#
checkPhaseSignals("your_name", "your_name.fa", "your_name.map", "your_reference.fa", "your_name_phase.info", "your_name_most_abundance_sRNA.fa");
#checkPhaseSignals("internet_smRNA", "internet_smRNA.fa", "internet_smRNA.map", "internet_reference.fa", "internet_phase.info", "internet_most_abundance_sRNA.fa");
#	checkPhaseSignals("GmaxsRNA", "GmaxsRNA.fa", "GmaxsRNA.map", "Gmax_189.fa", "Gmax_phase.info", "Gmax_most_abundance_sRNA.fa");
#**********************************test end*****************************************#
sub checkPhaseSignals{
	my($sampleName, $rna, $map, $genome, $out, $mostAbundancesRNA) = @_;
	%genomeHash = ();
	buildGenomeHash($genome, \%genomeHash);
	%mapHash = ();
	%rnaHash = ();
	%rnaSeqHash = ();
	@scfs = ();
	privateBuildMapRnaHash($rna, $map, \%rnaHash, \%mapHash, \%rnaSeqHash);
	privateCheck(21, $sampleName, $out, \%genomeHash, \%mapHash, \%rnaSeqHash, $mostAbundancesRNA);
	privateCheck(24, $sampleName, $out, \%genomeHash, \%mapHash, \%rnaSeqHash, $mostAbundancesRNA);
}

sub buildGenomeHash{
	my($genome, $genomeHash) = @_;
	open GENOME, $genome;
	$title = "";
	$seq = "";
	while(<GENOME>){
		chomp;
		if(/>(\S+)/){
			$$genomeHash{$title} = $seq;
			$title = $1;
			$seq = "";
		}else{
			$seq .= $_;
		}
	}
	$$genomeHash{$title} = $seq;  # 不要忘了最后一条序列
	close GENOME;
}

sub privateBuildMapRnaHash{
	my($rna, $map, $rnaHash, $mapHash, $rnaSeqHash) = @_;
	my $phaseLength20 = 20;
	my $phaseLength21 = 21;
	my $phaseLength22 = 22;
	my $phaseLength24 = 24;
	open RNA, $rna;
	while(<RNA>){
		chomp;
		if(/>(\S+)\s+(\S+)/){
			$rnaName = $1;
			$rnaNum = $2;
			$$rnaHash{$rnaName} = $rnaNum;
		}else{
			$$rnaSeqHash{$rnaName} = $_;
		}
	}
	close RNA;
	open MAP, $map;
	while(<MAP>){
		chomp;
		@line = ();
		@line = split /\t/, $_;
		$rnaName = $line[0];
		$scf = $line[1];
		if(find(\@scfs, $scf) == -1){
			push @scfs, $scf;
		}
		$rnaLength = $line[3] - $line[2] + 1;
		$$mapHash{$scf}{$line[2]}{$rnaLength.$line[4]} = $$rnaHash{$rnaName};  #加入正负链的信息，为了在统计sRNA作图的时候能够使用
		$$mapHash{$scf}{$line[2]}{$rnaLength."ignoreStrandRnaName"} = $rnaName;
		$line[2] += 2 if($line[4] eq "-");
		next if($rnaLength < $phaseLength20 || $rnaLength > $phaseLength24);
		$$mapHash{$scf}{$line[2]}{$rnaLength} = $$rnaHash{$rnaName};
		$$mapHash{$scf}{$line[2]}{$rnaLength."rnaName"} = $rnaName;
	}
	close MAP;
	%rnaHash = ();
}
sub privateCheck{
	my($phaseLength, $sampleName, $out, $genomeHash, $mapHash, $rnaSeqHash, $mostAbundancesRNAFile) = @_;
	open PHASEINFO, ">>$out";
	open SRNA, ">>$mostAbundancesRNAFile";
	foreach $scf (@scfs){
		$scfLength = length($$genomeHash{$scf});
		$phaseBegin = 0;
		$phaseEnd = 0;
		$bin = 0;
		$isPhase = 0;
		$maxScore = 0;
		$maxScoreIndex = 0;
		$i = 0;
		$maxIndex = 0;
		$newPhaseCenter = 0;
		while($i < $scfLength){
			$phaseScore = privateCheckPhaseSignal($scf, $i, $phaseLength,$mapHash);
			if($phaseScore > $maxScore){
				$maxScore = $phaseScore;
				$maxIndex = $i;
			}
			if($isPhase == 1){ #找到phase后进行延伸操作
				if($phaseScore > 1.4){
					$phaseEnd = $i + 4 * $phaseLength - 1;
					$newPhaseCenter = $i;
					$i = $i + $phaseLength;
				}
#				$i = $i + $phaseLength;
				else{
					if($i > $newPhaseCenter + 7 * $phaseLength - 1){
#						$i = $phaseEnd + 1;
						$i = $i + 8 * $phaseLength;
						$sRNAInfo = privateGetTotalAndPhasesRNANum($scf, $phaseBegin, $phaseEnd, $phaseLength, $mapHash);
						@spsRNAInfo = split /\t/, $sRNAInfo;
						$sRNAInfo = $spsRNAInfo[0]."\t".$spsRNAInfo[1];
						$mostAbundancesRNAName = $spsRNAInfo[2];
						print SRNA ">".$mostAbundancesRNAName."\n".$$rnaSeqHash{$mostAbundancesRNAName}."\n";
						print PHASEINFO $scf."\t".$phaseBegin."\t".$phaseEnd."\t".$sRNAInfo."\t".$maxIndex."\t".$maxScore."\t".$phaseLength."\n";
						$isPhase = 0;
						$maxScore = 0;
					}else{
						$i = $i + $phaseLength;  #移动到下一个相位进行检测
					}
				}
			}else{
				if($phaseScore > 1.4){ #新一轮的phase信号被探测到
					$isPhase = 1;
					$phaseBegin = $i - 4 * $phaseLength;
					$j = 1;
					$newPhaseCenter = $i;
					while($phaseBegin < 0){  #处理phase前端<0的情况
						$phaseBegin += $j * $phaseLength;
						$j++;
					}
					$phaseEnd = $i + 4 * $phaseLength - 1;
					$i = $i + $phaseLength;
				}else{  #仍未找到phase信号
					$i++;
				}
			}
		}
	}
	close PHASEINFO;
}
sub privateGetPhaseScoreAndsRNANum{
	my($scf, $begin, $end, $maxIndex, $phaseLength, $mapHash,$file) = @_;
	open O, ">$file";
	$begin = $maxIndex - 7 * $phaseLength;
	$end = $maxIndex + 7 * $phaseLength - 1;
	$index = 0;
	$increment = 1 / $phaseLength;
	foreach $i ($begin..($end + 1)){
		$phaseScore = privateCheckPhaseSignal($scf, $i, $phaseLength, $mapHash);
		$sRNANum = 0;
		if(exists $$mapHash{$scf}{$i}{$phaseLength}){
			$sRNANum = $$mapHash{$scf}{$i}{$phaseLength};
		}
		print O $index."\t".$phaseScore."\t".$index."\t".$sRNANum."\n"; 
		$index += $increment;
	}
	close O;
}



sub privateCheckPhaseSignal{
	my($scf, $begin, $phaseLength, $mapHash) = @_;
	$phaseOccupyed = 0;
	$phaseScore = 1;
	foreach $index (1..8){ #以begin为中心向左右延伸4个窗口
		if(exists $$mapHash{$scf}{$begin + ($index - 5) * $phaseLength}{$phaseLength}){
			$phaseScore += $$mapHash{$scf}{$begin + ($index - 5) * $phaseLength}{$phaseLength};
			$phaseOccupyed++;
		}
	}
	$phaseScore = ($phaseOccupyed - 2) * log($phaseScore);
	return $phaseScore > 0? $phaseScore:0;
}
sub privateGetTotalAndPhasesRNANum{
	my($scf, $begin, $end, $phaseLength, $mapHash) = @_;
	$totalsRNANum = 0;
	$phasesRNANum = 0;
	$mostAbundancesRNAName = "";
	$mostAbundancesRNANum  = 0;
	foreach $index ($begin..$end){
		last if($index + $phaseLength - 1 > $end);
		next if(!exists $$mapHash{$scf}{$index}{$phaseLength});
		if(($index - $begin) % $phaseLength == 0){
			$phasesRNANum++;
			if($$mapHash{$scf}{$index}{$phaseLength} > $mostAbundancesRNANum){
				$mostAbundancesRNANum = $$mapHash{$scf}{$index}{$phaseLength};
				$mostAbundancesRNAName = $$mapHash{$scf}{$index}{$phaseLength."rnaName"};
			}
		}
		$totalsRNANum++;
	}
	$totalsRNANum."\t".$phasesRNANum."\t".$mostAbundancesRNAName;
}
sub find{
	my($array, $x) = @_;
	foreach $i ( @$array){
		if($i eq $x){
			return 1;
		}
	}
	return -1;
}

