Demo entry 5990687

sop

   

Submitted by anonymous on Sep 13, 2016 at 04:02
Language: Perl. Code size: 3.6 kB.

#! usr/bin/perl -w 

# count read of each sample
# Usage: perl 0_getRsum.pl -i input/parameter.txt

use strict;
use warnings;
use Getopt::Std;

use vars qw($opt_i);
getopts("i:");

our $path;
our $isamplef;
our $dataf;
our $of;
our $outputFolder;

# create output folder

our @iDataFiles;
our %sampleReadInfo;

&main;
exit;

sub main{
	&getParameter if(defined &getParameter && $opt_i);
	&getDataFiles if(defined &getDataFiles && $dataf);
	&getSampleList if(defined &getSampleList && $isamplef && $dataf && $of);
}


sub getSampleList{
	my @tmp;
	my $ir1f;
	my $ir2f;
	my @fLines;
	my $r1n; # count of read
	my $r2n;
	my $sampleid;	

	open(OUT,">$of") or die $!;
	#count read for each sample
	open(IN,$isamplef) or die $!;
	while(<IN>){
		chomp $_;
		$r1n = 0;
		$r2n = 0;
                $_ =~s/,/\t/g;
		if($_=~/^\d+\tQA\d{3}/){
			undef @tmp;
			@tmp = split(/\t/,$_);
			# sample id = $tmp[1] 
			# get R1 file
			foreach $sampleid(@iDataFiles){
				# read count of R1
				if($sampleid=~/$tmp[1].*_R1_/){
					$ir1f = $dataf.'/'.$sampleid;
					open(FIN,"gzip -dc $ir1f|") or die ("can not open $ir1f\n");
					undef @fLines;
					@fLines = <FIN>;
					close FIN;
					$r1n = @fLines/4;
				}
				# read count of R2
				if($sampleid=~/$tmp[1].*_R2_/){
                                        $ir1f = $dataf.'/'.$sampleid;
                                        open(FIN,"gzip -dc $ir1f|") or die ("can not open $ir1f\n");
                                        undef @fLines;
                                        @fLines = <FIN>;
                                        close FIN;
                                        $r2n = @fLines/4;
                                }
			}			
			# print sample count of r1 r2
			print OUT $_."\t$r1n\t$r2n\n";
		}elsif($_=~/^Sample_ID/){print OUT "$_\tR1_Count\tR2_Count\n";}else{print OUT "$_\n";}
		
	}
	close IN;
	
	#count of Undetermined
	$r1n = 0;
	$r2n = 0;
	foreach $sampleid(@iDataFiles){
		# undetermined R1 
		if($sampleid=~/Undetermined.*_R1_/){
			$ir1f = $dataf.'/'.$sampleid;
                        open(FIN,"gzip -dc $ir1f|") or die ("can not open $ir1f\n");
                        undef @fLines;
                        @fLines = <FIN>;
                        close FIN;
                       $r1n = @fLines/4;

		}
		
		if($sampleid=~/Undetermined.*_R2_/){
                        $ir1f = $dataf.'/'.$sampleid;
                        open(FIN,"gzip -dc $ir1f|") or die ("can not open $ir1f\n");
                        undef @fLines;
                        @fLines = <FIN>;
                        close FIN;
                       $r2n = @fLines/4;

                }
	}
        print OUT "\tUndetermined\t\t\t\t\t\t\t\t\t$r1n\t$r2n\n";	
	close OUT;
}

sub getDataFiles{
	opendir(DIR,$dataf) or die $!;
	undef @iDataFiles;
	@iDataFiles = grep {/_R1|_R2/ && -f "$dataf/$_"} readdir(DIR);
	closedir DIR;
}

sub getParameter{
	my $tmpdataf;
	my $tmpisamplef;
	my $tmpof;
	open(IN,$opt_i) or die $!;
	while(<IN>){
		chomp;
		$path = $1 if($_=~/path=(.*)$/);
		$tmpisamplef = $1 if($_=~/sf=(.*)$/);
		$tmpdataf = $1 if($_=~/dataf=(.*)$/);
		$tmpof = $1 if($_=~/tmpof=(.*)$/);
		$outputFolder = $1 if($_=~/ofolder=(.*)$/);
	}
	close IN;
	
	if(!$path){print "please input the data directory.\n";exit;}
	$isamplef = $path.$tmpisamplef;
	$dataf = $path.$tmpdataf;
	# if there is no output, create folder output
	if(-d $outputFolder){}else{system("mkdir $outputFolder");}
	$of = "$outputFolder/$tmpof";
}

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).