#!/usr/bin/perl -w # Invoke nucmer with the appropriate arguments for matching short # reads with some number of allowed mismatches. # # Usage: # perl nucmer_reads.pls use strict; use warnings; use POSIX qw(ceil floor); use Getopt::Std; sub usage($) { my $msg = shift; print "$msg\n\n"; print "Usage:\n"; print " perl nucmer_reads.pls [-v] \n\n"; print "Options:\n"; print " -v verbose output\n\n"; exit 1; } # Must be able to run nucmer system("nucmer --version 2> /dev/null") == 0 || die "nucmer is not in your PATH; please add it and re-run"; # Get -v option my %options=(); getopts("v",\%options); my $verbose = 0; $verbose = $options{v} if defined $options{v}; defined($ARGV[0]) || usage("Must specify minimum read length as arg 1"); my $r = $ARGV[0]; print "Min read length: $r\n" if $verbose; defined($ARGV[1]) || usage("Must specify maximum number of substitutions as arg 2"); my $d = $ARGV[1]; print "Max differences: $d\n" if $verbose; defined($ARGV[2]) || usage("Must specify reference sequence file as arg 3"); my $ref = $ARGV[2]; print "Reference seq: $ref\n" if $verbose; defined($ARGV[3]) || usage("Must specify query sequence file as arg 4"); my $qry = $ARGV[3]; print "Query seq: $qry\n" if $verbose; # Calculate nucmer parameters my $minmatch = ceil(($r-$d) / ($d+1)); my $mincluster = $minmatch; my $maxgap = $d; my $breaklen = $r - $minmatch; # Run nucmer my $cmd = "nucmer --maxmatch -b $breaklen -c $mincluster -g $maxgap -l $minmatch --nooptimize $ref $qry"; print "Command: $cmd\n" if $verbose; exec($cmd);