#!/bin/sh -

# This script needs one argument - a file of file names.

PATH=/nfs/disk100/pubseq/emboss/bin/:$PATH
export PATH

PERL_PROG='

use strict;

if (@ARGV < 1)
{ 
  die "$0 needs one argument - a file of sequence file names\n";
}

my $file = $ARGV[0];

chomp $file;

if (-e $file)
{
  my $msf_file = "$file.fasta_msf";   

  my $seqnum = 1;
  printf "$msf_file\n";

  open(LIST_FILE,$file);
  while(my $line = <LIST_FILE>)
  {
    printf "$line\n";
    chomp($line);

    #
    # run descseq to ensure unique names
    #
    my $emboss_command_line = "descseq -append -name _$seqnum $line $line.new -auto";

    open EMBOSS, "|$emboss_command_line" or
              die "cannot open pipe to descseq: $!\n";
    close EMBOSS;

    unlink("$line");
    rename("$line.new","$line");
    $seqnum++;
  }

  my $emboss_command_line = "emma \"\@$file\" -filter -stdout -osf msf  -dendoutfile /dev/null -outseq $msf_file";

  open EMBOSS, "|$emboss_command_line" or
            die "cannot open pipe to emma: $!\n";
  close EMBOSS;
  system "jalview", "$msf_file";
}
else
{
  die "$file does not exist";
}
'

PERL_PROG_FASTA='

use strict;

if (@ARGV != 1) 
{
  die "$0 needs one argument - a file of feature file names\n";
}


sub process_fasta
{
  my $feature_file = shift;
  my $fasta_results_file = shift;

  my $sequence_file;

  if ($fasta_results_file =~ /(.*).out/) {
    $sequence_file = $1;
  } else {
    die "cannot understand $fasta_results_file\n";
  }

  
  if (! -e $sequence_file) {
    if ($feature_file =~ /(.*)(\/jalview\/)(.+)$/)
    {
      $sequence_file = "$1/$sequence_file";
      if (! -e $fasta_results_file &&
          ! -e "$fasta_results_file.gz")
      {
        $fasta_results_file = "$1/$fasta_results_file";
      }
    }
    if (! -e $sequence_file)
    {
      die "cannot find $sequence_file\n";
    }
  }


  my $fasta_seq_for_alignment = "";

  open SEQ_FILE, $sequence_file or die "cannot open $sequence_file: $!\n";

  while (<SEQ_FILE>) {
    $fasta_seq_for_alignment .= $_;
  }

  close SEQ_FILE;

  if (-e $fasta_results_file) {
    open FASTA_OUTPUT, $fasta_results_file
      or die "cannot open $fasta_results_file: $!\n";
  } else {
    my $gzip_fasta_results_file = $fasta_results_file . ".gz";
    if (-e $gzip_fasta_results_file) {
      open FASTA_OUTPUT, "gzip -d < $gzip_fasta_results_file |"
        or die "cannot open $gzip_fasta_results_file: $!\n";
    } else {
      die "cannot find $fasta_results_file or $gzip_fasta_results_file\n";
    }
  }


  my $top_re = "^The best scores are:";

  my $seen_top = 0;
  my $seen_bottom = 0;

  my @protein_ids = ();

  while (<FASTA_OUTPUT>) {
    if (/$top_re/) {
      $seen_top = 1;
      next;
    }

    if ($seen_top && /^\s*$/) {
      $seen_bottom = 1;
      next;
    }

    if ($seen_top && !$seen_bottom) {
      if (/^(\S+)/) {
        if (@protein_ids < 20) {
          push @protein_ids, "$1"
        } else {
          last;
        }
      } else {
        warn "cannot understand this line:\n$_\n";
      }
    }
  }

  my %hash = ();

  @hash{@protein_ids} = (1) x @protein_ids;

  @protein_ids = sort keys %hash;

  my $protein_db = "uniprot";

  # look for each of the IDs from the FASTA output in each of the DBs

  for my $id (@protein_ids) {
    my $fetch = "getz -sf fasta -f seq [uniprot-acc:$id]";

    my $temp_seq = "";

    open FETCH, "$fetch |" or
      die "cannot open pipe to $fetch: $!\n";

    while (<FETCH>) {
      $temp_seq .= $_;
    }

    close FETCH;

    if ($? == 0) {
      $fasta_seq_for_alignment .= $temp_seq;
    } else {
      print STDERR "$id was not found in $protein_db\n";
    }
  }

  my $msf_file = "$feature_file.fasta_msf";

  my $emboss_prog = "emma";

  my $emboss_command_line = "$emboss_prog -filter -stdout -osf msf  -dendoutfile /dev/null > $msf_file";

  open EMBOSS, "|$emboss_command_line" or
     die "cannot open pipe to $emboss_prog: $!\n";

  print EMBOSS $fasta_seq_for_alignment;

  close EMBOSS;

  my $jalview_prog = "jalview";

  print STDERR "\nstarting $jalview_prog:\n";

  system "$jalview_prog", "$msf_file";
}


my $file;

while (defined ($file = <>)) {
  chomp $file;

  if (-e $file) {
    open IN_FILE, "$file\n" or die "cannot open $file\n";

    my $line;

    while (defined ($line = <IN_FILE>)) {
      if ($line  =~ m!/fasta_file="(.*)"!) {
        my $fasta_results_file = $1;

        process_fasta $file, $fasta_results_file;

        last;
      }
    }

    close IN_FILE;
  }
}'

echo "Arguments: $@"
test=`wc -l "$@" | awk '{print $1}'`

if [ $test != 1 ]
then
  perl -w -e "$PERL_PROG" "$@"
else
  perl -w -e "$PERL_PROG_FASTA" "$@"
fi

#if [ $# != 1 ]
#then
#  perl -w -e "$PERL_PROG" "$@"
#else
#  perl -w -e "$PERL_PROG_FASTA" "$@"
#fi


