#!/usr/bin/perl

# ===========================================================================
#
#                            PUBLIC DOMAIN NOTICE
#            National Center for Biotechnology Information (NCBI)
#
#  This software/database is a "United States Government Work" under the
#  terms of the United States Copyright Act.  It was written as part of
#  the author's official duties as a United States Government employee and
#  thus cannot be copyrighted.  This software/database is freely available
#  to the public for use. The National Library of Medicine and the U.S.
#  Government do not place any restriction on its use or reproduction.
#  We would, however, appreciate having the NCBI and the author cited in
#  any work or product based on this material.
#
#  Although all reasonable efforts have been taken to ensure the accuracy
#  and reliability of the software and data, the NLM and the U.S.
#  Government do not and cannot warrant the performance or results that
#  may be obtained by using this software or data. The NLM and the U.S.
#  Government disclaim all warranties, express or implied, including
#  warranties of performance, merchantability or fitness for any particular
#  purpose.
#
# ===========================================================================
#
# File Name:  nquire
#
# Author:  Jonathan Kans
#
# Version Creation Date:   8/20/12
#
# ==========================================================================

# Entrez Direct - EDirect

# use strict;
use warnings;

my ($LibDir, $ScriptName);

use File::Spec;

BEGIN
{
  my $Volume;
  ($Volume, $LibDir, $ScriptName) = File::Spec->splitpath($0);
  $LibDir = File::Spec->catpath($Volume, $LibDir, '');
  if (my $RealPathname = eval {readlink $0}) {
    do {
      $RealPathname = File::Spec->rel2abs($RealPathname, $LibDir);
      ($Volume, $LibDir, undef) = File::Spec->splitpath($RealPathname);
      $LibDir = File::Spec->catpath($Volume, $LibDir, '')
    } while ($RealPathname = eval {readlink $RealPathname});
  } else {
    $LibDir = File::Spec->rel2abs($LibDir)
  }
  $LibDir .= '/aux/lib/perl5';
}
use lib $LibDir;

use LWP::UserAgent;
use POSIX;
use URI::Escape;

# definitions

use constant false => 0;
use constant true  => 1;

# utility subroutines

sub clearflags {
  %macros = ();
  $agent = "Nquire/1.0";
  $alias = "";
  $debug = false;
  $http = "";
  $output = "";
}

sub map_macros {

  $qury = shift (@_);

  if ( $qury !~ /\(#/ ) {
    return $qury;
  }

  if ( scalar (keys %macros) > 0 ) {
    for ( keys %macros ) {
      $ky = $_;
      $vl = $macros{$_};
      $qury =~ s/\((\#$ky)\)/$vl/g;
    }
  }

  return $qury;
}

sub read_aliases {

  if ( $alias ne "" ) {
    if (open (my $PROXY_IN, $alias)) {
      while ( $thisline = <$PROXY_IN> ) {
        $thisline =~ s/\r//;
        $thisline =~ s/\n//;
        $thisline =~ s/ +/ /g;
        $thisline =~ s/> </></g;

        if ( $thisline =~ /(.+)\t(.+)/ ) {
          $ky = $1;
          $vl = $2;
          $vl =~ s/\"//g;
          $macros{"$ky"} = "$vl";
        }
      }
      close ($PROXY_IN);
    } else {
      print STDERR "Unable to open alias file '$alias'\n";
    }
  }
}

# send actual query

sub do_post {

  $urlx = shift (@_);
  $argx = shift (@_);

  $rslt = "";

  if ( $debug ) {
    print STDERR "$urlx?$argx\n";
  }

  if ( $http eq "get" or $http eq "GET" ) {
    if ( $argx ne "" ) {
      $urlx .= "?";
      $urlx .= "$argx";
    }

    $usragnt = new LWP::UserAgent (timeout => 300);
    $usragnt->agent( "$agent" );

    $res = $usragnt->get ( $urlx );

    if ( $res->is_success) {
      $rslt = $res->content;
    } else {
      print STDERR $res->status_line . "\n";
    }

    if ( $rslt eq "" ) {
      print STDERR "No do_get output returned from '$urlx'\n";
    }

    if ( $debug ) {
      print STDERR "$rslt\n";
    }

    return $rslt;
  }

  $usragnt = new LWP::UserAgent (timeout => 300);
  $usragnt->agent( "$agent" );

  $req = new HTTP::Request POST => "$urlx";
  $req->content_type('application/x-www-form-urlencoded');
  $req->content("$argx");

  $res = $usragnt->request ( $req );

  if ( $res->is_success) {
    $rslt = $res->content;
  } else {
    print STDERR $res->status_line . "\n";
  }

  if ( $rslt eq "" ) {
    if ( $argx ne "" ) {
      $urlx .= "?";
      $urlx .= "$argx";
    }
    print STDERR "No do_post output returned from '$urlx'\n";
  }

  if ( $debug ) {
    print STDERR "$rslt\n";
  }

  return $rslt;
}

# uri_escape with backslash exceptions

sub do_uri_escape {

  $patx = shift (@_);

  $rslt = "";

  while ( $patx ne "" ) {
    if ( $patx =~ /^\\\\(.+)/ ) {
      $rslt .= "\\";
      $patx = $1;
    } elsif ( $patx =~ /^\\(.)(.+)/ ) {
      $rslt .= $1;
      $patx = $2;
    } elsif ( $patx =~ /^(.)(.+)/ ) {
      $rslt .= uri_escape ($1);
      $patx = $2;
    } elsif ( $patx =~ /^(.)/ ) {
      $rslt .= uri_escape ($1);
      $patx = "";
    }
  }

  return $rslt;
}

# nquire executes an external URL query from command line arguments

my $nquire_help = qq{
Query Commands

  -get    Uses HTTP GET instead of POST
  -url    Base URL for external search

Examples

  nquire -get -url "http://collections.mnh.si.edu/services/resolver/resolver.php" \\
    -voucher "Birds:625456" |
  xtract -pattern Result -element ScientificName Country

  nquire -get -url http://w1.weather.gov/xml/current_obs/KSFO.xml |
  xtract -pattern current_observation -tab "\\n" \\
    -element weather temp_f wind_dir wind_mph

  nquire -eutils efetch.fcgi -db pubmed -id 2539356 -rettype medline -retmode text

  nquire -eutils esummary.fcgi -db pubmed -id 2539356 -version 2.0

  nquire -url "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" elink.fcgi \\
    -dbfrom protein -db protein -cmd neighbor -linkname protein_protein -id NP_476532.1

  nquire -eutils esearch.fcgi -db pubmed -term "transposition immunity Tn3" |
  xtract -pattern eSearchResult -element QueryTranslation

};

sub nquire {

  # nquire -url http://... -tag value -tag value | ...

  $url = "";
  $arg = "";
  $pfx = "";
  $amp = "";
  $pat = "";

  @args = @ARGV;
  $max = scalar @args;

  if ( $max > 0 and $ARGV[0] eq "-help" ) {
    print $nquire_help;
    return;
  }

  if ( $max < 2 ) {
    return;
  }

  $i = 0;

  # if present, -debug must be first argument, only prints generated URL (undocumented)

  if ( $i < $max ) {
    $pat = $args[$i];
    if ( $pat eq "-debug" ) {
      $i++;
      $debug = true;
    }
  }

  # if present, -http get or -get must be next

  # nquire -get -url "http://collections.mnh.si.edu/services/resolver/resolver.php" -voucher "Birds:625456"

  if ( $i < $max ) {
    $pat = $args[$i];
    if ( $pat eq "-http" ) {
      $i++;
      if ( $i < $max ) {
        $http = $args[$i];
        $i++;
      }
    } elsif ( $pat eq "-get" ) {
      $i++;
      $http = "get";
    }
  }

  # if present, -agent must be next argument (undocumented)

  if ( $i < $max ) {
    $pat = $args[$i];
    if ( $pat eq "-agent" ) {
      $i++;
      if ( $i < $max ) {
        $agent = $args[$i];
        $i++;
      }
    }
  }

  # read file of keyword shortcuts for URL expansion

  if ( $i < $max ) {
    $pat = $args[$i];
    if ( $pat eq "-alias" ) {
      $i++;
      if ( $i < $max ) {
        $alias = $args[$i];
        if ( $alias ne "" ) {
          read_aliases ();
        }
        $i++;
      }
    }
  }

  # read URL

  if ( $i < $max ) {
    $pat = $args[$i];
    if ( $pat eq "-url" ) {
      $i++;
      if ( $i < $max ) {
        $url = $args[$i];
        $url = map_macros ($url);
        $i++;
      }
    } elsif ( $pat eq "-ncbi" ) {
      # shortcut for ncbi base (undocumented)
      $i++;
      if ( $i < $max ) {
        $url = "https://www.ncbi.nlm.nih.gov";
      }
    } elsif ( $pat eq "-eutils" ) {
      # shortcut for eutils base (undocumented)
      $i++;
      if ( $i < $max ) {
        $url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
      }
    } elsif ( $pat eq "-test" ) {
      # shortcut for eutilstest base (undocumented)
      $i++;
      if ( $i < $max ) {
        $url = "https://eutilstest.ncbi.nlm.nih.gov/entrez/eutils";
      }
    } elsif ( $pat eq "-hydra" ) {
      # internal citation match request (undocumented)
      $i++;
      if ( $i < $max ) {
        $url = "https://www.ncbi.nlm.nih.gov/projects/hydra/hydra_search.cgi";
        $pat = $args[$i];
        $pat = map_macros ($pat);
        $enc = do_uri_escape ($pat);
        $arg="search=pubmed_search_citation_top_20.1&query=$enc";
        $amp = "&";
        $i++;
      }
    } elsif ( $pat eq "-revhist" ) {
      # internal sequence revision history request (undocumented)
      $i++;
      if ( $i < $max ) {
        $url = "https://www.ncbi.nlm.nih.gov/sviewer/girevhist.cgi";
        $pat = $args[$i];
        $arg="cmd=seqid&txt=on&seqid=asntext&os=PUBSEQ_OS&val=$pat";
        $amp = "&";
        $i++;
      }
    }
  }

  if ( $url eq "" ) {
    return;
  }

  # hard-coded URL aliases for common NCBI web sites

  if ( $url =~ /\(#/ ) {

    $ky = "ncbi_url";
    if ( $url =~ /\(#$ky\)/ ) {
      $vl = "https://www.ncbi.nlm.nih.gov";
      $url =~ s/\((\#$ky)\)/$vl/g;
    }

    $ky = "eutils_url";
    if ( $url =~ /\(#$ky\)/ ) {
      $vl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
      $url =~ s/\((\#$ky)\)/$vl/g;
    }
  }

  # arguments before next minus are added to base URL as /value

  $go_on = true;
  while ( $i < $max and $go_on ) {
    $pat = $args[$i];
    if ( $pat =~ /^-(.+)/ ) {
      $go_on = false;
    } else {
      $pat = map_macros ($pat);
      $url .= "/" . $pat;
      $i++;
    }
  }

  # now expect tag with minus and value[s] without, add as &tag=value[,value]

  while ( $i < $max ) {
    $pat = $args[$i];
    if ( $pat =~ /^-(.+)/ ) {
      $pat = $1;
      $pfx = $amp . "$pat=";
      $amp = "";
    } else {
      $pat =~ s/^\\-/-/g;
      $pat = map_macros ($pat);
      $enc = do_uri_escape ($pat);
      $arg .= $pfx . $enc;
      $pfx = ",";
      $amp = "&";
    }
    $i++;
  }

  if ( $debug ) {
    if ( $arg eq "" ) {
      print "$url\n";
    } else {
      print "$url?$arg\n";
    }
    return;
  }

  $output = do_post ($url, $arg);

  print "$output";
}

# initialize

clearflags ();

# execute URL request

nquire ();

# close input and output files

close (STDIN);
close (STDOUT);
close (STDERR);
