abutils
Advanced tools
| Metadata-Version: 2.1 | ||
| Name: abutils | ||
| Version: 0.4.17 | ||
| Version: 0.4.18 | ||
| Summary: Utilities for analysis of adaptive immune receptor repertoire (AIRR) data | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/briney/abutils |
@@ -19,145 +19,2 @@ LICENSE | ||
| abutils.egg-info/top_level.txt | ||
| abutils/bin/cdhit_darwin_amd64 | ||
| abutils/bin/cdhit_darwin_arm64 | ||
| abutils/bin/cdhit_linux_amd64 | ||
| abutils/bin/fastp_darwin | ||
| abutils/bin/fastp_linux | ||
| abutils/bin/fasttree_darwin_amd64 | ||
| abutils/bin/fasttree_darwin_arm64 | ||
| abutils/bin/fasttree_linux_amd64 | ||
| abutils/bin/mafft_darwin | ||
| abutils/bin/mafft_linux | ||
| abutils/bin/mmseqs_darwin_amd64 | ||
| abutils/bin/mmseqs_darwin_arm64 | ||
| abutils/bin/mmseqs_linux_amd64 | ||
| abutils/bin/muscle3_darwin | ||
| abutils/bin/muscle3_linux | ||
| abutils/bin/muscle_darwin_amd64 | ||
| abutils/bin/muscle_darwin_arm64 | ||
| abutils/bin/muscle_linux_amd64 | ||
| abutils/bin/vsearch_darwin_amd64 | ||
| abutils/bin/vsearch_darwin_arm64 | ||
| abutils/bin/vsearch_linux_amd64 | ||
| abutils/bin/mafft_darwin_amd64/in | ||
| abutils/bin/mafft_darwin_amd64/mafft.bat | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/bin/mafft | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/addsingle | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/contrafoldwrap | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/countlen | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/dash_client | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/disttbfast | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/dndblast | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/dndfast7 | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/dndpre | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/dvtditr | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/f2cl | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/filter | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/getlag | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/hex2maffttext | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/mafft-distance | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/mafft-homologs.1 | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/mafft-profile | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/mafft.1 | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/mafftash_premafft.pl | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/maffttext2hex | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/makedirectionlist | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/mccaskillwrap | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/multi2hat3s | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/nodepair | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/pairash | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/pairlocalalign | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/regtable2seq | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/replaceu | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/restoreu | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/score | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/seekquencer_premafft.pl | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/seq2regtable | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/setcore | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/setdirection | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/sextet5 | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/splittbfast | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/tbfast | ||
| abutils/bin/mafft_darwin_amd64/mafftdir/libexec/version | ||
| abutils/bin/mafft_darwin_arm64/in | ||
| abutils/bin/mafft_darwin_arm64/mafft.bat | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/bin/mafft | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/addsingle | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/contrafoldwrap | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/countlen | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/dash_client | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/disttbfast | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/dndblast | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/dndfast7 | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/dndpre | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/dvtditr | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/f2cl | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/filter | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/getlag | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/hex2maffttext | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/mafft-distance | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/mafft-homologs.1 | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/mafft-profile | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/mafft.1 | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/mafftash_premafft.pl | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/maffttext2hex | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/makedirectionlist | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/mccaskillwrap | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/multi2hat3s | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/nodepair | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/pairash | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/pairlocalalign | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/regtable2seq | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/replaceu | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/restoreu | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/score | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/seekquencer_premafft.pl | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/seq2regtable | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/setcore | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/setdirection | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/sextet5 | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/splittbfast | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/tbfast | ||
| abutils/bin/mafft_darwin_arm64/mafftdir/libexec/version | ||
| abutils/bin/mafft_linux_amd64/mafft.bat | ||
| abutils/bin/mafft_linux_amd64/mafftdir/bin/mafft | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/addsingle | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/contrafoldwrap | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/countlen | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/dash_alignments | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/dash_client | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/dash_sequences.fa | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/disttbfast | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/dndblast | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/dndfast7 | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/dndpre | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/dvtditr | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/f2cl | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/filter | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/getlag | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/hat3 | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/hex2maffttext | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/mafft-distance | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/mafft-homologs.1 | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/mafft-profile | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/mafft.1 | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/mafftash_premafft.pl | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/maffttext2hex | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/makedirectionlist | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/mccaskillwrap | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/multi2hat3s | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/nodepair | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/pairash | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/pairlocalalign | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/regtable2seq | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/replaceu | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/restoreu | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/score | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/seekquencer_premafft.pl | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/seq2regtable | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/setcore | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/setdirection | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/sextet5 | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/splittbfast | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/tbfast | ||
| abutils/bin/mafft_linux_amd64/mafftdir/libexec/version | ||
| abutils/core/__init__.py | ||
@@ -164,0 +21,0 @@ abutils/core/lineage.py |
@@ -6,3 +6,3 @@ # from .core import * | ||
| # from . import bin, cl, io, pl, tl | ||
| from . import bin, cl, io, pl, tl | ||
| from . import cl as color | ||
@@ -37,2 +37,2 @@ from .core import lineage, pair, sequence | ||
| BINARY_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "bin")) | ||
| BINARY_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "binaries")) |
+1
-1
@@ -44,3 +44,3 @@ #!/usr/bin/env python | ||
| BIN_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "bin") | ||
| BIN_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "binaries") | ||
| SYSTEM = platform.system().lower() | ||
@@ -47,0 +47,0 @@ MACHINE = platform.machine().lower().replace("x86_64", "amd64") |
@@ -6,2 +6,2 @@ # Store the version here so: | ||
| __version__ = "0.4.17" | ||
| __version__ = "0.4.18" |
+1
-1
| Metadata-Version: 2.1 | ||
| Name: abutils | ||
| Version: 0.4.17 | ||
| Version: 0.4.18 | ||
| Summary: Utilities for analysis of adaptive immune receptor repertoire (AIRR) data | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/briney/abutils |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
| #! /bin/sh | ||
| pushd "`dirname "$0"`" > /dev/null 2>&1; rootdir="$PWD"; popd > /dev/null 2>&1; | ||
| MAFFT_BINARIES="$rootdir/mafftdir/libexec"; export MAFFT_BINARIES; | ||
| "$rootdir/mafftdir/bin/mafft" "$@" | ||
| # $1 can have space in file name |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
| #!/usr/bin/perl | ||
| ##################################################################### | ||
| # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) | ||
| # | ||
| # Ver. Date Changelog | ||
| ##################################################################### | ||
| # 1.0 07.26.13 Initial release | ||
| # 2.0 09.03.13 Added extensive warnings and error messages | ||
| # 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs | ||
| # 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output | ||
| # 3.2 12.08.14 Removed 5-char restriction for own structure files | ||
| # | ||
| ##################################################################### | ||
| use strict; | ||
| use Getopt::Long; | ||
| use File::Path qw(make_path remove_tree); | ||
| use LWP::Simple; | ||
| use LWP::UserAgent; | ||
| # to prevent error 'Header line too long (limit is 8192)' [v3.1] | ||
| use LWP::Protocol::http; | ||
| push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); | ||
| my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft"; | ||
| my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE ); | ||
| GetOptions | ||
| ( | ||
| 'd=s' => \$WORKDIR, | ||
| 'p=s' => \$PDBLIST, | ||
| 'o=s' => \$OWNLIST, | ||
| 'h=s' => \$HAT3FILE, | ||
| 'i=s' => \$INSTRFILE, | ||
| ); | ||
| print STDERR "[MAFFTash-premafft]\n"; | ||
| # set temp directory | ||
| my $TMP = "/tmp/mapremafft$$"; | ||
| make_path($TMP) unless -d $TMP; | ||
| ###### | ||
| # validation | ||
| &help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST); | ||
| &help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR; | ||
| $HAT3FILE = "hat3" unless defined $HAT3FILE; | ||
| $INSTRFILE = "instr" unless defined $INSTRFILE; | ||
| chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g; | ||
| ###### | ||
| # prepare inputs | ||
| print STDERR "Preparing inputs for service request...\n"; | ||
| my @files = (); | ||
| push(@files, "strweight" => "0.5"); | ||
| push(@files, "premafft" => "1"); | ||
| # pdb entries | ||
| if ( defined $PDBLIST ) | ||
| { | ||
| print STDERR "PDB List defined!\n"; | ||
| &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST; | ||
| my $listfile = "$TMP/pdblist.inp"; | ||
| open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!"); | ||
| open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!"); | ||
| while(<INPF>) | ||
| { | ||
| chomp; | ||
| if ( /^(\w{5})$/ ) | ||
| { | ||
| print OUTF ">PDBID\n$1\n"; | ||
| } | ||
| } | ||
| close OUTF; | ||
| close INPF; | ||
| push(@files, "inputfile" => ["$listfile"]); | ||
| } | ||
| # upload own structures | ||
| my %ownids = (); | ||
| if ( defined $OWNLIST ) | ||
| { | ||
| print STDERR "OWN List defined!\n"; | ||
| &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST; | ||
| open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!"); | ||
| while(<OWNINPF>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)$/ ) | ||
| { | ||
| my $fileref = "$WORKDIR/$1.pdb"; | ||
| unless (-e $fileref) | ||
| { | ||
| close OWNINPF; | ||
| &bail("Error: File $fileref does not exists!"); | ||
| } | ||
| push(@files, "inputownfile[]" => ["$fileref"]); | ||
| $ownids{$1} = 1; | ||
| } | ||
| } | ||
| close OWNINPF; | ||
| } | ||
| ###### | ||
| # start rest service | ||
| print STDERR "Sending service request...\n"; | ||
| my $browser = LWP::UserAgent->new; | ||
| $browser->timeout(0); | ||
| # post: running a mafftash job | ||
| my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' ); | ||
| &bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success); | ||
| # get response from post request | ||
| my ($status, $mafftashid) = &parseResponse($postResponse->content); | ||
| my $MAXTRIES = 3; | ||
| my $STIMER = 4; | ||
| my $longtimer = 0; | ||
| print STDERR "Request sent! Waiting for response...[$mafftashid]\n"; | ||
| # wait for results until it becomes available | ||
| while(1) | ||
| { | ||
| $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER; | ||
| sleep $longtimer; | ||
| # get: get results for mafftash job | ||
| my $getResponse = $browser->get("$BASEURL/$mafftashid"); | ||
| if ( $getResponse->is_success ) | ||
| { | ||
| # get response from get request | ||
| ($status, $mafftashid) = &parseResponse($getResponse->content); | ||
| next unless ( $status eq "done" ); | ||
| # if job is finished and ready | ||
| print STDERR "Results found!\n"; | ||
| my $csfile = "$TMP/checksum.tar.gz"; | ||
| my $try1 = 1; | ||
| while(1) | ||
| { | ||
| print STDERR "Fetching Results... [Trial $try1]\n"; | ||
| if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile ) | ||
| { | ||
| # get response from get request | ||
| my $checklist = &extractchecksum($csfile); | ||
| &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); | ||
| foreach my $id ( keys %$checklist ) | ||
| { | ||
| my $checkfile = "$TMP/$id"; | ||
| my $checkid = $checklist->{$id}; | ||
| my $try2 = 1; | ||
| while(1) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) | ||
| { | ||
| my $hashid = &getchecksum($checkfile); | ||
| #print STDERR "[hashid]$hashid [checkid]$checkid\n"; | ||
| if ($hashid ne "" && $hashid ne $checkid ) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| else | ||
| { | ||
| last; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; | ||
| $try1++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content))); | ||
| } | ||
| } | ||
| # make sure outputs were generated | ||
| # decompress | ||
| print STDERR "Assembling final results...\n"; | ||
| &backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/"); | ||
| &backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr"; | ||
| &backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3"; | ||
| # sometimes no hat3 file is generated [v3.1] | ||
| #&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE; | ||
| &bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE; | ||
| # warn if some ownids were ommitted | ||
| if ( scalar keys(%ownids) > 0 ) | ||
| { | ||
| my %instrids = (); | ||
| open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!"); | ||
| while(<INSTRF>) | ||
| { | ||
| chomp; | ||
| if ( /^>\d+_(\S+)$/ ) | ||
| { | ||
| $instrids{$1} = 1; | ||
| } | ||
| } | ||
| close INSTRF; | ||
| foreach my $id ( keys %ownids ) | ||
| { | ||
| warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id}; | ||
| } | ||
| } | ||
| &cleanup(); | ||
| #################### | ||
| #################### | ||
| sub parseResponse | ||
| { | ||
| my $response = shift; | ||
| #"status":"wait","mafftashid":"Ma8211432R" | ||
| my $status = ""; | ||
| my $mafftashid = ""; | ||
| if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) | ||
| { | ||
| $mafftashid = $1; | ||
| $status = $2; | ||
| } | ||
| return ($status, $mafftashid); | ||
| } | ||
| sub extractchecksum | ||
| { | ||
| my $infile = shift; | ||
| my %dataset = (); | ||
| open CSUM, "tar -zxf $infile -O|" or return \%dataset; | ||
| while(<CSUM>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)\s+(\S+)$/ ) | ||
| { | ||
| $dataset{$2} = $1; | ||
| } | ||
| } | ||
| close CSUM; | ||
| return \%dataset; | ||
| } | ||
| sub parseError | ||
| { | ||
| my $response = shift; | ||
| #"error":"Invalid number of inputs found." | ||
| my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : ""; | ||
| return $errorstr; | ||
| } | ||
| sub getchecksum | ||
| { | ||
| my $infile = shift; | ||
| # md5 binary check | ||
| my $MD5BIN = ""; | ||
| if ( -x "/usr/bin/md5sum" ) | ||
| { | ||
| $MD5BIN = "/usr/bin/md5sum"; | ||
| } | ||
| elsif ( -x "/sbin/md5" ) | ||
| { | ||
| $MD5BIN = "/sbin/md5 -q"; | ||
| } | ||
| return "" if $MD5BIN eq ""; | ||
| my $checksum = ""; | ||
| open MD5EXE, "$MD5BIN $infile|" or return ""; | ||
| while(<MD5EXE>) | ||
| { | ||
| if (/^(\S+)\s+(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| elsif (/^(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| } | ||
| close MD5EXE; | ||
| return $checksum; | ||
| } | ||
| sub backticks | ||
| { | ||
| my $command = shift; | ||
| `$command`; | ||
| return ($? == -1) ? 0 : 1; | ||
| } | ||
| sub bail | ||
| { | ||
| my $str = shift; | ||
| print STDERR "$str\n" if defined $str; | ||
| &cleanup(); | ||
| exit(1); | ||
| } | ||
| sub cleanup | ||
| { | ||
| return if ($TMP eq "" || !-d $TMP); | ||
| opendir(MAINDIR, $TMP); | ||
| my @files = readdir(MAINDIR); | ||
| closedir(MAINDIR); | ||
| foreach my $file (@files) | ||
| { | ||
| unlink "$TMP/$file" if -e "$TMP/$file"; | ||
| } | ||
| remove_tree($TMP); | ||
| } | ||
| sub help | ||
| { | ||
| my $str = shift; | ||
| print <<'HELPME'; | ||
| USAGE | ||
| ./mafftash_premafft.pl -p [FILE] | ||
| ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY] | ||
| ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY] | ||
| PARAMETERS | ||
| -p [FILE] | ||
| FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format | ||
| -o [FILE] -d [DIRECTORY] | ||
| FILE contains a list of IDs from your own structure/pdb files (one entry per line) | ||
| for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY | ||
| -h [HATFILE] | ||
| save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory | ||
| -i [INSTRFILE] | ||
| save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory | ||
| HELPME | ||
| &bail($str); | ||
| } | ||
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
| #!/usr/bin/perl | ||
| #################################################################################### | ||
| # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) | ||
| # | ||
| # Ver. Date Changelog | ||
| #################################################################################### | ||
| # 1.0 11.01.13 Initial release | ||
| # | ||
| # **Skipped version 2 to standardise version numbers to seekquencer.pl script** | ||
| # | ||
| # 3.0 04.24.14 Added split option -mod <mafftash-split> for output | ||
| # Uses seekquencer_v3 backend | ||
| # | ||
| # 4.0 05.12.14 Added new options: -run <thread|normal> -trd <count> -noin | ||
| # Sets -seqa fast in seekquencer.pl | ||
| # Uses seekquencer_v4 backend | ||
| # | ||
| # 4.1 05.19.14 Added a check on running REST requests before proceeding | ||
| # to avoid server load problems | ||
| # | ||
| # 4.2 05.27.14 Seq limit processing done in seekquencer.pl script | ||
| # to avoid server load problems | ||
| # | ||
| # 4.3 07.22.14 Added new option: -seqd <uniref100|uniref90|uniref70|uniprot> | ||
| # Blast limit changed from factor of 10 to -blim option | ||
| # Timing on sleep changed; added srand() for making seed | ||
| # Moved the job limit processing to server side | ||
| # | ||
| # 4.4 08.05.14 Modified to work in multiple OS | ||
| # | ||
| # | ||
| #################################################################################### | ||
| use strict; | ||
| use Getopt::Long; | ||
| use File::Path qw(make_path remove_tree); | ||
| use Cwd; | ||
| use LWP::Simple; | ||
| use LWP::UserAgent; | ||
| # to prevent error: Header line too long (limit is 8192) | ||
| use LWP::Protocol::http; | ||
| push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); | ||
| my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft"; | ||
| my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG ); | ||
| my $OUTTYPE = "mafftash"; | ||
| my $SEQDATABASE = "uniref100"; | ||
| my $SEQLIMIT = 100; | ||
| my $SEQBLASTLIMIT = 100; | ||
| my $RUNMODE = "normal"; # thread|normal | ||
| my $THREADCOUNT = 3; | ||
| GetOptions | ||
| ( | ||
| 'inp=s' => \$INPUTFILE, | ||
| 'idf=s' => \$IDLISTFILE, | ||
| 'seqf=s' => \$SEQFASTAFILE, | ||
| 'out=s' => \$OUTPUTFILE, | ||
| 'str' => \$STRFLAG, | ||
| 'seq' => \$SEQFLAG, | ||
| 'seqd=s' => \$SEQDATABASE, | ||
| 'lim=i' => \$SEQLIMIT, | ||
| 'blim=i' => \$SEQBLASTLIMIT, | ||
| 'pre' => \$EVALFLAG, | ||
| 'noin' => \$NOINFLAG, | ||
| 'mod=s' => \$OUTTYPE, | ||
| 'run=s' => \$RUNMODE, | ||
| 'trd=i' => \$THREADCOUNT, | ||
| ); | ||
| my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0; | ||
| print STDERR "[Seekquencer-premafft 4.4 on $^O]\n"; | ||
| # set temp directory | ||
| my $CWD = getcwd; | ||
| my $TMP = "$CWD/seekpremafft$$"; | ||
| make_path($TMP) unless -d $TMP; | ||
| ###### | ||
| # validation | ||
| help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE ); | ||
| help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) ); | ||
| help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) ); | ||
| help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) ); | ||
| help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) ); | ||
| help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE ); | ||
| help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG ); | ||
| help("Invalid value for '-seqd <uniref100|uniref90|uniref70|uniprot>'") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot"); | ||
| help("Invalid value for '-mod <fasta|mafftash|mafftash-split>'") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" ); | ||
| help("Invalid value for '-run <thread|normal>'") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" ); | ||
| help("Invalid value for '-trd <count>'; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) ); | ||
| ###### | ||
| # check existing requests | ||
| print STDERR "Checking server status...\n"; | ||
| # generate seed | ||
| srand($$); | ||
| # sleep a bit to give time for lsf response | ||
| sleep(int(rand(6))+1); | ||
| my $browser = LWP::UserAgent->new; | ||
| $browser->timeout(0); | ||
| # get: check if you can send a new request this time | ||
| my $jobsResponse = $browser->get("$BASEURL/isAllowed"); | ||
| if ( $jobsResponse->is_success ) | ||
| { | ||
| my $status = parseJobQueryResponse($jobsResponse->content); | ||
| bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0; | ||
| } | ||
| else | ||
| { | ||
| bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content))); | ||
| } | ||
| ###### | ||
| # make a temporary input if lists were provided | ||
| unless ( defined $INPUTFILE ) | ||
| { | ||
| $INPUTFILE = "$TMP/input.homemade"; | ||
| open INPF, ">$INPUTFILE" or bail("Error writing to input file."); | ||
| if ( defined $IDLISTFILE ) | ||
| { | ||
| open IDLIST, "<$IDLISTFILE" or bail("Error reading input file."); | ||
| while( <IDLIST> ) | ||
| { | ||
| chomp; | ||
| if ( /(\w{5})/ ) | ||
| { | ||
| print INPF ">PDBID\n$1\n"; | ||
| } | ||
| } | ||
| close IDLIST; | ||
| } | ||
| if ( defined $SEQFASTAFILE ) | ||
| { | ||
| open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file."); | ||
| while( <FASTA> ) | ||
| { | ||
| chomp; | ||
| print INPF "$_\n"; | ||
| } | ||
| close FASTA; | ||
| } | ||
| close INPF; | ||
| } | ||
| ###### | ||
| # prepare parameters | ||
| print STDERR "Preparing parameters for service request...\n"; | ||
| my @parameters = (); | ||
| push(@parameters, "fileinput" => ["$INPUTFILE"]); | ||
| push(@parameters, "out_type" => $OUTTYPE); | ||
| push(@parameters, "rest_flag" => "1"); | ||
| push(@parameters, "cls_flag" => "1"); | ||
| push(@parameters, "pre_flag" => "1") if defined $EVALFLAG; | ||
| push(@parameters, "noin_flag" => "1") if defined $NOINFLAG; | ||
| push(@parameters, "run_mode" => $RUNMODE); | ||
| push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread"; | ||
| if ( defined $STRFLAG ) | ||
| { | ||
| push(@parameters, "str_flag" => "1"); | ||
| push(@parameters, "ash_flag" => "1"); | ||
| } | ||
| elsif ( defined $SEQFLAG ) | ||
| { | ||
| push(@parameters, "seq_flag" => "1"); | ||
| push(@parameters, "seq_algorithm" => "fast"); | ||
| push(@parameters, "seq_database" => $SEQDATABASE); | ||
| push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); | ||
| push(@parameters, "seq_outputlimit" => $SEQLIMIT); | ||
| } | ||
| else | ||
| { | ||
| push(@parameters, "str_flag" => "1"); | ||
| push(@parameters, "ash_flag" => "1"); | ||
| push(@parameters, "seq_flag" => "1"); | ||
| push(@parameters, "seq_algorithm" => "fast"); | ||
| push(@parameters, "seq_database" => $SEQDATABASE); | ||
| push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); | ||
| push(@parameters, "seq_outputlimit" => $SEQLIMIT); | ||
| } | ||
| ###### | ||
| # start rest service | ||
| print STDERR "Sending service request...\n"; | ||
| # post: running a mafftash job | ||
| my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' ); | ||
| bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success); | ||
| # get response from post request | ||
| my ($status, $seekid) = parseResponse($postResponse->content); | ||
| my $MAXTRIES = 3; | ||
| my $STIMER = 5; | ||
| my $timer = 0; | ||
| print STDERR "Request sent! Waiting for response...[$seekid]\n"; | ||
| my $checklist = {}; | ||
| # wait for results until it becomes available | ||
| while(1) | ||
| { | ||
| # sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,, | ||
| $timer = $timer >= 60 ? 60 : $timer+$STIMER; | ||
| sleep($timer+int(rand(4))); | ||
| # get: get results for mafftash job | ||
| my $getResponse = $browser->get("$BASEURL/$seekid"); | ||
| if ( $getResponse->is_success ) | ||
| { | ||
| # get response from get request | ||
| ($status, $seekid) = parseResponse($getResponse->content); | ||
| next unless ( $status eq "done" ); | ||
| # if job is finished and ready | ||
| print STDERR "Results found!\n"; | ||
| my $csfile = "$TMP/checksum"; | ||
| my $try1 = 1; | ||
| while(1) | ||
| { | ||
| print STDERR "Fetching Results... [Trial $try1]\n"; | ||
| if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile ) | ||
| { | ||
| # get response from get request | ||
| $checklist = extractchecksum($csfile); | ||
| bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); | ||
| foreach my $id ( sort keys %$checklist ) | ||
| { | ||
| sleep 1; | ||
| my $checkfile = "$TMP/$id"; | ||
| my $checkid = $checklist->{$id}; | ||
| my $try2 = 1; | ||
| while(1) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) | ||
| { | ||
| last if $ISWINDOWS; | ||
| my $hashid = getchecksum($checkfile); | ||
| #print STDERR "[hashid]$hashid [checkid]$checkid\n"; | ||
| if ($hashid ne "" && $hashid ne $checkid ) | ||
| { | ||
| #unlink $checkfile if -e $checkfile; | ||
| bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| else | ||
| { | ||
| last; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; | ||
| $try1++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content))); | ||
| } | ||
| } | ||
| # make sure outputs were generated | ||
| # decompress | ||
| print STDERR "Assembling final results...\n"; | ||
| foreach my $id ( sort keys %$checklist ) | ||
| { | ||
| if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ ) | ||
| { | ||
| bail("Error: Output file corrupted!") unless -e "$TMP/$id"; | ||
| appendToFile("$TMP/$id","$OUTPUTFILE".$1); | ||
| } | ||
| } | ||
| cleanup(); | ||
| #################### | ||
| #################### | ||
| sub parseResponse | ||
| { | ||
| my $response = shift; | ||
| my $status = ""; | ||
| my $seekid = ""; | ||
| if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) | ||
| { | ||
| $seekid = $1; | ||
| $status = $2; | ||
| } | ||
| return ($status, $seekid); | ||
| } | ||
| sub parseJobQueryResponse | ||
| { | ||
| my $response = shift; | ||
| my $jobs = 100; | ||
| if ( $response =~ /^(\d+)$/ ) | ||
| { | ||
| $jobs = $1; | ||
| } | ||
| return $jobs; | ||
| } | ||
| sub extractchecksum | ||
| { | ||
| my $infile = shift; | ||
| my %dataset = (); | ||
| #open CSUM, "tar -zxf $infile -O|" or return \%dataset; | ||
| open CSUM, "<$infile" or return \%dataset; | ||
| while(<CSUM>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)\s+(\S+)$/ ) | ||
| { | ||
| $dataset{$2} = $1; | ||
| } | ||
| } | ||
| close CSUM; | ||
| return \%dataset; | ||
| } | ||
| sub parseError | ||
| { | ||
| my $response = shift; | ||
| #"error":"Invalid number of inputs found." | ||
| my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response; | ||
| return $errorstr; | ||
| } | ||
| sub getchecksum | ||
| { | ||
| my $infile = shift; | ||
| # md5 binary check | ||
| my $MD5BIN = ""; | ||
| if ( -x "/usr/bin/md5sum" ) | ||
| { | ||
| $MD5BIN = "/usr/bin/md5sum"; | ||
| } | ||
| elsif ( -x "/sbin/md5" ) | ||
| { | ||
| $MD5BIN = "/sbin/md5 -q"; | ||
| } | ||
| return "" if $MD5BIN eq ""; | ||
| my $checksum = ""; | ||
| open MD5EXE, "$MD5BIN $infile|" or return ""; | ||
| while(<MD5EXE>) | ||
| { | ||
| if (/^(\S+)\s+(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| elsif (/^(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| } | ||
| close MD5EXE; | ||
| return $checksum; | ||
| } | ||
| sub backticks | ||
| { | ||
| my $command = shift; | ||
| `$command`; | ||
| return ($? == -1) ? 0 : 1; | ||
| } | ||
| sub bail | ||
| { | ||
| my $str = shift; | ||
| my $status = shift; | ||
| #0 for success and 1 for error | ||
| $status = 1 unless defined; | ||
| print STDERR "$str\n" if defined $str; | ||
| cleanup(); | ||
| exit($status); | ||
| } | ||
| sub cleanup | ||
| { | ||
| return if ($TMP eq "" || !-d $TMP); | ||
| opendir(MAINDIR, $TMP); | ||
| my @files = readdir(MAINDIR); | ||
| closedir(MAINDIR); | ||
| foreach my $file (@files) | ||
| { | ||
| unlink "$TMP/$file" if -e "$TMP/$file"; | ||
| } | ||
| remove_tree($TMP); | ||
| } | ||
| sub appendToFile | ||
| { | ||
| my $inpfile = shift; | ||
| my $outfile = shift; | ||
| open INPF, "<$inpfile" or bail("Server Error: Error in reading file."); | ||
| open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file."); | ||
| while(<INPF>) | ||
| { | ||
| print OUTF $_; | ||
| } | ||
| close OUTF; | ||
| close INPF; | ||
| } | ||
| sub help | ||
| { | ||
| my $str = shift; | ||
| print <<'HELPME'; | ||
| USAGE | ||
| ./seekquencer_premafft.pl -inp <INFILE> -out <OUTFILE> [-str|-seq] | ||
| ./seekquencer_premafft.pl -idf <LISTFILE> -seqf <SEQFASTA> -out <OUTFILE> [-str|-seq] | ||
| PARAMETERS | ||
| -inp <INFILE> | ||
| INFILE is a FASTA-formatted file | ||
| PDB entries are written as: | ||
| >PDBID | ||
| [5-character pdbid+chain] | ||
| While sequence entries are written as: | ||
| >[id] | ||
| [sequence] | ||
| -idf <LISTFILE> | ||
| IDLISTFILE is a file containing a list of pdbids | ||
| pdbids should be a 5-character pdbid + chain | ||
| -seqf <SEQFASTA> | ||
| SEQFASTA is a fasta file | ||
| entries are written as: | ||
| >[id] | ||
| [sequence] | ||
| -out <OUTFILE> | ||
| Results are writen to a file named OUTFILE | ||
| -str | ||
| Only structures will be collected by Seekquencer | ||
| If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer | ||
| -seq | ||
| Only sequences will be collected by Seekquencer | ||
| If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer | ||
| OPTIONAL PARAMETERS: | ||
| -seqd <uniref100|uniref90|uniref70|uniprot> | ||
| Search Database for sequence homologs. Default value: uniref100 | ||
| -lim <count> | ||
| this sets the maximum number of sequence homologs collected. Default value: 100 | ||
| -blim <count> | ||
| this sets the -b and -v value when running blastall. Default value: 100 | ||
| -pre | ||
| When -str is set, this will compare all structures against all using pdp-ash | ||
| This would ensure that all structures collected are matching | ||
| All structures that do not match will be removed | ||
| -noin | ||
| When set, inputs will not be included in the output | ||
| -mod <mafftash|mafftash-split|fasta> | ||
| Defines the output format | ||
| mafftash (default) will print a mafftash-formatted fasta file | ||
| mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq) | ||
| fasta will print a regular fasta file | ||
| -run <thread|normal> | ||
| thread will run simultaneous jobs during blast queries (faster but takes more nodes) | ||
| normal will run sequential blast queries (slower but takes less nodes) | ||
| Default value: normal | ||
| -trd <count> | ||
| if -run <thread> is defined, this sets the number of parallel jobs to run. Default value: 3 | ||
| HELPME | ||
| bail($str); | ||
| } | ||
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
| #! /bin/sh | ||
| pushd "`dirname "$0"`" > /dev/null 2>&1; rootdir="$PWD"; popd > /dev/null 2>&1; | ||
| MAFFT_BINARIES="$rootdir/mafftdir/libexec"; export MAFFT_BINARIES; | ||
| "$rootdir/mafftdir/bin/mafft" "$@" | ||
| # $1 can have space in file name |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
| #!/usr/bin/perl | ||
| ##################################################################### | ||
| # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) | ||
| # | ||
| # Ver. Date Changelog | ||
| ##################################################################### | ||
| # 1.0 07.26.13 Initial release | ||
| # 2.0 09.03.13 Added extensive warnings and error messages | ||
| # 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs | ||
| # 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output | ||
| # 3.2 12.08.14 Removed 5-char restriction for own structure files | ||
| # | ||
| ##################################################################### | ||
| use strict; | ||
| use Getopt::Long; | ||
| use File::Path qw(make_path remove_tree); | ||
| use LWP::Simple; | ||
| use LWP::UserAgent; | ||
| # to prevent error 'Header line too long (limit is 8192)' [v3.1] | ||
| use LWP::Protocol::http; | ||
| push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); | ||
| my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft"; | ||
| my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE ); | ||
| GetOptions | ||
| ( | ||
| 'd=s' => \$WORKDIR, | ||
| 'p=s' => \$PDBLIST, | ||
| 'o=s' => \$OWNLIST, | ||
| 'h=s' => \$HAT3FILE, | ||
| 'i=s' => \$INSTRFILE, | ||
| ); | ||
| print STDERR "[MAFFTash-premafft]\n"; | ||
| # set temp directory | ||
| my $TMP = "/tmp/mapremafft$$"; | ||
| make_path($TMP) unless -d $TMP; | ||
| ###### | ||
| # validation | ||
| &help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST); | ||
| &help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR; | ||
| $HAT3FILE = "hat3" unless defined $HAT3FILE; | ||
| $INSTRFILE = "instr" unless defined $INSTRFILE; | ||
| chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g; | ||
| ###### | ||
| # prepare inputs | ||
| print STDERR "Preparing inputs for service request...\n"; | ||
| my @files = (); | ||
| push(@files, "strweight" => "0.5"); | ||
| push(@files, "premafft" => "1"); | ||
| # pdb entries | ||
| if ( defined $PDBLIST ) | ||
| { | ||
| print STDERR "PDB List defined!\n"; | ||
| &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST; | ||
| my $listfile = "$TMP/pdblist.inp"; | ||
| open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!"); | ||
| open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!"); | ||
| while(<INPF>) | ||
| { | ||
| chomp; | ||
| if ( /^(\w{5})$/ ) | ||
| { | ||
| print OUTF ">PDBID\n$1\n"; | ||
| } | ||
| } | ||
| close OUTF; | ||
| close INPF; | ||
| push(@files, "inputfile" => ["$listfile"]); | ||
| } | ||
| # upload own structures | ||
| my %ownids = (); | ||
| if ( defined $OWNLIST ) | ||
| { | ||
| print STDERR "OWN List defined!\n"; | ||
| &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST; | ||
| open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!"); | ||
| while(<OWNINPF>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)$/ ) | ||
| { | ||
| my $fileref = "$WORKDIR/$1.pdb"; | ||
| unless (-e $fileref) | ||
| { | ||
| close OWNINPF; | ||
| &bail("Error: File $fileref does not exists!"); | ||
| } | ||
| push(@files, "inputownfile[]" => ["$fileref"]); | ||
| $ownids{$1} = 1; | ||
| } | ||
| } | ||
| close OWNINPF; | ||
| } | ||
| ###### | ||
| # start rest service | ||
| print STDERR "Sending service request...\n"; | ||
| my $browser = LWP::UserAgent->new; | ||
| $browser->timeout(0); | ||
| # post: running a mafftash job | ||
| my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' ); | ||
| &bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success); | ||
| # get response from post request | ||
| my ($status, $mafftashid) = &parseResponse($postResponse->content); | ||
| my $MAXTRIES = 3; | ||
| my $STIMER = 4; | ||
| my $longtimer = 0; | ||
| print STDERR "Request sent! Waiting for response...[$mafftashid]\n"; | ||
| # wait for results until it becomes available | ||
| while(1) | ||
| { | ||
| $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER; | ||
| sleep $longtimer; | ||
| # get: get results for mafftash job | ||
| my $getResponse = $browser->get("$BASEURL/$mafftashid"); | ||
| if ( $getResponse->is_success ) | ||
| { | ||
| # get response from get request | ||
| ($status, $mafftashid) = &parseResponse($getResponse->content); | ||
| next unless ( $status eq "done" ); | ||
| # if job is finished and ready | ||
| print STDERR "Results found!\n"; | ||
| my $csfile = "$TMP/checksum.tar.gz"; | ||
| my $try1 = 1; | ||
| while(1) | ||
| { | ||
| print STDERR "Fetching Results... [Trial $try1]\n"; | ||
| if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile ) | ||
| { | ||
| # get response from get request | ||
| my $checklist = &extractchecksum($csfile); | ||
| &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); | ||
| foreach my $id ( keys %$checklist ) | ||
| { | ||
| my $checkfile = "$TMP/$id"; | ||
| my $checkid = $checklist->{$id}; | ||
| my $try2 = 1; | ||
| while(1) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) | ||
| { | ||
| my $hashid = &getchecksum($checkfile); | ||
| #print STDERR "[hashid]$hashid [checkid]$checkid\n"; | ||
| if ($hashid ne "" && $hashid ne $checkid ) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| else | ||
| { | ||
| last; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; | ||
| $try1++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content))); | ||
| } | ||
| } | ||
| # make sure outputs were generated | ||
| # decompress | ||
| print STDERR "Assembling final results...\n"; | ||
| &backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/"); | ||
| &backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr"; | ||
| &backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3"; | ||
| # sometimes no hat3 file is generated [v3.1] | ||
| #&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE; | ||
| &bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE; | ||
| # warn if some ownids were ommitted | ||
| if ( scalar keys(%ownids) > 0 ) | ||
| { | ||
| my %instrids = (); | ||
| open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!"); | ||
| while(<INSTRF>) | ||
| { | ||
| chomp; | ||
| if ( /^>\d+_(\S+)$/ ) | ||
| { | ||
| $instrids{$1} = 1; | ||
| } | ||
| } | ||
| close INSTRF; | ||
| foreach my $id ( keys %ownids ) | ||
| { | ||
| warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id}; | ||
| } | ||
| } | ||
| &cleanup(); | ||
| #################### | ||
| #################### | ||
| sub parseResponse | ||
| { | ||
| my $response = shift; | ||
| #"status":"wait","mafftashid":"Ma8211432R" | ||
| my $status = ""; | ||
| my $mafftashid = ""; | ||
| if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) | ||
| { | ||
| $mafftashid = $1; | ||
| $status = $2; | ||
| } | ||
| return ($status, $mafftashid); | ||
| } | ||
| sub extractchecksum | ||
| { | ||
| my $infile = shift; | ||
| my %dataset = (); | ||
| open CSUM, "tar -zxf $infile -O|" or return \%dataset; | ||
| while(<CSUM>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)\s+(\S+)$/ ) | ||
| { | ||
| $dataset{$2} = $1; | ||
| } | ||
| } | ||
| close CSUM; | ||
| return \%dataset; | ||
| } | ||
| sub parseError | ||
| { | ||
| my $response = shift; | ||
| #"error":"Invalid number of inputs found." | ||
| my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : ""; | ||
| return $errorstr; | ||
| } | ||
| sub getchecksum | ||
| { | ||
| my $infile = shift; | ||
| # md5 binary check | ||
| my $MD5BIN = ""; | ||
| if ( -x "/usr/bin/md5sum" ) | ||
| { | ||
| $MD5BIN = "/usr/bin/md5sum"; | ||
| } | ||
| elsif ( -x "/sbin/md5" ) | ||
| { | ||
| $MD5BIN = "/sbin/md5 -q"; | ||
| } | ||
| return "" if $MD5BIN eq ""; | ||
| my $checksum = ""; | ||
| open MD5EXE, "$MD5BIN $infile|" or return ""; | ||
| while(<MD5EXE>) | ||
| { | ||
| if (/^(\S+)\s+(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| elsif (/^(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| } | ||
| close MD5EXE; | ||
| return $checksum; | ||
| } | ||
| sub backticks | ||
| { | ||
| my $command = shift; | ||
| `$command`; | ||
| return ($? == -1) ? 0 : 1; | ||
| } | ||
| sub bail | ||
| { | ||
| my $str = shift; | ||
| print STDERR "$str\n" if defined $str; | ||
| &cleanup(); | ||
| exit(1); | ||
| } | ||
| sub cleanup | ||
| { | ||
| return if ($TMP eq "" || !-d $TMP); | ||
| opendir(MAINDIR, $TMP); | ||
| my @files = readdir(MAINDIR); | ||
| closedir(MAINDIR); | ||
| foreach my $file (@files) | ||
| { | ||
| unlink "$TMP/$file" if -e "$TMP/$file"; | ||
| } | ||
| remove_tree($TMP); | ||
| } | ||
| sub help | ||
| { | ||
| my $str = shift; | ||
| print <<'HELPME'; | ||
| USAGE | ||
| ./mafftash_premafft.pl -p [FILE] | ||
| ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY] | ||
| ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY] | ||
| PARAMETERS | ||
| -p [FILE] | ||
| FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format | ||
| -o [FILE] -d [DIRECTORY] | ||
| FILE contains a list of IDs from your own structure/pdb files (one entry per line) | ||
| for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY | ||
| -h [HATFILE] | ||
| save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory | ||
| -i [INSTRFILE] | ||
| save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory | ||
| HELPME | ||
| &bail($str); | ||
| } | ||
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
| #!/usr/bin/perl | ||
| #################################################################################### | ||
| # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) | ||
| # | ||
| # Ver. Date Changelog | ||
| #################################################################################### | ||
| # 1.0 11.01.13 Initial release | ||
| # | ||
| # **Skipped version 2 to standardise version numbers to seekquencer.pl script** | ||
| # | ||
| # 3.0 04.24.14 Added split option -mod <mafftash-split> for output | ||
| # Uses seekquencer_v3 backend | ||
| # | ||
| # 4.0 05.12.14 Added new options: -run <thread|normal> -trd <count> -noin | ||
| # Sets -seqa fast in seekquencer.pl | ||
| # Uses seekquencer_v4 backend | ||
| # | ||
| # 4.1 05.19.14 Added a check on running REST requests before proceeding | ||
| # to avoid server load problems | ||
| # | ||
| # 4.2 05.27.14 Seq limit processing done in seekquencer.pl script | ||
| # to avoid server load problems | ||
| # | ||
| # 4.3 07.22.14 Added new option: -seqd <uniref100|uniref90|uniref70|uniprot> | ||
| # Blast limit changed from factor of 10 to -blim option | ||
| # Timing on sleep changed; added srand() for making seed | ||
| # Moved the job limit processing to server side | ||
| # | ||
| # 4.4 08.05.14 Modified to work in multiple OS | ||
| # | ||
| # | ||
| #################################################################################### | ||
| use strict; | ||
| use Getopt::Long; | ||
| use File::Path qw(make_path remove_tree); | ||
| use Cwd; | ||
| use LWP::Simple; | ||
| use LWP::UserAgent; | ||
| # to prevent error: Header line too long (limit is 8192) | ||
| use LWP::Protocol::http; | ||
| push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); | ||
| my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft"; | ||
| my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG ); | ||
| my $OUTTYPE = "mafftash"; | ||
| my $SEQDATABASE = "uniref100"; | ||
| my $SEQLIMIT = 100; | ||
| my $SEQBLASTLIMIT = 100; | ||
| my $RUNMODE = "normal"; # thread|normal | ||
| my $THREADCOUNT = 3; | ||
| GetOptions | ||
| ( | ||
| 'inp=s' => \$INPUTFILE, | ||
| 'idf=s' => \$IDLISTFILE, | ||
| 'seqf=s' => \$SEQFASTAFILE, | ||
| 'out=s' => \$OUTPUTFILE, | ||
| 'str' => \$STRFLAG, | ||
| 'seq' => \$SEQFLAG, | ||
| 'seqd=s' => \$SEQDATABASE, | ||
| 'lim=i' => \$SEQLIMIT, | ||
| 'blim=i' => \$SEQBLASTLIMIT, | ||
| 'pre' => \$EVALFLAG, | ||
| 'noin' => \$NOINFLAG, | ||
| 'mod=s' => \$OUTTYPE, | ||
| 'run=s' => \$RUNMODE, | ||
| 'trd=i' => \$THREADCOUNT, | ||
| ); | ||
| my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0; | ||
| print STDERR "[Seekquencer-premafft 4.4 on $^O]\n"; | ||
| # set temp directory | ||
| my $CWD = getcwd; | ||
| my $TMP = "$CWD/seekpremafft$$"; | ||
| make_path($TMP) unless -d $TMP; | ||
| ###### | ||
| # validation | ||
| help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE ); | ||
| help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) ); | ||
| help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) ); | ||
| help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) ); | ||
| help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) ); | ||
| help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE ); | ||
| help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG ); | ||
| help("Invalid value for '-seqd <uniref100|uniref90|uniref70|uniprot>'") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot"); | ||
| help("Invalid value for '-mod <fasta|mafftash|mafftash-split>'") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" ); | ||
| help("Invalid value for '-run <thread|normal>'") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" ); | ||
| help("Invalid value for '-trd <count>'; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) ); | ||
| ###### | ||
| # check existing requests | ||
| print STDERR "Checking server status...\n"; | ||
| # generate seed | ||
| srand($$); | ||
| # sleep a bit to give time for lsf response | ||
| sleep(int(rand(6))+1); | ||
| my $browser = LWP::UserAgent->new; | ||
| $browser->timeout(0); | ||
| # get: check if you can send a new request this time | ||
| my $jobsResponse = $browser->get("$BASEURL/isAllowed"); | ||
| if ( $jobsResponse->is_success ) | ||
| { | ||
| my $status = parseJobQueryResponse($jobsResponse->content); | ||
| bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0; | ||
| } | ||
| else | ||
| { | ||
| bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content))); | ||
| } | ||
| ###### | ||
| # make a temporary input if lists were provided | ||
| unless ( defined $INPUTFILE ) | ||
| { | ||
| $INPUTFILE = "$TMP/input.homemade"; | ||
| open INPF, ">$INPUTFILE" or bail("Error writing to input file."); | ||
| if ( defined $IDLISTFILE ) | ||
| { | ||
| open IDLIST, "<$IDLISTFILE" or bail("Error reading input file."); | ||
| while( <IDLIST> ) | ||
| { | ||
| chomp; | ||
| if ( /(\w{5})/ ) | ||
| { | ||
| print INPF ">PDBID\n$1\n"; | ||
| } | ||
| } | ||
| close IDLIST; | ||
| } | ||
| if ( defined $SEQFASTAFILE ) | ||
| { | ||
| open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file."); | ||
| while( <FASTA> ) | ||
| { | ||
| chomp; | ||
| print INPF "$_\n"; | ||
| } | ||
| close FASTA; | ||
| } | ||
| close INPF; | ||
| } | ||
| ###### | ||
| # prepare parameters | ||
| print STDERR "Preparing parameters for service request...\n"; | ||
| my @parameters = (); | ||
| push(@parameters, "fileinput" => ["$INPUTFILE"]); | ||
| push(@parameters, "out_type" => $OUTTYPE); | ||
| push(@parameters, "rest_flag" => "1"); | ||
| push(@parameters, "cls_flag" => "1"); | ||
| push(@parameters, "pre_flag" => "1") if defined $EVALFLAG; | ||
| push(@parameters, "noin_flag" => "1") if defined $NOINFLAG; | ||
| push(@parameters, "run_mode" => $RUNMODE); | ||
| push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread"; | ||
| if ( defined $STRFLAG ) | ||
| { | ||
| push(@parameters, "str_flag" => "1"); | ||
| push(@parameters, "ash_flag" => "1"); | ||
| } | ||
| elsif ( defined $SEQFLAG ) | ||
| { | ||
| push(@parameters, "seq_flag" => "1"); | ||
| push(@parameters, "seq_algorithm" => "fast"); | ||
| push(@parameters, "seq_database" => $SEQDATABASE); | ||
| push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); | ||
| push(@parameters, "seq_outputlimit" => $SEQLIMIT); | ||
| } | ||
| else | ||
| { | ||
| push(@parameters, "str_flag" => "1"); | ||
| push(@parameters, "ash_flag" => "1"); | ||
| push(@parameters, "seq_flag" => "1"); | ||
| push(@parameters, "seq_algorithm" => "fast"); | ||
| push(@parameters, "seq_database" => $SEQDATABASE); | ||
| push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); | ||
| push(@parameters, "seq_outputlimit" => $SEQLIMIT); | ||
| } | ||
| ###### | ||
| # start rest service | ||
| print STDERR "Sending service request...\n"; | ||
| # post: running a mafftash job | ||
| my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' ); | ||
| bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success); | ||
| # get response from post request | ||
| my ($status, $seekid) = parseResponse($postResponse->content); | ||
| my $MAXTRIES = 3; | ||
| my $STIMER = 5; | ||
| my $timer = 0; | ||
| print STDERR "Request sent! Waiting for response...[$seekid]\n"; | ||
| my $checklist = {}; | ||
| # wait for results until it becomes available | ||
| while(1) | ||
| { | ||
| # sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,, | ||
| $timer = $timer >= 60 ? 60 : $timer+$STIMER; | ||
| sleep($timer+int(rand(4))); | ||
| # get: get results for mafftash job | ||
| my $getResponse = $browser->get("$BASEURL/$seekid"); | ||
| if ( $getResponse->is_success ) | ||
| { | ||
| # get response from get request | ||
| ($status, $seekid) = parseResponse($getResponse->content); | ||
| next unless ( $status eq "done" ); | ||
| # if job is finished and ready | ||
| print STDERR "Results found!\n"; | ||
| my $csfile = "$TMP/checksum"; | ||
| my $try1 = 1; | ||
| while(1) | ||
| { | ||
| print STDERR "Fetching Results... [Trial $try1]\n"; | ||
| if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile ) | ||
| { | ||
| # get response from get request | ||
| $checklist = extractchecksum($csfile); | ||
| bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); | ||
| foreach my $id ( sort keys %$checklist ) | ||
| { | ||
| sleep 1; | ||
| my $checkfile = "$TMP/$id"; | ||
| my $checkid = $checklist->{$id}; | ||
| my $try2 = 1; | ||
| while(1) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) | ||
| { | ||
| last if $ISWINDOWS; | ||
| my $hashid = getchecksum($checkfile); | ||
| #print STDERR "[hashid]$hashid [checkid]$checkid\n"; | ||
| if ($hashid ne "" && $hashid ne $checkid ) | ||
| { | ||
| #unlink $checkfile if -e $checkfile; | ||
| bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| else | ||
| { | ||
| last; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; | ||
| $try1++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content))); | ||
| } | ||
| } | ||
| # make sure outputs were generated | ||
| # decompress | ||
| print STDERR "Assembling final results...\n"; | ||
| foreach my $id ( sort keys %$checklist ) | ||
| { | ||
| if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ ) | ||
| { | ||
| bail("Error: Output file corrupted!") unless -e "$TMP/$id"; | ||
| appendToFile("$TMP/$id","$OUTPUTFILE".$1); | ||
| } | ||
| } | ||
| cleanup(); | ||
| #################### | ||
| #################### | ||
| sub parseResponse | ||
| { | ||
| my $response = shift; | ||
| my $status = ""; | ||
| my $seekid = ""; | ||
| if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) | ||
| { | ||
| $seekid = $1; | ||
| $status = $2; | ||
| } | ||
| return ($status, $seekid); | ||
| } | ||
| sub parseJobQueryResponse | ||
| { | ||
| my $response = shift; | ||
| my $jobs = 100; | ||
| if ( $response =~ /^(\d+)$/ ) | ||
| { | ||
| $jobs = $1; | ||
| } | ||
| return $jobs; | ||
| } | ||
| sub extractchecksum | ||
| { | ||
| my $infile = shift; | ||
| my %dataset = (); | ||
| #open CSUM, "tar -zxf $infile -O|" or return \%dataset; | ||
| open CSUM, "<$infile" or return \%dataset; | ||
| while(<CSUM>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)\s+(\S+)$/ ) | ||
| { | ||
| $dataset{$2} = $1; | ||
| } | ||
| } | ||
| close CSUM; | ||
| return \%dataset; | ||
| } | ||
| sub parseError | ||
| { | ||
| my $response = shift; | ||
| #"error":"Invalid number of inputs found." | ||
| my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response; | ||
| return $errorstr; | ||
| } | ||
| sub getchecksum | ||
| { | ||
| my $infile = shift; | ||
| # md5 binary check | ||
| my $MD5BIN = ""; | ||
| if ( -x "/usr/bin/md5sum" ) | ||
| { | ||
| $MD5BIN = "/usr/bin/md5sum"; | ||
| } | ||
| elsif ( -x "/sbin/md5" ) | ||
| { | ||
| $MD5BIN = "/sbin/md5 -q"; | ||
| } | ||
| return "" if $MD5BIN eq ""; | ||
| my $checksum = ""; | ||
| open MD5EXE, "$MD5BIN $infile|" or return ""; | ||
| while(<MD5EXE>) | ||
| { | ||
| if (/^(\S+)\s+(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| elsif (/^(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| } | ||
| close MD5EXE; | ||
| return $checksum; | ||
| } | ||
| sub backticks | ||
| { | ||
| my $command = shift; | ||
| `$command`; | ||
| return ($? == -1) ? 0 : 1; | ||
| } | ||
| sub bail | ||
| { | ||
| my $str = shift; | ||
| my $status = shift; | ||
| #0 for success and 1 for error | ||
| $status = 1 unless defined; | ||
| print STDERR "$str\n" if defined $str; | ||
| cleanup(); | ||
| exit($status); | ||
| } | ||
| sub cleanup | ||
| { | ||
| return if ($TMP eq "" || !-d $TMP); | ||
| opendir(MAINDIR, $TMP); | ||
| my @files = readdir(MAINDIR); | ||
| closedir(MAINDIR); | ||
| foreach my $file (@files) | ||
| { | ||
| unlink "$TMP/$file" if -e "$TMP/$file"; | ||
| } | ||
| remove_tree($TMP); | ||
| } | ||
| sub appendToFile | ||
| { | ||
| my $inpfile = shift; | ||
| my $outfile = shift; | ||
| open INPF, "<$inpfile" or bail("Server Error: Error in reading file."); | ||
| open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file."); | ||
| while(<INPF>) | ||
| { | ||
| print OUTF $_; | ||
| } | ||
| close OUTF; | ||
| close INPF; | ||
| } | ||
| sub help | ||
| { | ||
| my $str = shift; | ||
| print <<'HELPME'; | ||
| USAGE | ||
| ./seekquencer_premafft.pl -inp <INFILE> -out <OUTFILE> [-str|-seq] | ||
| ./seekquencer_premafft.pl -idf <LISTFILE> -seqf <SEQFASTA> -out <OUTFILE> [-str|-seq] | ||
| PARAMETERS | ||
| -inp <INFILE> | ||
| INFILE is a FASTA-formatted file | ||
| PDB entries are written as: | ||
| >PDBID | ||
| [5-character pdbid+chain] | ||
| While sequence entries are written as: | ||
| >[id] | ||
| [sequence] | ||
| -idf <LISTFILE> | ||
| IDLISTFILE is a file containing a list of pdbids | ||
| pdbids should be a 5-character pdbid + chain | ||
| -seqf <SEQFASTA> | ||
| SEQFASTA is a fasta file | ||
| entries are written as: | ||
| >[id] | ||
| [sequence] | ||
| -out <OUTFILE> | ||
| Results are writen to a file named OUTFILE | ||
| -str | ||
| Only structures will be collected by Seekquencer | ||
| If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer | ||
| -seq | ||
| Only sequences will be collected by Seekquencer | ||
| If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer | ||
| OPTIONAL PARAMETERS: | ||
| -seqd <uniref100|uniref90|uniref70|uniprot> | ||
| Search Database for sequence homologs. Default value: uniref100 | ||
| -lim <count> | ||
| this sets the maximum number of sequence homologs collected. Default value: 100 | ||
| -blim <count> | ||
| this sets the -b and -v value when running blastall. Default value: 100 | ||
| -pre | ||
| When -str is set, this will compare all structures against all using pdp-ash | ||
| This would ensure that all structures collected are matching | ||
| All structures that do not match will be removed | ||
| -noin | ||
| When set, inputs will not be included in the output | ||
| -mod <mafftash|mafftash-split|fasta> | ||
| Defines the output format | ||
| mafftash (default) will print a mafftash-formatted fasta file | ||
| mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq) | ||
| fasta will print a regular fasta file | ||
| -run <thread|normal> | ||
| thread will run simultaneous jobs during blast queries (faster but takes more nodes) | ||
| normal will run sequential blast queries (slower but takes less nodes) | ||
| Default value: normal | ||
| -trd <count> | ||
| if -run <thread> is defined, this sets the number of parallel jobs to run. Default value: 3 | ||
| HELPME | ||
| bail($str); | ||
| } | ||
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
| #! /bin/bash | ||
| # sh -> bash for debian. By J. R. Peterson. 2015/Jun. | ||
| pushd "`dirname "$0"`" > /dev/null 2>&1; rootdir="$PWD"; popd > /dev/null 2>&1; | ||
| MAFFT_BINARIES="$rootdir/mafftdir/libexec"; export MAFFT_BINARIES; | ||
| "$rootdir/mafftdir/bin/mafft" "$@" | ||
| # input file name can have space |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
| #!/usr/bin/perl | ||
| ##################################################################### | ||
| # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) | ||
| # | ||
| # Ver. Date Changelog | ||
| ##################################################################### | ||
| # 1.0 07.26.13 Initial release | ||
| # 2.0 09.03.13 Added extensive warnings and error messages | ||
| # 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs | ||
| # 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output | ||
| # 3.2 12.08.14 Removed 5-char restriction for own structure files | ||
| # | ||
| ##################################################################### | ||
| use strict; | ||
| use Getopt::Long; | ||
| use File::Path qw(make_path remove_tree); | ||
| use LWP::Simple; | ||
| use LWP::UserAgent; | ||
| # to prevent error 'Header line too long (limit is 8192)' [v3.1] | ||
| use LWP::Protocol::http; | ||
| push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); | ||
| my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft"; | ||
| my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE ); | ||
| GetOptions | ||
| ( | ||
| 'd=s' => \$WORKDIR, | ||
| 'p=s' => \$PDBLIST, | ||
| 'o=s' => \$OWNLIST, | ||
| 'h=s' => \$HAT3FILE, | ||
| 'i=s' => \$INSTRFILE, | ||
| ); | ||
| print STDERR "[MAFFTash-premafft]\n"; | ||
| # set temp directory | ||
| my $TMP = "/tmp/mapremafft$$"; | ||
| make_path($TMP) unless -d $TMP; | ||
| ###### | ||
| # validation | ||
| &help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST); | ||
| &help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR; | ||
| $HAT3FILE = "hat3" unless defined $HAT3FILE; | ||
| $INSTRFILE = "instr" unless defined $INSTRFILE; | ||
| chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g; | ||
| ###### | ||
| # prepare inputs | ||
| print STDERR "Preparing inputs for service request...\n"; | ||
| my @files = (); | ||
| push(@files, "strweight" => "0.5"); | ||
| push(@files, "premafft" => "1"); | ||
| # pdb entries | ||
| if ( defined $PDBLIST ) | ||
| { | ||
| print STDERR "PDB List defined!\n"; | ||
| &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST; | ||
| my $listfile = "$TMP/pdblist.inp"; | ||
| open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!"); | ||
| open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!"); | ||
| while(<INPF>) | ||
| { | ||
| chomp; | ||
| if ( /^(\w{5})$/ ) | ||
| { | ||
| print OUTF ">PDBID\n$1\n"; | ||
| } | ||
| } | ||
| close OUTF; | ||
| close INPF; | ||
| push(@files, "inputfile" => ["$listfile"]); | ||
| } | ||
| # upload own structures | ||
| my %ownids = (); | ||
| if ( defined $OWNLIST ) | ||
| { | ||
| print STDERR "OWN List defined!\n"; | ||
| &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST; | ||
| open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!"); | ||
| while(<OWNINPF>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)$/ ) | ||
| { | ||
| my $fileref = "$WORKDIR/$1.pdb"; | ||
| unless (-e $fileref) | ||
| { | ||
| close OWNINPF; | ||
| &bail("Error: File $fileref does not exists!"); | ||
| } | ||
| push(@files, "inputownfile[]" => ["$fileref"]); | ||
| $ownids{$1} = 1; | ||
| } | ||
| } | ||
| close OWNINPF; | ||
| } | ||
| ###### | ||
| # start rest service | ||
| print STDERR "Sending service request...\n"; | ||
| my $browser = LWP::UserAgent->new; | ||
| $browser->timeout(0); | ||
| # post: running a mafftash job | ||
| my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' ); | ||
| &bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success); | ||
| # get response from post request | ||
| my ($status, $mafftashid) = &parseResponse($postResponse->content); | ||
| my $MAXTRIES = 3; | ||
| my $STIMER = 4; | ||
| my $longtimer = 0; | ||
| print STDERR "Request sent! Waiting for response...[$mafftashid]\n"; | ||
| # wait for results until it becomes available | ||
| while(1) | ||
| { | ||
| $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER; | ||
| sleep $longtimer; | ||
| # get: get results for mafftash job | ||
| my $getResponse = $browser->get("$BASEURL/$mafftashid"); | ||
| if ( $getResponse->is_success ) | ||
| { | ||
| # get response from get request | ||
| ($status, $mafftashid) = &parseResponse($getResponse->content); | ||
| next unless ( $status eq "done" ); | ||
| # if job is finished and ready | ||
| print STDERR "Results found!\n"; | ||
| my $csfile = "$TMP/checksum.tar.gz"; | ||
| my $try1 = 1; | ||
| while(1) | ||
| { | ||
| print STDERR "Fetching Results... [Trial $try1]\n"; | ||
| if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile ) | ||
| { | ||
| # get response from get request | ||
| my $checklist = &extractchecksum($csfile); | ||
| &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); | ||
| foreach my $id ( keys %$checklist ) | ||
| { | ||
| my $checkfile = "$TMP/$id"; | ||
| my $checkid = $checklist->{$id}; | ||
| my $try2 = 1; | ||
| while(1) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) | ||
| { | ||
| my $hashid = &getchecksum($checkfile); | ||
| #print STDERR "[hashid]$hashid [checkid]$checkid\n"; | ||
| if ($hashid ne "" && $hashid ne $checkid ) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| else | ||
| { | ||
| last; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; | ||
| $try1++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content))); | ||
| } | ||
| } | ||
| # make sure outputs were generated | ||
| # decompress | ||
| print STDERR "Assembling final results...\n"; | ||
| &backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/"); | ||
| &backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr"; | ||
| &backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3"; | ||
| # sometimes no hat3 file is generated [v3.1] | ||
| #&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE; | ||
| &bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE; | ||
| # warn if some ownids were ommitted | ||
| if ( scalar keys(%ownids) > 0 ) | ||
| { | ||
| my %instrids = (); | ||
| open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!"); | ||
| while(<INSTRF>) | ||
| { | ||
| chomp; | ||
| if ( /^>\d+_(\S+)$/ ) | ||
| { | ||
| $instrids{$1} = 1; | ||
| } | ||
| } | ||
| close INSTRF; | ||
| foreach my $id ( keys %ownids ) | ||
| { | ||
| warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id}; | ||
| } | ||
| } | ||
| &cleanup(); | ||
| #################### | ||
| #################### | ||
| sub parseResponse | ||
| { | ||
| my $response = shift; | ||
| #"status":"wait","mafftashid":"Ma8211432R" | ||
| my $status = ""; | ||
| my $mafftashid = ""; | ||
| if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) | ||
| { | ||
| $mafftashid = $1; | ||
| $status = $2; | ||
| } | ||
| return ($status, $mafftashid); | ||
| } | ||
| sub extractchecksum | ||
| { | ||
| my $infile = shift; | ||
| my %dataset = (); | ||
| open CSUM, "tar -zxf $infile -O|" or return \%dataset; | ||
| while(<CSUM>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)\s+(\S+)$/ ) | ||
| { | ||
| $dataset{$2} = $1; | ||
| } | ||
| } | ||
| close CSUM; | ||
| return \%dataset; | ||
| } | ||
| sub parseError | ||
| { | ||
| my $response = shift; | ||
| #"error":"Invalid number of inputs found." | ||
| my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : ""; | ||
| return $errorstr; | ||
| } | ||
| sub getchecksum | ||
| { | ||
| my $infile = shift; | ||
| # md5 binary check | ||
| my $MD5BIN = ""; | ||
| if ( -x "/usr/bin/md5sum" ) | ||
| { | ||
| $MD5BIN = "/usr/bin/md5sum"; | ||
| } | ||
| elsif ( -x "/sbin/md5" ) | ||
| { | ||
| $MD5BIN = "/sbin/md5 -q"; | ||
| } | ||
| return "" if $MD5BIN eq ""; | ||
| my $checksum = ""; | ||
| open MD5EXE, "$MD5BIN $infile|" or return ""; | ||
| while(<MD5EXE>) | ||
| { | ||
| if (/^(\S+)\s+(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| elsif (/^(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| } | ||
| close MD5EXE; | ||
| return $checksum; | ||
| } | ||
| sub backticks | ||
| { | ||
| my $command = shift; | ||
| `$command`; | ||
| return ($? == -1) ? 0 : 1; | ||
| } | ||
| sub bail | ||
| { | ||
| my $str = shift; | ||
| print STDERR "$str\n" if defined $str; | ||
| &cleanup(); | ||
| exit(1); | ||
| } | ||
| sub cleanup | ||
| { | ||
| return if ($TMP eq "" || !-d $TMP); | ||
| opendir(MAINDIR, $TMP); | ||
| my @files = readdir(MAINDIR); | ||
| closedir(MAINDIR); | ||
| foreach my $file (@files) | ||
| { | ||
| unlink "$TMP/$file" if -e "$TMP/$file"; | ||
| } | ||
| remove_tree($TMP); | ||
| } | ||
| sub help | ||
| { | ||
| my $str = shift; | ||
| print <<'HELPME'; | ||
| USAGE | ||
| ./mafftash_premafft.pl -p [FILE] | ||
| ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY] | ||
| ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY] | ||
| PARAMETERS | ||
| -p [FILE] | ||
| FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format | ||
| -o [FILE] -d [DIRECTORY] | ||
| FILE contains a list of IDs from your own structure/pdb files (one entry per line) | ||
| for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY | ||
| -h [HATFILE] | ||
| save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory | ||
| -i [INSTRFILE] | ||
| save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory | ||
| HELPME | ||
| &bail($str); | ||
| } | ||
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
| #!/usr/bin/perl | ||
| #################################################################################### | ||
| # Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp) | ||
| # | ||
| # Ver. Date Changelog | ||
| #################################################################################### | ||
| # 1.0 11.01.13 Initial release | ||
| # | ||
| # **Skipped version 2 to standardise version numbers to seekquencer.pl script** | ||
| # | ||
| # 3.0 04.24.14 Added split option -mod <mafftash-split> for output | ||
| # Uses seekquencer_v3 backend | ||
| # | ||
| # 4.0 05.12.14 Added new options: -run <thread|normal> -trd <count> -noin | ||
| # Sets -seqa fast in seekquencer.pl | ||
| # Uses seekquencer_v4 backend | ||
| # | ||
| # 4.1 05.19.14 Added a check on running REST requests before proceeding | ||
| # to avoid server load problems | ||
| # | ||
| # 4.2 05.27.14 Seq limit processing done in seekquencer.pl script | ||
| # to avoid server load problems | ||
| # | ||
| # 4.3 07.22.14 Added new option: -seqd <uniref100|uniref90|uniref70|uniprot> | ||
| # Blast limit changed from factor of 10 to -blim option | ||
| # Timing on sleep changed; added srand() for making seed | ||
| # Moved the job limit processing to server side | ||
| # | ||
| # 4.4 08.05.14 Modified to work in multiple OS | ||
| # | ||
| # | ||
| #################################################################################### | ||
| use strict; | ||
| use Getopt::Long; | ||
| use File::Path qw(make_path remove_tree); | ||
| use Cwd; | ||
| use LWP::Simple; | ||
| use LWP::UserAgent; | ||
| # to prevent error: Header line too long (limit is 8192) | ||
| use LWP::Protocol::http; | ||
| push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0); | ||
| my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft"; | ||
| my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG ); | ||
| my $OUTTYPE = "mafftash"; | ||
| my $SEQDATABASE = "uniref100"; | ||
| my $SEQLIMIT = 100; | ||
| my $SEQBLASTLIMIT = 100; | ||
| my $RUNMODE = "normal"; # thread|normal | ||
| my $THREADCOUNT = 3; | ||
| GetOptions | ||
| ( | ||
| 'inp=s' => \$INPUTFILE, | ||
| 'idf=s' => \$IDLISTFILE, | ||
| 'seqf=s' => \$SEQFASTAFILE, | ||
| 'out=s' => \$OUTPUTFILE, | ||
| 'str' => \$STRFLAG, | ||
| 'seq' => \$SEQFLAG, | ||
| 'seqd=s' => \$SEQDATABASE, | ||
| 'lim=i' => \$SEQLIMIT, | ||
| 'blim=i' => \$SEQBLASTLIMIT, | ||
| 'pre' => \$EVALFLAG, | ||
| 'noin' => \$NOINFLAG, | ||
| 'mod=s' => \$OUTTYPE, | ||
| 'run=s' => \$RUNMODE, | ||
| 'trd=i' => \$THREADCOUNT, | ||
| ); | ||
| my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0; | ||
| print STDERR "[Seekquencer-premafft 4.4 on $^O]\n"; | ||
| # set temp directory | ||
| my $CWD = getcwd; | ||
| my $TMP = "$CWD/seekpremafft$$"; | ||
| make_path($TMP) unless -d $TMP; | ||
| ###### | ||
| # validation | ||
| help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE ); | ||
| help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) ); | ||
| help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) ); | ||
| help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) ); | ||
| help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) ); | ||
| help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE ); | ||
| help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG ); | ||
| help("Invalid value for '-seqd <uniref100|uniref90|uniref70|uniprot>'") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot"); | ||
| help("Invalid value for '-mod <fasta|mafftash|mafftash-split>'") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" ); | ||
| help("Invalid value for '-run <thread|normal>'") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" ); | ||
| help("Invalid value for '-trd <count>'; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) ); | ||
| ###### | ||
| # check existing requests | ||
| print STDERR "Checking server status...\n"; | ||
| # generate seed | ||
| srand($$); | ||
| # sleep a bit to give time for lsf response | ||
| sleep(int(rand(6))+1); | ||
| my $browser = LWP::UserAgent->new; | ||
| $browser->timeout(0); | ||
| # get: check if you can send a new request this time | ||
| my $jobsResponse = $browser->get("$BASEURL/isAllowed"); | ||
| if ( $jobsResponse->is_success ) | ||
| { | ||
| my $status = parseJobQueryResponse($jobsResponse->content); | ||
| bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0; | ||
| } | ||
| else | ||
| { | ||
| bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content))); | ||
| } | ||
| ###### | ||
| # make a temporary input if lists were provided | ||
| unless ( defined $INPUTFILE ) | ||
| { | ||
| $INPUTFILE = "$TMP/input.homemade"; | ||
| open INPF, ">$INPUTFILE" or bail("Error writing to input file."); | ||
| if ( defined $IDLISTFILE ) | ||
| { | ||
| open IDLIST, "<$IDLISTFILE" or bail("Error reading input file."); | ||
| while( <IDLIST> ) | ||
| { | ||
| chomp; | ||
| if ( /(\w{5})/ ) | ||
| { | ||
| print INPF ">PDBID\n$1\n"; | ||
| } | ||
| } | ||
| close IDLIST; | ||
| } | ||
| if ( defined $SEQFASTAFILE ) | ||
| { | ||
| open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file."); | ||
| while( <FASTA> ) | ||
| { | ||
| chomp; | ||
| print INPF "$_\n"; | ||
| } | ||
| close FASTA; | ||
| } | ||
| close INPF; | ||
| } | ||
| ###### | ||
| # prepare parameters | ||
| print STDERR "Preparing parameters for service request...\n"; | ||
| my @parameters = (); | ||
| push(@parameters, "fileinput" => ["$INPUTFILE"]); | ||
| push(@parameters, "out_type" => $OUTTYPE); | ||
| push(@parameters, "rest_flag" => "1"); | ||
| push(@parameters, "cls_flag" => "1"); | ||
| push(@parameters, "pre_flag" => "1") if defined $EVALFLAG; | ||
| push(@parameters, "noin_flag" => "1") if defined $NOINFLAG; | ||
| push(@parameters, "run_mode" => $RUNMODE); | ||
| push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread"; | ||
| if ( defined $STRFLAG ) | ||
| { | ||
| push(@parameters, "str_flag" => "1"); | ||
| push(@parameters, "ash_flag" => "1"); | ||
| } | ||
| elsif ( defined $SEQFLAG ) | ||
| { | ||
| push(@parameters, "seq_flag" => "1"); | ||
| push(@parameters, "seq_algorithm" => "fast"); | ||
| push(@parameters, "seq_database" => $SEQDATABASE); | ||
| push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); | ||
| push(@parameters, "seq_outputlimit" => $SEQLIMIT); | ||
| } | ||
| else | ||
| { | ||
| push(@parameters, "str_flag" => "1"); | ||
| push(@parameters, "ash_flag" => "1"); | ||
| push(@parameters, "seq_flag" => "1"); | ||
| push(@parameters, "seq_algorithm" => "fast"); | ||
| push(@parameters, "seq_database" => $SEQDATABASE); | ||
| push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT); | ||
| push(@parameters, "seq_outputlimit" => $SEQLIMIT); | ||
| } | ||
| ###### | ||
| # start rest service | ||
| print STDERR "Sending service request...\n"; | ||
| # post: running a mafftash job | ||
| my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' ); | ||
| bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success); | ||
| # get response from post request | ||
| my ($status, $seekid) = parseResponse($postResponse->content); | ||
| my $MAXTRIES = 3; | ||
| my $STIMER = 5; | ||
| my $timer = 0; | ||
| print STDERR "Request sent! Waiting for response...[$seekid]\n"; | ||
| my $checklist = {}; | ||
| # wait for results until it becomes available | ||
| while(1) | ||
| { | ||
| # sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,, | ||
| $timer = $timer >= 60 ? 60 : $timer+$STIMER; | ||
| sleep($timer+int(rand(4))); | ||
| # get: get results for mafftash job | ||
| my $getResponse = $browser->get("$BASEURL/$seekid"); | ||
| if ( $getResponse->is_success ) | ||
| { | ||
| # get response from get request | ||
| ($status, $seekid) = parseResponse($getResponse->content); | ||
| next unless ( $status eq "done" ); | ||
| # if job is finished and ready | ||
| print STDERR "Results found!\n"; | ||
| my $csfile = "$TMP/checksum"; | ||
| my $try1 = 1; | ||
| while(1) | ||
| { | ||
| print STDERR "Fetching Results... [Trial $try1]\n"; | ||
| if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile ) | ||
| { | ||
| # get response from get request | ||
| $checklist = extractchecksum($csfile); | ||
| bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 ); | ||
| foreach my $id ( sort keys %$checklist ) | ||
| { | ||
| sleep 1; | ||
| my $checkfile = "$TMP/$id"; | ||
| my $checkid = $checklist->{$id}; | ||
| my $try2 = 1; | ||
| while(1) | ||
| { | ||
| unlink $checkfile if -e $checkfile; | ||
| if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile ) | ||
| { | ||
| last if $ISWINDOWS; | ||
| my $hashid = getchecksum($checkfile); | ||
| #print STDERR "[hashid]$hashid [checkid]$checkid\n"; | ||
| if ($hashid ne "" && $hashid ne $checkid ) | ||
| { | ||
| #unlink $checkfile if -e $checkfile; | ||
| bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| else | ||
| { | ||
| last; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES; | ||
| $try2++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES; | ||
| $try1++; | ||
| sleep $STIMER; | ||
| } | ||
| } | ||
| last; | ||
| } | ||
| else | ||
| { | ||
| bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content))); | ||
| } | ||
| } | ||
| # make sure outputs were generated | ||
| # decompress | ||
| print STDERR "Assembling final results...\n"; | ||
| foreach my $id ( sort keys %$checklist ) | ||
| { | ||
| if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ ) | ||
| { | ||
| bail("Error: Output file corrupted!") unless -e "$TMP/$id"; | ||
| appendToFile("$TMP/$id","$OUTPUTFILE".$1); | ||
| } | ||
| } | ||
| cleanup(); | ||
| #################### | ||
| #################### | ||
| sub parseResponse | ||
| { | ||
| my $response = shift; | ||
| my $status = ""; | ||
| my $seekid = ""; | ||
| if ( $response =~ /^([^\s:]+):([^\s:]+)$/ ) | ||
| { | ||
| $seekid = $1; | ||
| $status = $2; | ||
| } | ||
| return ($status, $seekid); | ||
| } | ||
| sub parseJobQueryResponse | ||
| { | ||
| my $response = shift; | ||
| my $jobs = 100; | ||
| if ( $response =~ /^(\d+)$/ ) | ||
| { | ||
| $jobs = $1; | ||
| } | ||
| return $jobs; | ||
| } | ||
| sub extractchecksum | ||
| { | ||
| my $infile = shift; | ||
| my %dataset = (); | ||
| #open CSUM, "tar -zxf $infile -O|" or return \%dataset; | ||
| open CSUM, "<$infile" or return \%dataset; | ||
| while(<CSUM>) | ||
| { | ||
| chomp; | ||
| if ( /^(\S+)\s+(\S+)$/ ) | ||
| { | ||
| $dataset{$2} = $1; | ||
| } | ||
| } | ||
| close CSUM; | ||
| return \%dataset; | ||
| } | ||
| sub parseError | ||
| { | ||
| my $response = shift; | ||
| #"error":"Invalid number of inputs found." | ||
| my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response; | ||
| return $errorstr; | ||
| } | ||
| sub getchecksum | ||
| { | ||
| my $infile = shift; | ||
| # md5 binary check | ||
| my $MD5BIN = ""; | ||
| if ( -x "/usr/bin/md5sum" ) | ||
| { | ||
| $MD5BIN = "/usr/bin/md5sum"; | ||
| } | ||
| elsif ( -x "/sbin/md5" ) | ||
| { | ||
| $MD5BIN = "/sbin/md5 -q"; | ||
| } | ||
| return "" if $MD5BIN eq ""; | ||
| my $checksum = ""; | ||
| open MD5EXE, "$MD5BIN $infile|" or return ""; | ||
| while(<MD5EXE>) | ||
| { | ||
| if (/^(\S+)\s+(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| elsif (/^(\S+)$/) | ||
| { | ||
| $checksum = $1; | ||
| last; | ||
| } | ||
| } | ||
| close MD5EXE; | ||
| return $checksum; | ||
| } | ||
| sub backticks | ||
| { | ||
| my $command = shift; | ||
| `$command`; | ||
| return ($? == -1) ? 0 : 1; | ||
| } | ||
| sub bail | ||
| { | ||
| my $str = shift; | ||
| my $status = shift; | ||
| #0 for success and 1 for error | ||
| $status = 1 unless defined; | ||
| print STDERR "$str\n" if defined $str; | ||
| cleanup(); | ||
| exit($status); | ||
| } | ||
| sub cleanup | ||
| { | ||
| return if ($TMP eq "" || !-d $TMP); | ||
| opendir(MAINDIR, $TMP); | ||
| my @files = readdir(MAINDIR); | ||
| closedir(MAINDIR); | ||
| foreach my $file (@files) | ||
| { | ||
| unlink "$TMP/$file" if -e "$TMP/$file"; | ||
| } | ||
| remove_tree($TMP); | ||
| } | ||
| sub appendToFile | ||
| { | ||
| my $inpfile = shift; | ||
| my $outfile = shift; | ||
| open INPF, "<$inpfile" or bail("Server Error: Error in reading file."); | ||
| open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file."); | ||
| while(<INPF>) | ||
| { | ||
| print OUTF $_; | ||
| } | ||
| close OUTF; | ||
| close INPF; | ||
| } | ||
| sub help | ||
| { | ||
| my $str = shift; | ||
| print <<'HELPME'; | ||
| USAGE | ||
| ./seekquencer_premafft.pl -inp <INFILE> -out <OUTFILE> [-str|-seq] | ||
| ./seekquencer_premafft.pl -idf <LISTFILE> -seqf <SEQFASTA> -out <OUTFILE> [-str|-seq] | ||
| PARAMETERS | ||
| -inp <INFILE> | ||
| INFILE is a FASTA-formatted file | ||
| PDB entries are written as: | ||
| >PDBID | ||
| [5-character pdbid+chain] | ||
| While sequence entries are written as: | ||
| >[id] | ||
| [sequence] | ||
| -idf <LISTFILE> | ||
| IDLISTFILE is a file containing a list of pdbids | ||
| pdbids should be a 5-character pdbid + chain | ||
| -seqf <SEQFASTA> | ||
| SEQFASTA is a fasta file | ||
| entries are written as: | ||
| >[id] | ||
| [sequence] | ||
| -out <OUTFILE> | ||
| Results are writen to a file named OUTFILE | ||
| -str | ||
| Only structures will be collected by Seekquencer | ||
| If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer | ||
| -seq | ||
| Only sequences will be collected by Seekquencer | ||
| If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer | ||
| OPTIONAL PARAMETERS: | ||
| -seqd <uniref100|uniref90|uniref70|uniprot> | ||
| Search Database for sequence homologs. Default value: uniref100 | ||
| -lim <count> | ||
| this sets the maximum number of sequence homologs collected. Default value: 100 | ||
| -blim <count> | ||
| this sets the -b and -v value when running blastall. Default value: 100 | ||
| -pre | ||
| When -str is set, this will compare all structures against all using pdp-ash | ||
| This would ensure that all structures collected are matching | ||
| All structures that do not match will be removed | ||
| -noin | ||
| When set, inputs will not be included in the output | ||
| -mod <mafftash|mafftash-split|fasta> | ||
| Defines the output format | ||
| mafftash (default) will print a mafftash-formatted fasta file | ||
| mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq) | ||
| fasta will print a regular fasta file | ||
| -run <thread|normal> | ||
| thread will run simultaneous jobs during blast queries (faster but takes more nodes) | ||
| normal will run sequential blast queries (slower but takes less nodes) | ||
| Default value: normal | ||
| -trd <count> | ||
| if -run <thread> is defined, this sets the number of parallel jobs to run. Default value: 3 | ||
| HELPME | ||
| bail($str); | ||
| } | ||
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is too big to display
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
937413
-99.62%82
-63.56%