Skip to content

Esearch input (#47) #106

Esearch input (#47)

Esearch input (#47) #106

on:
push:
branches: [master, dev, validate-taxonomy]
name: Validate taxonomy
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['ubuntu-20.04' ]
perl: [ '5.32' ]
name: Perl ${{ matrix.perl }} on ${{ matrix.os }}
steps:
- name: Set up perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: ${{ matrix.perl }}
multi-thread: "true"
- name: checkout my repo
uses: actions/checkout@v4
with:
path: Kalamari
- name: update PATH
run: |
echo $GITHUB_WORKSPACE/Kalamari/bin >> $GITHUB_PATH
echo $PATH
echo ""
cat $GITHUB_PATH
- name: build taxonomy
run: |
echo $PATH
bash Kalamari/bin/buildTaxonomy.sh
bash Kalamari/bin/filterTaxonomy.sh
ls -lhR Kalamari/share/kalamari-*/taxonomy
- name: validate taxonomy
run: |
perl Kalamari/bin/validateTaxonomy.pl Kalamari/share/kalamari-*/taxonomy
- name: validate filtered taxonomy
run: |
perl Kalamari/bin/validateTaxonomy.pl Kalamari/share/kalamari-*/taxonomy/filtered
- name: matching taxids
run: |
export taxdir=$(\ls -d Kalamari/share/kalamari-*/taxonomy)
echo "Making sure that all taxids in chromosomes.tsv and plasmids.tsv are present in nodes.tsv and names.tsv"
tail -n +2 Kalamari/src/chromosomes.tsv Kalamari/src/plasmids.tsv -q | perl -F'\t' -lane 'BEGIN{@node=`cat $ENV{taxdir}/nodes.dmp`; for $n(@node){($taxid)=split(/\t/, $n); $taxid{$taxid}++; } } for my $t($F[2], $F[3]){ if(!$taxid{$t}){ print "Could not find $t taxid";} }'
tail -n +2 Kalamari/src/chromosomes.tsv Kalamari/src/plasmids.tsv -q | perl -F'\t' -lane 'BEGIN{@name=`cat $ENV{taxdir}/names.dmp`; for $n(@name){($taxid)=split(/\t/, $n); $taxid{$taxid}++; } } for my $t($F[2], $F[3]){ if(!$taxid{$t}){ print "Could not find $t taxid";} }'