-
Notifications
You must be signed in to change notification settings - Fork 3
47 lines (46 loc) · 1.95 KB
/
validateTaxonomy.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
on:
push:
branches: [master, dev, esearch-input]
name: Validate taxonomy
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ['ubuntu-20.04' ]
perl: [ '5.32' ]
name: Perl ${{ matrix.perl }} on ${{ matrix.os }}
steps:
- name: Set up perl
uses: shogo82148/actions-setup-perl@v1
with:
perl-version: ${{ matrix.perl }}
multi-thread: "true"
- name: checkout my repo
uses: actions/checkout@v4
with:
path: Kalamari
- name: update PATH
run: |
echo $GITHUB_WORKSPACE/Kalamari/bin >> $GITHUB_PATH
echo $PATH
echo ""
cat $GITHUB_PATH
- name: build taxonomy
run: |
echo $PATH
bash Kalamari/bin/buildTaxonomy.sh
bash Kalamari/bin/filterTaxonomy.sh
ls -lhR Kalamari/share/kalamari-*/taxonomy
- name: validate taxonomy
run: |
perl Kalamari/bin/validateTaxonomy.pl Kalamari/share/kalamari-*/taxonomy
- name: validate filtered taxonomy
run: |
perl Kalamari/bin/validateTaxonomy.pl Kalamari/share/kalamari-*/taxonomy/filtered
- name: matching taxids
run: |
export taxdir=$(\ls -d Kalamari/share/kalamari-*/taxonomy)
echo "Making sure that all taxids in chromosomes.tsv and plasmids.tsv are present in nodes.tsv and names.tsv"
tail -n +2 Kalamari/src/chromosomes.tsv Kalamari/src/plasmids.tsv -q | perl -F'\t' -lane 'BEGIN{@node=`cat $ENV{taxdir}/nodes.dmp`; for $n(@node){($taxid)=split(/\t/, $n); $taxid{$taxid}++; } } for my $t($F[2], $F[3]){ if(!$taxid{$t}){ print "Could not find $t taxid";} }'
tail -n +2 Kalamari/src/chromosomes.tsv Kalamari/src/plasmids.tsv -q | perl -F'\t' -lane 'BEGIN{@name=`cat $ENV{taxdir}/names.dmp`; for $n(@name){($taxid)=split(/\t/, $n); $taxid{$taxid}++; } } for my $t($F[2], $F[3]){ if(!$taxid{$t}){ print "Could not find $t taxid";} }'