This repository has been archived by the owner on May 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompseq.yaml
181 lines (181 loc) · 8.25 KB
/
compseq.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
!mobyle/program
name: compseq
title: compseq
description: Calculate the composition of unique words in sequences
inputs: !mobyle/inputparagraph
children:
- !mobyle/inputprogramparagraph
prompt: Input section
name: input
children:
- !mobyle/inputprogramparameter
prompt: Sequence option
format: ("", " -sequence=" + str(value))[value is not None]
simple: true
argpos: 1
mandatory: true
name: sequence
command: false
type: !mobyle/formattedtype
format_terms: ['EDAM_format:1927', 'EDAM_format:2200', 'EDAM_format:1935',
'EDAM_format:1936', 'EDAM_format:1948', 'EDAM_format:1948', 'EDAM_format:1957',
'EDAM_format:2188']
data_terms: ['EDAM_data:2044']
- !mobyle/inputprogramparameter
comment: "This is a file previously produced by 'compseq' that can be\
\ used to set the expected frequencies of words in this analysis.\
\ \n The word size in the current run must be the same as the one\
\ in this results file. Obviously, you should use a file produced\
\ from protein sequences if you are counting protein sequence word\
\ frequencies, and you must use one made from nucleotide frequencies\
\ if you are analysing a nucleotide sequence."
prompt: Program compseq output file (optional)
format: ("", " -infile=" + str(value))[value is not None]
argpos: 2
name: infile
command: false
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
- !mobyle/inputprogramparagraph
prompt: Required section
name: required
children:
- !mobyle/inputprogramparameter
comment: "This is the size of word (n-mer) to count. \n Thus if you want\
\ to count codon frequencies for a nucleotide sequence, you should\
\ enter 3 here."
prompt: Word size to consider (e.g. 2=dimer) (value greater than or equal
to 1)
format: ("", " -word=" + str(value))[value is not None and value!=vdef]
simple: true
argpos: 3
mandatory: true
name: word
command: false
type: !mobyle/integertype {default: 2}
ctrls:
- message: Value greater than or equal to 1 is required
test:
value: {'#gte': '1'}
- !mobyle/inputprogramparagraph
prompt: Additional section
name: additional
children:
- !mobyle/inputprogramparameter
comment: "The normal behaviour of 'compseq' is to count the frequencies\
\ of all words that occur by moving a window of length 'word' up\
\ by one each time. \n This option allows you to move the window\
\ up by the length of the word each time, skipping over the intervening\
\ words. \n You can count only those words that occur in a single\
\ frame of the word by setting this value to a number other than\
\ zero. \n If you set it to 1 it will only count the words in frame\
\ 1, 2 will only count the words in frame 2 and so on."
prompt: Frame of word to look at (0=all frames) (value greater than or
equal to 0)
format: ("", " -frame=" + str(value))[value is not None and value!=vdef]
argpos: 4
name: frame
command: false
type: !mobyle/integertype {default: 0}
ctrls:
- message: Value greater than or equal to 0 is required
test:
value: {'#gte': '0'}
- !mobyle/inputprogramparameter
comment: "The amino acid code B represents Asparagine or Aspartic acid\
\ and the code Z represents Glutamine or Glutamic acid. \n These\
\ are not commonly used codes and you may wish not to count words\
\ containing them, just noting them in the count of 'Other' words."
prompt: Ignore the amino acids b and z and just count them as 'other'
format: (" -noignorebz", "")[ bool(value) ]
argpos: 5
name: ignorebz
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
comment: Set this to be true if you also wish to also count words in the
reverse complement of a nucleic sequence.
prompt: Count words in the forward and reverse sense
format: ("", " -reverse")[ bool(value) ]
argpos: 6
name: reverse
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
comment: "If this is set true then the expected frequencies of words are\
\ calculated from the observed frequency of single bases or residues\
\ in the sequences. \n If you are reporting a word size of 1 (single\
\ bases or residues) then there is no point in using this option\
\ because the calculated expected frequency will be equal to the\
\ observed frequency. \n Calculating the expected frequencies like\
\ this will give an approximation of the expected frequencies that\
\ you might get by using an input file of frequencies produced by\
\ a previous run of this program. If an input file of expected word\
\ frequencies has been specified then the values from that file will\
\ be used instead of this calculation of expected frequency from\
\ the sequence, even if 'calcfreq' is set to be true."
prompt: Calculate expected frequency from sequence
format: ("", " -calcfreq")[ bool(value) ]
argpos: 7
name: calcfreq
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparagraph
prompt: Output section
name: output
children:
- !mobyle/inputprogramparameter
comment: This is the results file.
prompt: Name of the output file (outfile)
format: ("" , " -outfile=" + str(value))[value is not None]
argpos: 8
name: outfile
command: false
type: !mobyle/stringtype {default: outfile.composition}
- !mobyle/inputprogramparameter
comment: You can make the output results file much smaller if you do not
display the words with a zero count.
prompt: Display the words that have a frequency of zero
format: (" -nozerocount", "")[ bool(value) ]
argpos: 9
name: zerocount
command: false
type: !mobyle/booleantype {default: false}
- !mobyle/inputprogramparameter
prompt: Turn off any prompting
format: '" -auto -stdout"'
argpos: 10
name: auto
command: false
hidden: true
type: !mobyle/stringtype {}
outputs: !mobyle/outputparagraph
children:
- !mobyle/outputprogramparagraph
prompt: Output section
name: output
children:
- !mobyle/outputprogramparameter
prompt: Outfile_out option
filenames: outfile
name: outfile_out
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
- !mobyle/outputprogramparameter
prompt: Standard output
filenames: '"compseq.out"'
name: stdout
output_type: stdout
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
- !mobyle/outputprogramparameter
prompt: Standard error
filenames: '"compseq.err"'
name: stderr
type: !mobyle/formattedtype
data_terms: ['EDAM_data:2048']
operations: ['EDAM_operation:0236']
topics: ['EDAM_topic:0157']
documentation_links: ['http://bioweb2.pasteur.fr/docs/EMBOSS/compseq.html', 'http://emboss.sourceforge.net/docs/themes']
command: compseq
env: {}