-
Notifications
You must be signed in to change notification settings - Fork 17
/
Pythonizer.pm
477 lines (458 loc) · 17.4 KB
/
Pythonizer.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
package Pythonizer;
#
## ABSTRACT: Supplementary subroutines for pythonizer
## Includes logging subroutine: logme, abend, out, getopts and helpme
## Copyright Nikolai Bezroukov, 2019-2020.
## Licensed under Perl Artistic license
# Ver Date Who Modification
# ===== ========== ======== ==============================================================
# 00.00 2019/10/10 BEZROUN Initial implementation. Limited by the rule "one statement-one line"
# 00.10 2019/11/19 BEZROUN The prototype is able to process the minimal test (with multiple errors) but still
# 00.11 2019/11/19 BEZROUN autocommit now allow to save multiple modules in addition to the main program
# 00.12 2019/12/27 BEZROUN Notions of ValType was introduced in preparation of introduction of pre_processor.pl version 0.2
# 00.20 2020/02/03 BEZROUN getline was moved from pythonyzer.
# 00.30 2020/08/05 BEZROUN preprocess_line was folded into getline.
# 00.40 2020/08/17 BEZROUN getops is now implemented in Softpano.pm to allow the repretion of option letter to set the value of options ( -ddd)
# 00.50 2020/08/24 BEZROUN Option -p added
# 00.60 2020/08/25 BEZROUN __DATA__ and __END__ processing added
# 00.61 2020/08/25 BEZROUN POD processing added Option - r (refactor) added
# 00.70 2020/09/03 BEZROUN Stack manipulation defined more completly and moved from main script to Pythonizer.om
# 00.80 2020/09/17 BEZROUN Basic global varibles detection added. Global statement now is generated for each subroutine
# 00.90 2020/10/12 BEZROUN Option -l added. Output format improved. Many small fixes
use v5.10.1;
use warnings;
use strict 'subs';
use feature 'state';
use Perlscan qw(tokenize $TokenStr @ValClass @ValPerl @ValPy @ValType);
use Softpano qw(abend logme out getopts standard_options);
require Exporter;
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
@ISA = qw(Exporter);
@EXPORT = qw(preprocess_line correct_nest getline prolog output_line @LocalSub %GlobalVar);
our ($IntactLine, $output_file, $NextNest,$CurNest, $line);
$::TabSize=3;
$::breakpoint=0;
$NextNest=$CurNest=0;
$MAXNESTING=9;
$VERSION = '0.80';
$refactor=0; # option -r flag (invocation of pre-pythonizer)
$PassNo=0; # EXTERNAL VAR: 0 -- the first pass ( reading from @InputTextA); 1 -- the second pass(reading from STDIN)
$InLineNo=0; # counter, pointing to the current like in InputTextA during the first pass
%LocalSub=(); # list of local subs
%GlobalVar=(); # generated "external" declaration with the list of global variables.
$maxlinelen=188;
#
#::prolog -- Decode parameter for the pythonizer. all parameters are exported
#
sub prolog
{
getopts("hd:v:r:b:t:l:",\%options);
#
# Three standard otpiotn -h, -v and -d
#
standard_options(\%options);
#
# Custom options specific for the application
#
if( exists $options{'r'} ){
if( $options{'r'} eq ''){
$refactor='./pre_pythonizer.pl';
}else{
if( -f $options{'r'} ){
$refactor=$options{'r'};
}else{
logme('S',"The Script $options{'r'} does not exist (may be you need to specify path to the file)\n");
exit 255;
}
}
unless (-x $refactor ){
logme('S',"File $options{'r'} specifed in option -r is not executable\n");
exit 255;
}
}
if( exists $options{'b'} ){
unless ($options{'b'}){
logme('S',"Option -b should have a numberic value. There is no default.");
exit 255;
}
if( $options{'b'}>0 && $options{'b'}<9000 ){
$::breakpoint=$options{'b'};
($::debug) && logme('W',"Breakpoint set to line $::breakpoint");
}else{
logme('S',"Wrong value of option -b ( breakpoint): $options('b')\n");
exit 255;
}
}
if( exists $options{'t'} ){
$options{'t'}=1 if $options{'t'} eq '';
if( $options{'t'}>1 && $options{'t'}<10 ){
$::TabSize=$options{'t'};
}else{
logme('S',"Range for options -t (tab size) is 2-10. You specified: $options('t')\n");
exit 255;
}
}
if( exists $options{'w'} ){
if($options{'w'}<100 && $options{'w'}>256 ){
$maxlinelen=$options{'w'};
if( $maxlinelen//2==1 ){
$maxlinelen-=1;
}
}else{
logme('S',"Incorrect value for length of the line in protocol of tranlation: $options{'w'}\n Minimum is 100. Max is 256. Default value 188 is assumned \n");
}
}#
#
# Application arguments
#
if( scalar(@ARGV)==1 ){
$fname=$ARGV[0];
unless( -f $fname ){
abend("Input file $fname does not exist");
}
$source_file=substr($ARGV[0],0,rindex($ARGV[0],'.'));
$output_file=$source_file.'.py';
out("Results of transcription are written to the file $output_file");
if( $refactor ){
out("Option -r (refactor) was specified. file refactored using $refactor as the fist pass over the source code");
`pre_pythonizer -v 0 $fname`;
}
unless( -f "$fname.bak" ){
`cp -p $fname $fname.bak && tr -d "\r" < $fname.bak > $fname`; # just in case
}
$fsize=-s $fname;
if ($fsize<10){
abend("The size of the file is $fsize. Nothing to do. Exiting");
}
unless( -r $fname ){
abend("File does not have read permissions for the user");
}
open (STDIN, '<',$fname) || die("Can't open $fname for reading $!");
}else{
abend("Input file should be supplied as the first argument");
}
if( $debug){
print STDERR "ATTENTION!!! Working in debugging mode debug=$debug\n";
}
out("=" x 121,"\n");
get_globals();
close STDIN;
open (STDIN, '<',$fname) || die("Can't open $fname for reading");
$PassNo=1;
open(SYSOUT,'>',$output_file) || die("Can't open $output_file for writing");
return;
} # prolog
sub get_globals
#
# This suroutine creates two hashes
# 1. hash $GlobalVar with declations of global variables used in particula surtutine
# 2. %LocalSub; -- list of subs in the program
#
{
#
# Arrays and hashes for varible analyses
#
my ( $varname, $subname, $CurSubName,$i,$k,$var_usage_in_subs);
my %DeclaredVarH=(); # list of my varibles in the current subroute
my %VarSubMap=(); # matrix var/sub that allows to create list of global for each sub
$CurSubName='main';
$LocalSub{'main'}=1;
while(1){
if( scalar(@Perlscan::BufferValClass)==0 ){
$line=getline(); # get the first meaningful line, skipping commenets and POD
last unless(defined($line));
if( $::debug==2 && $.== $::breakpoint ){
say STDERR "\n\n === Line $. Perl source: $line ===\n";
$DB::single = 1;
}
Perlscan::tokenize($line);
}else{
#process token buffer -- Oct 9, 2020 --NNB
@ValClass=@Perlscan::BufferValClass;
$TokenStr=join('',@ValClass);
@ValPerl=@Perlscan::BufferValPerl;
@ValPy=@Perlscan::BufferValPy;
}
unless(defined($ValClass[0])){
next;
}
if( $ValClass[0] eq 't' && $ValPerl[0] eq 'my' ){
for($i=1; $i<=$#ValClass; $i++ ){
last if( $ValClass[$i] eq '=' );
if( $ValClass[$i] =~/[sah]/ ){
$DeclaredVarH{$ValPy[$i]}=1; # this hash is need only for particular sub
}
}
if( $i<$#ValClass ){
for( $k=$i+1; $k<@ValClass; $k++ ){
if( $ValClass[$k]=~/[sah]/ ){
next if exists($DeclaredVarH{$ValPy[$k]});
next if( defined($ValType[$k]) && $ValType[$k] eq 'X');
$VarSubMap{$ValPy[$k]}{$CurSubName}='+';
}
} # for
}
}elsif( $ValPerl[0] eq 'sub' && $#ValClass==1 ){
$CurSubName=$ValPy[1];
$LocalSub{$CurSubName}=1;
%DeclaredVarH=(); # this is the list of my varible for given sub; does not needed for any other sub
}else{
for( $k=0; $k<@ValClass; $k++ ){
if( $ValClass[$k]=~/[sah]/ ){
next if exists($DeclaredVarH{$ValPy[$k]});
next if( defined($ValType[$k]) && $ValType[$k] eq 'X');
$VarSubMap{$ValPy[$k]}{$CurSubName}='+';
if( $ValPy[$k] =~/[\[\(]/){
say "=== Pass 1 INTERNAL ERROR in processing line $InLineNo Special variable is $ValPerl[$k] as $ValPy[$k]\n";
$DB::single = 1;
}
}
} # for
} # statements
} # while
foreach $varname (keys %VarSubMap ){
next if( $varname=~/[\(\[]/ );
next if( length($varname)==1 );
$var_usage_in_subs=scalar(keys %{$VarSubMap{$varname}} );
if( $var_usage_in_subs>1){
# Varible that is present in multiple subs assumed to be global
foreach $subname (keys %{$VarSubMap{$varname}} ){
$GlobalVar{$subname}.=','.$varname;
}
}
}
($::debug) && out("\nDETECTED GLOBAL VARIABLES:");
foreach $subname (keys %GlobalVar ){
$GlobalVar{$subname}='global '.substr($GlobalVar{$subname},1);
($::debug) && out "\t$subname: $GlobalVar{$subname}";
}
($::debug) && out("\nList of local subroutines:");
($::debug) && out(join(' ', keys %LocalSub));
#here we have already populated array Sub2name with the list of subs and $global_list with the list of global variables
}
sub get_here
#
#Extract here string with delimiter specified as the first argument
#
{
my $here_str;
while (substr($line,0,length($_[0])) ne $_[0] ){
$here_str.=$line;
$line=getline();
}
return '""""'."\n".$here_str."\n".'"""""'."\n";
} # get_here
sub getline
#
#get input line. It has now ability to buffer line, which will be scanned by tokeniser next.
#
{
state @buffer; # buffer to "postponed lines. Used for translation of postfix conditinals among other things.
return $line if( scalar(@Perlscan::BufferValClass)>0 ); # block input if we process token buffer Oct 8, 2020 -- NNB
if( scalar(@_)>0 ){
push(@buffer,@_); # buffer lines in the order they listed; they will be injected in the next call;
#if (scalar(@_)==3){
# say join('|',@_);
# $DB::single = 1;
#}
return;
}
while(1 ){
#
# firs we perform debufferization
#
if( scalar(@buffer) ){
$line=shift(@buffer);
}else{
$line=<>;
return $line unless (defined($line)); # End of file
}
chomp($line);
if( length($line)==0 || $line=~/^\s*$/ ){
output_line('') if( $PassNo); # blank line
next;
}elsif( $line =~ /^\s*(#.*$)/ ){
# pure comment lines
output_line('',$1) if( $PassNo);
next;
}elsif( $line =~ /^__DATA__/ || $line =~ /^__END__/){
# data block
return undef if( $PassNo==0 );
open(SYSDATA,'>',"$source_file.data") || abend("Can't open file $source_file.data for writing. Check permissions" );
logme('W',"Tail data after __DATA__ or __END__ line are detected in Perl Script. They are written to a separate file $source_file.data");
while( $line=<> ){
print SYSDATA $line;
}
close SYSDATA;
return $line;
}elsif( substr($line,0,1) eq '='){
# POD block
output_line('',q['''']);
while($line=<>){
last if( $line eq '=cut');
output_line('',$line) if( $PassNo);
}
output_line('',q['''']) if( $PassNo);
}
if( substr($line,-1,1) eq "\r" ){
chop($line);
}
$line =~ s/\s+$//; # trim tailing blanks
$line =~ s/^\s+//; # trim leading blanks
if ($line eq '{' || $line eq '}') {
$IntactLine='';
}else{
$IntactLine=$line;
}
return $line;
}
}
#::output_line -- Output line shifted properly to the current nesting level
# arg 1 -- actual PseudoPython generated line
# arg 2 -- tail comment (added Dec 28, 2019)
# arg 3 -- copy without processing ( (added Sep 3, 2020))
sub output_line
{
return if ($PassNo==0); # no output during the first pass
my $line=(scalar(@_)==0 ) ? $IntactLine : $_[0];
my $tailcomment=(scalar(@_)==2 ) ? $_[1] : '';
my $indent=' ' x $::TabSize x $CurNest;
my $flag=( defined($main::TrStatus) && $main::TrStatus < 0 ) ? 'F' : ' ';
my $len=length($line);
my $prefix=sprintf('%4u',$.)." |".sprintf('%2u',$CurNest)." | ".sprintf('%1s',$flag)." |";
my $zone_size=($maxlinelen-length($prefix))/2; # length of prefix is 20
my $start_of_comment_zone=$zone_size+length($prefix); # the start of comment_zone is 20+80=100.
# So the total line length=180
my $orig_tail_len=length($tailcomment);
my $i;
if( $tailcomment){
$tailcomment=($tailcomment=~/^\s+(.*)$/ ) ? $indent.$1 : $indent.$tailcomment;
}
# Special case of empty line or "pure" comment that needs to be indented
if( $len==0 ){
if( $::TrStatus < 0 ){
out($prefix,join(' ',@::ValPy)." #FAIL $IntactLine");
say SYSOUT join(' ',@::ValPy)." #FAIL $IntactLine";
}else{
out($prefix,$tailcomment);
say SYSOUT $tailcomment;
}
return;
}
if( scalar(@_)<3){
$line=($line=~/^\s+(.*)$/ )? $indent.$1 : $indent.$line;
}
say SYSOUT $line;
$line=$prefix.$line;
$len=length($line); # new length woth prefix containing line no and nesting
my (@lineblock,$filler);
if( scalar(@_)==1){
# no tailcomment
if( $IntactLine=~/^\s+(.*)$/ ){
$IntactLine=$1;
}
if( $len > $start_of_comment_zone ){
# long line
if( length($IntactLine) > $zone_size ){
out($line);
if (index($IntactLine,"\n")==-1){
print_intactline($IntactLine,$zone_size,$start_of_comment_zone);
}else{
@lineblock=split("\n",$IntactLine);
print_intactline($lineblock[0],$zone_size,$start_of_comment_zone);
for(my $i=1; $i<@lineblock;$i++){
print_intactline($lineblock[$i],$zone_size,$start_of_comment_zone);
}
}
}else{
out($line,' #PL: ',$IntactLine);
}
}else{
# short line without tail comment
$filler=' ' x ($start_of_comment_zone-$len);
if (index($IntactLine,"\n")==-1){
out($line,$filler,' #PL: ',$IntactLine);
}else{
@lineblock=split("\n",$IntactLine);
out($line,$filler,' #PL: ',$lineblock[0]); # its short so this is OK
for( $i=1; $i<@lineblock;$i++){
print_intactline($lineblock[$i],$zone_size,$start_of_comment_zone);
}
}
}
}else{
#line with tail comment
$i=index($tailcomment,"\n");
if($i==-1) {
out($line,' ',$tailcomment); # output with the original comment instead of Perl source
print_intactline(substr($IntactLine,0,-$orig_tail_len),$zone_size,$start_of_comment_zone); # print Perl source
}else{
@lineblock=split("\n",$IntactLine);
out($line,' ',$tailcomment); # output with tail comment instead of Perl comment
print_intactline(substr($lineblock[0],0,-$orig_tail_len),$zone_size,$start_of_comment_zone);
for( $i=1; $i<@lineblock;$i++){
print_intactline($lineblock[$i],$zone_size,$start_of_comment_zone);
}
}
}
} # output_line
sub print_intactline
{
my ($IntactLine,$zone_size,$start_of_comment_zone)=@_;
my $filling=' ' x $start_of_comment_zone;
if( length($IntactLine) > $zone_size ){
out( $filling,' #PL: ',substr($IntactLine,0,$zone_size));
out( $filling,' #+ : ',substr($IntactLine,$zone_size));
}else{
out($filling,' #PL: ',$IntactLine);
}
}
sub correct_nest
# Ensure proper indenting of the lines. Accepts two arguments
# if no arguments given it sets $CurNest=$NextNest;
# If only 1 ARG given inrements/decreaments $NextNest;
# NOTE: If zero is given sets NextNest to zero.
# if two arguments are given increments/decrements both NexNext and $CurNest
# NOTE: Special case -- if 0,0 is passed both set to zero
# Each argument checked against the min and max threholds befor processing. If the threshold exceeded the operation ignored.
{
my $delta;
if( scalar(@_)==0 ){
# if no arguments given set NextNest equal to CurNest
$CurNest=$NextNest;
return;
}
$delta=$_[0];
if( $delta==0 && scalar(@_)==1 ){
$NextNest=0;
return;
}
if( $NextNest+$delta > $MAXNESTING ){
if ($::debug>2) {
logme('E',"Attempt to set next nesting level above the treshold($MAXNESTING) ignored");
$DB::single = 1;
}
}elsif( $NextNest+$delta < 0 ){
if ($::debug>2 ) {
logme('S',"Attempt to set the next nesting level below zero ignored");
$DB::single = 1;
}
}else{
$NextNest+=$delta;
}
if( scalar(@_)==2){
$delta=$_[1];
if( $delta==0 && $_[0]==0){
$CurNest=$NextNest=0;
return;
}
if( $delta+$CurNest>$MAXNESTING ){
logme('E',"Attempt to set current nesting level above the treshold($MAXNESTING) ignored");
}elsif( $delta+$CurNest<0){
logme('S',"Attempt to set the curent nesting level below zero ignored");
}else{
$CurNest+=$delta;
}
}
}
1;