-
Notifications
You must be signed in to change notification settings - Fork 5
/
kt-vycisti_fuvi
executable file
·71 lines (54 loc) · 1.6 KB
/
kt-vycisti_fuvi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Pomocny skript pre vycistenie zaznamov v subore *.FPU. Vykona prekodovanie do UTF8, odstranenie neplatnych znakov,
spojenie zaznamov nachadzajucich sa na viacerych riadkoch Vysledny subor je generovany na STDOUT (standardny vystup).
Pouzitie: kt-vycisti_fuvi.py subor.fpu [kodovanie] > opraveny_subor.fpu
"""
import re
import sys
import codecs
uplnost_riadku = True
spajac_zaznamov = []
def neplatnost_znakov(znak):
ordznak = ord(znak)
if ordznak < 31 or (ordznak >= 127 and ordznak <= 159): return False
try:
znak.encode('latin2')
except:
return False
return True
# nazov suboru
try:
fpusubor = sys.argv[1]
except(IndexError):
print __doc__
sys.exit(2)
# kodovanie
try:
kodovanie = sys.argv[2]
except(IndexError):
kodovanie = 'IBM852' # CP1250
sub = codecs.open(fpusubor, "r", encoding=kodovanie)
riadok = sub.readline()
reg1 = re.compile("^\.")
reg2 = re.compile("^$")
reg3 = re.compile(";$")
while riadok:
riadok = filter(neplatnost_znakov, riadok.rstrip())
if reg1.match(riadok):
print riadok.rstrip().encode('utf-8')
elif reg2.match(riadok):
print riadok.rstrip().encode('utf-8')
else:
if reg3.search(riadok) and uplnost_riadku == True:
print riadok.rstrip().encode('utf-8')
elif reg3.search(riadok) and uplnost_riadku == False:
spojeny_zaznam = ''.join(spajac_zaznamov)
print spojeny_zaznam.rstrip().encode('utf-8') + riadok.rstrip().encode('utf-8')
uplnost_riadku = True
spajac_zaznamov = []
else:
spajac_zaznamov.append(riadok.rstrip())
uplnost_riadku = False
riadok = sub.readline()
# vim: set ts=4 sts=4 sw=4 noet: