Merge branch 'devel'

new release (v2.8)
garabik · Dec 31, 2020 · c772aaa · c772aaa
2 parents ea1b7bb + 435d071
commit c772aaa
Show file tree

Hide file tree

Showing 7 changed files with 168 additions and 84 deletions.
diff --git a/debian/changelog b/debian/changelog
@@ -1,3 +1,12 @@
+unicode (2.8-1) unstable; urgency=low
+
+  * display ASCII table (either traditional or the EU–UK Trade and Cooperation
+    Agreement version)
+  * tidy up manpage (closes: #972047) (closes:#972063)
+  * fix decoding paracode arguments (closes: #939196)
+
+ -- Radovan Garabík <[email protected]>  Wed, 30 Dec 2020 17:13:32 +0100
+
 unicode (2.7-1) unstable; urgency=low
 
   * add East Asian width

diff --git a/debian/control b/debian/control
@@ -2,12 +2,13 @@ Source: unicode
 Section: utils
 Priority: optional
 Maintainer: Radovan Garabík <[email protected]>
-Build-Depends: debhelper (>= 4), dh-python
+Build-Depends: debhelper (>= 4), dh-python, python3
 Standards-Version: 4.3.0
 
 Package: unicode
 Architecture: all
 Depends: ${misc:Depends}, ${python3:Depends}
+Suggests: bzip2
 Recommends: unicode-data
 Description: display unicode character properties
  unicode is a simple command line utility that displays

diff --git a/paracode b/paracode
@@ -201,7 +201,7 @@ def main():
     (options, args) = parser.parse_args()
 
     if args:
-        to_convert = ' '.join(args).decode('utf-8')
+        to_convert = decode(' '.join(args), 'utf-8')
     else:
         to_convert = None
 

diff --git a/paracode.1 b/paracode.1
@@ -4,46 +4,47 @@
 paracode \- command line Unicode conversion tool
 .SH SYNOPSIS
 .B paracode
-.RI [ -t tables ] 
+.RB [ \-t
+.IR tables ]
 string
 .SH DESCRIPTION
 This manual page documents the
 .B paracode
 command.
 .PP
-\fBparacode\fP exploits the full power of the Unicode standard to convert the text
-into visually similar stream of glyphs, while using completely different codepoints.
-It is an excellent didactic tool demonstrating the principles and advanced use of
-the Unicode standard.
+\fBparacode\fP exploits the full power of the Unicode standard to convert
+the text into visually similar stream of glyphs, while using completely
+different codepoints.
+It is an excellent didactic tool demonstrating the principles and advanced
+use of the Unicode standard.
 .PP
 \fBparacode\fP is a command line tool working as
 a filter, reading standard input in UTF-8 encoding and writing to
 standard output.
-
+.
 .SH OPTIONS
 .TP
 .BI \-t tables
-.BI \-\-tables
+.BI \-\-tables tables
 
 Use given list of conversion tables, separated by a plus sign.
 
 Special name 'all' selects all the tables.
 
-Note that selecting 'other', 'cyrillic_plus' and 'cherokee' tables (and 'all') 
+Note that selecting 'other', 'cyrillic_plus' and 'cherokee' tables (and 'all')
 makes use of rather esoteric characters, and not all fonts contain them.
 
-
 Special table 'mirror' uses quite different character substitution,
 is not selected automatically with 'all' and does not work well
 with anything except plain ascii alphabetical characters.
 
 Example:
 
-paracode -t cyrillic+greek+cherokee
+paracode \-t cyrillic+greek+cherokee
 
-paracode -t cherokee  <input >output
+paracode \-t cherokee  <input >output
 
-paracode -r -t mirror  <input >output
+paracode \-r \-t mirror  <input >output
 
 
 
@@ -60,16 +61,16 @@ other
 cherokee
 
 all
-
+.
 .TP
-.BI \-r
-
-Display text in reverse order after conversion, best used together with -t mirror.
+.B \-r
 
+Display text in reverse order after conversion,
+best used together with \-t mirror.
+.
 .SH SEE ALSO
-iconv(1)
-
-
+.BR iconv (1)
+.
 .SH AUTHOR
 Radovan Garab\('ik <garabik @ kassiopeia.juls.savba.sk>
 

diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
 
 
 setup(name='unicode',
-      version='2.7',
+      version='2.8',
       scripts=['unicode', 'paracode'],
 #      entry_points={'console_scripts': [
 #          'unicode = unicode:main',

diff --git a/unicode b/unicode
@@ -66,7 +66,7 @@ else: # python2
 
 from optparse import OptionParser
 
-VERSION='2.7'
+VERSION='2.8'
 
 
 # list of terminals that support bidi
@@ -635,7 +635,8 @@ def print_characters(clist, maxcount, format_string, query_wikipedia=0, query_wi
         if maxcount:
             counter += 1
         if counter > options.maxcount:
-            out("\nToo many characters to display, more than %s, use --max 0 (or other value) option to change it\n" % options.maxcount)
+            sys.stdout.flush()
+            sys.stderr.write("\nToo many characters to display, more than %s, use --max 0 (or other value) option to change it\n" % options.maxcount)
             return
         properties = get_unicode_properties(c)
         ordc = ord(c)
@@ -809,6 +810,49 @@ def is_range(s, typ):
 def unescape(s):
     return s.replace(r'\n', '\n')
 
+ascii_cc_names = ('NUL', 'SOH', 'STX', 'ETX', 'EOT', 'ENQ', 'ACK', 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'SO', 'SI', 'DLE', 'DC1', 'DC2', 'DC3', 'DC4', 'NAK', 'SYN', 'ETB', 'CAN', 'EM', 'SUB', 'ESC', 'FS', 'GS', 'RS', 'US')
+
+def display_ascii_table():
+    print('Dec Hex    Dec Hex    Dec Hex  Dec Hex  Dec Hex  Dec Hex   Dec Hex   Dec Hex')
+    for row in range(0, 16):
+        for col in range(0, 8):
+            cp = 16*col+row
+            ch = chr(cp) if 32<=cp else ascii_cc_names[cp]
+            ch = 'DEL' if cp==127 else ch
+            frm = '{:3d} {:02X} {:2s}'
+            if cp < 32:
+                frm = '{:3d} {:02X} {:4s}'
+            elif cp >= 96:
+                frm = '{:4d} {:02X} {:2s}'
+            cell = frm.format(cp, cp, ch)
+            print(cell, end='')
+        print()
+
+brexit_ascii_diffs = {
+ 30: ' ',
+ 31: ' ',
+ 34: "'",
+123: '{}{',
+125: '}}',
+127: ' ',
+128: ' ',
+129: ' ',
+        }
+
+def display_brexit_ascii_table():
+    print(' + | 0    1    2    3    4    5    6    7    8    9')
+    print('---+-----------------------------------------------')
+    for row in range(30, 130, 10):
+        print('{:3d}'.format(row), end='|')
+        for col in range(0, 10):
+            cp = col+row
+            ch = brexit_ascii_diffs.get(cp, chr(cp))
+            cell = ' {:3s} '.format(ch)
+            print(cell, end='')
+        print()
+
+
+
 format_string_default = '''{yellow}{bold}U+{ordc:04X} {name}{default}
 {green}UTF-8:{default} {utf8} {green}UTF-16BE:{default} {utf16be} {green}Decimal:{default} {decimal} {green}Octal:{default} {octal}{opt_additional}
 {pchar}{opt_flipcase}{opt_uppercase}{opt_lowercase}
@@ -884,6 +928,14 @@ def main():
           action="store_const", dest="format_string",
           const='{pchar} U+{ordc:04X} {name}\n',
           help="Brief format")
+    parser.add_option("--ascii",
+          action="store_const", dest="ascii_table",
+          const=True,
+          help="Display ASCII table")
+    parser.add_option("--brexit-ascii", "--brexit",
+          action="store_const", dest="brexit_ascii_table",
+          const=True,
+          help="Display ASCII table (EU–UK Trade and Cooperation Agreement version)")
 
     global options
     (options, arguments) = parser.parse_args()
@@ -899,6 +951,15 @@ def main():
         print (textwrap.fill(' '.join(all_encodings)))
         sys.exit()
 
+    if options.ascii_table:
+        display_ascii_table()
+        sys.exit()
+
+    if options.brexit_ascii_table:
+        display_brexit_ascii_table()
+        sys.exit()
+
+
     if len(arguments)==0:
         parser.print_help()
         sys.exit()