for release v1.6

prohippo · Oct 19, 2019 · 8f00b61 · 8f00b61
1 parent fe3d398
commit 8f00b61
Show file tree

Hide file tree

Showing 4 changed files with 56 additions and 26 deletions.
diff --git a/PyEllyManual.pdf b/PyEllyManual.pdf
diff --git a/README.txt b/README.txt
@@ -20,21 +20,21 @@ and extended entity extraction, ambiguity handling, sentence recognition,
 support for large external dictionaries, and a general procedural
 framework for translating text from UTF-8 to UTF-8.
 
-The latest version has been completely rewritten in mostly object-oriented
+The latest versions have been completely rewritten in mostly object-oriented
 Python 2.7. It completed multiple stages of beta testing in 2014 and may
 now be downloaded from GitHub at https://github.com/prohippo/pyelly.git .
 Further development and refinement is ongoing.
 
 To learn how to use PyElly, see the PyEllyManual.pdf file in the same
-directory as this README.txt file. The manual has over a hundred sixty pages
+directory as this README.txt file. The manual has about 170 pages
 of information, including an overview of basic linguistics. Documentation
 of individual Python source files can be generated as needed with the
 Python pydoc utility from PyElly source files.
 
-At present, PyElly consists of 66 Python modules comprising almost eleven
+At present, PyElly consists of 67 Python modules comprising almost eleven
 thousand lines of source code. There are also various definition files
-to support basic various language processing capabilities. The PyElly
-download includes rules for some example applications illustraing the most
+to implement basic various language processing capabilities. The PyElly
+download includes rules for example applications illustrating the most
 basic kinds of computational linguistics; these include
 
 * indexing - remove stopwords and get stems for content words from raw
@@ -50,10 +50,10 @@ basic kinds of computational linguistics; these include
 * disambig - disambiguation of phrases with WordNet concept information.
 * chemic   - recognition of chemical names in text
 
-These illustrate what you can do with PyElly and also serve as a basis for
-comprehensive integration testing. Other applications will be added to the
-PyElly package on GitHub in the future. You may use any of them as models for
-building your own PyElly applications.
+These illustrate the many different things you can do with PyElly and also
+serve as a basis for comprehensive integration testing. Other applications
+will be added to the PyElly package on GitHub in the future. You may use
+any of them as models for building your own PyElly applications.
 
 PyElly is free software released under a BSD open-source license for
 educational and other uses. Be advised that the current software and
@@ -310,7 +310,7 @@ Release Notes:
                       fix bug in disambiguation with type 0 rules
                       extend "test" integration testing for new patterns
                       extend "marking" application rules
-                      clean up "doctor rules"
+                      clean up "doctor" rules
                       clean up and extend documentation
  1.3.16  - 21aug2016  add another recognizer for space chars
                       fix bug in pattern matching with spaces
@@ -623,7 +623,7 @@ Release Notes:
                       integrate template matching in PyElly processing
                       fix bug in ellySurvey because of new vocabularyTable
                       fix minor bug in cognitiveDefiner
-                      extend "test" appplication rules
+                      extend "test" application rules
                       extend "test" integration testing for templates
                       clean up "marking" rules
                       update documentation
@@ -672,29 +672,32 @@ Release Notes:
                       update documentation
  1.5.8.1 - 29nov2018  fix problem with suffix removal after prefix removal
                       extend "chemic" rules
-                      extend "chemic integration testing
+                      extend "chemic" integration testing
                       update documentation
  1.5.8.2 - 07dec2018  fix patternTable bug in handling Unicode prime char
                       extend "chemic" rules
-                      extend "chemic integration testing
+                      extend "chemic" integration testing
                       update documentation
  1.5.8.3 - 10dec2018  handle Greek letters properly in ellyBuffer
                       extend "chemic" rules
-                      extend "chemic integration testing
+                      extend "chemic" integration testing
                       update documentation
  1.5.8.4 - 21dec2018  handle Greek letters properly in patternTable
                       handle Greek letters properly in ellyWildcard matching
                       clarify ellyToken print representation, clean up code
                       extend "chemic" rules
-                      extend "chemic integration testing
+                      extend "chemic" integration testing
                       update documentation
  1.5.8.5 - 10jul2019  extend default suffix rules
                       update documentation
+ 1.6     - 19oct2019  add support for Chinese Unicode input
+                      fix problem with language initialization in ellyBase
+                      update documentation
 
 
 New versions will be for non-cosmetic changes in PyElly code. This typically
 will often require regenerating any previously saved *.elly.bin files to ensure
-correct operation. Changes only to PyElly sample application definition files,
+correct operation. Changes only to PyElly example application definition files,
 unit testing input or key files, and PyElly documentation will be made from time
 to time, but these will leave version numbers the same, if they are the only
 changes. Updates are still frequent; check for the latest files. The dates above

diff --git a/ellyBase.py b/ellyBase.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 # PyElly - scripting tool for analyzing natural language
 #
-# ellyBase.py : 10jul2019 CPM
+# ellyBase.py : 17oct2019 CPM
 # ------------------------------------------------------------------------------
 # Copyright (c) 2013, Clinton Prentiss Mah
 # All rights reserved.
@@ -34,9 +34,16 @@
 """
 
 import sys
-import ellySession
 import ellyConfiguration
+
+if __name__ == '__main__':
+    arg = sys.argv[1:]
+    if len(arg) > 0 and arg[0].lower() == '-zh':
+        ellyConfiguration.language = arg.pop(0)[1:].upper()
+
+
 import ellyDefinition
+import ellySession
 import ellyPickle
 import ellyToken
 import ellyChar
@@ -73,7 +80,7 @@
 
 # version ID
 
-release = 'v1.5.8.5'                    # current version of PyElly software
+release = 'v1.6'                        # current version of PyElly software
 
 def _timeModified ( basn , filn ):
 
@@ -267,6 +274,7 @@ def __init__ ( self , system , restore=None ):
 
 #       print '0:' , len(d.stb.ntname) , 'syntactic categories'
 
+#       print 'base language=' , ellyConfiguration.language
         mtb = d.mtb if d != None else None
         self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)
 
@@ -409,6 +417,7 @@ def translate ( self , text , plsb=False ):
             return ''
 #       print 'list' , list(text)
         self.sbu.refill(text)           # put text to translate into input buffer
+#       print 'refilled=' , self.sbu
 
         try:
             while True:
@@ -939,15 +948,22 @@ def _show ( typm , syms ):
 
 #   print 'stdin=' , si.encoding , 'stdout=' , so.encoding
 
+#
+#   command line arguments were partly processed at the beginning of this file!
+#   (necessary to instantiate SubstitutionBuffer properly for Chinese characters)
+#
+
     try:
-        syst = sys.argv[1] if len(sys.argv) > 1 else 'test'  # which rule definitions to run
-        dpth = int(sys.argv[2]) if len(sys.argv) > 2 else -1 # depth of parse tree reporting
+        syst = arg[0] if len(arg) > 0 else 'test'   # which rule definitions to run
+        dpth = int(arg[1]) if len(arg) > 1 else -1  # depth of parse tree reporting
     except ValueError , e:
         print >> sys.stderr , e
         sys.exit(1)
 
-    print 'release=' , 'PyElly' , release
-    print 'system =' , syst
+    print 'release  =' , 'PyElly' , release
+    print 'system   =' , syst
+
+    print 'language =  ' , ellyConfiguration.language
     try:
         eb = EllyBase(syst)
 #       print 'eb=' , eb
@@ -978,11 +994,13 @@ def _show ( typm , syms ):
 
         so.write('> ')
         line = si.readline()
+#       print 'input line=' , line
         l = line.decode('utf8')
+#       print 'decoded   =' , l
         if len(l) == 0 or l[0] == '\n': break
 #       print 'input:' , type(line) , '->' , type(l) , l
         txt = list(l.strip())
-#       print 'txt=' , txt
+        print 'txt=' , txt
         so.write('\n')
         lo = eb.translate(txt,True)
         if lo == None:

diff --git a/ellyMain.py b/ellyMain.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 # PyElly - scripting tool for analyzing natural language
 #
-# ellyMain.py : 01nov2018 CPM
+# ellyMain.py : 15oct2019 CPM
 # ------------------------------------------------------------------------------
 # Copyright (c) 2013, Clinton Prentiss Mah
 # All rights reserved.
@@ -49,8 +49,13 @@
 
 a = sys.argv[1:]  # drop invocation name of module in argument list
 
+#
+# interpret command line flags
+#
+
 while len(a) > 0 and a[0][0] == '-': # check for commandline flag
     flg = a.pop(0)
+#   print 'flg=' , flg
     if   flg == '-b':                # batch processing?
         interact = False
     elif flg == '-d':                # limit parse tree display depth?
@@ -63,6 +68,9 @@
         plsb = True
     elif flg == '-noLang':           # turn off default language analysis?
         ellyConfiguration.language = ''
+    elif len(flg) == 8 and flg[:6] == '-lang=':
+        ellyConfiguration.language = flg[6:].upper()
+#       print 'language=' , ellyConfiguration.language
 
 import stopExceptions
 import ellySentenceReader
@@ -76,7 +84,7 @@
 
 if interact:
     print "PyElly" , ellyBase.release + "," , "Natural Language Filtering"
-    print "Copyright 2014-2018 under BSD open-source license by C.P. Mah"
+    print "Copyright 2014-2019 under BSD open-source license by C.P. Mah"
     print "All rights reserved"
     print ""
 
@@ -126,6 +134,7 @@
     b = rdr.getNext()            # get next sentence
     if b == None: break          # EOF check
     if len(b) == 0: continue     # ignore empty lines
+#   print 'main b=' , b
 
     if interact:
         print 'translating' , b