Attempting to parse free-form ANSI text.
Frederic Rentsch
anthra.norell at vtxmail.ch
Mon Oct 23 08:56:35 EDT 2006
Paul McGuire wrote:
> "Michael B. Trausch" <"mike$#at^&nospam!%trauschus"> wrote in message
> news:GsGdnTIYc-lXaafYnZ2dnUVZ_sCdnZ2d at comcast.com...
>
>> Alright... I am attempting to find a way to parse ANSI text from a
>> telnet application. However, I am experiencing a bit of trouble.
>>
>> What I want to do is have all ANSI sequences _removed_ from the output,
>> save for those that manage color codes or text presentation (in short,
>> the ones that are ESC[#m (with additional #s separated by ; characters).
>> The ones that are left, the ones that are the color codes, I want to
>> act on, and remove from the text stream, and display the text.
>>
>>
> Here is a pyparsing-based scanner/converter, along with some test code at
> the end. It takes care of partial escape sequences, and strips any
> sequences of the form
> "<ESC>[##;##;...<alpha>", unless the trailing alpha is 'm'.
> The pyparsing project wiki is at http://pyparsing.wikispaces.com.
>
> -- Paul
>
> from pyparsing import *
>
>
snip
>
>
> test = """\
> This is a test string containing some ANSI sequences.
> Sequence 1: ~[10;12m
> Sequence 2: ~[3;4h
> Sequence 3: ~[4;5m
> Sequence 4; ~[m
> Sequence 5; ~[24HNo more escape sequences.
> ~[7""".replace('~',chr(27))
>
> leftOver = processInputString(test)
>
>
> Prints:
> This is a test string containing some ANSI sequences.
> Sequence 1:
> <change color attributes to ['1012']>
>
I doubt we should concatenate numbers.
> Sequence 2:
>
> Sequence 3:
> <change color attributes to ['45']>
>
> Sequence 4;
> <change color attributes to ['']>
>
> Sequence 5;
> No more escape sequences.
>
> <found partial escape sequence ['\x1b[7'], tack it on front of next>
>
>
>
Another one of Paul's elegant pyparsing solutions. To satisfy my own
curiosity, I tried to see how SE stacked up and devoted more time than I
really should to finding out. In the end I don't know if it was worth
the effort, but having made it I might as well just throw it in.
The following code does everything Mike needs to do, except interact
with wx. It is written to run standing alone. To incorporate it in
Mike's class the functions would be methods and the globals would be
instance attributes. Running it does this:
>>> chunk_1 = """This is a test string containing some ANSI sequences.
Sequence 1 Valid code, invalid numbers: \x1b[10;12mEnd of sequence 1
Sequence 2 Not an 'm'-code: \x1b[30;4;77hEnd of sequence 2
Sequence 3 Color setting code: \x1b[30;45mEnd of sequence 3
Sequence 4 Parameter setting code: \x1b[7mEnd of sequence 4
Sequence 5 Color setting code spanning calls: \x1b[3"""
>>> chunk_2 = """7;42mEnd of sequence 5
Sequence 6 Invalid code: \x1b[End of sequence 6
Sequence 7 A valid code at the end: \x1b[9m
"""
>>> init ()
>>> process_text (chunk_1)
>>> process_text (chunk_2)
>>> print output
This is a test string containing some ANSI sequences.
Sequence 1 Valid code, invalid numbers: >>! Ignoring unknown number 10
!<< >>! Ignoring unknown number 12 !<< End of sequence 1
Sequence 2 Not an 'm'-code: End of sequence 2
Sequence 3 Color setting code: >>setting foreground BLACK<< >>setting
background MAGENTA<< End of sequence 3
Sequence 4 Parameter setting code: >>Calling parameter setting function
7<< End of sequence 4
Sequence 5 Color setting code spanning calls: >>setting foreground
GREY<< >>setting background GREEN<< End of sequence 5
Sequence 6 Invalid code: nd of sequence 6
Sequence 7 A valid code at the end: >>Calling parameter setting
function 9<<
###################
And here it goes:
def init ():
# To add to AnsiTextCtrl.__init__ ()
import SE # SEL is less import overhead but doesn't have
interactive development features (not needed in production versions)
global output #-> For testing
global Pre_Processor, digits_re, Colors, truncated_escape_hold #
global -> instance attributes
# Screening out all ansi escape sequences except those controlling color
grit = '\n'.join (['(%d)=' % i for i in range (128,255)]) + ' (13)= '
# Regular expression r'[\x80-\xff\r]' would work fine but is four
times slower than 127 fixed definitions
all_escapes = r'\x1b\[\d*(;\d*)*[A-Za-z]'
color_escapes = r'\x1b\[\d*(;\d*)*m'
Pre_Processor = SE.SE ('%s ~%s~= ~%s~==' % (grit, all_escapes,
color_escapes)) # SEL.SEL for production
# 'all_escapes' also matches what 'color_escapes' matches. With
identical regular expression matches it is the last definitions that
applies. Other than that, the order of definitions is irrelevant to
precedence.
# Isolating digits.
digits_re = re.compile ('\d+')
# Color numbers to color names
Colors = SE.SE ('''
30=BLACK 40=BLACK
31=RED 41=RED
32=GREEN 42=GREEN
33=YELLOW 43=YELLOW
34=BLUE 44=BLUE
35=MAGENTA 45=MAGENTA
36=CYAN 46=CYAN
37=GREY 47=GREY
39=GREY 49=BLACK
<EAT>
''')
truncated_escape_hold = '' #-> self.truncated_escape_hold
output = '' #-> For testing only
# What follows replaces all others of Mike's methods
def process_text (text):
global output #-> For testing
global truncated_escape_hold, digits_re, Pre_Processor, Colors
purged_text = truncated_escape_hold + Pre_Processor (text)
# Text is now clean except for color codes beginning with ESC
ansi_controlled_sections = purged_text.split ('\x1b')
# Each ansi_controlled_section starts with a color control, except
the first one (leftmost split-off)
if ansi_controlled_sections:
#-> self.AppendText(ansi_controlled_sections [0]) #->
For real
output += ansi_controlled_sections [0] #-> For testing #->
For testing
for section in ansi_controlled_sections [1:]:
if section == '': continue
try: escape_ansi_controlled_section, data = section.split ('m', 1)
except ValueError: # Truncated escape
truncated_escape_hold = '\x1b' + section # Restore ESC
removed by split ('\x1b')
else:
escapes = escape_ansi_controlled_section.split (';')
for escape in escapes:
try: number = digits_re.search (escape).group ()
except AttributeError:
output += ' >>!!!Invalid number %s!!!<<< ' % escape
#-> For testing
continue
_set_wx (number)
#-> self.AppendText(data) #-> For real
output += data #-> For testing
def _set_wx (n):
global output # For testing only
global Colors
int_n = int (n)
if 0 <= int_n <= 9:
#-> self._number_to_method (n)() #->
For real
output += ' >>Calling parameter setting function %s<< ' % n #->
For testing
return
color = Colors (n)
if color:
if 30 <= int_n < 50:
if 40 <= int_n:
#-> self.AnsiBGColor = color #->
For real
output += ' >>setting background %s<< ' % color #->
For testing
else:
#-> self.AnsiFGColor = color #->
For real
output += ' >>setting foreground %s<< ' % color #->
For testing
return
output += ' >>!!!Ignoring unknown number %s!!!<< ' % n #->
For testing
#-> For real requires this in addition:
#->
#-> # Methods controlled by 'm' code 0 to 9: # Presumably 'm'?
#->
#-> def _0 (self):
#-> self.AnsiFGColor = 'GREY'
#-> self.AnsiBGColor = 'BLACK'
#-> self.AnsiFontSize = 9
#-> self.AnsiFontFamily = wx.FONTFAMILY_TELETYPE
#-> self.AnsiFontStyle = wx.FONTSTYLE_NORMAL
#-> self.AnsiFontWeight = wx.FONTWEIGHT_NORMAL
#-> self.AnsiFontUnderline = False
#->
#-> def _1 (self): self.AnsiFontWeight = wx.FONTWEIGHT_BOLD
#-> def _2 (self): self.AnsiFontWeight = wx.FONTWEIGHT_LIGHT
#-> def _3 (self): self.AnsiFontStyle = wx.FONTSTYLE_ITALIC
#-> def _4 (self): self.AnsiFontUnderline = True
#-> def _5 (self): pass
#-> def _7 (self): self.AnsiFGColor, self.AnsiBGColor =
self.AnsiBGColor, self.AnsiFGColor
#-> def _8 (self): self.AnsiFGColor = self.AnsiBGColor
#-> def _9 (self): pass
#->
#->
#-> _number_to_method = {
#-> '0' : _0,
#-> '1' : _1,
#-> '2' : _2,
#-> '3' : _3,
#-> '4' : _4,
#-> '7' : _7,
#-> '8' : _8,
#-> '9' : _9,
#-> }
################
The most recent version of SE is now 2.3 with a rare malfunction
corrected. (SE from http://cheeseshop.python.org/pypi/SE/2.2%20beta)
More information about the Python-list
mailing list