[IPython-dev] IPython handles code input as latin1 instead of the system encoding

Eduardo Habkost ehabkost at raisama.net
Mon Oct 11 09:06:49 EDT 2010


On Sat, Oct 09, 2010 at 10:29:39PM -0700, Fernando Perez wrote:
<snip>
> > In [1]: import sys, locale
> >
> > In [2]: print sys.stdin.encoding,locale.getdefaultlocale()
> > UTF-8 ('en_US', 'UTF8')
> >
> > In [3]: print repr(u'áé')
> > u'\xc3\xa1\xc3\xa9'
> 
> Thanks for the report.  We've made a lot of improvements to our
> unicode handling recently, and I think it's all OK now.  With current
> trunk:
> 
> IPython 0.11.alpha1.git -- An enhanced Interactive Python.
> ?         -> Introduction and overview of IPython's features.
> %quickref -> Quick reference.
> help      -> Python's own help system.
> object?   -> Details about 'object', use 'object??' for extra details.
> 
> In [1]: import sys, locale
> 
> In [2]: print repr(u'áé')
> u'\xe1\xe9'
> 
> 
> Let us know again if you have any remaining problems.

Hi,

I just built and installed from latest git (commit
4e2d3af2a82b31fb523497eccb7ca0cfebd9d169). Things look worse. Crash report is
below.


[ipython/master]$ ipython
Python 2.6.2 (r262:71600, Jun  4 2010, 18:28:04)
Type "copyright", "credits" or "license" for more information.

IPython 0.11.alpha1.git -- An enhanced Interactive Python.
?         -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help      -> Python's own help system.
object?   -> Details about 'object', use 'object??' for extra details.

In [1]: repr("áé")
ERROR: An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (82, 0))

---------------------------------------------------------------------------
UnicodeDecodeError                            Python 2.6.2: /usr/bin/python
                                                   Mon Oct 11 09:55:38 2010
A problem occured executing Python code.  Here is the sequence of function
calls leading up to the error, with the most recent (innermost) call last.
/usr/bin/ipython in <module>()
      1
      2
      3
      4
      5
      6
      7 #!/usr/bin/python
      8 """Terminal-based IPython entry point.
      9
---> 10 Note: this is identical to IPython/frontend/terminal/scripts/ipython for now.
        global launch_new_instance = <function launch_new_instance at 0x91e95a4>
     11 Once 0.11 is closer to release, we will likely need to reorganize the script
     12 entry points."""
     13
     14 from IPython.frontend.terminal.ipapp import launch_new_instance
     15
     16 launch_new_instance()
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31

/usr/lib/python2.6/site-packages/IPython/frontend/terminal/ipapp.pyc in launch_new_instance()
    646 def load_default_config(ipython_dir=None):
    647     """Load the default config file from the default ipython_dir.
    648
    649     This is useful for embedded shells.
    650     """
    651     if ipython_dir is None:
    652         ipython_dir = get_ipython_dir()
    653     cl = PyFileConfigLoader(default_config_file_name, ipython_dir)
    654     config = cl.load_config()
    655     return config
    656
    657
    658 def launch_new_instance():
    659     """Create and run a full blown IPython instance"""
    660     app = IPythonApp()
--> 661     app.start()
    662
    663
    664 if __name__ == '__main__':
    665     launch_new_instance()
    666
    667
    668
    669
    670
    671
    672
    673
    674
    675
    676

/usr/lib/python2.6/site-packages/IPython/core/application.pyc in start(self=<IPython.frontend.terminal.ipapp.IPythonApp object at 0xb769a4ec>)
    196         # Merge all config objects into a single one the app can then use
    197         self.merge_configs()
    198         self.log_master_config()
    199
    200         # Construction phase
    201         self.pre_construct()
    202         self.construct()
    203         self.post_construct()
    204
    205         # Done, flag as such and
    206         self._initialized = True
    207
    208     def start(self):
    209         """Start the application."""
    210         self.initialize()
--> 211         self.start_app()
    212
    213     #-------------------------------------------------------------------------
    214     # Various stages of Application creation
    215     #-------------------------------------------------------------------------
    216
    217     def create_crash_handler(self):
    218         """Create a crash handler, typically setting sys.excepthook to it."""
    219         self.crash_handler = self.crash_handler_class(self)
    220         sys.excepthook = self.crash_handler
    221
    222     def create_default_config(self):
    223         """Create defaults that can't be set elsewhere.
    224
    225         For the most part, we try to set default in the class attributes
    226         of Configurables.  But, defaults the top-level Application (which is

/usr/lib/python2.6/site-packages/IPython/frontend/terminal/ipapp.pyc in start_app(self=<IPython.frontend.terminal.ipapp.IPythonApp object at 0xb769a4ec>)
    626         try:
    627             fname = self.extra_args[0]
    628         except:
    629             pass
    630         else:
    631             try:
    632                 self._exec_file(fname)
    633             except:
    634                 self.log.warn("Error in executing file in user namespace: %s" %
    635                               fname)
    636                 self.shell.showtraceback()
    637
    638     def start_app(self):
    639         if self.master_config.Global.interact:
    640             self.log.debug("Starting IPython's mainloop...")
--> 641             self.shell.mainloop()
    642         else:
    643             self.log.debug("IPython not interactive, start_app is no-op...")
    644
    645
    646 def load_default_config(ipython_dir=None):
    647     """Load the default config file from the default ipython_dir.
    648
    649     This is useful for embedded shells.
    650     """
    651     if ipython_dir is None:
    652         ipython_dir = get_ipython_dir()
    653     cl = PyFileConfigLoader(default_config_file_name, ipython_dir)
    654     config = cl.load_config()
    655     return config
    656

/usr/lib/python2.6/site-packages/IPython/frontend/terminal/interactiveshell.pyc in mainloop(self=<IPython.frontend.terminal.interactiveshell.TerminalInteractiveShell object at 0x8fd62ac>, display_banner=None)
    183     def mainloop(self, display_banner=None):
    184         """Start the mainloop.
    185
    186         If an optional banner argument is given, it will override the
    187         internally created default banner.
    188         """
    189
    190         with nested(self.builtin_trap, self.display_trap):
    191
    192             # if you run stuff with -c <cmd>, raw hist is not updated
    193             # ensure that it's in sync
    194             self.history_manager.sync_inputs()
    195
    196             while 1:
    197                 try:
--> 198                     self.interact(display_banner=display_banner)
        global N = undefined
        global R = undefined
        global t = undefined
        global updatet = undefined
        global user_ns_hiddenR = undefined
        global _pylab_magic_runt = undefined
        global magic_run = undefined
        global guit = undefined
        global ns = undefined
        global sN = undefined
        global usr = undefined
        global lib = undefined
        global python2 = undefined
        global site = undefined
        global packages = undefined
        global IPython = undefined
        global frontend = undefined
        global terminal = undefined
        global interactiveshell.pyt = undefined
        global enable_pylab = undefined
        global s = undefined
        global c = undefined
        global C = undefined
        global _ = undefined
        global d = undefined
        global S = undefined
        global Ask = undefined
        global the = undefined
        global shell = undefined
        global to = undefined
        global exit.Can = undefined
        global be = undefined
        global overiden = undefined
        global used = undefined
        global a = undefined
        global callback.N = undefined
        global R3 = undefined
        global RS = undefined
        global interactiveshell.pyRv = undefined
        global i = undefined
        global o = undefined
        global q9 = undefined
        global n = undefined
        global sJ = undefined
        global Handle = undefined
        global interactive = undefined
        global exit.This = undefined
        global method = undefined
        global calls = undefined
        global ask_exit = undefined
        global callback.s = undefined
        global Gd = undefined
        global g = undefined
        global GHd = undefined
        global Toggle = undefined
        global autoindent = undefined
        global on = undefined
        global off = undefined
        global available.s = undefined
        global Automatic = undefined
        global indentation = undefined
        global OFFt = undefined
        global ONN = undefined
        global shellt = undefined
        global set_autoindentR = undefined
        global parameter_s = undefined
        global magic_autoindent = undefined
        global Paste = undefined
        global execute = undefined
        global pre = undefined
        global formatted = undefined
        global code = undefined
        global block = undefined
        global clipboard.You = undefined
        global must = undefined
        global terminate = undefined
        global two = undefined
        global minus = undefined
        global signs = undefined
        global alone = undefined
        global line.You = undefined
        global can = undefined
        global also = undefined
        global provide = undefined
        global your = undefined
        global own = undefined
        global sentinel = undefined
        global new = undefined
        global this = undefined
        global operation = undefined
        global The = undefined
        global dedented = undefined
        global prior = undefined
        global execution = undefined
        global enable = undefined
        global of = undefined
        global definitions.characters = undefined
        global at = undefined
        global beginning = undefined
        global line = undefined
        global are = undefined
        global ignored = undefined
        global allow = undefined
        global pasting = undefined
        global directly = undefined
        global e = undefined
        global mails = undefined
        global diff = undefined
        global files = undefined
        global doctests = undefined
        global continuation = undefined
        global prompt = undefined
        global stripped.The = undefined
        global executed = undefined
        global assigned = undefined
        global variable = undefined
        global named = undefined
        global later = undefined
        global editing.You = undefined
        global name = undefined
        global an = undefined
        global argument = undefined
        global e.g..This = undefined
        global assigns = undefined
        global pasted = undefined
        global string = undefined
        global without = undefined
        global dedenting = undefined
        global executing = undefined
        global it = undefined
        global preceding = undefined
        global still = undefined
        global stripped = undefined
        global re = <module 're' from '/usr/lib/python2.6/re.pyc'>
        global executes = undefined
        global previously = undefined
        global entered = undefined
        global by = undefined
        global cpaste.Do = undefined
        global alarmed = undefined
        global garbled = undefined
        global output = undefined
        global Windows = undefined
        global readline = undefined
        global bug.Just = undefined
        global press = undefined
        global enter = undefined
        global type = undefined
        global again = undefined
        global will = undefined
        global what = undefined
        global was = undefined
        global just = undefined
        global pasted.IPython = undefined
        global statements = undefined
        global magics = undefined
        global escapes = undefined
        global supported = undefined
        global yet.See = undefined
        global paste = undefined
        global automatically = undefined
        global pull = undefined
        global clipboard.s = undefined
        global rs = undefined
        global modet = undefined
        global stringt = undefined
        global rNt = undefined
        global ss = undefined
        global parse_optionsRx = undefined
        global has_keyt = undefined
        global _rerun_pastedt = undefined
        global gett = undefined
        global _strip_pasted_lines_for_codet = undefined
        global _get_pasted_linest = undefined
        global _execute_block = undefined
        global optst = undefined
        global argst = undefined
        global part = undefined
        global sentinelt = undefined
        global magic_cpaste = undefined
        global pN = undefined
        global p = undefined
        global clipboard.The = undefined
        global text = undefined
        global pulled = undefined
        global clipboard = undefined
        global user = undefined
        global intervention = undefined
        global printed = undefined
        global back = undefined
        global screen = undefined
        global before = undefined
        global unless = undefined
        global q = undefined
        global flag = undefined
        global given = undefined
        global force = undefined
        global quiet = undefined
        global mode.The = undefined
        global Options = undefined
        global r = undefined
        global cpaste.q = undefined
        global mode = undefined
        global do = undefined
        global echo = undefined
        global terminal.IPython = undefined
        global cpaste = undefined
        global manually = undefined
        global into = undefined
        global until = undefined
        global you = undefined
        global mark = undefined
        global its = undefined
        global end.t = undefined
        global rqR = undefined
        global Nt = undefined
        global qs = undefined
        global Rx = undefined
        global RY = undefined
        global clipboard_getR = undefined
        global splitlinesRF = undefined
        global pycolorizet = undefined
        global endswithR = undefined
        global textR = undefined
        global RF = undefined
        global magic_paste4 = undefined
        global __name__t = undefined
        global __module__R = undefined
        global R4 = undefined
        global Rj = undefined
        global RE = undefined
        global embeddedt = undefined
        global embedded_activeR = undefined
        global editort = undefined
        global pagerR = undefined
        global R2 = undefined
        global RD = undefined
        global propertyR1 = undefined
        global RA = undefined
        global RC = undefined
        global RG = undefined
        global RQ = undefined
        global RO = undefined
        global R_ = undefined
        global Rk = undefined
        global Rv = undefined
        global Rb = undefined
        global interactiveshell.pyR = undefined
        global sL = undefined
        global U = undefined
        global contextlibR = undefined
        global reR = undefined
        global IPython.core.errorR = undefined
        global IPython.core.usageR = undefined
        global IPython.core.inputlistR = undefined
        global IPython.core.interactiveshellR = undefined
        global IPython.lib.inputhookR = undefined
        global IPython.lib.pylabtoolsR = undefined
        global IPython.utils.terminalR = undefined
        global IPython.utils.processR = undefined
        global IPython.utils.warnR = undefined
        global IPython.utils.textR = undefined
        global IPython.utils.traitletsR = undefined
        global Rr = undefined
        global register = undefined
        global module = undefined
        global s. = undefined
    199                     #self.interact_with_readline()
    200                     # XXX for testing of a readline-decoupled repl loop, call
    201                     # interact_with_readline above
    202                     break
    203                 except KeyboardInterrupt:
    204                     # this should not be necessary, but KeyboardInterrupt
    205                     # handling seems rather unpredictable...
    206                     self.write("\nKeyboardInterrupt in interact()\n")
    207
    208     def interact(self, display_banner=None):
    209         """Closely emulate the interactive Python console."""
    210
    211         # batch run -> do not interact
    212         if self.exit_now:
    213             return

/usr/lib/python2.6/site-packages/IPython/frontend/terminal/interactiveshell.pyc in interact(self=<IPython.frontend.terminal.interactiveshell.TerminalInteractiveShell object at 0x8fd62ac>, display_banner=False)
    270                      'Because of how pdb handles the stack, it is impossible\n'
    271                      'for IPython to properly format this particular exception.\n'
    272                      'IPython will resume normal operation.')
    273             except:
    274                 # exceptions here are VERY RARE, but they can be triggered
    275                 # asynchronously by signal handlers, for example.
    276                 self.showtraceback()
    277             else:
    278                 self.input_splitter.push(line)
    279                 more = self.input_splitter.push_accepts_more()
    280                 if (self.SyntaxTB.last_syntax_error and
    281                     self.autoedit_syntax):
    282                     self.edit_syntax_error()
    283                 if not more:
    284                     source_raw = self.input_splitter.source_raw_reset()[1]
--> 285                     self.run_cell(source_raw)
    286
    287         # We are off again...
    288         __builtin__.__dict__['__IPYTHON__active'] -= 1
    289
    290         # Turn off the exit flag, so the mainloop can be restarted if desired
    291         self.exit_now = False
    292
    293     def raw_input(self, prompt='', continue_prompt=False):
    294         """Write a prompt and read a line.
    295
    296         The returned line does not include the trailing newline.
    297         When the user enters the EOF key sequence, EOFError is raised.
    298
    299         Optional inputs:
    300

/usr/lib/python2.6/site-packages/IPython/core/interactiveshell.pyc in run_cell(self=<IPython.frontend.terminal.interactiveshell.TerminalInteractiveShell object at 0x8fd62ac>, cell='repr("\xc3\xa1\xc3\xa9")\n')
   2078         # - increment the global execution counter (we need to pull that out
   2079         # from outputcache's control; outputcache should instead read it from
   2080         # the main object).
   2081         # - do any logging of input
   2082         # - update histories (raw/translated)
   2083         # - then, call plain run_source (for single blocks, so displayhook is
   2084         # triggered) or run_code (for multiline blocks in exec mode).
   2085         #
   2086         # Once this is done, we'll be able to stop using runlines and we'll
   2087         # also have a much cleaner separation of logging, input history and
   2088         # output cache management.
   2089         #################################################################
   2090
   2091         # We need to break up the input into executable blocks that can be run
   2092         # in 'single' mode, to provide comfortable user behavior.
-> 2093         blocks = self.input_splitter.split_blocks(cell)
   2094
   2095         if not blocks:
   2096             return
   2097
   2098         # Store the 'ipython' version of the cell as well, since that's what
   2099         # needs to go into the translated history and get executed (the
   2100         # original cell may contain non-python syntax).
   2101         ipy_cell = ''.join(blocks)
   2102
   2103         # Store raw and processed history
   2104         self.history_manager.store_inputs(ipy_cell, cell)
   2105
   2106         self.logger.log(ipy_cell, cell)
   2107         # dbg code!!!
   2108         if 0:

/usr/lib/python2.6/site-packages/IPython/core/inputsplitter.pyc in split_blocks(self=<IPython.core.inputsplitter.IPythonInputSplitter object at 0x91f002c>, lines=[])
    514                 # block.  Thus, we must put the line back into the input buffer
    515                 # so that it starts a new block on the next pass.
    516                 #
    517                 # 2. the second case is detected in the line before the actual
    518                 # dedent happens, so , we consume the line and we can break out
    519                 # to start a new block.
    520
    521                 # Case 1, explicit dedent causes a break.
    522                 # Note: check that we weren't on the very last line, else we'll
    523                 # enter an infinite loop adding/removing the last line.
    524                 if  _full_dedent and lines and not next_line.startswith(' '):
    525                     lines.append(next_line)
    526                     break
    527
    528                 # Otherwise any line is pushed
--> 529                 self.push(next_line)
    530
    531                 # Case 2, full dedent with full block ready:
    532                 if _full_dedent or \
    533                        self.indent_spaces==0 and not self.push_accepts_more():
    534                     break
    535             # Form the new block with the current source input
    536             blocks.append(self.source_reset())
    537
    538         #return blocks
    539         # HACK!!! Now that our input is in blocks but guaranteed to be pure
    540         # python syntax, feed it back a second time through the AST-based
    541         # splitter, which is more accurate than ours.
    542         return split_blocks(''.join(blocks))
    543
    544     #------------------------------------------------------------------------

/usr/lib/python2.6/site-packages/IPython/core/inputsplitter.pyc in push(self=<IPython.core.inputsplitter.IPythonInputSplitter object at 0x91f002c>, lines='repr("\xc3\xa1\xc3\xa9")')
    981         # class by hand line by line, we need to temporarily switch out to
    982         # 'line' mode, do a single manual reset and then feed the lines one
    983         # by one.  Note that this only matters if the input has more than one
    984         # line.
    985         changed_input_mode = False
    986
    987         if self.input_mode == 'cell':
    988             self.reset()
    989             changed_input_mode = True
    990             saved_input_mode = 'cell'
    991             self.input_mode = 'line'
    992
    993         # Store raw source before applying any transformations to it.  Note
    994         # that this must be done *after* the reset() call that would otherwise
    995         # flush the buffer.
--> 996         self._store(lines, self._buffer_raw, 'source_raw')
    997
    998         try:
    999             push = super(IPythonInputSplitter, self).push
   1000             for line in lines_list:
   1001                 if self._is_complete or not self._buffer or \
   1002                    (self._buffer and self._buffer[-1].rstrip().endswith(':')):
   1003                     for f in transforms:
   1004                         line = f(line)
   1005
   1006                 out = push(line)
   1007         finally:
   1008             if changed_input_mode:
   1009                 self.input_mode = saved_input_mode
   1010         return out
   1011

/usr/lib/python2.6/site-packages/IPython/core/inputsplitter.pyc in _store(self=<IPython.core.inputsplitter.IPythonInputSplitter object at 0x91f002c>, lines='repr("\xc3\xa1\xc3\xa9")', buffer=['repr("\xc3\xa1\xc3\xa9")\n'], store='source_raw')
    592                 self.indent_spaces, self._full_dedent = self._find_indent(line)
    593
    594     def _store(self, lines, buffer=None, store='source'):
    595         """Store one or more lines of input.
    596
    597         If input lines are not newline-terminated, a newline is automatically
    598         appended."""
    599
    600         if buffer is None:
    601             buffer = self._buffer
    602
    603         if lines.endswith('\n'):
    604             buffer.append(lines)
    605         else:
    606             buffer.append(lines+'\n')
--> 607         setattr(self, store, self._set_source(buffer))
    608
    609     def _set_source(self, buffer):
    610         return ''.join(buffer).encode(self.encoding)
    611
    612
    613 #-----------------------------------------------------------------------------
    614 # Functions and classes for IPython-specific syntactic support
    615 #-----------------------------------------------------------------------------
    616
    617 # RegExp for splitting line contents into pre-char//first word-method//rest.
    618 # For clarity, each group in on one line.
    619
    620 line_split = re.compile("""
    621              ^(\s*)              # any leading space
    622              ([,;/%]|!!?|\?\??)  # escape character or characters

/usr/lib/python2.6/site-packages/IPython/core/inputsplitter.pyc in _set_source(self=<IPython.core.inputsplitter.IPythonInputSplitter object at 0x91f002c>, buffer=['repr("\xc3\xa1\xc3\xa9")\n'])
    595         """Store one or more lines of input.
    596
    597         If input lines are not newline-terminated, a newline is automatically
    598         appended."""
    599
    600         if buffer is None:
    601             buffer = self._buffer
    602
    603         if lines.endswith('\n'):
    604             buffer.append(lines)
    605         else:
    606             buffer.append(lines+'\n')
    607         setattr(self, store, self._set_source(buffer))
    608
    609     def _set_source(self, buffer):
--> 610         return ''.join(buffer).encode(self.encoding)
    611
    612
    613 #-----------------------------------------------------------------------------
    614 # Functions and classes for IPython-specific syntactic support
    615 #-----------------------------------------------------------------------------
    616
    617 # RegExp for splitting line contents into pre-char//first word-method//rest.
    618 # For clarity, each group in on one line.
    619
    620 line_split = re.compile("""
    621              ^(\s*)              # any leading space
    622              ([,;/%]|!!?|\?\??)  # escape character or characters
    623              \s*(%?[\w\.\*]*)    # function/method, possibly with leading %
    624                                  # to correctly treat things like '?%magic'
    625              (\s+.*$|$)          # rest of line

UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 6: ordinal not in range(128)

Hit <Enter> to quit this message (your terminal may close):
-----------------------------

-- 
Eduardo



More information about the IPython-dev mailing list