[IPython-dev] IPython handles code input as latin1 instead of the system encoding
Eduardo Habkost
ehabkost at raisama.net
Mon Oct 11 09:06:49 EDT 2010
On Sat, Oct 09, 2010 at 10:29:39PM -0700, Fernando Perez wrote:
<snip>
> > In [1]: import sys, locale
> >
> > In [2]: print sys.stdin.encoding,locale.getdefaultlocale()
> > UTF-8 ('en_US', 'UTF8')
> >
> > In [3]: print repr(u'áé')
> > u'\xc3\xa1\xc3\xa9'
>
> Thanks for the report. We've made a lot of improvements to our
> unicode handling recently, and I think it's all OK now. With current
> trunk:
>
> IPython 0.11.alpha1.git -- An enhanced Interactive Python.
> ? -> Introduction and overview of IPython's features.
> %quickref -> Quick reference.
> help -> Python's own help system.
> object? -> Details about 'object', use 'object??' for extra details.
>
> In [1]: import sys, locale
>
> In [2]: print repr(u'áé')
> u'\xe1\xe9'
>
>
> Let us know again if you have any remaining problems.
Hi,
I just built and installed from latest git (commit
4e2d3af2a82b31fb523497eccb7ca0cfebd9d169). Things look worse. Crash report is
below.
[ipython/master]$ ipython
Python 2.6.2 (r262:71600, Jun 4 2010, 18:28:04)
Type "copyright", "credits" or "license" for more information.
IPython 0.11.alpha1.git -- An enhanced Interactive Python.
? -> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help -> Python's own help system.
object? -> Details about 'object', use 'object??' for extra details.
In [1]: repr("áé")
ERROR: An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (82, 0))
---------------------------------------------------------------------------
UnicodeDecodeError Python 2.6.2: /usr/bin/python
Mon Oct 11 09:55:38 2010
A problem occured executing Python code. Here is the sequence of function
calls leading up to the error, with the most recent (innermost) call last.
/usr/bin/ipython in <module>()
1
2
3
4
5
6
7 #!/usr/bin/python
8 """Terminal-based IPython entry point.
9
---> 10 Note: this is identical to IPython/frontend/terminal/scripts/ipython for now.
global launch_new_instance = <function launch_new_instance at 0x91e95a4>
11 Once 0.11 is closer to release, we will likely need to reorganize the script
12 entry points."""
13
14 from IPython.frontend.terminal.ipapp import launch_new_instance
15
16 launch_new_instance()
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/usr/lib/python2.6/site-packages/IPython/frontend/terminal/ipapp.pyc in launch_new_instance()
646 def load_default_config(ipython_dir=None):
647 """Load the default config file from the default ipython_dir.
648
649 This is useful for embedded shells.
650 """
651 if ipython_dir is None:
652 ipython_dir = get_ipython_dir()
653 cl = PyFileConfigLoader(default_config_file_name, ipython_dir)
654 config = cl.load_config()
655 return config
656
657
658 def launch_new_instance():
659 """Create and run a full blown IPython instance"""
660 app = IPythonApp()
--> 661 app.start()
662
663
664 if __name__ == '__main__':
665 launch_new_instance()
666
667
668
669
670
671
672
673
674
675
676
/usr/lib/python2.6/site-packages/IPython/core/application.pyc in start(self=<IPython.frontend.terminal.ipapp.IPythonApp object at 0xb769a4ec>)
196 # Merge all config objects into a single one the app can then use
197 self.merge_configs()
198 self.log_master_config()
199
200 # Construction phase
201 self.pre_construct()
202 self.construct()
203 self.post_construct()
204
205 # Done, flag as such and
206 self._initialized = True
207
208 def start(self):
209 """Start the application."""
210 self.initialize()
--> 211 self.start_app()
212
213 #-------------------------------------------------------------------------
214 # Various stages of Application creation
215 #-------------------------------------------------------------------------
216
217 def create_crash_handler(self):
218 """Create a crash handler, typically setting sys.excepthook to it."""
219 self.crash_handler = self.crash_handler_class(self)
220 sys.excepthook = self.crash_handler
221
222 def create_default_config(self):
223 """Create defaults that can't be set elsewhere.
224
225 For the most part, we try to set default in the class attributes
226 of Configurables. But, defaults the top-level Application (which is
/usr/lib/python2.6/site-packages/IPython/frontend/terminal/ipapp.pyc in start_app(self=<IPython.frontend.terminal.ipapp.IPythonApp object at 0xb769a4ec>)
626 try:
627 fname = self.extra_args[0]
628 except:
629 pass
630 else:
631 try:
632 self._exec_file(fname)
633 except:
634 self.log.warn("Error in executing file in user namespace: %s" %
635 fname)
636 self.shell.showtraceback()
637
638 def start_app(self):
639 if self.master_config.Global.interact:
640 self.log.debug("Starting IPython's mainloop...")
--> 641 self.shell.mainloop()
642 else:
643 self.log.debug("IPython not interactive, start_app is no-op...")
644
645
646 def load_default_config(ipython_dir=None):
647 """Load the default config file from the default ipython_dir.
648
649 This is useful for embedded shells.
650 """
651 if ipython_dir is None:
652 ipython_dir = get_ipython_dir()
653 cl = PyFileConfigLoader(default_config_file_name, ipython_dir)
654 config = cl.load_config()
655 return config
656
/usr/lib/python2.6/site-packages/IPython/frontend/terminal/interactiveshell.pyc in mainloop(self=<IPython.frontend.terminal.interactiveshell.TerminalInteractiveShell object at 0x8fd62ac>, display_banner=None)
183 def mainloop(self, display_banner=None):
184 """Start the mainloop.
185
186 If an optional banner argument is given, it will override the
187 internally created default banner.
188 """
189
190 with nested(self.builtin_trap, self.display_trap):
191
192 # if you run stuff with -c <cmd>, raw hist is not updated
193 # ensure that it's in sync
194 self.history_manager.sync_inputs()
195
196 while 1:
197 try:
--> 198 self.interact(display_banner=display_banner)
global N = undefined
global R = undefined
global t = undefined
global updatet = undefined
global user_ns_hiddenR = undefined
global _pylab_magic_runt = undefined
global magic_run = undefined
global guit = undefined
global ns = undefined
global sN = undefined
global usr = undefined
global lib = undefined
global python2 = undefined
global site = undefined
global packages = undefined
global IPython = undefined
global frontend = undefined
global terminal = undefined
global interactiveshell.pyt = undefined
global enable_pylab = undefined
global s = undefined
global c = undefined
global C = undefined
global _ = undefined
global d = undefined
global S = undefined
global Ask = undefined
global the = undefined
global shell = undefined
global to = undefined
global exit.Can = undefined
global be = undefined
global overiden = undefined
global used = undefined
global a = undefined
global callback.N = undefined
global R3 = undefined
global RS = undefined
global interactiveshell.pyRv = undefined
global i = undefined
global o = undefined
global q9 = undefined
global n = undefined
global sJ = undefined
global Handle = undefined
global interactive = undefined
global exit.This = undefined
global method = undefined
global calls = undefined
global ask_exit = undefined
global callback.s = undefined
global Gd = undefined
global g = undefined
global GHd = undefined
global Toggle = undefined
global autoindent = undefined
global on = undefined
global off = undefined
global available.s = undefined
global Automatic = undefined
global indentation = undefined
global OFFt = undefined
global ONN = undefined
global shellt = undefined
global set_autoindentR = undefined
global parameter_s = undefined
global magic_autoindent = undefined
global Paste = undefined
global execute = undefined
global pre = undefined
global formatted = undefined
global code = undefined
global block = undefined
global clipboard.You = undefined
global must = undefined
global terminate = undefined
global two = undefined
global minus = undefined
global signs = undefined
global alone = undefined
global line.You = undefined
global can = undefined
global also = undefined
global provide = undefined
global your = undefined
global own = undefined
global sentinel = undefined
global new = undefined
global this = undefined
global operation = undefined
global The = undefined
global dedented = undefined
global prior = undefined
global execution = undefined
global enable = undefined
global of = undefined
global definitions.characters = undefined
global at = undefined
global beginning = undefined
global line = undefined
global are = undefined
global ignored = undefined
global allow = undefined
global pasting = undefined
global directly = undefined
global e = undefined
global mails = undefined
global diff = undefined
global files = undefined
global doctests = undefined
global continuation = undefined
global prompt = undefined
global stripped.The = undefined
global executed = undefined
global assigned = undefined
global variable = undefined
global named = undefined
global later = undefined
global editing.You = undefined
global name = undefined
global an = undefined
global argument = undefined
global e.g..This = undefined
global assigns = undefined
global pasted = undefined
global string = undefined
global without = undefined
global dedenting = undefined
global executing = undefined
global it = undefined
global preceding = undefined
global still = undefined
global stripped = undefined
global re = <module 're' from '/usr/lib/python2.6/re.pyc'>
global executes = undefined
global previously = undefined
global entered = undefined
global by = undefined
global cpaste.Do = undefined
global alarmed = undefined
global garbled = undefined
global output = undefined
global Windows = undefined
global readline = undefined
global bug.Just = undefined
global press = undefined
global enter = undefined
global type = undefined
global again = undefined
global will = undefined
global what = undefined
global was = undefined
global just = undefined
global pasted.IPython = undefined
global statements = undefined
global magics = undefined
global escapes = undefined
global supported = undefined
global yet.See = undefined
global paste = undefined
global automatically = undefined
global pull = undefined
global clipboard.s = undefined
global rs = undefined
global modet = undefined
global stringt = undefined
global rNt = undefined
global ss = undefined
global parse_optionsRx = undefined
global has_keyt = undefined
global _rerun_pastedt = undefined
global gett = undefined
global _strip_pasted_lines_for_codet = undefined
global _get_pasted_linest = undefined
global _execute_block = undefined
global optst = undefined
global argst = undefined
global part = undefined
global sentinelt = undefined
global magic_cpaste = undefined
global pN = undefined
global p = undefined
global clipboard.The = undefined
global text = undefined
global pulled = undefined
global clipboard = undefined
global user = undefined
global intervention = undefined
global printed = undefined
global back = undefined
global screen = undefined
global before = undefined
global unless = undefined
global q = undefined
global flag = undefined
global given = undefined
global force = undefined
global quiet = undefined
global mode.The = undefined
global Options = undefined
global r = undefined
global cpaste.q = undefined
global mode = undefined
global do = undefined
global echo = undefined
global terminal.IPython = undefined
global cpaste = undefined
global manually = undefined
global into = undefined
global until = undefined
global you = undefined
global mark = undefined
global its = undefined
global end.t = undefined
global rqR = undefined
global Nt = undefined
global qs = undefined
global Rx = undefined
global RY = undefined
global clipboard_getR = undefined
global splitlinesRF = undefined
global pycolorizet = undefined
global endswithR = undefined
global textR = undefined
global RF = undefined
global magic_paste4 = undefined
global __name__t = undefined
global __module__R = undefined
global R4 = undefined
global Rj = undefined
global RE = undefined
global embeddedt = undefined
global embedded_activeR = undefined
global editort = undefined
global pagerR = undefined
global R2 = undefined
global RD = undefined
global propertyR1 = undefined
global RA = undefined
global RC = undefined
global RG = undefined
global RQ = undefined
global RO = undefined
global R_ = undefined
global Rk = undefined
global Rv = undefined
global Rb = undefined
global interactiveshell.pyR = undefined
global sL = undefined
global U = undefined
global contextlibR = undefined
global reR = undefined
global IPython.core.errorR = undefined
global IPython.core.usageR = undefined
global IPython.core.inputlistR = undefined
global IPython.core.interactiveshellR = undefined
global IPython.lib.inputhookR = undefined
global IPython.lib.pylabtoolsR = undefined
global IPython.utils.terminalR = undefined
global IPython.utils.processR = undefined
global IPython.utils.warnR = undefined
global IPython.utils.textR = undefined
global IPython.utils.traitletsR = undefined
global Rr = undefined
global register = undefined
global module = undefined
global s. = undefined
199 #self.interact_with_readline()
200 # XXX for testing of a readline-decoupled repl loop, call
201 # interact_with_readline above
202 break
203 except KeyboardInterrupt:
204 # this should not be necessary, but KeyboardInterrupt
205 # handling seems rather unpredictable...
206 self.write("\nKeyboardInterrupt in interact()\n")
207
208 def interact(self, display_banner=None):
209 """Closely emulate the interactive Python console."""
210
211 # batch run -> do not interact
212 if self.exit_now:
213 return
/usr/lib/python2.6/site-packages/IPython/frontend/terminal/interactiveshell.pyc in interact(self=<IPython.frontend.terminal.interactiveshell.TerminalInteractiveShell object at 0x8fd62ac>, display_banner=False)
270 'Because of how pdb handles the stack, it is impossible\n'
271 'for IPython to properly format this particular exception.\n'
272 'IPython will resume normal operation.')
273 except:
274 # exceptions here are VERY RARE, but they can be triggered
275 # asynchronously by signal handlers, for example.
276 self.showtraceback()
277 else:
278 self.input_splitter.push(line)
279 more = self.input_splitter.push_accepts_more()
280 if (self.SyntaxTB.last_syntax_error and
281 self.autoedit_syntax):
282 self.edit_syntax_error()
283 if not more:
284 source_raw = self.input_splitter.source_raw_reset()[1]
--> 285 self.run_cell(source_raw)
286
287 # We are off again...
288 __builtin__.__dict__['__IPYTHON__active'] -= 1
289
290 # Turn off the exit flag, so the mainloop can be restarted if desired
291 self.exit_now = False
292
293 def raw_input(self, prompt='', continue_prompt=False):
294 """Write a prompt and read a line.
295
296 The returned line does not include the trailing newline.
297 When the user enters the EOF key sequence, EOFError is raised.
298
299 Optional inputs:
300
/usr/lib/python2.6/site-packages/IPython/core/interactiveshell.pyc in run_cell(self=<IPython.frontend.terminal.interactiveshell.TerminalInteractiveShell object at 0x8fd62ac>, cell='repr("\xc3\xa1\xc3\xa9")\n')
2078 # - increment the global execution counter (we need to pull that out
2079 # from outputcache's control; outputcache should instead read it from
2080 # the main object).
2081 # - do any logging of input
2082 # - update histories (raw/translated)
2083 # - then, call plain run_source (for single blocks, so displayhook is
2084 # triggered) or run_code (for multiline blocks in exec mode).
2085 #
2086 # Once this is done, we'll be able to stop using runlines and we'll
2087 # also have a much cleaner separation of logging, input history and
2088 # output cache management.
2089 #################################################################
2090
2091 # We need to break up the input into executable blocks that can be run
2092 # in 'single' mode, to provide comfortable user behavior.
-> 2093 blocks = self.input_splitter.split_blocks(cell)
2094
2095 if not blocks:
2096 return
2097
2098 # Store the 'ipython' version of the cell as well, since that's what
2099 # needs to go into the translated history and get executed (the
2100 # original cell may contain non-python syntax).
2101 ipy_cell = ''.join(blocks)
2102
2103 # Store raw and processed history
2104 self.history_manager.store_inputs(ipy_cell, cell)
2105
2106 self.logger.log(ipy_cell, cell)
2107 # dbg code!!!
2108 if 0:
/usr/lib/python2.6/site-packages/IPython/core/inputsplitter.pyc in split_blocks(self=<IPython.core.inputsplitter.IPythonInputSplitter object at 0x91f002c>, lines=[])
514 # block. Thus, we must put the line back into the input buffer
515 # so that it starts a new block on the next pass.
516 #
517 # 2. the second case is detected in the line before the actual
518 # dedent happens, so , we consume the line and we can break out
519 # to start a new block.
520
521 # Case 1, explicit dedent causes a break.
522 # Note: check that we weren't on the very last line, else we'll
523 # enter an infinite loop adding/removing the last line.
524 if _full_dedent and lines and not next_line.startswith(' '):
525 lines.append(next_line)
526 break
527
528 # Otherwise any line is pushed
--> 529 self.push(next_line)
530
531 # Case 2, full dedent with full block ready:
532 if _full_dedent or \
533 self.indent_spaces==0 and not self.push_accepts_more():
534 break
535 # Form the new block with the current source input
536 blocks.append(self.source_reset())
537
538 #return blocks
539 # HACK!!! Now that our input is in blocks but guaranteed to be pure
540 # python syntax, feed it back a second time through the AST-based
541 # splitter, which is more accurate than ours.
542 return split_blocks(''.join(blocks))
543
544 #------------------------------------------------------------------------
/usr/lib/python2.6/site-packages/IPython/core/inputsplitter.pyc in push(self=<IPython.core.inputsplitter.IPythonInputSplitter object at 0x91f002c>, lines='repr("\xc3\xa1\xc3\xa9")')
981 # class by hand line by line, we need to temporarily switch out to
982 # 'line' mode, do a single manual reset and then feed the lines one
983 # by one. Note that this only matters if the input has more than one
984 # line.
985 changed_input_mode = False
986
987 if self.input_mode == 'cell':
988 self.reset()
989 changed_input_mode = True
990 saved_input_mode = 'cell'
991 self.input_mode = 'line'
992
993 # Store raw source before applying any transformations to it. Note
994 # that this must be done *after* the reset() call that would otherwise
995 # flush the buffer.
--> 996 self._store(lines, self._buffer_raw, 'source_raw')
997
998 try:
999 push = super(IPythonInputSplitter, self).push
1000 for line in lines_list:
1001 if self._is_complete or not self._buffer or \
1002 (self._buffer and self._buffer[-1].rstrip().endswith(':')):
1003 for f in transforms:
1004 line = f(line)
1005
1006 out = push(line)
1007 finally:
1008 if changed_input_mode:
1009 self.input_mode = saved_input_mode
1010 return out
1011
/usr/lib/python2.6/site-packages/IPython/core/inputsplitter.pyc in _store(self=<IPython.core.inputsplitter.IPythonInputSplitter object at 0x91f002c>, lines='repr("\xc3\xa1\xc3\xa9")', buffer=['repr("\xc3\xa1\xc3\xa9")\n'], store='source_raw')
592 self.indent_spaces, self._full_dedent = self._find_indent(line)
593
594 def _store(self, lines, buffer=None, store='source'):
595 """Store one or more lines of input.
596
597 If input lines are not newline-terminated, a newline is automatically
598 appended."""
599
600 if buffer is None:
601 buffer = self._buffer
602
603 if lines.endswith('\n'):
604 buffer.append(lines)
605 else:
606 buffer.append(lines+'\n')
--> 607 setattr(self, store, self._set_source(buffer))
608
609 def _set_source(self, buffer):
610 return ''.join(buffer).encode(self.encoding)
611
612
613 #-----------------------------------------------------------------------------
614 # Functions and classes for IPython-specific syntactic support
615 #-----------------------------------------------------------------------------
616
617 # RegExp for splitting line contents into pre-char//first word-method//rest.
618 # For clarity, each group in on one line.
619
620 line_split = re.compile("""
621 ^(\s*) # any leading space
622 ([,;/%]|!!?|\?\??) # escape character or characters
/usr/lib/python2.6/site-packages/IPython/core/inputsplitter.pyc in _set_source(self=<IPython.core.inputsplitter.IPythonInputSplitter object at 0x91f002c>, buffer=['repr("\xc3\xa1\xc3\xa9")\n'])
595 """Store one or more lines of input.
596
597 If input lines are not newline-terminated, a newline is automatically
598 appended."""
599
600 if buffer is None:
601 buffer = self._buffer
602
603 if lines.endswith('\n'):
604 buffer.append(lines)
605 else:
606 buffer.append(lines+'\n')
607 setattr(self, store, self._set_source(buffer))
608
609 def _set_source(self, buffer):
--> 610 return ''.join(buffer).encode(self.encoding)
611
612
613 #-----------------------------------------------------------------------------
614 # Functions and classes for IPython-specific syntactic support
615 #-----------------------------------------------------------------------------
616
617 # RegExp for splitting line contents into pre-char//first word-method//rest.
618 # For clarity, each group in on one line.
619
620 line_split = re.compile("""
621 ^(\s*) # any leading space
622 ([,;/%]|!!?|\?\??) # escape character or characters
623 \s*(%?[\w\.\*]*) # function/method, possibly with leading %
624 # to correctly treat things like '?%magic'
625 (\s+.*$|$) # rest of line
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 6: ordinal not in range(128)
Hit <Enter> to quit this message (your terminal may close):
-----------------------------
--
Eduardo
More information about the IPython-dev
mailing list