[Python-checkins] python/dist/src/Doc/ref ref2.tex,1.34.6.4,1.34.6.5 ref3.tex,1.82.4.4,1.82.4.5

Tue, 24 Sep 2002 14:01:09 -0700

Update of /cvsroot/python/python/dist/src/Doc/ref
In directory usw-pr-cvs1:/tmp/cvs-serv18244/ref

Modified Files:
      Tag: release22-maint
	ref2.tex ref3.tex 
Log Message:
Another try at clarifying what goes into and comes out of Unicode objects.

Index: ref2.tex
===================================================================
RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref2.tex,v
retrieving revision 1.34.6.4
retrieving revision 1.34.6.5
diff -C2 -d -r1.34.6.4 -r1.34.6.5
*** ref2.tex	9 Aug 2002 20:41:19 -0000	1.34.6.4
--- ref2.tex	24 Sep 2002 21:01:06 -0000	1.34.6.5
***************
*** 377,403 ****
  \index{C}

! \begin{tableii}{l|l}{code}{Escape Sequence}{Meaning}
! \lineii{\e\var{newline}} {Ignored}
! \lineii{\e\e}	{Backslash (\code{\e})}
! \lineii{\e'}	{Single quote (\code{'})}
! \lineii{\e"}	{Double quote (\code{"})}
! \lineii{\e a}	{\ASCII{} Bell (BEL)}
! \lineii{\e b}	{\ASCII{} Backspace (BS)}
! \lineii{\e f}	{\ASCII{} Formfeed (FF)}
! \lineii{\e n}	{\ASCII{} Linefeed (LF)}
! \lineii{\e N\{\var{name}\}}
!        {Character named \var{name} in the Unicode database (Unicode only)}
! \lineii{\e r}	{\ASCII{} Carriage Return (CR)}
! \lineii{\e t}	{\ASCII{} Horizontal Tab (TAB)}
! \lineii{\e u\var{xxxx}}    {Character with 16-bit hex value \var{xxxx} (Unicode only)}
! \lineii{\e U\var{xxxxxxxx}}{Character with 32-bit hex value \var{xxxxxxxx} (Unicode only)}
! \lineii{\e v}	{\ASCII{} Vertical Tab (VT)}
! \lineii{\e\var{ooo}} {\ASCII{} character with octal value \var{ooo}}
! \lineii{\e x\var{hh}} {\ASCII{} character with hex value \var{hh}}
! \end{tableii}
  \index{ASCII@\ASCII}

! As in Standard C, up to three octal digits are accepted.  However,
! exactly two hex digits are taken in hex escapes.

  Unlike Standard \index{unrecognized escape sequence}C,
--- 377,422 ----
  \index{C}

! \begin{tableiii}{l|l|c}{code}{Escape Sequence}{Meaning}{Notes}
! \lineiii{\e\var{newline}} {Ignored}{}
! \lineiii{\e\e}	{Backslash (\code{\e})}{}
! \lineiii{\e'}	{Single quote (\code{'})}{}
! \lineiii{\e"}	{Double quote (\code{"})}{}
! \lineiii{\e a}	{\ASCII{} Bell (BEL)}{}
! \lineiii{\e b}	{\ASCII{} Backspace (BS)}{}
! \lineiii{\e f}	{\ASCII{} Formfeed (FF)}{}
! \lineiii{\e n}	{\ASCII{} Linefeed (LF)}{}
! \lineiii{\e N\{\var{name}\}}
!         {Character named \var{name} in the Unicode database (Unicode only)}{}
! \lineiii{\e r}	{\ASCII{} Carriage Return (CR)}{}
! \lineiii{\e t}	{\ASCII{} Horizontal Tab (TAB)}{}
! \lineiii{\e u\var{xxxx}}
!         {Character with 16-bit hex value \var{xxxx} (Unicode only)}{(1)}
! \lineiii{\e U\var{xxxxxxxx}}
!         {Character with 32-bit hex value \var{xxxxxxxx} (Unicode only)}{(2)}
! \lineiii{\e v}	{\ASCII{} Vertical Tab (VT)}{}
! \lineiii{\e\var{ooo}} {\ASCII{} character with octal value \var{ooo}}{(3)}
! \lineiii{\e x\var{hh}} {\ASCII{} character with hex value \var{hh}}{(4)}
! \end{tableiii}
  \index{ASCII@\ASCII}

! \noindent
! Notes:
! 
! \begin{itemize}
! \item[(1)]
!   Individual code units which form parts of a surrogate pair can be
!   encoded using this escape sequence.
! \item[(2)]
!   Any Unicode character can be encoded this way, but characters
!   outside the Basic Multilingual Plane (BMP) will be encoded using a
!   surrogate pair if Python is compiled to use 16-bit code units (the
!   default).  Individual code units which form parts of a surrogate
!   pair can be encoded using this escape sequence.
! \item[(3)]
!   As in Standard C, up to three octal digits are accepted.
! \item[(4)]
!   Unlike in Standard C, at most two hex digits are accepted.
! \end{itemize}
! 

  Unlike Standard \index{unrecognized escape sequence}C,
***************
*** 428,432 ****
  escape sequence is processed while \emph{all other backslashes are
  left in the string}.  For example, the string literal
! \code{ur"\e u0062\e n"} consists of three Unicode characters:
  `LATIN SMALL LETTER B', `REVERSE SOLIDUS', and `LATIN SMALL LETTER N'.
  Backslashes can be escaped with a preceding backslash; however, both
--- 447,451 ----
  escape sequence is processed while \emph{all other backslashes are
  left in the string}.  For example, the string literal
! \code{ur"\e{}u0062\e n"} consists of three Unicode characters:
  `LATIN SMALL LETTER B', `REVERSE SOLIDUS', and `LATIN SMALL LETTER N'.
  Backslashes can be escaped with a preceding backslash; however, both

Index: ref3.tex
===================================================================
RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref3.tex,v
retrieving revision 1.82.4.4
retrieving revision 1.82.4.5
diff -C2 -d -r1.82.4.4 -r1.82.4.5
*** ref3.tex	20 Jun 2002 06:18:26 -0000	1.82.4.4
--- ref3.tex	24 Sep 2002 21:01:07 -0000	1.82.4.5
***************
*** 287,299 ****

  \item[Unicode]
! The items of a Unicode object are Unicode characters.  A Unicode
! character is represented by a Unicode object of one item and can hold
! a 16-bit value representing a Unicode ordinal.  The built-in functions
  \function{unichr()}\bifuncindex{unichr} and
! \function{ord()}\bifuncindex{ord} convert between characters and
  nonnegative integers representing the Unicode ordinals as defined in
  the Unicode Standard 3.0. Conversion from and to other encodings are
  possible through the Unicode method \method{encode} and the built-in
! function \function{unicode()}\bifuncindex{unicode}.
  \obindex{unicode}
  \index{character}
--- 287,303 ----

  \item[Unicode]
! The items of a Unicode object are Unicode code units.  A Unicode code
! unit is represented by a Unicode object of one item and can hold
! either a 16-bit or 32-bit value representing a Unicode ordinal (the
! maximum value for the ordinal is given in \code{sys.maxunicode}, and
! depends on how Python is configured at compile time).  Surrogate pairs
! may be present in the Unicode object, and will be reported as two
! separate items.  The built-in functions
  \function{unichr()}\bifuncindex{unichr} and
! \function{ord()}\bifuncindex{ord} convert between code units and
  nonnegative integers representing the Unicode ordinals as defined in
  the Unicode Standard 3.0. Conversion from and to other encodings are
  possible through the Unicode method \method{encode} and the built-in
! function \function{unicode()}.\bifuncindex{unicode}
  \obindex{unicode}
  \index{character}