@@ -66,6 +66,16 @@ def errmsg(msg, doc, pos, end=None):
6666 'b' : '\b ' , 'f' : '\f ' , 'n' : '\n ' , 'r' : '\r ' , 't' : '\t ' ,
6767}
6868
69+ def _decode_uXXXX (s , pos ):
70+ esc = s [pos + 1 :pos + 5 ]
71+ if len (esc ) == 4 and esc [1 ] not in 'xX' :
72+ try :
73+ return int (esc , 16 )
74+ except ValueError :
75+ pass
76+ msg = "Invalid \\ uXXXX escape"
77+ raise ValueError (errmsg (msg , s , pos ))
78+
6979def py_scanstring (s , end , strict = True ,
7080 _b = BACKSLASH , _m = STRINGCHUNK .match ):
7181 """Scan the string s for a JSON string. End is the index of the
@@ -115,25 +125,14 @@ def py_scanstring(s, end, strict=True,
115125 raise ValueError (errmsg (msg , s , end ))
116126 end += 1
117127 else :
118- esc = s [end + 1 :end + 5 ]
119- next_end = end + 5
120- if len (esc ) != 4 :
121- msg = "Invalid \\ uXXXX escape"
122- raise ValueError (errmsg (msg , s , end ))
123- uni = int (esc , 16 )
124- if 0xd800 <= uni <= 0xdbff :
125- msg = "Invalid \\ uXXXX\\ uXXXX surrogate pair"
126- if not s [end + 5 :end + 7 ] == '\\ u' :
127- raise ValueError (errmsg (msg , s , end ))
128- esc2 = s [end + 7 :end + 11 ]
129- if len (esc2 ) != 4 :
130- raise ValueError (errmsg (msg , s , end ))
131- uni2 = int (esc2 , 16 )
132- uni = 0x10000 + (((uni - 0xd800 ) << 10 ) | (uni2 - 0xdc00 ))
133- next_end += 6
128+ uni = _decode_uXXXX (s , end )
129+ end += 5
130+ if 0xd800 <= uni <= 0xdbff and s [end :end + 2 ] == '\\ u' :
131+ uni2 = _decode_uXXXX (s , end + 1 )
132+ if 0xdc00 <= uni2 <= 0xdfff :
133+ uni = 0x10000 + (((uni - 0xd800 ) << 10 ) | (uni2 - 0xdc00 ))
134+ end += 6
134135 char = chr (uni )
135-
136- end = next_end
137136 _append (char )
138137 return '' .join (chunks ), end
139138
0 commit comments