Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

"""Implementation of JSONDecoder 

""" 

from __future__ import absolute_import 

import re 

import sys 

import struct 

from .compat import b, u, text_type, binary_type, PY3, unichr 

from .scanner import make_scanner, JSONDecodeError 

 

def _import_c_scanstring(): 

try: 

from ._speedups import scanstring 

return scanstring 

except ImportError: 

return None 

c_scanstring = _import_c_scanstring() 

 

# NOTE (3.1.0): JSONDecodeError may still be imported from this module for 

# compatibility, but it was never in the __all__ 

__all__ = ['JSONDecoder'] 

 

FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 

 

def _floatconstants(): 

if sys.version_info < (2, 6): 

_BYTES = '7FF80000000000007FF0000000000000'.decode('hex') 

# The struct module in Python 2.4 would get frexp() out of range here 

# when an endian is specified in the format string. Fixed in Python 2.5+ 

if sys.byteorder != 'big': 

_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] 

nan, inf = struct.unpack('dd', _BYTES) 

else: 

nan = float('nan') 

inf = float('inf') 

return nan, inf, -inf 

 

NaN, PosInf, NegInf = _floatconstants() 

 

_CONSTANTS = { 

'-Infinity': NegInf, 

'Infinity': PosInf, 

'NaN': NaN, 

} 

 

STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 

BACKSLASH = { 

'"': u('"'), '\\': u('\u005c'), '/': u('/'), 

'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), 

} 

 

DEFAULT_ENCODING = "utf-8" 

 

def py_scanstring(s, end, encoding=None, strict=True, 

_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, 

_PY3=PY3, _maxunicode=sys.maxunicode): 

"""Scan the string s for a JSON string. End is the index of the 

character in s after the quote that started the JSON string. 

Unescapes all valid JSON string escape sequences and raises ValueError 

on attempt to decode an invalid string. If strict is False then literal 

control characters are allowed in the string. 

 

Returns a tuple of the decoded string and the index of the character in s 

after the end quote.""" 

if encoding is None: 

encoding = DEFAULT_ENCODING 

chunks = [] 

_append = chunks.append 

begin = end - 1 

while 1: 

chunk = _m(s, end) 

if chunk is None: 

raise JSONDecodeError( 

"Unterminated string starting at", s, begin) 

end = chunk.end() 

content, terminator = chunk.groups() 

# Content is contains zero or more unescaped string characters 

if content: 

if not _PY3 and not isinstance(content, text_type): 

content = text_type(content, encoding) 

_append(content) 

# Terminator is the end of string, a literal control character, 

# or a backslash denoting that an escape sequence follows 

if terminator == '"': 

break 

elif terminator != '\\': 

if strict: 

msg = "Invalid control character %r at" 

raise JSONDecodeError(msg, s, end) 

else: 

_append(terminator) 

continue 

try: 

esc = s[end] 

except IndexError: 

raise JSONDecodeError( 

"Unterminated string starting at", s, begin) 

# If not a unicode escape sequence, must be in the lookup table 

if esc != 'u': 

try: 

char = _b[esc] 

except KeyError: 

msg = "Invalid \\X escape sequence %r" 

raise JSONDecodeError(msg, s, end) 

end += 1 

else: 

# Unicode escape sequence 

msg = "Invalid \\uXXXX escape sequence" 

esc = s[end + 1:end + 5] 

escX = esc[1:2] 

if len(esc) != 4 or escX == 'x' or escX == 'X': 

raise JSONDecodeError(msg, s, end - 1) 

try: 

uni = int(esc, 16) 

except ValueError: 

raise JSONDecodeError(msg, s, end - 1) 

end += 5 

# Check for surrogate pair on UCS-4 systems 

# Note that this will join high/low surrogate pairs 

# but will also pass unpaired surrogates through 

if (_maxunicode > 65535 and 

uni & 0xfc00 == 0xd800 and 

s[end:end + 2] == '\\u'): 

esc2 = s[end + 2:end + 6] 

escX = esc2[1:2] 

if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): 

try: 

uni2 = int(esc2, 16) 

except ValueError: 

raise JSONDecodeError(msg, s, end) 

if uni2 & 0xfc00 == 0xdc00: 

uni = 0x10000 + (((uni - 0xd800) << 10) | 

(uni2 - 0xdc00)) 

end += 6 

char = unichr(uni) 

# Append the unescaped character 

_append(char) 

return _join(chunks), end 

 

 

# Use speedup if available 

scanstring = c_scanstring or py_scanstring 

 

WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 

WHITESPACE_STR = ' \t\n\r' 

 

def JSONObject(state, encoding, strict, scan_once, object_hook, 

object_pairs_hook, memo=None, 

_w=WHITESPACE.match, _ws=WHITESPACE_STR): 

(s, end) = state 

# Backwards compatibility 

if memo is None: 

memo = {} 

memo_get = memo.setdefault 

pairs = [] 

# Use a slice to prevent IndexError from being raised, the following 

# check will raise a more specific ValueError if the string is empty 

nextchar = s[end:end + 1] 

# Normally we expect nextchar == '"' 

if nextchar != '"': 

if nextchar in _ws: 

end = _w(s, end).end() 

nextchar = s[end:end + 1] 

# Trivial empty object 

if nextchar == '}': 

if object_pairs_hook is not None: 

result = object_pairs_hook(pairs) 

return result, end + 1 

pairs = {} 

if object_hook is not None: 

pairs = object_hook(pairs) 

return pairs, end + 1 

elif nextchar != '"': 

raise JSONDecodeError( 

"Expecting property name enclosed in double quotes", 

s, end) 

end += 1 

while True: 

key, end = scanstring(s, end, encoding, strict) 

key = memo_get(key, key) 

 

# To skip some function call overhead we optimize the fast paths where 

# the JSON key separator is ": " or just ":". 

if s[end:end + 1] != ':': 

end = _w(s, end).end() 

if s[end:end + 1] != ':': 

raise JSONDecodeError("Expecting ':' delimiter", s, end) 

 

end += 1 

 

try: 

if s[end] in _ws: 

end += 1 

if s[end] in _ws: 

end = _w(s, end + 1).end() 

except IndexError: 

pass 

 

value, end = scan_once(s, end) 

pairs.append((key, value)) 

 

try: 

nextchar = s[end] 

if nextchar in _ws: 

end = _w(s, end + 1).end() 

nextchar = s[end] 

except IndexError: 

nextchar = '' 

end += 1 

 

if nextchar == '}': 

break 

elif nextchar != ',': 

raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) 

 

try: 

nextchar = s[end] 

if nextchar in _ws: 

end += 1 

nextchar = s[end] 

if nextchar in _ws: 

end = _w(s, end + 1).end() 

nextchar = s[end] 

except IndexError: 

nextchar = '' 

 

end += 1 

if nextchar != '"': 

raise JSONDecodeError( 

"Expecting property name enclosed in double quotes", 

s, end - 1) 

 

if object_pairs_hook is not None: 

result = object_pairs_hook(pairs) 

return result, end 

pairs = dict(pairs) 

if object_hook is not None: 

pairs = object_hook(pairs) 

return pairs, end 

 

def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

(s, end) = state 

values = [] 

nextchar = s[end:end + 1] 

if nextchar in _ws: 

end = _w(s, end + 1).end() 

nextchar = s[end:end + 1] 

# Look-ahead for trivial empty array 

if nextchar == ']': 

return values, end + 1 

elif nextchar == '': 

raise JSONDecodeError("Expecting value or ']'", s, end) 

_append = values.append 

while True: 

value, end = scan_once(s, end) 

_append(value) 

nextchar = s[end:end + 1] 

if nextchar in _ws: 

end = _w(s, end + 1).end() 

nextchar = s[end:end + 1] 

end += 1 

if nextchar == ']': 

break 

elif nextchar != ',': 

raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) 

 

try: 

if s[end] in _ws: 

end += 1 

if s[end] in _ws: 

end = _w(s, end + 1).end() 

except IndexError: 

pass 

 

return values, end 

 

class JSONDecoder(object): 

"""Simple JSON <http://json.org> decoder 

 

Performs the following translations in decoding by default: 

 

+---------------+-------------------+ 

| JSON | Python | 

+===============+===================+ 

| object | dict | 

+---------------+-------------------+ 

| array | list | 

+---------------+-------------------+ 

| string | str, unicode | 

+---------------+-------------------+ 

| number (int) | int, long | 

+---------------+-------------------+ 

| number (real) | float | 

+---------------+-------------------+ 

| true | True | 

+---------------+-------------------+ 

| false | False | 

+---------------+-------------------+ 

| null | None | 

+---------------+-------------------+ 

 

It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 

their corresponding ``float`` values, which is outside the JSON spec. 

 

""" 

 

def __init__(self, encoding=None, object_hook=None, parse_float=None, 

parse_int=None, parse_constant=None, strict=True, 

object_pairs_hook=None): 

""" 

*encoding* determines the encoding used to interpret any 

:class:`str` objects decoded by this instance (``'utf-8'`` by 

default). It has no effect when decoding :class:`unicode` objects. 

 

Note that currently only encodings that are a superset of ASCII work, 

strings of other encodings should be passed in as :class:`unicode`. 

 

*object_hook*, if specified, will be called with the result of every 

JSON object decoded and its return value will be used in place of the 

given :class:`dict`. This can be used to provide custom 

deserializations (e.g. to support JSON-RPC class hinting). 

 

*object_pairs_hook* is an optional function that will be called with 

the result of any object literal decode with an ordered list of pairs. 

The return value of *object_pairs_hook* will be used instead of the 

:class:`dict`. This feature can be used to implement custom decoders 

that rely on the order that the key and value pairs are decoded (for 

example, :func:`collections.OrderedDict` will remember the order of 

insertion). If *object_hook* is also defined, the *object_pairs_hook* 

takes priority. 

 

*parse_float*, if specified, will be called with the string of every 

JSON float to be decoded. By default, this is equivalent to 

``float(num_str)``. This can be used to use another datatype or parser 

for JSON floats (e.g. :class:`decimal.Decimal`). 

 

*parse_int*, if specified, will be called with the string of every 

JSON int to be decoded. By default, this is equivalent to 

``int(num_str)``. This can be used to use another datatype or parser 

for JSON integers (e.g. :class:`float`). 

 

*parse_constant*, if specified, will be called with one of the 

following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This 

can be used to raise an exception if invalid JSON numbers are 

encountered. 

 

*strict* controls the parser's behavior when it encounters an 

invalid control character in a string. The default setting of 

``True`` means that unescaped control characters are parse errors, if 

``False`` then control characters will be allowed in strings. 

 

""" 

if encoding is None: 

encoding = DEFAULT_ENCODING 

self.encoding = encoding 

self.object_hook = object_hook 

self.object_pairs_hook = object_pairs_hook 

self.parse_float = parse_float or float 

self.parse_int = parse_int or int 

self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 

self.strict = strict 

self.parse_object = JSONObject 

self.parse_array = JSONArray 

self.parse_string = scanstring 

self.memo = {} 

self.scan_once = make_scanner(self) 

 

def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): 

"""Return the Python representation of ``s`` (a ``str`` or ``unicode`` 

instance containing a JSON document) 

 

""" 

if _PY3 and isinstance(s, binary_type): 

s = s.decode(self.encoding) 

obj, end = self.raw_decode(s) 

end = _w(s, end).end() 

if end != len(s): 

raise JSONDecodeError("Extra data", s, end, len(s)) 

return obj 

 

def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): 

"""Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 

beginning with a JSON document) and return a 2-tuple of the Python 

representation and the index in ``s`` where the document ended. 

Optionally, ``idx`` can be used to specify an offset in ``s`` where 

the JSON document begins. 

 

This can be used to decode a JSON document from a string that may 

have extraneous data at the end. 

 

""" 

if idx < 0: 

# Ensure that raw_decode bails on negative indexes, the regex 

# would otherwise mask this behavior. #98 

raise JSONDecodeError('Expecting value', s, idx) 

if _PY3 and not isinstance(s, text_type): 

raise TypeError("Input string must be text, not bytes") 

# strip UTF-8 bom 

if len(s) > idx: 

ord0 = ord(s[idx]) 

if ord0 == 0xfeff: 

idx += 1 

elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': 

idx += 3 

return self.scan_once(s, idx=_w(s, idx).end())