|
|
a26447 |
diff --git a/flask/json.py b/flask/json.py
|
|
|
a26447 |
index 45ba3240..1ee0586e 100644
|
|
|
a26447 |
--- a/flask/json.py
|
|
|
a26447 |
+++ b/flask/json.py
|
|
|
a26447 |
@@ -8,6 +8,7 @@
|
|
|
a26447 |
:copyright: (c) 2012 by Armin Ronacher.
|
|
|
a26447 |
:license: BSD, see LICENSE for more details.
|
|
|
a26447 |
"""
|
|
|
a26447 |
+import codecs
|
|
|
a26447 |
import io
|
|
|
a26447 |
import uuid
|
|
|
a26447 |
from datetime import datetime
|
|
|
a26447 |
@@ -111,6 +112,49 @@ def _load_arg_defaults(kwargs):
|
|
|
a26447 |
kwargs.setdefault('cls', JSONDecoder)
|
|
|
a26447 |
|
|
|
a26447 |
|
|
|
a26447 |
+def detect_encoding(data):
|
|
|
a26447 |
+ """Detect which UTF codec was used to encode the given bytes.
|
|
|
a26447 |
+
|
|
|
a26447 |
+ The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is
|
|
|
a26447 |
+ accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big
|
|
|
a26447 |
+ or little endian. Some editors or libraries may prepend a BOM.
|
|
|
a26447 |
+
|
|
|
a26447 |
+ :param data: Bytes in unknown UTF encoding.
|
|
|
a26447 |
+ :return: UTF encoding name
|
|
|
a26447 |
+ """
|
|
|
a26447 |
+ head = data[:4]
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if head[:3] == codecs.BOM_UTF8:
|
|
|
a26447 |
+ return 'utf-8-sig'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if b'\x00' not in head:
|
|
|
a26447 |
+ return 'utf-8'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE):
|
|
|
a26447 |
+ return 'utf-32'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
|
|
|
a26447 |
+ return 'utf-16'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if len(head) == 4:
|
|
|
a26447 |
+ if head[:3] == b'\x00\x00\x00':
|
|
|
a26447 |
+ return 'utf-32-be'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if head[::2] == b'\x00\x00':
|
|
|
a26447 |
+ return 'utf-16-be'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if head[1:] == b'\x00\x00\x00':
|
|
|
a26447 |
+ return 'utf-32-le'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if head[1::2] == b'\x00\x00':
|
|
|
a26447 |
+ return 'utf-16-le'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ if len(head) == 2:
|
|
|
a26447 |
+ return 'utf-16-be' if head.startswith(b'\x00') else 'utf-16-le'
|
|
|
a26447 |
+
|
|
|
a26447 |
+ return 'utf-8'
|
|
|
a26447 |
+
|
|
|
a26447 |
+
|
|
|
a26447 |
def dumps(obj, **kwargs):
|
|
|
a26447 |
"""Serialize ``obj`` to a JSON formatted ``str`` by using the application's
|
|
|
a26447 |
configured encoder (:attr:`~flask.Flask.json_encoder`) if there is an
|
|
|
a26447 |
@@ -145,7 +189,10 @@ def loads(s, **kwargs):
|
|
|
a26447 |
"""
|
|
|
a26447 |
_load_arg_defaults(kwargs)
|
|
|
a26447 |
if isinstance(s, bytes):
|
|
|
a26447 |
- s = s.decode(kwargs.pop('encoding', None) or 'utf-8')
|
|
|
a26447 |
+ encoding = kwargs.pop('encoding', None)
|
|
|
a26447 |
+ if encoding is None:
|
|
|
a26447 |
+ encoding = detect_encoding(s)
|
|
|
a26447 |
+ s = s.decode(encoding)
|
|
|
a26447 |
return _json.loads(s, **kwargs)
|
|
|
a26447 |
|
|
|
a26447 |
|
|
|
a26447 |
diff --git a/flask/testsuite/helpers.py b/flask/testsuite/helpers.py
|
|
|
a26447 |
index 636f67fa..12e10d96 100644
|
|
|
a26447 |
--- a/flask/testsuite/helpers.py
|
|
|
a26447 |
+++ b/flask/testsuite/helpers.py
|
|
|
a26447 |
@@ -15,6 +15,8 @@ import unittest
|
|
|
a26447 |
from logging import StreamHandler
|
|
|
a26447 |
from flask.testsuite import FlaskTestCase, catch_warnings, catch_stderr
|
|
|
a26447 |
from werkzeug.http import parse_cache_control_header, parse_options_header
|
|
|
a26447 |
+
|
|
|
a26447 |
+from flask import json
|
|
|
a26447 |
from flask._compat import StringIO, text_type
|
|
|
a26447 |
|
|
|
a26447 |
|
|
|
a26447 |
@@ -29,6 +31,16 @@ def has_encoding(name):
|
|
|
a26447 |
|
|
|
a26447 |
class JSONTestCase(FlaskTestCase):
|
|
|
a26447 |
|
|
|
a26447 |
+ def test_detect_encoding(self):
|
|
|
a26447 |
+ values = (1, 't', True, False, None, [], [1,2,3], {}, {'foo': u'🐍'},)
|
|
|
a26447 |
+ encodings = ('utf-8', 'utf-8-sig', 'utf-16-le', 'utf-16-be', 'utf-16', 'utf-32-le', 'utf-32-be', 'utf-32',)
|
|
|
a26447 |
+
|
|
|
a26447 |
+ for encoding in encodings:
|
|
|
a26447 |
+ for value in values:
|
|
|
a26447 |
+ data = json.dumps(value).encode(encoding)
|
|
|
a26447 |
+ self.assert_equal(json.detect_encoding(data), encoding)
|
|
|
a26447 |
+ self.assert_equal(json.loads(data),value)
|
|
|
a26447 |
+
|
|
|
a26447 |
def test_json_bad_requests(self):
|
|
|
a26447 |
app = flask.Flask(__name__)
|
|
|
a26447 |
@app.route('/json', methods=['POST'])
|
|
|
a26447 |
@@ -38,18 +50,6 @@ class JSONTestCase(FlaskTestCase):
|
|
|
a26447 |
rv = c.post('/json', data='malformed', content_type='application/json')
|
|
|
a26447 |
self.assert_equal(rv.status_code, 400)
|
|
|
a26447 |
|
|
|
a26447 |
- def test_json_body_encoding(self):
|
|
|
a26447 |
- app = flask.Flask(__name__)
|
|
|
a26447 |
- app.testing = True
|
|
|
a26447 |
- @app.route('/')
|
|
|
a26447 |
- def index():
|
|
|
a26447 |
- return flask.request.get_json()
|
|
|
a26447 |
-
|
|
|
a26447 |
- c = app.test_client()
|
|
|
a26447 |
- resp = c.get('/', data=u'"Hällo Wörld"'.encode('iso-8859-15'),
|
|
|
a26447 |
- content_type='application/json; charset=iso-8859-15')
|
|
|
a26447 |
- self.assert_equal(resp.data, u'Hällo Wörld'.encode('utf-8'))
|
|
|
a26447 |
-
|
|
|
a26447 |
def test_jsonify(self):
|
|
|
a26447 |
d = dict(a=23, b=42, c=[1, 2, 3])
|
|
|
a26447 |
app = flask.Flask(__name__)
|
|
|
a26447 |
diff --git a/flask/wrappers.py b/flask/wrappers.py
|
|
|
a26447 |
index 1a17824a..0d6f068d 100644
|
|
|
a26447 |
--- a/flask/wrappers.py
|
|
|
a26447 |
+++ b/flask/wrappers.py
|
|
|
a26447 |
@@ -127,17 +127,10 @@ class Request(RequestBase):
|
|
|
a26447 |
if self.mimetype != 'application/json' and not force:
|
|
|
a26447 |
return None
|
|
|
a26447 |
|
|
|
a26447 |
- # We accept a request charset against the specification as
|
|
|
a26447 |
- # certain clients have been using this in the past. This
|
|
|
a26447 |
- # fits our general approach of being nice in what we accept
|
|
|
a26447 |
- # and strict in what we send out.
|
|
|
a26447 |
- request_charset = self.mimetype_params.get('charset')
|
|
|
a26447 |
+ data = _get_data(self, cache)
|
|
|
a26447 |
+
|
|
|
a26447 |
try:
|
|
|
a26447 |
- data = _get_data(self, cache)
|
|
|
a26447 |
- if request_charset is not None:
|
|
|
a26447 |
- rv = json.loads(data, encoding=request_charset)
|
|
|
a26447 |
- else:
|
|
|
a26447 |
- rv = json.loads(data)
|
|
|
a26447 |
+ rv = json.loads(data)
|
|
|
a26447 |
except ValueError as e:
|
|
|
a26447 |
if silent:
|
|
|
a26447 |
rv = None
|