# Copyright 2015 Google Inc. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import math import re from typing import ( Any, Callable, IO, Iterable, Mapping, Optional, Set, Tuple, Union, ) import unicodedata from .parser import Parser def load( fp: IO, *, encoding: Optional[str] = None, cls: None = None, object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None, parse_float: Optional[Callable[[str], Any]] = None, parse_int: Optional[Callable[[str], Any]] = None, parse_constant: Optional[Callable[[str], Any]] = None, object_pairs_hook: Optional[ Callable[[Iterable[Tuple[str, Any]]], Any] ] = None, allow_duplicate_keys: bool = True, ) -> Any: """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing a JSON document) to a Python object. Supports almost the same arguments as ``json.load()`` except that: - the `cls` keyword is ignored. - an extra `allow_duplicate_keys` parameter supports checking for duplicate keys in a object; by default, this is True for compatibility with ``json.load()``, but if set to False and the object contains duplicate keys, a ValueError will be raised. """ s = fp.read() return loads( s, encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, allow_duplicate_keys=allow_duplicate_keys, ) def loads( s: str, *, encoding: Optional[str] = None, cls: None = None, object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None, parse_float: Optional[Callable[[str], Any]] = None, parse_int: Optional[Callable[[str], Any]] = None, parse_constant: Optional[Callable[[str], Any]] = None, object_pairs_hook: Optional[ Callable[[Iterable[Tuple[str, Any]]], Any] ] = None, allow_duplicate_keys: bool = True, ): """Deserialize ``s`` (a string containing a JSON5 document) to a Python object. Supports the same arguments as ``json.load()`` except that: - the `cls` keyword is ignored. - an extra `allow_duplicate_keys` parameter supports checking for duplicate keys in a object; by default, this is True for compatibility with ``json.load()``, but if set to False and the object contains duplicate keys, a ValueError will be raised. """ assert cls is None, 'Custom decoders are not supported' if isinstance(s, bytes): encoding = encoding or 'utf-8' s = s.decode(encoding) if not s: raise ValueError('Empty strings are not legal JSON5') parser = Parser(s, '') ast, err, _ = parser.parse() if err: raise ValueError(err) def _fp_constant_parser(s): return float(s.replace('Infinity', 'inf').replace('NaN', 'nan')) if object_pairs_hook: dictify = object_pairs_hook elif object_hook: def dictify(pairs): return object_hook(dict(pairs)) else: dictify = dict if not allow_duplicate_keys: _orig_dictify = dictify def dictify(pairs): # pylint: disable=function-redefined return _reject_duplicate_keys(pairs, _orig_dictify) parse_float = parse_float or float parse_int = parse_int or int parse_constant = parse_constant or _fp_constant_parser return _walk_ast(ast, dictify, parse_float, parse_int, parse_constant) def _reject_duplicate_keys(pairs, dictify): keys = set() for key, _ in pairs: if key in keys: raise ValueError(f'Duplicate key "{key}" found in object') keys.add(key) return dictify(pairs) def _walk_ast( el, dictify: Callable[[Iterable[Tuple[str, Any]]], Any], parse_float, parse_int, parse_constant, ): if el == 'None': return None if el == 'True': return True if el == 'False': return False ty, v = el if ty == 'number': if v.startswith('0x') or v.startswith('0X'): return parse_int(v, base=16) if '.' in v or 'e' in v or 'E' in v: return parse_float(v) if 'Infinity' in v or 'NaN' in v: return parse_constant(v) return parse_int(v) if ty == 'string': return v if ty == 'object': pairs = [] for key, val_expr in v: val = _walk_ast( val_expr, dictify, parse_float, parse_int, parse_constant ) pairs.append((key, val)) return dictify(pairs) if ty == 'array': return [ _walk_ast(el, dictify, parse_float, parse_int, parse_constant) for el in v ] raise ValueError('unknown el: ' + el) # pragma: no cover def dump( obj: Any, fp: IO, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, cls: None = None, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable[[Any], Any]] = None, sort_keys: bool = False, quote_keys: bool = False, trailing_commas: bool = True, allow_duplicate_keys: bool = True, **kwargs, ): """Serialize ``obj`` to a JSON5-formatted stream to ``fp``, a ``.write()``-supporting file-like object. Supports the same arguments as ``json.dump()``, except that: - The ``cls`` keyword is not supported. - The ``encoding`` keyword is ignored; Unicode strings are always written. - By default, object keys that are legal identifiers are not quoted; if you pass ``quote_keys=True``, they will be. - By default, if lists and objects span multiple lines of output (i.e., when ``indent`` >=0), the last item will have a trailing comma after it. If you pass ``trailing_commas=False``, it will not. - If you use a number, a boolean, or ``None`` as a key value in a dict, it will be converted to the corresponding JSON string value, e.g. "1", "true", or "null". By default, ``dump()`` will match the `json` modules behavior and produce malformed JSON if you mix keys of different types that have the same converted value; e.g., ``{1: "foo", "1": "bar"}`` produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If you pass ``allow_duplicate_keys=False``, an exception will be raised instead. - If `quote_keys` is true, then keys of objects will be enclosed in quotes, as in regular JSON. Otherwise, keys will not be enclosed in quotes unless they contain whitespace. - If `trailing_commas` is false, then commas will not be inserted after the final elements of objects and arrays, as in regular JSON. Otherwise, such commas will be inserted. - If `allow_duplicate_keys` is false, then only the last entry with a given key will be written. Otherwise, all entries with the same key will be written. Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \ allow_duplicate_keys=True)`` should produce exactly the same output as ``json.dump(obj, fp).`` """ del kwargs fp.write( dumps( obj=obj, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, cls=cls, indent=indent, separators=separators, default=default, sort_keys=sort_keys, quote_keys=quote_keys, trailing_commas=trailing_commas, allow_duplicate_keys=allow_duplicate_keys, ) ) def dumps( obj: Any, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, cls: None = None, indent: Optional[Union[int, str]] = None, separators: Optional[Tuple[str, str]] = None, default: Optional[Callable[[Any], Any]] = None, sort_keys: bool = False, quote_keys: bool = False, trailing_commas: bool = True, allow_duplicate_keys: bool = True, **kwargs, ): """Serialize ``obj`` to a JSON5-formatted string. Supports the same arguments as ``json.dumps()``, except that: - The ``cls`` keyword is not supported. - The ``encoding`` keyword is ignored; Unicode strings are always written. - By default, object keys that are legal identifiers are not quoted; if you pass ``quote_keys=True``, they will be. - By default, if lists and objects span multiple lines of output (i.e., when ``indent`` >=0), the last item will have a trailing comma after it. If you pass ``trailing_commas=False``, it will not. - If you use a number, a boolean, or ``None`` as a key value in a dict, it will be converted to the corresponding JSON string value, e.g. "1", "true", or "null". By default, ``dump()`` will match the `json` modules behavior and produce malformed JSON if you mix keys of different types that have the same converted value; e.g., ``{1: "foo", "1": "bar"}`` produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If you pass ``allow_duplicate_keys=False``, an exception will be raised instead. - If `quote_keys` is true, then keys of objects will be enclosed in quotes, as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless they contain whitespace. - If `trailing_commas` is false, then commas will not be inserted after the final elements of objects and arrays, as in regular JSON. Otherwise, such commas will be inserted. - If `allow_duplicate_keys` is false, then only the last entry with a given key will be written. Otherwise, all entries with the same key will be written. Calling ``dumps(obj, quote_keys=True, trailing_commas=False, \ allow_duplicate_keys=True)`` should produce exactly the same output as ``json.dumps(obj).`` """ assert kwargs.get('cls', None) is None, 'Custom encoders are not supported' del cls if separators is None: if indent is None: separators = (', ', ': ') else: separators = (',', ': ') default = default or _raise_type_error if check_circular: seen: Optional[Set[int]] = set() else: seen = None level = 1 is_key = False _, v = _dumps( obj, skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level, is_key, ) return v def _dumps( obj, skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen: Optional[Set[int]], level: int, is_key: bool, ): # pylint: disable=too-many-statements if obj is True: s = 'true' elif obj is False: s = 'false' elif obj is None: s = 'null' elif obj == float('inf'): if allow_nan: s = 'Infinity' else: raise ValueError() elif obj == float('-inf'): if allow_nan: s = '-Infinity' else: raise ValueError() elif isinstance(obj, float) and math.isnan(obj): if allow_nan: s = 'NaN' else: raise ValueError() elif isinstance(obj, str): if ( is_key and _is_ident(obj) and not quote_keys and not _is_reserved_word(obj) ): return True, obj return True, _dump_str(obj, ensure_ascii) elif isinstance(obj, int): # Subclasses of `int` and `float` may have custom # __repr__ or __str__ methods, but the `JSON` library # ignores them in order to ensure that the representation # are just bare numbers. In order to match JSON's behavior # we call the methods of the `float` and `int` class directly. s = int.__repr__(obj) elif isinstance(obj, float): # See comment above for int s = float.__repr__(obj) else: s = None if is_key: if s is not None: return True, f'"{s}"' if skipkeys: return False, None raise TypeError(f'invalid key {repr(obj)}') if s is not None: return True, s if indent is not None: end_str = '' if trailing_commas: end_str = ',' if isinstance(indent, int): if indent > 0: indent_str = '\n' + ' ' * indent * level end_str += '\n' + ' ' * indent * (level - 1) else: indent_str = '\n' end_str += '\n' else: indent_str = '\n' + indent * level end_str += '\n' + indent * (level - 1) else: indent_str = '' end_str = '' item_sep, kv_sep = separators item_sep += indent_str if seen is not None: i = id(obj) if i in seen: raise ValueError('Circular reference detected.') seen.add(i) # Ideally we'd use collections.abc.Mapping and collections.abc.Sequence # here, but for backwards-compatibility with potential old callers, # we only check for the two attributes we need in each case. if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'): s = _dump_dict( obj, skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level + 1, item_sep, kv_sep, indent_str, end_str, ) elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'): s = _dump_array( obj, skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level + 1, item_sep, indent_str, end_str, ) else: s = _dumps( default(obj), skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level, is_key, )[1] if seen is not None: seen.remove(i) return False, s def _dump_dict( obj, skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level, item_sep, kv_sep, indent_str, end_str, ): if not obj: return '{}' if sort_keys: keys = sorted(obj.keys()) else: keys = obj.keys() s = '{' + indent_str num_items_added = 0 new_keys = set() for key in keys: valid_key, key_str = _dumps( key, skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level, is_key=True, ) if skipkeys and not valid_key: continue if not allow_duplicate_keys: if key_str in new_keys: raise ValueError(f'duplicate key {repr(key)}') new_keys.add(key_str) if num_items_added: s += item_sep s += ( key_str + kv_sep + _dumps( obj[key], skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level, is_key=False, )[1] ) num_items_added += 1 s += end_str + '}' return s def _dump_array( obj, skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level, item_sep, indent_str, end_str, ): if not obj: return '[]' return ( '[' + indent_str + item_sep.join( [ _dumps( el, skipkeys, ensure_ascii, check_circular, allow_nan, indent, separators, default, sort_keys, quote_keys, trailing_commas, allow_duplicate_keys, seen, level, False, )[1] for el in obj ] ) + end_str + ']' ) def _dump_str(obj, ensure_ascii): ret = ['"'] for ch in obj: if ch == '\\': ret.append('\\\\') elif ch == '"': ret.append('\\"') elif ch == '\u2028': ret.append('\\u2028') elif ch == '\u2029': ret.append('\\u2029') elif ch == '\n': ret.append('\\n') elif ch == '\r': ret.append('\\r') elif ch == '\b': ret.append('\\b') elif ch == '\f': ret.append('\\f') elif ch == '\t': ret.append('\\t') elif ch == '\v': ret.append('\\v') elif ch == '\0': ret.append('\\0') elif not ensure_ascii: ret.append(ch) else: o = ord(ch) if 32 <= o < 128: ret.append(ch) elif o < 65536: ret.append(f'\\u{o:04x}') else: val = o - 0x10000 high = 0xD800 + (val >> 10) low = 0xDC00 + (val & 0x3FF) ret.append(f'\\u{high:04x}\\u{low:04x}') return ''.join(ret) + '"' def _is_ident(k): if not k or not _is_id_start(k[0]) and k[0] not in ('$', '_'): return False for ch in k[1:]: if not _is_id_continue(ch) and ch not in ('$', '_'): return False return True def _is_id_start(ch): return unicodedata.category(ch) in ( 'Lu', 'Ll', 'Li', 'Lt', 'Lm', 'Lo', 'Nl', ) def _is_id_continue(ch): return unicodedata.category(ch) in ( 'Lu', 'Ll', 'Li', 'Lt', 'Lm', 'Lo', 'Nl', 'Nd', 'Mn', 'Mc', 'Pc', ) _reserved_word_re = None def _is_reserved_word(k): global _reserved_word_re if _reserved_word_re is None: # List taken from section 7.6.1 of ECMA-262. _reserved_word_re = re.compile( '(' + '|'.join( [ 'break', 'case', 'catch', 'class', 'const', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'enum', 'export', 'extends', 'false', 'finally', 'for', 'function', 'if', 'import', 'in', 'instanceof', 'new', 'null', 'return', 'super', 'switch', 'this', 'throw', 'true', 'try', 'typeof', 'var', 'void', 'while', 'with', ] ) + ')$' ) return _reserved_word_re.match(k) is not None def _raise_type_error(obj): raise TypeError(f'{repr(obj)} is not JSON5 serializable')