# This file comes from
# https://raw.githubusercontent.com/getify/JSON.minify/1e3590ddb043b62c851506d9ae8425336335d879/json_minify/__init__.py
#
# It is licensed under the following license:
# Copyright (c) 2017 Kyle Simpson <getify@gmail.com>
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""A port of the `JSON-minify` utility to the Python language.
Based on JSON.minify.js: https://github.com/getify/JSON.minify
Contributers:
- Gerald Storer
- Contributed original version
- Felipe Machado
- Performance optimization
- Pradyun S. Gedam
- Conditions and variable names changed
- Reformatted tests and moved to separate file
- Made into a PyPI Package
"""
import re
def json_minify(string, strip_space=True):
tokenizer = re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
end_slashes_re = re.compile(r'(\\)*$')
in_string = False
in_multi = False
in_single = False
new_str = []
index = 0
for match in re.finditer(tokenizer, string):
if not (in_multi or in_single):
tmp = string[index:match.start()]
if not in_string and strip_space:
# replace white space as defined in standard
tmp = re.sub('[ \t\n\r]+', '', tmp)
new_str.append(tmp)
elif not strip_space:
# Replace comments with white space so that the JSON parser reports
# the correct column numbers on parsing errors.
new_str.append(' ' * (match.start() - index))
index = match.end()
val = match.group()
if val == '"' and not (in_multi or in_single):
escaped = end_slashes_re.search(string, 0, match.start())
# start of string or unescaped quote character to end string
if not in_string or (escaped is None or len(escaped.group()) % 2 == 0): # noqa
in_string = not in_string
index -= 1 # include " character in next catch
elif not (in_string or in_multi or in_single):
if val == '/*':
in_multi = True
elif val == '//':
in_single = True
elif val == '*/' and in_multi and not (in_string or in_single):
in_multi = False
if not strip_space:
new_str.append(' ' * len(val))
elif val in '\r\n' and not (in_multi or in_string) and in_single:
in_single = False
elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)): # noqa
new_str.append(val)
if not strip_space:
if val in '\r\n':
new_str.append(val)
elif in_multi or in_single:
new_str.append(' ' * len(val))
new_str.append(string[index:])
return ''.join(new_str)