diff --git a/adafruit_json_stream.py b/adafruit_json_stream.py index 0c08ae5..b5172d7 100644 --- a/adafruit_json_stream.py +++ b/adafruit_json_stream.py @@ -40,7 +40,9 @@ def read(self): self.i += 1 return char - def fast_forward(self, closer, *, return_object=False): + def fast_forward( + self, closer, *, return_object=False + ): # pylint: disable=too-many-branches """ Read through the stream until the character is ``closer``, ``]`` (ending a list) or ``}`` (ending an object.) Intermediate lists and @@ -62,6 +64,7 @@ def fast_forward(self, closer, *, return_object=False): # } = 125, { = 123 buffer[0] = closer - 2 + ignore_next = False while close_stack: char = self.read() count += 1 @@ -71,8 +74,14 @@ def fast_forward(self, closer, *, return_object=False): new_buffer[: len(buffer)] = buffer buffer = new_buffer buffer[count] = char - if char == close_stack[-1]: + if ignore_next: + # that character was escaped, skip it + ignore_next = False + elif char == close_stack[-1]: close_stack.pop() + elif char == ord("\\") and close_stack[-1] == ord('"'): + # if backslash, ignore the next character + ignore_next = True elif char == ord('"'): close_stack.append(ord('"')) elif close_stack[-1] == ord('"'): @@ -96,26 +105,41 @@ def next_value(self, endswith=None): if isinstance(endswith, str): endswith = ord(endswith) in_string = False + ignore_next = False while True: try: char = self.read() except EOFError: char = endswith - if not in_string and (char == endswith or char in (ord("]"), ord("}"))): - self.last_char = char - if len(buf) == 0: - return None - value_string = bytes(buf).decode("utf-8") - return json.loads(value_string) - if char == ord("{"): - return TransientObject(self) - if char == ord("["): - return TransientList(self) + in_string = False + ignore_next = False if not in_string: - in_string = char == ord('"') + # end character or object/list end + if char == endswith or char in (ord("]"), ord("}")): + self.last_char = char + if len(buf) == 0: + return None + value_string = bytes(buf).decode("utf-8") + return json.loads(value_string) + # string or sub object + if char == ord("{"): + return TransientObject(self) + if char == ord("["): + return TransientList(self) + # start a string + if char == ord('"'): + in_string = True else: - in_string = char != ord('"') + # skipping any closing or opening character if in a string + # also skipping escaped characters (like quotes in string) + if ignore_next: + ignore_next = False + elif char == ord("\\"): + ignore_next = True + elif char == ord('"'): + in_string = False + buf.append(char) @@ -130,7 +154,7 @@ def __init__(self, stream): self.finish_char = "" def finish(self): - """Consume all of the characters for this list from the stream.""" + """Consume all of the characters for this container from the stream.""" if not self.done: if self.active_child: self.active_child.finish() @@ -139,7 +163,8 @@ def finish(self): self.done = True def as_object(self): - """Consume all of the characters for this list from the stream and return as an object.""" + """Consume all of the characters for this container from the stream + and return as an object.""" if self.has_read: raise BufferError("Object has already been partly read.") @@ -183,10 +208,17 @@ class TransientObject(Transient): def __init__(self, stream): super().__init__(stream) self.finish_char = "}" - self.active_child_key = None + self.active_key = None + + def finish(self): + """Consume all of the characters for this container from the stream.""" + if self.active_key and not self.active_child: + self.done = self.data.fast_forward(",") + self.active_key = None + super().finish() def __getitem__(self, key): - if self.active_child and self.active_child_key == key: + if self.active_child and self.active_key == key: return self.active_child self.has_read = True @@ -195,12 +227,16 @@ def __getitem__(self, key): self.active_child.finish() self.done = self.data.fast_forward(",") self.active_child = None - self.active_child_key = None + self.active_key = None if self.done: raise KeyError(key) while not self.done: - current_key = self.data.next_value(":") + if self.active_key: + current_key = self.active_key + self.active_key = None + else: + current_key = self.data.next_value(":") if current_key is None: self.done = True break @@ -210,11 +246,47 @@ def __getitem__(self, key): self.done = True if isinstance(next_value, Transient): self.active_child = next_value - self.active_child_key = key + self.active_key = key return next_value self.done = self.data.fast_forward(",") raise KeyError(key) + def __iter__(self): + return self + + def _next_key(self): + """Return the next item's key, without consuming the value.""" + if self.active_key: + if self.active_child: + self.active_child.finish() + self.active_child = None + self.done = self.data.fast_forward(",") + self.active_key = None + if self.done: + raise StopIteration() + + self.has_read = True + + current_key = self.data.next_value(":") + if current_key is None: + self.done = True + raise StopIteration() + + self.active_key = current_key + return current_key + + def __next__(self): + return self._next_key() + + def items(self): + """Return iterator in the dictionary’s items ((key, value) pairs).""" + try: + while not self.done: + key = self._next_key() + yield (key, self[key]) + except StopIteration: + return + def load(data_iter): """Returns an object to represent the top level of the given JSON stream.""" diff --git a/examples/json_stream_local_file_advanced.py b/examples/json_stream_local_file_advanced.py new file mode 100644 index 0000000..2920619 --- /dev/null +++ b/examples/json_stream_local_file_advanced.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 Scott Shawcroft for Adafruit Industries +# +# SPDX-License-Identifier: Unlicense + +import sys +import time + +import adafruit_json_stream as json_stream + +# import json_stream + + +class FakeResponse: + def __init__(self, file): + self.file = file + + def iter_content(self, chunk_size): + while True: + yield self.file.read(chunk_size) + + +f = open(sys.argv[1], "rb") # pylint: disable=consider-using-with +obj = json_stream.load(FakeResponse(f).iter_content(32)) + + +def find_keys(haystack, keys): + """If we don't know the order in which the keys are, + go through all of them and pick the ones we want""" + out = {} + # iterate on the items of an object + for key in haystack: + if key in keys: + # retrieve the value only if needed + value = haystack[key] + # if it's a sub object, get it all + if hasattr(value, "as_object"): + value = value.as_object() + out[key] = value + return out + + +months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", +] + + +def time_to_date(stamp): + tt = time.localtime(stamp) + month = months[tt.tm_mon] + return f"{tt.tm_mday:2d}th of {month}" + + +def ftoc(temp): + return (temp - 32) * 5 / 9 + + +currently = obj["currently"] +print("Currently:") +print(" ", time_to_date(currently["time"])) +print(" ", currently["icon"]) + +# iterate on the content of a list +for i, day in enumerate(obj["daily"]["data"]): + day_items = find_keys(day, ("time", "summary", "temperatureHigh")) + date = time_to_date(day_items["time"]) + print( + f'On {date}: {day_items["summary"]},', + f'Max: {int(day_items["temperatureHigh"])}F', + f'({int(ftoc(day_items["temperatureHigh"]))}C)', + ) + + if i > 4: + break diff --git a/tests/test_json_stream.py b/tests/test_json_stream.py index b8197fe..7ed05c9 100644 --- a/tests/test_json_stream.py +++ b/tests/test_json_stream.py @@ -66,6 +66,38 @@ def dict_with_all_types(): """ +@pytest.fixture +def list_with_bad_strings(): + return r""" + [ + "\"}\"", + "{\"a\": 1, \"b\": [2,3]}", + "\"", + "\\\"", + "\\\\\"", + "\\x40\"", + "[[[{{{", + "]]]}}}" + ] + """ + + +@pytest.fixture +def dict_with_bad_strings(): + return r""" + { + "1": "\"}\"", + "2": "{\"a\": 1, \"b\": [2,3]}", + "3": "\"", + "4": "\\\"", + "5": "\\\\\"", + "6": "\\x40\"", + "7": "[[[{{{", + "8": "]]]}}}" + } + """ + + @pytest.fixture def list_with_values(): return """ @@ -308,6 +340,116 @@ def test_complex_dict(complex_dict): assert sub_counter == 12 +def test_bad_strings_in_list(list_with_bad_strings): + """Test loading different strings that can confuse the parser.""" + + bad_strings = [ + '"}"', + '{"a": 1, "b": [2,3]}', + '"', + '\\"', + '\\\\"', + '\\x40"', + "[[[{{{", + "]]]}}}", + ] + + assert json.loads(list_with_bad_strings) + + # get each separately + stream = adafruit_json_stream.load(BytesChunkIO(list_with_bad_strings.encode())) + for i, item in enumerate(stream): + assert item == bad_strings[i] + + +def test_bad_strings_in_list_iter(list_with_bad_strings): + """Test loading different strings that can confuse the parser.""" + + bad_strings = [ + '"}"', + '{"a": 1, "b": [2,3]}', + '"', + '\\"', + '\\\\"', + '\\x40"', + "[[[{{{", + "]]]}}}", + ] + + assert json.loads(list_with_bad_strings) + + # get each separately + stream = adafruit_json_stream.load(BytesChunkIO(list_with_bad_strings.encode())) + for i, item in enumerate(stream): + assert item == bad_strings[i] + + +def test_bad_strings_in_dict_as_object(dict_with_bad_strings): + """Test loading different strings that can confuse the parser.""" + + bad_strings = { + "1": '"}"', + "2": '{"a": 1, "b": [2,3]}', + "3": '"', + "4": '\\"', + "5": '\\\\"', + "6": '\\x40"', + "7": "[[[{{{", + "8": "]]]}}}", + } + + # read all at once + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_bad_strings.encode())) + assert stream.as_object() == bad_strings + + +def test_bad_strings_in_dict_all_keys(dict_with_bad_strings): + """Test loading different strings that can confuse the parser.""" + + bad_strings = { + "1": '"}"', + "2": '{"a": 1, "b": [2,3]}', + "3": '"', + "4": '\\"', + "5": '\\\\"', + "6": '\\x40"', + "7": "[[[{{{", + "8": "]]]}}}", + } + + # read one after the other with keys + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_bad_strings.encode())) + assert stream["1"] == bad_strings["1"] + assert stream["2"] == bad_strings["2"] + assert stream["3"] == bad_strings["3"] + assert stream["4"] == bad_strings["4"] + assert stream["5"] == bad_strings["5"] + assert stream["6"] == bad_strings["6"] + assert stream["7"] == bad_strings["7"] + assert stream["8"] == bad_strings["8"] + + +def test_bad_strings_in_dict_skip_some(dict_with_bad_strings): + """Test loading different strings that can confuse the parser.""" + + bad_strings = { + "1": '"}"', + "2": '{"a": 1, "b": [2,3]}', + "3": '"', + "4": '\\"', + "5": '\\\\"', + "6": '\\x40"', + "7": "[[[{{{", + "8": "]]]}}}", + } + + # read some, skip some + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_bad_strings.encode())) + assert stream["2"] == bad_strings["2"] + assert stream["5"] == bad_strings["5"] + assert stream["8"] == bad_strings["8"] + + def test_complex_dict_grabbing(complex_dict): """Test loading a complex dict and grabbing specific keys.""" @@ -543,3 +685,78 @@ def test_as_object_grabbing_multiple_subscriptable_levels_again_after_passed_rai assert next(dict_1["sub_list"]) == "a" with pytest.raises(KeyError, match="sub_dict"): dict_1["sub_dict"]["sub_dict_name"] + + +def test_iterating_keys(dict_with_keys): + """Iterate through keys of a simple object.""" + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + output = list(stream) + assert output == ["field_1", "field_2", "field_3"] + + +def test_iterating_keys_get(dict_with_keys): + """Iterate through keys of a simple object and get values.""" + + the_dict = json.loads(dict_with_keys) + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + for key in stream: + value = stream[key] + assert value == the_dict[key] + + +def test_iterating_items(dict_with_keys): + """Iterate through items of a simple object.""" + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + output = list(stream.items()) + assert output == [("field_1", 1), ("field_2", 2), ("field_3", 3)] + + +def test_iterating_keys_after_get(dict_with_keys): + """Iterate through keys of a simple object after an item has already been read.""" + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + assert stream["field_1"] == 1 + output = list(stream) + assert output == ["field_2", "field_3"] + + +def test_iterating_items_after_get(dict_with_keys): + """Iterate through items of a simple object after an item has already been read.""" + + bytes_io_chunk = BytesChunkIO(dict_with_keys.encode()) + stream = adafruit_json_stream.load(bytes_io_chunk) + assert stream["field_1"] == 1 + output = list(stream.items()) + assert output == [("field_2", 2), ("field_3", 3)] + + +def test_iterating_complex_dict(complex_dict): + """Mix iterating over items of objects in objects in arrays.""" + + names = ["one", "two", "three", "four"] + sub_values = [None, "two point one", "three point one", None] + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + thing_num = 0 + for (index, item) in enumerate(stream.items()): + key, a_list = item + assert key == f"list_{index+1}" + for thing in a_list: + assert thing["dict_name"] == names[thing_num] + for sub_key in thing["sub_dict"]: + # break after getting a key with or without the value + # (testing finish() called from the parent list) + if sub_key == "sub_dict_name": + if thing_num in {1, 2}: + value = thing["sub_dict"][sub_key] + assert value == sub_values[thing_num] + break + thing_num += 1
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: