diff --git a/adafruit_json_stream.py b/adafruit_json_stream.py index 78831a9..0c08ae5 100644 --- a/adafruit_json_stream.py +++ b/adafruit_json_stream.py @@ -26,6 +26,7 @@ def __init__(self, data_iter): self.data_iter = data_iter self.i = 0 self.chunk = b"" + self.last_char = None def read(self): """Read the next character from the stream.""" @@ -39,16 +40,37 @@ def read(self): self.i += 1 return char - def fast_forward(self, closer): - """Read through the stream until the character is ``closer``, ``]`` + def fast_forward(self, closer, *, return_object=False): + """ + Read through the stream until the character is ``closer``, ``]`` (ending a list) or ``}`` (ending an object.) Intermediate lists and - objects are skipped.""" + objects are skipped. + + :param str closer: the character to read until + :param bool return_object: read until the closer, + and then parse the data and return as an object + """ + closer = ord(closer) close_stack = [closer] count = 0 + + buffer = None + if return_object: + buffer = bytearray(32) + # ] = 93, [ = 91 + # } = 125, { = 123 + buffer[0] = closer - 2 + while close_stack: char = self.read() count += 1 + if buffer: + if count == len(buffer): + new_buffer = bytearray(len(buffer) + 32) + new_buffer[: len(buffer)] = buffer + buffer = new_buffer + buffer[count] = char if char == close_stack[-1]: close_stack.pop() elif char == ord('"'): @@ -63,6 +85,9 @@ def fast_forward(self, closer): close_stack.append(ord("}")) elif char == ord("["): close_stack.append(ord("]")) + if buffer: + value_string = bytes(memoryview(buffer)[: count + 1]).decode("utf-8") + return json.loads(value_string) return False def next_value(self, endswith=None): @@ -77,10 +102,10 @@ def next_value(self, endswith=None): except EOFError: char = endswith if not in_string and (char == endswith or char in (ord("]"), ord("}"))): + self.last_char = char if len(buf) == 0: return None value_string = bytes(buf).decode("utf-8") - # print(f"{repr(value_string)}, {endswith=}") return json.loads(value_string) if char == ord("{"): return TransientObject(self) @@ -94,19 +119,15 @@ def next_value(self, endswith=None): buf.append(char) -class Transient: # pylint: disable=too-few-public-methods +class Transient: """Transient object representing a JSON object.""" - # This is helpful for checking that something is a TransientList or TransientObject. - - -class TransientList(Transient): - """Transient object that acts like a list through the stream.""" - def __init__(self, stream): + self.active_child = None self.data = stream self.done = False - self.active_child = None + self.has_read = False + self.finish_char = "" def finish(self): """Consume all of the characters for this list from the stream.""" @@ -114,13 +135,31 @@ def finish(self): if self.active_child: self.active_child.finish() self.active_child = None - self.data.fast_forward("]") + self.data.fast_forward(self.finish_char) + self.done = True + + def as_object(self): + """Consume all of the characters for this list from the stream and return as an object.""" + if self.has_read: + raise BufferError("Object has already been partly read.") + self.done = True + return self.data.fast_forward(self.finish_char, return_object=True) + + +class TransientList(Transient): + """Transient object that acts like a list through the stream.""" + + def __init__(self, stream): + super().__init__(stream) + self.finish_char = "]" def __iter__(self): return self def __next__(self): + self.has_read = True + if self.active_child: self.active_child.finish() self.done = self.data.fast_forward(",") @@ -128,6 +167,8 @@ def __next__(self): if self.done: raise StopIteration() next_value = self.data.next_value(",") + if self.data.last_char == ord("]"): + self.done = True if next_value is None: self.done = True raise StopIteration() @@ -140,42 +181,39 @@ class TransientObject(Transient): """Transient object that acts like a dictionary through the stream.""" def __init__(self, stream): - self.data = stream - self.done = False - self.buf = array.array("B") + super().__init__(stream) + self.finish_char = "}" + self.active_child_key = None - self.active_child = None + def __getitem__(self, key): + if self.active_child and self.active_child_key == key: + return self.active_child - def finish(self): - """Consume all of the characters for this object from the stream.""" - if not self.done: - if self.active_child: - self.active_child.finish() - self.active_child = None - self.data.fast_forward("}") - self.done = True + self.has_read = True - def __getitem__(self, key): if self.active_child: self.active_child.finish() self.done = self.data.fast_forward(",") self.active_child = None + self.active_child_key = None if self.done: - raise KeyError() + raise KeyError(key) - while True: + while not self.done: current_key = self.data.next_value(":") if current_key is None: - # print("object done", self) self.done = True break if current_key == key: next_value = self.data.next_value(",") + if self.data.last_char == ord("}"): + self.done = True if isinstance(next_value, Transient): self.active_child = next_value + self.active_child_key = key return next_value - self.data.fast_forward(",") - raise KeyError() + self.done = self.data.fast_forward(",") + raise KeyError(key) def load(data_iter): diff --git a/tests/test_json_stream.py b/tests/test_json_stream.py new file mode 100644 index 0000000..b8197fe --- /dev/null +++ b/tests/test_json_stream.py @@ -0,0 +1,545 @@ +# SPDX-FileCopyrightText: 2025 Justin Myers +# +# SPDX-License-Identifier: Unlicense + +import json +import math +import pytest +import adafruit_json_stream + + +# pylint: disable=invalid-name,pointless-statement,redefined-outer-name + + +# --------------- +# Helpers +# --------------- + + +class BytesChunkIO: + def __init__(self, data=b"", chunk_size=10): + self.chunk_size = chunk_size + self.chunks_read = 0 + self.data = data + self.data_len = len(self.data) + self.position = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.position > self.data_len: + raise StopIteration + + end = self.chunk_size + if self.position + end > self.data_len: + end = self.data_len + chunk = self.data[self.position : self.position + self.chunk_size] + + self.chunks_read += 1 + self.position += self.chunk_size + + return chunk + + def get_chunks_read(self): + return self.chunks_read + + +# --------------- +# Fixtures +# --------------- + + +@pytest.fixture +def dict_with_all_types(): + return """ + { + "_check": "{\\\"a\\\": 1, \\\"b\\\": [2,3]}", + "bool": true, + "dict": {"key": "value"}, + "float": 1.1, + "int": 1, + "list": [1,2,3], + "null": null, + "string": "string" + } + """ + + +@pytest.fixture +def list_with_values(): + return """ + [ + 1, + 2, + 3 + ] + """ + + +@pytest.fixture +def dict_with_keys(): + return """ + { + "field_1": 1, + "field_2": 2, + "field_3": 3 + } + """ + + +@pytest.fixture +def dict_with_list_with_single_entries(): + return """ + { + "list_1": [ + { + "dict_id": 1 + }, + { + "dict_id": 2 + }, + { + "dict_id": 3 + }, + { + "dict_id": 4 + } + ] + } + """ + + +@pytest.fixture +def complex_dict(): + return """ + { + "list_1": [ + { + "dict_id": 1, + "dict_name": "one", + "sub_dict": { + "sub_dict_id": 1.1, + "sub_dict_name": "one point one" + }, + "sub_list": [ + "a", + "b", + "c" + ] + }, + { + "dict_id": 2, + "dict_name": "two", + "sub_dict": { + "sub_dict_id": 2.1, + "sub_dict_name": "two point one" + }, + "sub_list": [ + "d", + "e", + "f" + ] + } + ], + "list_2": [ + { + "dict_id": 3, + "dict_name": "three", + "sub_dict": { + "sub_dict_id": 3.1, + "sub_dict_name": "three point one" + }, + "sub_list": [ + "g", + "h", + "i" + ] + }, + { + "dict_id": 4, + "dict_name": "four", + "sub_dict": { + "sub_dict_id": 4.1, + "sub_dict_name": "four point one" + }, + "sub_list": [ + "j", + "k", + "l" + ] + } + ] + } + """ + + +# --------------- +# Tests +# --------------- + + +def test_all_types(dict_with_all_types): + """Test loading a simple dict all data types.""" + + assert json.loads(dict_with_all_types) + + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_all_types.encode())) + + assert stream["bool"] is True + assert stream["dict"]["key"] == "value" + assert stream["float"] == 1.1 + assert stream["int"] == 1 + assert next(stream["list"]) == 1 + assert stream["null"] is None + assert stream["string"] == "string" + + +def test_simple_dict_with_keys(dict_with_keys): + """Test loading a simple dict with keys.""" + + assert json.loads(dict_with_keys) + + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_keys.encode())) + for i in range(1, 4): + assert stream[f"field_{i}"] == i + with pytest.raises(KeyError, match="field_4"): + stream["field_4"] + + +def test_simple_dict_with_grabbing_key_twice_raises(dict_with_keys): + """Test loading a simple dict with keys twice raises.""" + + assert json.loads(dict_with_keys) + + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_keys.encode())) + assert stream["field_1"] == 1 + with pytest.raises(KeyError, match="field_1"): + stream["field_1"] + + +def test_simple_dict_with_keys_middle_key(dict_with_keys): + """Test loading a simple dict and grabbing a key in the middle.""" + + assert json.loads(dict_with_keys) + + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_keys.encode())) + assert stream["field_2"] == 2 + + +def test_simple_dict_with_keys_missing_key_raises(dict_with_keys): + """Test loading a simple dict and grabbing a key that doesn't exist raises.""" + + assert json.loads(dict_with_keys) + + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_keys.encode())) + with pytest.raises(KeyError, match="field_4"): + stream["field_4"] + + +def test_list_with_values(list_with_values): + """Test loading a list and iterating over it.""" + + assert json.loads(list_with_values) + + stream = adafruit_json_stream.load(BytesChunkIO(list_with_values.encode())) + counter = 0 + for value in stream: + counter += 1 + assert value == counter + + +def test_dict_with_list_of_single_entries(dict_with_list_with_single_entries): + """Test loading an dict with a list of dicts with one entry each.""" + + assert json.loads(dict_with_list_with_single_entries) + + stream = adafruit_json_stream.load( + BytesChunkIO(dict_with_list_with_single_entries.encode()) + ) + counter = 0 + for obj in stream["list_1"]: + counter += 1 + assert obj["dict_id"] == counter + assert counter == 4 + + +def test_complex_dict(complex_dict): + """Test loading a complex dict.""" + + assert json.loads(complex_dict) + + dict_names = [ + "one", + "two", + "three", + "four", + ] + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + counter = 0 + sub_counter = 0 + for obj in stream["list_1"]: + counter += 1 + assert obj["dict_id"] == counter + assert obj["dict_name"] == dict_names[counter - 1] + sub_dict = obj["sub_dict"] + assert sub_dict["sub_dict_id"] == counter + 0.1 + assert sub_dict["sub_dict_name"] == f"{dict_names[counter-1]} point one" + for item in obj["sub_list"]: + sub_counter += 1 + assert item == chr(96 + sub_counter) + + assert counter == 2 + assert sub_counter == 6 + + for obj in stream["list_2"]: + counter += 1 + assert obj["dict_id"] == counter + assert obj["dict_name"] == dict_names[counter - 1] + sub_dict = obj["sub_dict"] + assert sub_dict["sub_dict_id"] == counter + 0.1 + assert sub_dict["sub_dict_name"] == f"{dict_names[counter-1]} point one" + for item in obj["sub_list"]: + sub_counter += 1 + assert item == chr(96 + sub_counter) + + assert counter == 4 + assert sub_counter == 12 + + +def test_complex_dict_grabbing(complex_dict): + """Test loading a complex dict and grabbing specific keys.""" + + assert json.loads(complex_dict) + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + sub_list = dict_1["sub_list"] + assert next(sub_list) == "a" + list_2 = stream["list_2"] + next(list_2) + dict_2 = next(list_2) + sub_list = dict_2["sub_list"] + assert next(sub_list) == "j" + + +def test_complex_dict_passed_key_raises(complex_dict): + """ + Test loading a complex dict and attempting to grab a specific key that has been passed raises. + """ + + assert json.loads(complex_dict) + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + assert dict_1["dict_name"] == "one" + with pytest.raises(KeyError, match="obects_id"): + stream["obects_id"] + + +def test_complex_dict_passed_reference_raises(complex_dict): + """ + Test loading a complex dict and attempting to grab a data from a saved reference that has + been passed raises. + """ + + assert json.loads(complex_dict) + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + sub_dict = dict_1["sub_dict"] + sub_list = dict_1["sub_list"] + list_2 = stream["list_2"] + next(list_2) + with pytest.raises(KeyError, match="sub_dict_id"): + sub_dict["sub_dict_id"] + with pytest.raises(StopIteration): + next(sub_list) + + +# complex_dict is 1518 bytes +@pytest.mark.parametrize( + ("chunk_size", "expected_chunks"), ((10, 152), (50, 31), (100, 16), (5000, 1)) +) +def test_complex_dict_buffer_sizes(chunk_size, complex_dict, expected_chunks): + """Test loading a complex dict and checking the chunking.""" + + assert json.loads(complex_dict) + + bytes_io_chunk = BytesChunkIO(complex_dict.encode(), chunk_size) + + stream = adafruit_json_stream.load(bytes_io_chunk) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + sub_list = dict_1["sub_list"] + assert next(sub_list) == "a" + list_2 = stream["list_2"] + next(list_2) + dict_2 = next(list_2) + sub_list = dict_2["sub_list"] + assert next(sub_list) == "j" + for _ in sub_list: + pass + with pytest.raises(KeyError): + stream["list_3"] + + assert bytes_io_chunk.get_chunks_read() == expected_chunks + assert math.ceil(len(complex_dict) / chunk_size) == expected_chunks + + +# complex_dict is 1518 bytes +@pytest.mark.parametrize( + ("chunk_size", "expected_chunks"), ((5, 61), (10, 31), (50, 7), (100, 4)) +) +def test_complex_dict_not_looking_at_all_data_buffer_sizes( + chunk_size, complex_dict, expected_chunks +): + """Test loading a complex dict and checking the chunking.""" + + assert json.loads(complex_dict) + + bytes_io_chunk = BytesChunkIO(complex_dict.encode(), chunk_size) + + stream = adafruit_json_stream.load(bytes_io_chunk) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + sub_list = dict_1["sub_list"] + assert next(sub_list) == "a" + + assert bytes_io_chunk.get_chunks_read() == expected_chunks + assert math.ceil(len(complex_dict) / chunk_size) >= (expected_chunks / 4) + + +def test_incomplete_json_raises(): + """Test incomplete json raises.""" + + data = """ + { + "field_1": 1 + """ + + with pytest.raises(json.JSONDecodeError): + json.loads(data) + + stream = adafruit_json_stream.load(BytesChunkIO(data.encode())) + + with pytest.raises(EOFError): + stream["field_2"] + + +def test_as_object(complex_dict): + """Test loading a complex dict and grabbing parts as objects.""" + + assert json.loads(complex_dict) + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + assert dict_1["sub_dict"].as_object() == { + "sub_dict_id": 1.1, + "sub_dict_name": "one point one", + } + assert dict_1["sub_list"].as_object() == ["a", "b", "c"] + dict_2 = next(list_1) + assert dict_2.as_object() == { + "dict_id": 2, + "dict_name": "two", + "sub_dict": {"sub_dict_id": 2.1, "sub_dict_name": "two point one"}, + "sub_list": ["d", "e", "f"], + } + assert stream["list_2"].as_object() == [ + { + "dict_id": 3, + "dict_name": "three", + "sub_dict": {"sub_dict_id": 3.1, "sub_dict_name": "three point one"}, + "sub_list": ["g", "h", "i"], + }, + { + "dict_id": 4, + "dict_name": "four", + "sub_dict": {"sub_dict_id": 4.1, "sub_dict_name": "four point one"}, + "sub_list": ["j", "k", "l"], + }, + ] + + +def test_as_object_stream(dict_with_all_types): + + assert json.loads(dict_with_all_types) + + stream = adafruit_json_stream.load(BytesChunkIO(dict_with_all_types.encode())) + + obj = stream.as_object() + assert obj == { + "_check": '{"a": 1, "b": [2,3]}', + "bool": True, + "dict": {"key": "value"}, + "float": 1.1, + "int": 1, + "list": [1, 2, 3], + "null": None, + "string": "string", + } + assert json.loads(obj["_check"]) == { + "a": 1, + "b": [ + 2, + 3, + ], + } + + +def test_as_object_that_is_partially_read_raises(complex_dict): + """Test loading a complex dict and grabbing partially read raises.""" + + assert json.loads(complex_dict) + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + assert dict_1["dict_id"] == 1 + with pytest.raises(BufferError): + dict_1.as_object() + + +def test_as_object_grabbing_multiple_subscriptable_levels_twice(complex_dict): + """Test loading a complex dict and grabbing multiple subscriptable levels twice.""" + + assert json.loads(complex_dict) + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + assert dict_1["sub_dict"]["sub_dict_id"] == 1.1 + assert dict_1["sub_dict"]["sub_dict_name"] == "one point one" + + +def test_as_object_grabbing_multiple_subscriptable_levels_again_after_passed_raises( + complex_dict, +): + """ + Test loading a complex dict and grabbing multiple subscriptable levels after passing it raises. + """ + + assert json.loads(complex_dict) + + stream = adafruit_json_stream.load(BytesChunkIO(complex_dict.encode())) + + list_1 = stream["list_1"] + dict_1 = next(list_1) + assert dict_1["sub_dict"]["sub_dict_id"] == 1.1 + assert next(dict_1["sub_list"]) == "a" + with pytest.raises(KeyError, match="sub_dict"): + dict_1["sub_dict"]["sub_dict_name"] diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..099a9b7 --- /dev/null +++ b/tox.ini @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: 2022 Kevin Conley +# SPDX-FileCopyrightText: 2024 Justin Myers for Adafruit Industries +# +# SPDX-License-Identifier: MIT + +[tox] +envlist = py311 + +[testenv] +description = run tests +deps = + pytest==7.4.3 + requests +commands = pytest + +[testenv:coverage] +description = run coverage +deps = + pytest==7.4.3 + pytest-cov==4.1.0 + requests +package = editable +commands = + coverage run --source=. --omit=tests/* --branch {posargs} -m pytest + coverage report + coverage html + +[testenv:lint] +description = run linters +deps = + pre-commit==3.6.0 +skip_install = true +commands = pre-commit run {posargs} + +[testenv:docs] +description = build docs +deps = + -r requirements.txt + -r docs/requirements.txt +skip_install = true +commands = sphinx-build -E -W -b html docs/. _build/html
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: