-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: Improve sentence streaming logic in slop.py and enhance tes…
…ts with Hypothesis for better coverage
- Loading branch information
Showing
2 changed files
with
106 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,71 +1,121 @@ | ||
import pytest | ||
from anthropic import TextEvent | ||
from bubble.slop import stream_sentences, stream_normally | ||
import hypothesis.strategies as st | ||
from hypothesis import given | ||
|
||
sentence_ending_punctuation = st.sampled_from(".!?") | ||
|
||
sentence = st.text("abc, \n").flatmap( | ||
lambda text: sentence_ending_punctuation.map( | ||
lambda punctuation: text + punctuation | ||
) | ||
) | ||
|
||
|
||
def create_text_stream(*chunks): | ||
"""Create a mock Anthropic text stream from text chunks""" | ||
return [TextEvent(type="text", text=chunk, snapshot="") for chunk in chunks] | ||
return [ | ||
TextEvent(type="text", text=chunk, snapshot="") | ||
for chunk in chunks | ||
] | ||
|
||
|
||
@pytest.fixture | ||
def text_stream(): | ||
"""Create a mock Anthropic text stream""" | ||
return create_text_stream( | ||
"<sentence>First sentence", | ||
".</sentence> ", | ||
"<sentence>Second ", | ||
"sentence.</sentence>", | ||
"<sentence>Third sentence." | ||
"First sentence", | ||
". ", | ||
"Second ", | ||
"sentence. ", | ||
"Third sentence.", | ||
) | ||
|
||
|
||
@pytest.mark.trio | ||
async def test_stream_sentences(text_stream): | ||
"""Test streaming sentences with XML tags""" | ||
"""Test streaming sentences""" | ||
sentences = [] | ||
async for sentence in stream_sentences(text_stream): | ||
sentences.append(sentence) | ||
|
||
assert sentences == [ | ||
"First sentence", | ||
"Second sentence", | ||
"Third sentence" | ||
"First sentence.", | ||
"Second sentence.", | ||
"Third sentence.", | ||
] | ||
|
||
|
||
@pytest.mark.trio | ||
async def test_stream_sentences_with_initial(text_stream): | ||
"""Test streaming with initial partial sentence""" | ||
sentences = [] | ||
async for sentence in stream_sentences( | ||
text_stream, initial_sentence="<sentence>Initial " | ||
): | ||
sentences.append(sentence) | ||
|
||
assert sentences[0].startswith("Initial") | ||
|
||
|
||
@pytest.mark.trio | ||
async def test_stream_normally(text_stream): | ||
"""Test normal streaming without sentence parsing""" | ||
result = await stream_normally(text_stream) | ||
|
||
expected = ( | ||
"<sentence>First sentence.</sentence> " | ||
"<sentence>Second sentence.</sentence>" | ||
"<sentence>Third sentence.</sentence>" | ||
) | ||
expected = "First sentence. " "Second sentence. " "Third sentence." | ||
|
||
assert result == expected | ||
|
||
|
||
@pytest.mark.trio | ||
async def test_stream_multiline_sentences(): | ||
"""Test handling of multiline sentence content""" | ||
events = create_text_stream("<sentence>First\nline\nof text.") | ||
@given(st.text()) | ||
async def test_sentence_stream_yields_same_text(text: str): | ||
events = create_text_stream(text) | ||
sentences = [ | ||
sentence async for sentence in stream_sentences(events) | ||
] | ||
assert "".join(sentences).strip() == text.strip() | ||
|
||
sentences = [] | ||
async for sentence in stream_sentences(events): | ||
sentences.append(sentence) | ||
|
||
assert sentences == ["First line of text"] | ||
|
||
@given(st.lists(sentence)) | ||
async def test_sentence_stream_on_list_of_sentences( | ||
sentences: list[str], | ||
): | ||
events = create_text_stream(" ".join(sentences)) | ||
streamed = [sentence async for sentence in stream_sentences(events)] | ||
assert streamed == sentences | ||
|
||
|
||
@given(st.lists(sentence)) | ||
async def test_sentence_stream_on_list_of_sentences_with_newlines( | ||
sentences: list[str], | ||
): | ||
events = create_text_stream("\n".join(sentences)) | ||
streamed = [sentence async for sentence in stream_sentences(events)] | ||
assert streamed == sentences | ||
|
||
|
||
async def test_sentence_stream_handles_no_sentence_ending(): | ||
events = create_text_stream("This is a test") | ||
sentences = [ | ||
sentence async for sentence in stream_sentences(events) | ||
] | ||
assert sentences == ["This is a test"] | ||
|
||
|
||
async def test_sentence_stream_handles_ellipsis(): | ||
events = create_text_stream("This is a test... This is a test.") | ||
sentences = [ | ||
sentence async for sentence in stream_sentences(events) | ||
] | ||
assert sentences == ["This is a test...", "This is a test."] | ||
|
||
|
||
async def test_sentence_stream_handles_newlines(): | ||
events = create_text_stream("This is a test.\nThis is a test.") | ||
sentences = [ | ||
sentence async for sentence in stream_sentences(events) | ||
] | ||
assert sentences == ["This is a test.", "This is a test."] | ||
|
||
|
||
@given( | ||
sentence.flatmap( | ||
lambda s: st.integers(1, len(s)).map(lambda n: (s, n)) | ||
) | ||
) | ||
async def test_foo(x): | ||
(sentence, n) = x | ||
a = sentence[:n] | ||
b = sentence[n:] | ||
assert a + b == sentence | ||
events = create_text_stream(a, b) | ||
streamed = [sentence async for sentence in stream_sentences(events)] | ||
assert streamed == [sentence] |