Skip to content

Commit

Permalink
feat(name): corruption update
Browse files Browse the repository at this point in the history
There was a bug in the corruption name: Any calls in parallel yeilded
non-deterministic results due to a side effect. In refactoring the code
to fix that, I changed it so that we can a new kind of corription.

Yes, we should do something much better.
  • Loading branch information
kierun committed Feb 19, 2024
1 parent 8f11acf commit ad92031
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 100 deletions.
111 changes: 92 additions & 19 deletions pynpc/name_corruptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,96 @@ def __init__(self, patterns: list[str]) -> None:
self.patterns = patterns
self.cursor = len(patterns)

def corrupt_once(self, name: str) -> str:
def corrupt(self, name: str, size: int = 3) -> list[str]:
"""Corrupts a name."""
starting_cursor = self.cursor
cursor = (starting_cursor + 1) % len(self.patterns)
while cursor != starting_cursor:
# grab a pattern like ("d", "t")
(pattern, replacement) = self.patterns[cursor] # type: ignore[misc]

# replace it - eg "david".replace("d", "t") => "tavit"
new_name = name.replace(pattern, replacement) # type: ignore[has-type]
if new_name != name:
# if the name changed, we're done
self.cursor = cursor
return new_name

# if not, keep going with the next pattern
cursor = (cursor + 1) % len(self.patterns)

# if we get here, we didn't find any patterns that worked
return name
names = []
start = self.cursor
for _ in range(size):
starting_cursor = start
cursor = (starting_cursor + 1) % len(self.patterns)
while True:
(pattern, replacement) = self.patterns[cursor] # type: ignore[misc]
new_name = name.replace(pattern, replacement) # type: ignore[has-type]
if new_name != name:
start = cursor
names.append(new_name)
name = new_name
break
cursor = (cursor + 1) % len(self.patterns)
if cursor == (starting_cursor - 1):
break
names.append(name)
return sorted(set(names))


if __name__ == "__main__": # pragma: no cover
"""This is test code.
As in, that's what we used to see if we were happy with the
corruption. It's super academic!
"""
from pathlib import Path

import orjson
from rich import print as lpr

data = Path(Path(__file__).resolve().parent.parent, "pynpc", "data", "name-corruption-pattern.json")
patterns = parse_patterns(orjson.loads(data.read_text()))
x = NameCorruptor(patterns)

names = (
"agatha",
"aldwin",
"althea",
"anselm",
"armin",
"bartholomew",
"berengar",
"clarice",
"constance",
"dierk",
"eadric",
"edward",
"eldrida",
"elfric",
"erna",
"eustace",
"felicity",
"finnegan",
"giselle",
"gerald",
"godric",
"gunther",
"hadrian",
"heloise",
"isolde",
"ivor",
"jocelyn",
"lancelot",
"lysandra",
"magnus",
"melisande",
"merrick",
"osborn",
"philomena",
"reginald",
"rowena",
"sabine",
"seraphina",
"sigfrid",
"tiberius",
"ulf",
"urien",
"vespera",
"wendel",
"wilfred",
"winifred",
"xenia",
"ysabel",
"zephyr",
"zinnia",
"zuriel",
"zygmund",
)
for name in names:
lpr(f'("{name}", "{" ".join(x.corrupt(name, 4))}"),')
14 changes: 6 additions & 8 deletions pynpc/npc.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,13 @@ def __init__(self, localisation: list[str] | None = None, what: str = "fantasy")

def _get_name(self, name: str, sz: int = 3) -> str:
"""Get corruptions variations from a name."""
rlog.debug("Generating name", name=name, sz=sz)
try:
first, last = name.split(" ")
first, _ = name.split(" ")
except ValueError as e:
rlog.error("Name is not in the expected format: 'first last'", name=name, error=e)
first = name
last = ""
generated = [first]
for _ in range(sz):
_next = generated[-1]
corrupted = self._corruptor.corrupt_once(_next)
generated.append(corrupted)
return f"{first} (" + " ".join(generated[1:]) + ") " + last
return "(" + " ".join(self._corruptor.corrupt(first, sz)) + ") "

def reading(self) -> Reading:
"""Return either upwards or revesed tarot cards draw."""
Expand All @@ -141,12 +136,15 @@ def generate(self) -> None:
self.name_fem = person.full_name(gender=Gender.FEMALE)
if self.name_fem != transliterate(self.name_fem):
self.name_fem += " — " + transliterate(self.name_fem)
self.name_fem += f" → {self._get_name(transliterate(self.name_fem), 3)}"
self.name_mal = person.full_name(gender=Gender.MALE)
if self.name_mal != transliterate(self.name_mal):
self.name_mal += " — " + transliterate(self.name_mal)
self.name_mal += f" → {self._get_name(transliterate(self.name_mal), 3)}"
self.name_non = person.full_name()
if self.name_non != transliterate(self.name_non):
self.name_non += " — " + transliterate(self.name_non)
self.name_non += f" → {self._get_name(transliterate(self.name_non), 3)}"
_arc = self._resources["archetypes"].get_value()
self.nature = Trait(_arc["name"], _arc["description"])
self.demeanour = self.nature
Expand Down
134 changes: 62 additions & 72 deletions tests/test_name_corruptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,85 +26,75 @@ def test_parse_multiple_sequences():
assert parse_patterns(sut) == expected


def test_corrupt_once():
corruptor = NameCorruptor(parse_patterns([["th", "ff"]]))
assert corruptor.corrupt_once("agatha") == "agaffa"


@pytest.mark.parametrize(
("name", "expected"),
[
("agatha", "agatha agadda agata agata agata"),
("aldwin", "aldwin altwin altwen altwun altwum"),
("althea", "althea alddea altea altea altea"),
("anselm", "anselm antzelm intzelm aintzelm aiwantzelm"),
("armin", "armin armen armun armum armium"),
(
"bartholomew",
"bartholomew barddolomew bartolomew partolomew fartolomew",
),
("berengar", "berengar baerengar baerungar baerumgar baeriumgar"),
("clarice", "clarice claricke clarike clarijke clarihjke"),
(
"constance",
"constance konstance khonstance khonstince khonstaince",
),
("dierk", "dierk dieyrk tieyrk tiheyrk tyeyrk"),
("eadric", "eadric eadrick eadrik eadrijk eatrijk"),
("edward", "edward edvard etvard echard echart"),
("eldrida", "eldrida eltrita eltrihta eltryta eltryta"),
("elfric", "elfric elfrick elfrik elfrijk elvrijk"),
("erna", "erna erne aerne aerne aerne"),
("eustace", "eustace eustache eusteiche eusteeche eusteeckhe"),
("felicity", "felicity velicity vhelicity vhelihcihty vhelycyty"),
("finnegan", "finnegan vinnegan vhinnegan vhennegan vhunnegan"),
("giselle", "giselle gitzelle gitzella gitzellya gihtzellya"),
("gerald", "gerald gaerald gaeralt gairalt gaidralt"),
("godric", "godric godrick godrik godrijk gotrijk"),
("gunther", "gunther gundder gunter guntaer gumtaer"),
("hadrian", "hadrian hatrian hatrihan hatryan hatryin"),
("heloise", "heloise heloihse heloyse heloize heloite"),
("isolde", "isolde isolte ihsolte ysolte izolte"),
("ivor", "ivor ivhor ihvhor yvhor yvhhor"),
("jocelyn", "jocelyn jocielyn joselyn jotzelyn jotzelyn"),
("lancelot", "lancelot lancielot lanselot lantzelot lantzelod"),
("lysandra", "lysandra lyzandra lytsandra lytzahndra lytzahntra"),
("magnus", "magnus magnaes magnees magnees magnees"),
(
"melisande",
"melisande melizande melitsande melitzahnde melitzahnte",
),
("merrick", "merrick merrickk merrikk merrijkk maerrijkk"),
("osborn", "osborn osporn osforn osvorn osvhorn"),
("philomena", "philomena ffilomena filomena vilomena vhilomena"),
("reginald", "reginald regineld reginelt regenelt regunelt"),
("rowena", "rowena rowene rowune rowume rowiume"),
("sabine", "sabine zabine tsabine tzahbine tzahbene"),
("seraphina", "seraphina seraffina serafina seravina seravhina"),
("sigfrid", "sigfrid sigvrid sigvhrid sigvhrit sihgvhriht"),
("tiberius", "tiberius tibaerius tihbaerihus tybaeryus typaeryus"),
("ulf", "ulf ulv ulvh ulvh ulvh"),
("urien", "urien urieyn uriheyn uryeyn uryeyn"),
("vespera", "vespera vhespera vhespaera vhesfaera vhesfaira"),
("wendel", "wendel wentel wuntel wumtel wiumtel"),
("wilfred", "wilfred wilvred wilvhred wilvhret wihlvhret"),
("winifred", "winifred winivred winivhred winivhret wenivhret"),
("xenia", "xenia xunia xumia xiumia xihumiha"),
("ysabel", "ysabel yzabel ytsabel ytzahbel ytzahpel"),
("zephyr", "zephyr zeffyr zefyr zevyr zevhyr"),
("zinnia", "zinnia zennia zunnia zumnia ziumnia"),
("zuriel", "zuriel zurieyl zuriheyl zuryeyl tzuryeyl"),
("zygmund", "zygmund zygmunt zygmumt zygmiumt zygmihumt"),
("agatha", "agadda agata"),
("aldwin", "altwen altwin altwum altwun"),
("althea", "alddea altea"),
("anselm", "aintzelm aiwantzelm antzelm intzelm"),
("armin", "armen armium armum armun"),
("bartholomew", "barddolomew bartolomew fartolomew partolomew"),
("berengar", "baerengar baeriumgar baerumgar baerungar"),
("clarice", "claricke clarihjke clarijke clarike"),
("constance", "khonstaince khonstance khonstince konstance"),
("dierk", "dieyrk tieyrk tiheyrk tyeyrk"),
("eadric", "eadrick eadrijk eadrik eatrijk"),
("edward", "echard echart edvard etvard"),
("eldrida", "eltrihta eltrita eltryta"),
("elfric", "elfrick elfrijk elfrik elvrijk"),
("erna", "aerne erne"),
("eustace", "eustache eusteeche eusteeckhe eusteiche"),
("felicity", "velicity vhelicity vhelihcihty vhelycyty"),
("finnegan", "vhennegan vhinnegan vhunnegan vinnegan"),
("giselle", "gihtzellya gitzella gitzelle gitzellya"),
("gerald", "gaerald gaeralt gaidralt gairalt"),
("godric", "godrick godrijk godrik gotrijk"),
("gunther", "gumtaer gundder guntaer gunter"),
("hadrian", "hatrian hatrihan hatryan hatryin"),
("heloise", "heloihse heloite heloize heloyse"),
("isolde", "ihsolte isolte izolte ysolte"),
("ivor", "ihvhor ivhor yvhhor yvhor"),
("jocelyn", "jocielyn joselyn jotzelyn"),
("lancelot", "lancielot lanselot lantzelod lantzelot"),
("lysandra", "lytsandra lytzahndra lytzahntra lyzandra"),
("magnus", "magnaes magnees"),
("melisande", "melitsande melitzahnde melitzahnte melizande"),
("merrick", "maerrijkk merrickk merrijkk merrikk"),
("osborn", "osforn osporn osvhorn osvorn"),
("philomena", "ffilomena filomena vhilomena vilomena"),
("reginald", "regenelt regineld reginelt regunelt"),
("rowena", "rowene rowiume rowume rowune"),
("sabine", "tsabine tzahbene tzahbine zabine"),
("seraphina", "seraffina serafina seravhina seravina"),
("sigfrid", "sigvhrid sigvhrit sigvrid sihgvhriht"),
("tiberius", "tibaerius tihbaerihus tybaeryus typaeryus"),
("ulf", "ulv ulvh"),
("urien", "urieyn uriheyn uryeyn"),
("vespera", "vhesfaera vhesfaira vhespaera vhespera"),
("wendel", "wentel wiumtel wumtel wuntel"),
("wilfred", "wihlvhret wilvhred wilvhret wilvred"),
("winifred", "wenivhret winivhred winivhret winivred"),
("xenia", "xihumiha xiumia xumia xunia"),
("ysabel", "ytsabel ytzahbel ytzahpel yzabel"),
("zephyr", "zeffyr zefyr zevhyr zevyr"),
("zinnia", "zennia ziumnia zumnia zunnia"),
("zuriel", "tzuryeyl zurieyl zuriheyl zuryeyl"),
("zygmund", "zygmihumt zygmiumt zygmumt zygmunt"),
],
)
def test_corrupt_several(name, expected):
data = Path(Path(__file__).resolve().parent.parent, "pynpc", "data", "name-corruption-pattern.json")
patterns = parse_patterns(loads(data.read_text()))

corruptor = NameCorruptor(patterns)
generated = [name]
for _ in range(len(expected.split(" ")) - 1):
_next = generated[-1]
corrupted = corruptor.corrupt_once(_next)
generated.append(corrupted)
assert " ".join(generated) == expected
generated = corruptor.corrupt(name, 4)
assert " ".join(generated) == expected, " ".join(generated)


def test_latesha_hanging() -> None:
"""Test that Latesha is hanging."""
data = Path(Path(__file__).resolve().parent.parent, "pynpc", "data", "name-corruption-pattern.json")
patterns = parse_patterns(loads(data.read_text()))
sut = NameCorruptor(patterns)
assert "Lataersha" in sut.corrupt("Latersha")
5 changes: 4 additions & 1 deletion tests/test_npc.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,10 @@ def test_resource_get_values(res) -> None:


def test_get_name(random) -> None:
assert "fred" in random._get_name("fred") # noqa: SLF001
sut = random._get_name("fred") # noqa: SLF001
assert "vhred" in sut
assert "vhret" in sut
assert "vred" in sut


@pytest.mark.parametrize(
Expand Down

0 comments on commit ad92031

Please sign in to comment.