-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat(names): name corruptor * test(debug): added ipdb * test(coverage): Back to 100% * feat(faker): faker for fake names * chore(pre-commit): upgrades
- Loading branch information
Showing
11 changed files
with
672 additions
and
358 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Name corruption | ||
|
||
Did we use a formal implementation of | ||
[Grimm's law](https://en.wikipedia.org/wiki/Grimm%27s_law)? No. | ||
|
||
Did we use some other philological data? Also no. | ||
|
||
Did we do some careful research and testing? Again, no. | ||
|
||
We sat around with a bit of code and a file watcher and interactively updated | ||
the rules, watching 50 names update. The list is what we came up with after | ||
about 30 minutes interactive play, and updated all the names in the list by at | ||
least one corruption. |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
[ | ||
["pir", "per"], | ||
["ie", "iey"], | ||
["sa", "za", "tsa", "tzah"], | ||
["th", "dd", "t"], | ||
["gnu", "gnae"], | ||
["cel", "ciel", "sel", "tzel"], | ||
["lot", "lod"], | ||
["ric", "rick", "rik", "rijk"], | ||
["ph", "ff", "f", "v", "vh"], | ||
["na", "ne"], | ||
["er", "aer"], | ||
["dwa", "dva", "tva", "cha"], | ||
["ao", "ai", "aiwa", "awa", "a"], | ||
["d", "t"], | ||
["tta", "tva"], | ||
["lle", "lla", "llya"], | ||
["in", "en", "un", "um", "ium"], | ||
["i", "ih", "y"], | ||
["por", "pro"], | ||
["b", "p", "f"], | ||
["co", "ko", "kho"], | ||
["an", "in", "ain"], | ||
["zu", "tzu"], | ||
["ace", "ache", "eiche"], | ||
["tt", "t"], | ||
["ys", "iz", "it", "itz", "its", "itsa", "itsah"], | ||
["ia", "aya"], | ||
["ena", "ina", "iyna"], | ||
["era", "ira", "idra"], | ||
["ick", "ich", "ech", "eckh"] | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Name corruptor.""" | ||
from itertools import pairwise | ||
|
||
|
||
def parse_patterns(sequences: list[list[str]]) -> list[str]: | ||
"""Parse patterns.""" | ||
results = [] # type: ignore[var-annotated] | ||
for seq in sequences: | ||
results.extend(pairwise(seq)) | ||
return results | ||
|
||
|
||
class NameCorruptor: | ||
"""Name corruptor class.""" | ||
|
||
def __init__(self, patterns: list[str]) -> None: | ||
"""Initialize.""" | ||
self.patterns = patterns | ||
self.cursor = len(patterns) | ||
|
||
def corrupt_once(self, name: str) -> str: | ||
"""Corrupts a name.""" | ||
starting_cursor = self.cursor | ||
cursor = (starting_cursor + 1) % len(self.patterns) | ||
while cursor != starting_cursor: | ||
# grab a pattern like ("d", "t") | ||
(pattern, replacement) = self.patterns[cursor] # type: ignore[misc] | ||
|
||
# replace it - eg "david".replace("d", "t") => "tavit" | ||
new_name = name.replace(pattern, replacement) # type: ignore[has-type] | ||
if new_name != name: | ||
# if the name changed, we're done | ||
self.cursor = cursor | ||
# todo -- put 'relax' back in | ||
return new_name | ||
|
||
# if not, keep going with the next pattern | ||
cursor = (cursor + 1) % len(self.patterns) | ||
|
||
# if we get here, we didn't find any patterns that worked | ||
return name |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# -*- coding: utf-8 -*- | ||
"""Name corruptor tests.""" | ||
from pathlib import Path | ||
|
||
import pytest | ||
from orjson import loads | ||
|
||
from pynpc.name_corruptor import NameCorruptor, parse_patterns | ||
|
||
|
||
def test_parse_single_sequence(): | ||
assert parse_patterns([["a", "b", "c"]]) == [("a", "b"), ("b", "c")] | ||
|
||
|
||
def test_parse_multiple_sequences(): | ||
sut = [ | ||
["a", "b", "c"], | ||
["d", "e", "f"], | ||
] | ||
expected = [ | ||
("a", "b"), | ||
("b", "c"), | ||
("d", "e"), | ||
("e", "f"), | ||
] | ||
assert parse_patterns(sut) == expected | ||
|
||
|
||
def test_corrupt_once(): | ||
corruptor = NameCorruptor(parse_patterns([["th", "ff"]])) | ||
assert corruptor.corrupt_once("agatha") == "agaffa" | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("name", "expected"), | ||
[ | ||
("agatha", "agatha agadda agata agata agata"), | ||
("aldwin", "aldwin altwin altwen altwun altwum"), | ||
("althea", "althea alddea altea altea altea"), | ||
("anselm", "anselm antzelm intzelm aintzelm aiwantzelm"), | ||
("armin", "armin armen armun armum armium"), | ||
( | ||
"bartholomew", | ||
"bartholomew barddolomew bartolomew partolomew fartolomew", | ||
), | ||
("berengar", "berengar baerengar baerungar baerumgar baeriumgar"), | ||
("clarice", "clarice claricke clarike clarijke clarihjke"), | ||
( | ||
"constance", | ||
"constance konstance khonstance khonstince khonstaince", | ||
), | ||
("dierk", "dierk dieyrk tieyrk tiheyrk tyeyrk"), | ||
("eadric", "eadric eadrick eadrik eadrijk eatrijk"), | ||
("edward", "edward edvard etvard echard echart"), | ||
("eldrida", "eldrida eltrita eltrihta eltryta eltryta"), | ||
("elfric", "elfric elfrick elfrik elfrijk elvrijk"), | ||
("erna", "erna erne aerne aerne aerne"), | ||
("eustace", "eustace eustache eusteiche eusteeche eusteeckhe"), | ||
("felicity", "felicity velicity vhelicity vhelihcihty vhelycyty"), | ||
("finnegan", "finnegan vinnegan vhinnegan vhennegan vhunnegan"), | ||
("giselle", "giselle gitzelle gitzella gitzellya gihtzellya"), | ||
("gerald", "gerald gaerald gaeralt gairalt gaidralt"), | ||
("godric", "godric godrick godrik godrijk gotrijk"), | ||
("gunther", "gunther gundder gunter guntaer gumtaer"), | ||
("hadrian", "hadrian hatrian hatrihan hatryan hatryin"), | ||
("heloise", "heloise heloihse heloyse heloize heloite"), | ||
("isolde", "isolde isolte ihsolte ysolte izolte"), | ||
("ivor", "ivor ivhor ihvhor yvhor yvhhor"), | ||
("jocelyn", "jocelyn jocielyn joselyn jotzelyn jotzelyn"), | ||
("lancelot", "lancelot lancielot lanselot lantzelot lantzelod"), | ||
("lysandra", "lysandra lyzandra lytsandra lytzahndra lytzahntra"), | ||
("magnus", "magnus magnaes magnees magnees magnees"), | ||
( | ||
"melisande", | ||
"melisande melizande melitsande melitzahnde melitzahnte", | ||
), | ||
("merrick", "merrick merrickk merrikk merrijkk maerrijkk"), | ||
("osborn", "osborn osporn osforn osvorn osvhorn"), | ||
("philomena", "philomena ffilomena filomena vilomena vhilomena"), | ||
("reginald", "reginald regineld reginelt regenelt regunelt"), | ||
("rowena", "rowena rowene rowune rowume rowiume"), | ||
("sabine", "sabine zabine tsabine tzahbine tzahbene"), | ||
("seraphina", "seraphina seraffina serafina seravina seravhina"), | ||
("sigfrid", "sigfrid sigvrid sigvhrid sigvhrit sihgvhriht"), | ||
("tiberius", "tiberius tibaerius tihbaerihus tybaeryus typaeryus"), | ||
("ulf", "ulf ulv ulvh ulvh ulvh"), | ||
("urien", "urien urieyn uriheyn uryeyn uryeyn"), | ||
("vespera", "vespera vhespera vhespaera vhesfaera vhesfaira"), | ||
("wendel", "wendel wentel wuntel wumtel wiumtel"), | ||
("wilfred", "wilfred wilvred wilvhred wilvhret wihlvhret"), | ||
("winifred", "winifred winivred winivhred winivhret wenivhret"), | ||
("xenia", "xenia xunia xumia xiumia xihumiha"), | ||
("ysabel", "ysabel yzabel ytsabel ytzahbel ytzahpel"), | ||
("zephyr", "zephyr zeffyr zefyr zevyr zevhyr"), | ||
("zinnia", "zinnia zennia zunnia zumnia ziumnia"), | ||
("zuriel", "zuriel zurieyl zuriheyl zuryeyl tzuryeyl"), | ||
("zygmund", "zygmund zygmunt zygmumt zygmiumt zygmihumt"), | ||
], | ||
) | ||
def test_corrupt_several(name, expected): | ||
data = Path(Path(__file__).resolve().parent.parent, "pynpc", "data", "name-corruption-pattern.json") | ||
patterns = parse_patterns(loads(data.read_text())) | ||
|
||
corruptor = NameCorruptor(patterns) | ||
generated = [name] | ||
for _ in range(len(expected.split(" ")) - 1): | ||
_next = generated[-1] | ||
corrupted = corruptor.corrupt_once(_next) | ||
generated.append(corrupted) | ||
assert " ".join(generated) == expected |
Oops, something went wrong.