-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdiff.py
30 lines (22 loc) · 904 Bytes
/
diff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# imports modules for registration
from datasets import load_dataset
from tqdm import tqdm
import os
import difflib
auth_token = os.environ["HF_TOKEN"] # Replace with an auth token, which you can get from your huggingface account: Profile -> Settings -> Access Tokens -> New Token
winoground = load_dataset("facebook/winoground", use_auth_token=auth_token)["test"]
import difflib
for example in winoground:
sentence1 = example["caption_0"].lower().split(" ")
sentence2 = example["caption_1"].lower().split(" ")
diff = []
diff2 = []
union = set()
for i, word in enumerate(sentence1):
if sentence2[i] != word:
if sentence2[i] not in union and word not in union:
diff.append(word)
diff2.append(sentence2[i])
union.add(word)
union.add(sentence2[i])
print(' '.join(diff), ' '.join(diff2))