-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathp098.py
162 lines (147 loc) · 5.68 KB
/
p098.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#! /usr/bin/env python
# https://projecteuler.net/problem=98
#
# Given the input file 'words.txt', find words which are are anagrams
# of each other and can both be mapped to square numbers, using the
# same letter/digit mapping. e.g:
#
# CARE = 1296 = 36^2 (C = 1, A = 2, R = 9, E = 6)
# RACE = 9216 = 96^2 (C = 1, A = 2, R = 9, E = 6)
#
# Print out the largest such square that can be found this way.
#
# Strategy:
#
# We can eliminate unnecessary work by:
# 1. Skipping any words in the dictionary that have no anagrams.
# 2. Calculating only square numbers for a given word length at
# a time
# 2. Starting our search from the longest words in the dictionary.
# Since the objective is only to find the largest square, we
# may return immediately once it is certain that we cannot find a
# larger square.
#
# To determine quickly whether a word maps to a given square number,
# we may define a canonical "lexical pattern" for a string by
# iterating over its characters, and replacing each unseen one with A,
# B, C and so on. For example, both the word APPROPRIATE and the
# number 12234235167 map to the pattern ABBCDBCEAFG.
#
# By precalculating a list of square numbers and their lexical patterns,
# we can determine quickly which (if any) squares match the pattern for
# a given word.
#
# To solve:
#
# 1. Read the list of input words.
# a. Hash the word into an anagram bucket.
# b. Eliminate any words that have no anagrams in the dictionary.
#
# 2. Iterate over the anagram buckets, starting with the longest word length.
# a. Calculate the square numbers that have the same number of digits.
# Group them by lexical pattern.
# a. Calculate each word's lexical pattern and look up the squares
# (if any) with the same pattern.
# b. Loop through each word/square pair, and determine whether
# the anagrammed word also maps to a square.
import csv
import math
import time
def read_words(infile):
"""Reads words stored in CSV file INFILE.
Returns a dict of words grouped into anagram buckets.
"""
words = []
with open(infile, 'r') as f:
csv_reader = csv.reader(f)
for row in csv_reader:
words = words + row
return words
def collect_anagrams(words):
"""Groups each word in WORDS (a list of strings) into anagram groups.
Returns a dict in which each key is a string of letters ordered
alphabetically, and its value is a set of words which all anagram
to that key.
"""
anagrams = {}
for w in words:
anagram_key = ''.join(sorted(w))
anagrams.setdefault(anagram_key, set())
anagrams[anagram_key].add(w)
return anagrams
def lexical_pattern(s):
"""Returns the lexical pattern represented by string s.
Note: results are only well-defined for strings with less than 26
distinct symbols.
"""
cipher = {}
nextkey = 0
pattern = ''
for sym in s:
if sym not in cipher:
cipher[sym] = chr(ord('A') + nextkey)
nextkey += 1
pattern += cipher[sym]
return pattern
def translate(word1, key, word2):
"""Returns the string that word2 maps to, using word1 and key as
the key for a char-to-char mapping.
Raises KeyError if word2 includes any characters not in word1.
It is assumed that the letters in word1 are mapped consistently
to the chars in key, i.e. that every occurrence of char X in word1
is mapped to the same char Y in the key. If this is not the case,
the last mapping found is used.
"""
key = dict(zip(word1, key))
return ''.join(key[sym] for sym in word2)
def calculate_squares(ndigits):
"Returns a list of square numbers which are each NDIGITS long."
root = int(math.sqrt(10**ndigits - 1))
squares = []
while root > 0 and len(str(root*root)) == ndigits:
squares.append(root*root)
root -= 1
return squares
def findmaxsquare(inputfile):
words = read_words(inputfile)
all_anagrams = collect_anagrams(words)
# Select only anagram groups with at least two words.
all_anagrams = {k: group
for (k, group) in all_anagrams.iteritems()
if len(group) > 1}
# Iterate over anagram groups, starting with the longest words,
# and look for words that map to square numbers.
maxsq = 0
lastgroup = None
cached_squares = {}
for group in sorted(all_anagrams.keys(), key=len, reverse=True):
# If we have found at least one word/square match, and have
# moved on to a shorter group, we're not going to find a larger
# square and can quit.
if maxsq > 0 and lastgroup is not None and len(group) < len(lastgroup):
break
lastgroup = group
anagrams = all_anagrams[group]
# Get a list of square numbers with the same number of digits
ndigits = len(group)
if ndigits not in cached_squares:
cached_squares[ndigits] = calculate_squares(ndigits)
squares = cached_squares[ndigits]
matches = [ (word, sq)
for word in anagrams for sq in squares
if lexical_pattern(word) == lexical_pattern(str(sq)) ]
# For each word/square match, check whether any of the word's anagrams
# maps to another square.
for word, sq in matches:
for ana in anagrams:
if word != ana:
sq2 = int(translate(word, str(sq), ana))
if sq2 in squares:
maxsq = max(sq, sq2, maxsq)
return maxsq
if __name__ == '__main__':
t1 = time.clock()
maxsq = findmaxsquare(inputfile='p098_words.txt')
runtime = time.clock() - t1
print maxsq
print "{} seconds".format(runtime)