-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcapitalisation.jl
204 lines (177 loc) · 7.63 KB
/
capitalisation.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# Check capitalisation of data names and category attributes
#
# Some notes: 'distance_derived' is acceptable so a more complex
# construction to catch things like 'distance_DA_su' is required.
#
const proper_names = ("Wyckoff","Cartn","_H_M\$","_H_M_","_Hall",
"Schoenflies","Patterson","Seitz","Friedel",
"R_factor","F_calc","Fcalc","Flack","Fox","Cromer_Mann",
"Laue","Rogers","Bijvoet",
"F_complex","F_meas","F_squared","Rmerge","A_calc",
"B_calc","A_meas","B_meas","Bravais",
"B_equiv","U_equiv","B[_]*iso","U[_]*iso",
"matrix_B([_]|\$)","matrix_U([_]|\$)","U_[1-3]+","B_[1-3]+","Uij","Bij",
"UBij","av_R_","TIJ","_T_max","_T_min","F_000","RGB",
"^IT_","label_[ADH]","distance_[DAH]+(_|\$)","symmetry_[DAH]\$",
"angle_[DAH]+(_|\$)",
"Cambridge","units_Z","CAS\$","ISBN","CSD","Medline",
"ASTM","ISSN","^COD\$","NCA","^NH","MDF","NBS","PDB","PDF",
"_CCDC_","DOI\$","ORCID\$","IUCr\$","IUPAC\$",
"SMILES\$","InChI","_RMS","_Cu\$","_Mo\$","ADP",
"I_over_I","I_over_netI","I_net","R_Fsqd","^R_I_","Lp_factor",
"R_I_factor","I_over_suI","meas_F","_S_",
"^R\$","^RT\$","^T\$","^B\$","^Ro\$","EPINET","_IZA\$",
"RCSR","_SP\$","TOPOS\$","Voronoi","Stokes_I","Stokes_Q",
"Stokes_U","Stokes_V",
# Powder dictionary
"_wR_","_len_Q\$", "March-Dollase", "March_Dollase",
"_len_Q_su\$", "Chebyshev",
# ms_CIF
"Legendre","_site_U_",
# rho_CIF
"^P[0-9]{2}", "^P[0-9]_[0-9]", "^Pc", "^Pv",
# mag_CIF
"Fourier", "magn_OG"
)
"""
Exceptions to the regular expressions in proper_names. If the first
part caseless matches the category, then the second is applied.
"""
const special_cases = (("march_dollase\$","^r\$"),)
mutable struct CapitalCheck <: Visitor_Recursive
iscat::Bool
isfunc::Bool
code_items::Array{String,1}
enums::Dict{String,Array{Union{Nothing,String},1}}
end
CapitalCheck() = CapitalCheck(false,false,[],Dict())
CapitalCheck(d::DDLm_Dictionary) = begin
code_items = list_code_defs(d)
enum_items = get_enums(d)
CapitalCheck(false,false,code_items,enum_items)
end
all_upper(s) = begin
if all(isuppercase,s) return true end
c = match(r"[^a-z]+",String(s))
c != nothing && length(c.match) == length(s)
end
all_lower(s) = begin
if all(islowercase,s) return true end
c = match(r"[^A-Z]+",String(s))
c != nothing && length(c.match) == length(s)
end
@rule scalar_item(cc::CapitalCheck,tree) = begin
attribute = tree.children[1]
if attribute == "_definition.id"
v = traverse_to_value(tree.children[2])
if !occursin(".",v) && !all_upper(v)
print_err(get_line(tree),"Category names should be uppercase in category definition for $v",err_code = "2.1.10")
end
end
if attribute == "_definition.scope"
v = traverse_to_value(tree.children[2])
if v == "Category" cc.iscat = true end
end
if attribute == "_name.category_id"
v = traverse_to_value(tree.children[2])
if v == "function" cc.isfunc = true end
end
if attribute in cc.code_items && !(attribute in keys(cc.enums))
v = traverse_to_value(tree.children[2],delims=false)
if isletter(v[1]) && !isuppercase(v[1])
print_err(get_line(tree),"Attribute values for $attribute should be capitalised",err_code="2.1.12")
end
end
if attribute in keys(cc.enums)
v = traverse_to_value(tree.children[2],firstok=true,delims=false)
poss = cc.enums[attribute]
if !(v in poss)
print_err(get_line(tree),"Attribute value $v for $attribute does not follow that used in the reference dictionary",err_code="2.1.13")
end
end
end
@rule loop(cc::CapitalCheck,tree) = begin
boundary = findfirst(x-> !isa(x,Lerche.Token),tree.children)
dnames = String.(tree.children[2:boundary-1])
for i in boundary:length(tree.children[boundary:end])
dname = dnames[((i-boundary)%length(dnames))+1]
if dname in keys(cc.enums)
poss = cc.enums[dname]
val = traverse_to_value(tree.children[i],firstok=true,delims=false)
if !(val in poss) && !(String(val) in poss)
print_err(get_line(tree.children[i]),"Attribute value $val for $dname is not capitalised as in the reference dictionary", err_code="2.1.13")
end
end
end
end
@rule save_frame(cc::CapitalCheck,tree) = begin
if cc.iscat && !all_upper(tree.children[1][6:end])
print_err(get_line(tree),"Save frame name is not all upper case for category definition",err_code = "4.3.1")
end
if !(cc.iscat || cc.isfunc) && !all_lower(tree.children[1][6:end])
print_err(get_line(tree), "Save frame name $(tree.children[1]) is not all lower case for item definition", err_code = "4.3.1")
end
name = Lerche.find_pred(tree,x->x.children[1]=="_name.category_id")
if !isempty(name)
name = first(name)
object = first(Lerche.find_pred(tree,x->x.children[1]=="_name.object_id"))
name = traverse_to_value(name.children[2])
object = traverse_to_value(object.children[2])
if cc.iscat && (!all_upper(name) || !all_upper(object))
print_err(get_line(tree),"Save frame for $object does not have capitalised category names in _name.category_id or _name.object_id",err_code="2.1.10")
end
if !cc.iscat && !cc.isfunc
if !canonical_case(name) || (!canonical_case(object) && !is_special_case(name,object))
print_err(get_line(tree),"Save frame for $object does not have canonical case for category/object names $name/$object",err_code="2.1.11")
end
end
if cc.isfunc && (!isuppercase(object[1]) || occursin("_",object))
print_err(get_line(tree),"Function name should be CamelCase",err_code="2.1.14")
end
end
cc.iscat = false
cc.isfunc = false
end
canonical_case(name) = begin
lname = lowercase(name)
for pn in proper_names
if match(Regex(lowercase(pn)),String(lname)) !== nothing
if match(Regex(pn),String(name))=== nothing
@debug "Expected $pn in $name"
return false
else
return true
end
end
end
if !all_lower(name) return false end
return true
end
is_special_case(name, object) = begin
lname = lowercase(name)
for (c, o) in special_cases
if match(Regex(lowercase(c)), String(lname)) !== nothing
@debug "Potential special case $name.$object"
if match(Regex(lowercase(o)), String(lowercase(object))) != nothing
if match(Regex(o), String(object)) == nothing
@debug "Expected $name.$o for $name.$object"
return false
else
@debug "$name.$object passes"
return true
end
end
end
end
return true
end
# Get all attributes that are code-valued
list_code_defs(d) = begin
filter(collect(keys(d))) do x
if :contents in propertynames(d[x][:type])
d[x][:type].contents[] == "Code"
else
false
end
end
end