mirrored from https://chromium.googlesource.com/infra/luci/recipes-py
-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathapi.py
381 lines (334 loc) · 14.4 KB
/
api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# Copyright 2018 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.
"""API for Tricium analyzers to use.
This recipe module is intended to support different kinds of
analyzer recipes, including:
* Recipes that wrap one or more legacy analyzers.
* Recipes that accumulate comments one by one.
* Recipes that wrap other tools and parse their output.
"""
import fnmatch
import gzip
import os
from google.protobuf import json_format
from recipe_engine import recipe_api
from PB.go.chromium.org.luci.common.proto.findings import findings as findings_pb
from PB.tricium.data import Data
from . import legacy_analyzers
BINARY_FILE_EXTENSIONS = ['.png']
class TriciumApi(recipe_api.RecipeApi):
"""TriciumApi provides basic support for Tricium."""
# Expose pre-defined analyzers, as well the LegacyAnalyzer class.
LegacyAnalyzer = legacy_analyzers.LegacyAnalyzer
analyzers = legacy_analyzers.Analyzers
# The limit on the number of comments that can be added via this recipe.
#
# Any comments added after this threshold is reached will be dropped.
_comments_num_limit = 1000
def __init__(self, **kwargs):
"""Sets up the API.
Initializes an empty list of comments for use with
add_comment and write_comments.
"""
super().__init__(**kwargs)
self._comments = []
self._findings = []
def add_comment(
self,
category,
message,
path,
start_line=0,
end_line=0,
start_char=0,
end_char=0,
suggestions=(),
):
"""Adds one comment to accumulate.
For semantics of start_line, start_char, end_line, end_char, see Gerrit doc
https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#comment-range
"""
# Tricium comment
comment = Data.Comment()
comment.category = category
comment.message = message
comment.path = path
comment.start_line = start_line
comment.end_line = end_line
comment.start_char = start_char
comment.end_char = end_char
# convert to LUCI Findings
if not self.m.buildbucket.build.input.gerrit_changes:
raise ValueError('missing gerrit_changes in the build input')
cl = self.m.buildbucket.build.input.gerrit_changes[0]
gerrit_change_ref = findings_pb.Location.GerritChangeReference(
host=cl.host,
project=cl.project,
change=cl.change,
patchset=cl.patchset,
)
loc = findings_pb.Location(
gerrit_change_ref=gerrit_change_ref,
file_path=(path or '/COMMIT_MSG'),
)
if start_line:
loc.range.start_line = start_line
loc.range.start_column = start_char
# Tricium allows specifying start_line but leave end_line empty.
# This will use the line field in the RoboCommentInput
# https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#robot-comment-input.
# However, such combination is not allowed in range. Therefore, explicitly
# set the end_line to start_line if end_line is not empty.
loc.range.end_line = end_line or start_line
loc.range.end_column = end_char
finding = findings_pb.Finding(
category=category,
location=loc,
message = message,
severity_level=findings_pb.Finding.SEVERITY_LEVEL_WARNING,
)
for s in suggestions:
json_format.ParseDict(s, comment.suggestions.add())
# convert to LUCI Finding fixes.
fix = finding.fixes.add(description=s.get('description', ''))
for tr_rep in s.get('replacements', []):
loc = findings_pb.Location(
gerrit_change_ref=gerrit_change_ref,
file_path=(tr_rep.get('path', None) or '/COMMIT_MSG'),
)
if tr_rep.get('start_line', 0):
loc.range.start_line = tr_rep.get('start_line', 0)
loc.range.start_column = tr_rep.get('start_char', 0)
loc.range.end_line = tr_rep.get('end_line', 0)
loc.range.end_column = tr_rep.get('end_char', 0)
fix.replacements.add(
location=loc,
new_content=tr_rep.get('replacement', ''),
)
self.validate_comment(comment)
self._add_comment(comment, finding)
@staticmethod
def validate_comment(comment):
"""Validates comment to comply with Tricium/Gerrit requirements.
Raise ValueError on the first detected problem.
"""
if comment.start_line < 0:
raise ValueError('start_line must be 1-based, but %d given' %
(comment.start_line,))
if comment.start_line == 0:
for attr in ('end_line', 'start_char', 'end_char'):
value = getattr(comment, attr)
if value:
raise ValueError('start_line is 0, implying file level comment, '
'but %s is %d instead of 0' % (attr, value))
return
if comment.start_line > comment.end_line and comment.end_line != 0:
# TODO(tandrii): it's probably better to require end_line always set.
raise ValueError('start_line must be <= end_line, but %d..%d given' %
(comment.start_line, comment.end_line))
if comment.start_char < 0:
raise ValueError('start_char must be 0-based, but %d given' %
(comment.start_char,))
if comment.end_char < 0:
raise ValueError('end_char must be 0-based, but %d given' %
(comment.end_char,))
if (comment.start_line == comment.end_line and
comment.start_char >= comment.end_char and comment.end_char > 0):
raise ValueError(
'(start_line, start_char) must be before (end_line, end_char), '
'but (%d,%d) .. (%d,%d) given' % (
comment.start_line,
comment.start_char,
comment.end_line,
comment.end_char,
))
if os.path.isabs(comment.path):
raise ValueError('path must be relative to the input directory, but '
'got absolute path %s' % (comment.path))
def _add_comment(self, comment, finding=None):
if comment not in self._comments:
self._comments.append(comment)
if finding and finding not in self._findings:
self._findings.append(finding)
def write_comments(self, upload_findings=True):
"""Emit the results accumulated by `add_comment` and `run_legacy`."""
results = Data.Results()
results.comments.extend(self._comments)
step = self.m.step('write results', [])
if len(results.comments) > self._comments_num_limit:
# We don't yet know how many of these comments are included in changed
# lines and would be posted. Add a warning to try to help with
# clarification in the case that Tricium unexpectedly emits no comments.
step.presentation.status = self.m.step.WARNING
step.presentation.step_text = (
'%s comments created, Tricium may refuse to post comments if there '
'are too many in changed lines. This build sends only the first %s '
'comments.' % (len(results.comments), self._comments_num_limit))
comments = results.comments[:self._comments_num_limit]
del results.comments[:]
results.comments.extend(comments)
# The "tricium" output property is read by the Tricium service.
step.presentation.properties['tricium'] = self.m.proto.encode(
results, 'JSONPB', indent=0, preserving_proto_field_name=False)
if self.m.resultdb.enabled and upload_findings and self._findings:
self.m.findings.upload_findings(self._findings)
return step
def run_legacy(self,
analyzers,
input_base,
affected_files,
commit_message,
emit=True):
"""Runs legacy analyzers.
This function internally accumulates the comments from the analyzers it
runs to the same global storage used by `add_comment()`. By default it
emits comments from legacy analyzers to the tricium output property,
along with any comments previously created by calling `add_comment()`
directly, after running all the specified analyzers.
Args:
* analyzers (List(LegacyAnalyer)): Analyzers to run.
* input_base (Path): The Tricium input dir, generally a checkout base.
* affected_files (List(str)): Paths of files in the change, relative
to input_base.
* commit_message (str): Commit message from Gerrit.
* emit (bool): Whether to write results to the tricium output
property. If unset, the caller will be responsible for calling
`write_comments` to emit the comments added by the legacy analyzers.
This is useful for recipes that need to run a mixture of custom
analyzers (using `add_comment()` to store comments) and legacy
analyzers.
"""
self._write_files_data(affected_files, commit_message, input_base)
# For each analyzer, download the CIPD package, run it and accumulate
# results. Note: Each analyzer could potentially be run in parallel.
for analyzer in analyzers:
with self.m.step.nest(analyzer.name) as presentation:
# Check analyzer.path_filters and conditionally skip.
if not _matches_path_filters(affected_files, analyzer.path_filters):
presentation.step_text = 'skipped due to path filters'
try:
analyzer_dir = self.m.path.cleanup_dir / analyzer.name
output_base = analyzer_dir / 'out'
package_dir = analyzer_dir / 'package'
self._fetch_legacy_analyzer(package_dir, analyzer)
results = self._run_legacy_analyzer(
package_dir,
analyzer,
input_dir=input_base,
output_dir=output_base)
# Show step results. If there are too many comments, don't include
# them. If one analyzer fails, continue running the rest.
for comment in results.comments:
suggestions = [
json_format.MessageToDict(s)
for s in comment.suggestions or ()
]
self.add_comment(
comment.category,
comment.message,
comment.path,
start_line=comment.start_line,
end_line=comment.end_line,
start_char=comment.start_char,
end_char=comment.end_char,
suggestions=suggestions)
num_comments = len(results.comments)
presentation.step_text = '%s comment(s)' % num_comments
presentation.logs['result'] = self.m.proto.encode(
results, 'JSONPB')
except self.m.step.StepFailure:
presentation.step_text = 'failed'
# The tricium data dir with files.json is written in the checkout cache
# directory and should be cleaned up.
self.m.file.rmtree('clean up tricium data dir', input_base / 'tricium')
if emit:
self.write_comments()
def is_binary(self, path):
_, ext = self.m.path.splitext(self.m.path.basename(path))
return ext in BINARY_FILE_EXTENSIONS
def _write_files_data(self, affected_files, commit_message, base_dir):
"""Writes a Files input message to a file.
Args:
* affected_files (List(str)): File paths. This should
be relative to `base_dir`.
* commit_message (str): The commit message from Gerrit.
* base_dir (Path): Input files base directory.
"""
files = Data.Files()
files.commit_message = commit_message
for path in affected_files:
# TODO(qyearsley): Set the is_binary and status fields for each file.
# Analyzers use these fields to determine whether to skip files.
f = files.files.add()
f.path = path
f.is_binary = self.is_binary(path)
data_dir = self._ensure_data_dir(base_dir)
self.m.file.write_proto(
'write files.json',
data_dir / 'files.json',
files,
'JSONPB',
# Tricium analyzers expect camelCase field names.
encoding_kwargs={'preserving_proto_field_name': False})
def _read_results(self, base_dir):
"""Reads a Tricium Results message from a file.
Args:
* base_dir (Path): A directory. Generally this will
be the same as the -output arg passed to the analyzer.
Returns: Results protobuf message.
"""
data_dir = self._ensure_data_dir(base_dir)
results_json = self.m.file.read_text(
'read results',
data_dir / 'results.json',
test_data='{"comments":[]}')
return json_format.Parse(results_json, Data.Results())
def _ensure_data_dir(self, base_dir):
"""Creates the Tricium data directory if it doesn't exist.
Simple Tricium analyzers assume that data is input/output from a
particular subpath relative to the input/output paths passed.
Args:
* base_dir (Path): A directory, could be either the -input
or -output passed to a Tricium analyzer.
Returns: Tricium data file directory inside base_dir.
"""
data_dir = base_dir / 'tricium' / 'data'
self.m.file.ensure_directory('ensure tricium data dir', data_dir)
return data_dir
def _fetch_legacy_analyzer(self, package_dir, analyzer):
"""Fetches an analyzer package from CIPD.
Args:
* packages_dir (Path): The path to fetch to.
* analyzer (LegacyAnalyzer): Analyzer package to fetch.
"""
ensure_file = self.m.cipd.EnsureFile()
ensure_file.add_package(analyzer.package, version=analyzer.version)
self.m.cipd.ensure(package_dir, ensure_file)
def _run_legacy_analyzer(self, package_dir, analyzer, input_dir, output_dir):
"""Runs a simple legacy analyzer executable and returns the results.
Args:
* package_dir (Path): The directory where the analyzer CIPD package
contents have been unpacked to.
* analyzer (LegacyAnalyzer): Analyzer object to run.
* input_dir (Path): The Tricium input dir, which is expected to contain
files as well as the metadata at tricium/data/files.json.
* output_dir (Path): The directory to write results into.
"""
# Some analyzers depend on other files in the CIPD package, so cwd is
# expected to be the directory with the analyzer.
with self.m.context(cwd=package_dir):
cmd = [
package_dir / analyzer.executable, '-input', input_dir, '-output',
output_dir
] + analyzer.extra_args
self.m.step('run analyzer',
cmd).presentation.logs['cmd'] = ' '.join(str(c) for c in cmd)
return self._read_results(output_dir)
def _matches_path_filters(files, patterns):
if len(patterns) == 0:
return True
for p in patterns:
if any(fnmatch.fnmatch(f, p) for f in files):
return True
return False