forked from PyCQA/pydocstyle
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpep257.py
executable file
·703 lines (542 loc) · 21.2 KB
/
pep257.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
#! /usr/bin/env python
"""Static analysis tool for checking docstring conventions and style.
About
-----
Currently implemented checks cover most of PEP257:
http://www.python.org/dev/peps/pep-0257/
After PEP257 is covered and tested, other checks might be added,
e.g. NumPy docstring conventions is the first candidate:
https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt
The main repository of this program is located at:
http://github.com/GreenSteam/pep257
Creating own checks
-------------------
In order to add your own check, create a function in "Checks functions"
section below. The function should take 3 parameters:
docstring : str
Docstring to check, as it is in file (with quotes).
context : str
Docstring's context (e.g. function's source code).
is_script : bool
Whether the docstring is script with #! or not.
Depending on 1st parameter name, the function will be called with
different type of docstring:
* module_docstring
* function_docstring
* class_docstring
* method_docstring
* def_docstring (i.e. function-docstrings + method-docstrings)
* docstring (i.e. all above docstring types)
E.g. the following function will be fed only class-docstrings:
def your_check(class_docstring, context, is_script):
pass
If for a certain function, class, etc. a docstring does not exist,
then `None` will be passed, which should be taken into account.
To signify that a check passed successfully simply `return` from the
check function. If a check failed, return `True`. If a check failed
and you can provide the precise position where it failed, return a
tuple (start_position, end_position), where start and end positions
are integers specifying where in `context` the failure occured.
Also, see examples in "Check functions" section.
"""
__version__ = '0.2.3'
from curses.ascii import isascii
import inspect
from optparse import OptionParser
from os import walk
from os.path import abspath, basename, expanduser, isdir, isfile
from os.path import join as path_join
import re
import sys
import tokenize as tk
try:
from StringIO import StringIO
except ImportError:
# Python 3.0 and later
from io import StringIO
try:
all
any
except NameError:
# Python 2.4 and earlier
def all(iterable):
for element in iterable:
if not element:
return False
return True
def any(iterable):
for element in iterable:
if element:
return True
return False
try:
next
except NameError:
# Python 2.5 and earlier
def next(obj):
return obj.next()
#
# Helper functions
#
def cached(f):
"""A decorator that caches function results.
No cache expiration is currently done.
"""
cache = {}
def cached_func(*args, **kwargs):
key = (args, tuple(kwargs.items()))
if key in cache:
return cache[key]
else:
res = f(*args, **kwargs)
cache[key] = res
return res
return cached_func
def yield_list(f):
"""Convert generator into list-returning function (decorator)."""
return lambda *arg, **kw: list(f(*arg, **kw))
def remove_comments(s):
return re.sub('#[^\n]', '', s)
def abs_pos(marker, source):
"""Return absolute position in source given (line, character) marker."""
line, char = marker
lines = StringIO(source).readlines()
return len(''.join(lines[:line - 1])) + char
def rel_pos(abs_pos, source):
"""Given absolute position, return relative (line, character) in source."""
lines = StringIO(source).readlines()
nchars = len(source)
assert nchars >= abs_pos
while nchars > abs_pos:
assert nchars >= abs_pos
nchars -= len(lines[-1])
lines.pop()
return len(lines) + 1, abs_pos - len(''.join(lines))
def get_summary_line_info(thedocstring):
"""Get the (summary_line, line_number) tuple for the given docstring.
The returned 'summary_line' is the pep257 summary line and 'line_number' is
the zero-based docstring line number containing the summary line, which
will be either 0 (zeroth line) or 1 (first line). Any docstring checks
relating to the summary line should use this method to ensure consistent
treatment of the summary line.
"""
lines = eval(thedocstring).split('\n')
first_line = lines[0].strip()
if len(lines) == 1 or len(first_line) > 0:
return first_line, 0
return lines[1].strip(), 1
#
# Parsing
#
def parse_module_docstring(source):
for kind, value, _, _, _ in tk.generate_tokens(StringIO(source).readline):
if kind in [tk.COMMENT, tk.NEWLINE, tk.NL]:
continue
elif kind == tk.STRING: # first STRING should be docstring
return value
else:
return None
def parse_docstring(source, what=''):
"""Parse docstring given `def` or `class` source."""
module_docstring = parse_module_docstring(source)
if what.startswith('module'):
return module_docstring
if module_docstring:
return module_docstring
token_gen = tk.generate_tokens(StringIO(source).readline)
try:
kind = None
while kind != tk.INDENT:
kind, _, _, _, _ = next(token_gen)
kind, value, _, _, _ = next(token_gen)
if kind == tk.STRING: # STRING after INDENT is a docstring
return value
except StopIteration:
pass
@yield_list
def parse_top_level(source, keyword):
"""Parse top-level functions or classes."""
token_gen = tk.generate_tokens(StringIO(source).readline)
kind, value, char = None, None, None
while True:
start, end = None, None
while not (kind == tk.NAME and value == keyword and char == 0):
kind, value, (line, char), _, _ = next(token_gen)
start = line, char
while not (kind == tk.DEDENT and value == '' and char == 0):
kind, value, (line, char), _, _ = next(token_gen)
end = line, char
yield source[abs_pos(start, source): abs_pos(end, source)]
@cached
def parse_functions(source):
return parse_top_level(source, 'def')
@cached
def parse_classes(source):
return parse_top_level(source, 'class')
def skip_indented_block(token_gen):
kind, value, start, end, raw = next(token_gen)
while kind != tk.INDENT:
kind, value, start, end, raw = next(token_gen)
indent = 1
for kind, value, start, end, raw in token_gen:
if kind == tk.INDENT:
indent += 1
elif kind == tk.DEDENT:
indent -= 1
if indent == 0:
return kind, value, start, end, raw
@cached
@yield_list
def parse_methods(source):
source = ''.join(parse_classes(source))
token_gen = tk.generate_tokens(StringIO(source).readline)
kind, value, char = None, None, None
while True:
start, end = None, None
while not (kind == tk.NAME and value == 'def'):
kind, value, (line, char), _, _ = next(token_gen)
start = line, char
kind, value, (line, char), _, _ = skip_indented_block(token_gen)
end = line, char
yield source[abs_pos(start, source): abs_pos(end, source)]
def parse_contexts(source, kind):
if kind == 'module_docstring':
return [source]
if kind == 'function_docstring':
return parse_functions(source)
if kind == 'class_docstring':
return parse_classes(source)
if kind == 'method_docstring':
return parse_methods(source)
if kind == 'def_docstring':
return parse_functions(source) + parse_methods(source)
if kind == 'docstring':
return ([parse_module_docstring(source)] + parse_functions(source) +
parse_classes(source) + parse_methods(source))
#
# Framework
#
class Error(object):
"""Error in docstring style.
* Stores relevant data about the error,
* provides format for printing an error,
* provides __lt__ method to sort errors.
"""
# options that define how errors are printed
explain = False
range = False
quote = False
def __init__(self, filename, source, docstring, context,
explanation, start=None, end=None):
self.filename = filename
self.source = source
self.docstring = docstring
self.context = context
self.explanation = explanation.strip()
if start is None:
self.start = source.find(context) + context.find(docstring)
else:
self.start = source.find(context) + start
self.line, self.char = rel_pos(self.start, self.source)
if end is None:
self.end = self.start + len(docstring)
else:
self.end = source.find(context) + end
self.end_line, self.end_char = rel_pos(self.end, self.source)
def __str__(self):
s = self.filename + ':%d:%d' % (self.line, self.char)
if self.range:
s += '..%d:%d' % (self.end_line, self.end_char)
if self.explain:
s += ': ' + self.explanation + '\n'
else:
s += ': ' + self.explanation.split('\n')[0].strip()
if self.quote:
quote = self.source[self.start:self.end].strip()
s += '\n> ' + '\n> '.join(quote.split('\n')) + '\n'
return s
def __lt__(self, other):
return (self.filename, self.start) < (other.filename, other.start)
@yield_list
def find_checks(keyword):
for function in globals().values():
if inspect.isfunction(function):
args = inspect.getargspec(function)[0]
if args and args[0] == keyword:
yield function
@yield_list
def check_source(source, filename):
keywords = ['module_docstring', 'function_docstring',
'class_docstring', 'method_docstring',
'def_docstring', 'docstring'] # TODO? 'nested_docstring']
is_script = source.startswith('#!') or \
basename(filename).startswith('test_')
for keyword in keywords:
for check in find_checks(keyword):
for context in parse_contexts(source, keyword):
docstring = parse_docstring(context, keyword)
result = check(docstring, context, is_script)
if result:
positions = [] if result is True else result
yield Error(filename, source, docstring, context,
check.__doc__, *positions)
def find_input_files(filenames):
""" Return a list of input files.
`filenames` is a list of filenames, which may be either files
or directories. Files within subdirectories are added
recursively.
"""
input_files = []
filenames = [abspath(expanduser(f)) for f in filenames]
for filename in filenames:
if isdir(filename):
for root, _dirs, files in walk(filename):
input_files += [path_join(root, f) for f in sorted(files)
if f.endswith(".py")]
elif isfile(filename):
input_files += [filename]
else:
print_error("%s is not a file or directory" % filename)
return input_files
def check_files(filenames):
r"""Return list of docstring style errors found in files.
Example
-------
>>> import pep257
>>> pep257.check_files(['one.py', 'two.py'])
['one.py:23:1 PEP257 Use u\"\"\" for Unicode docstrings.']
"""
errors = []
for filename in find_input_files(filenames):
errors.extend(check_source(open(filename).read(), filename))
return [str(e) for e in errors]
def parse_options():
parser = OptionParser(version=__version__)
parser.add_option('-e', '--explain', action='store_true',
help='show explanation of each error')
parser.add_option('-r', '--range', action='store_true',
help='show error start..end positions')
parser.add_option('-q', '--quote', action='store_true',
help='quote erroneous lines')
return parser.parse_args()
def print_error(message):
sys.stderr.write(message)
sys.stderr.write('\n')
sys.stderr.flush()
def main(options, arguments):
print('=' * 80)
print('Note: checks are relaxed for scripts (with #!) compared to modules')
Error.explain = options.explain
Error.range = options.range
Error.quote = options.quote
errors = []
for filename in find_input_files(arguments):
try:
f = open(filename)
except IOError:
print_error("Error opening file %s" % filename)
else:
try:
errors.extend(check_source(f.read(), filename))
except IOError:
print_error("Error reading file %s" % filename)
except tk.TokenError:
print_error("Error parsing file %s" % filename)
finally:
f.close()
for error in sorted(errors):
print_error(str(error))
return 1 if errors else 0
#
# Check functions
#
def check_modules_have_docstrings(module_docstring, context, is_script):
"""All modules should have docstrings.
All modules should normally have docstrings.
"""
if not module_docstring: # or not eval(module_docstring).strip():
return 0, min(79, len(context))
if not eval(module_docstring).strip():
return True
def check_def_has_docstring(def_docstring, context, is_script):
"""Exported definitions should have docstrings.
...all functions and classes exported by a module should also have
docstrings. Public methods (including the __init__ constructor)
should also have docstrings.
"""
if is_script:
return # assume nothing is exported
def_name = context.split()[1]
if def_name.startswith('_') and not def_name.endswith('__'):
return # private, not exported
if not def_docstring:
return 0, len(context.split('\n')[0])
if not eval(def_docstring).strip():
return True
def check_class_has_docstring(class_docstring, context, is_script):
"""Exported classes should have docstrings.
...all functions and classes exported by a module should also have
docstrings.
"""
if is_script:
return # assume nothing is exported
class_name = context.split()[1]
if class_name.startswith('_'):
return # not exported
if not class_docstring:
return 0, len(context.split('\n')[0])
if not eval(class_docstring).strip():
return True
def check_triple_double_quotes(docstring, context, is_script):
r"""Use \"\"\"triple double quotes\"\"\".
For consistency, always use \"\"\"triple double quotes\"\"\" around
docstrings. Use r\"\"\"raw triple double quotes\"\"\" if you use any
backslashes in your docstrings. For Unicode docstrings, use
u\"\"\"Unicode triple-quoted strings\"\"\".
"""
if docstring and not (docstring.startswith('"""') or
docstring.startswith('r"""') or
docstring.startswith('u"""')):
return True
def check_backslashes(docstring, context, is_script):
r"""Use r\"\"\" if any backslashes in your docstrings.
Use r\"\"\"raw triple double quotes\"\"\" if you use any backslashes
(\\) in your docstrings.
"""
if docstring and "\\" in docstring and not docstring.startswith('r"""'):
return True
def check_unicode_docstring(docstring, context, is_script):
r"""Use u\"\"\" for Unicode docstrings.
For Unicode docstrings, use u\"\"\"Unicode triple-quoted stringsr\"\"\".
"""
if (docstring and not all(isascii(char) for char in docstring) and
not docstring.startswith('u"""')):
return True
def check_one_liners(docstring, context, is_script):
"""One-liner docstrings should fit on one line with quotes.
The closing quotes are on the same line as the opening quotes.
This looks better for one-liners.
"""
if not docstring:
return
lines = docstring.split('\n')
if len(lines) > 1:
non_empty = [l for l in lines if any([c.isalpha() for c in l])]
if len(non_empty) == 1:
return True
def check_no_blank_before(def_docstring, context, is_script):
"""No blank line before docstring in definitions.
There's no blank line either before or after the docstring.
"""
if not def_docstring:
return
before = remove_comments(context.split(def_docstring)[0])
if before.split(':')[-1].count('\n') > 1:
return True
def check_ends_with_period(docstring, context, is_script):
"""First line should end with a period.
The [first line of a] docstring is a phrase ending in a period.
"""
if not docstring:
return
(summary_line, line_number) = get_summary_line_info(docstring)
if not summary_line.endswith('.'):
return True
def check_imperative_mood(def_docstring, context, is_script):
"""First line should be in imperative mood ('Do', not 'Does').
[Docstring] prescribes the function or method's effect as a command:
("Do this", "Return that"), not as a description; e.g. don't write
"Returns the pathname ...".
"""
if def_docstring and eval(def_docstring).strip():
first_word = eval(def_docstring).strip().split()[0]
if first_word.endswith('s') and not first_word.endswith('ss'):
return True
def check_no_signature(def_docstring, context, is_script):
"""First line should not be function's or method's "signature".
The one-line docstring should NOT be a "signature" reiterating
the function/method parameters (which can be obtained by introspection).
"""
if not def_docstring:
return
def_name = context.split(def_docstring)[0].split()[1].split('(')[0]
first_line = eval(def_docstring).split('\n')[0]
if def_name + '(' in first_line.replace(' ', ''):
return True
def check_return_type(def_docstring, context, is_script):
"""Return value type should be mentioned.
However, the nature of the return value cannot be determined by
introspection, so it should be mentioned.
"""
if (not def_docstring) or is_script:
return
if 'return' not in def_docstring.lower():
tokens = list(tk.generate_tokens(StringIO(context).readline))
after_return = [tokens[i + 1][0] for i, token in enumerate(tokens)
if token[1] == 'return']
# not very precise (tk.OP ';' is not taken into account)
if set(after_return) - set([tk.COMMENT, tk.NL, tk.NEWLINE]) != set([]):
return True
def check_blank_after_summary(docstring, context, is_script):
"""Blank line missing after one-line summary.
Multi-line docstrings consist of a summary line just like a one-line
docstring, followed by a blank line, followed by a more elaborate
description. The summary line may be used by automatic indexing tools;
it is important that it fits on one line and is separated from the
rest of the docstring by a blank line.
"""
if not docstring:
return
lines = eval(docstring).split('\n')
if len(lines) > 1:
(summary_line, line_number) = get_summary_line_info(docstring)
if len(lines) <= (line_number+1) or lines[line_number+1].strip() != '':
return True
def check_indent(docstring, context, is_script):
"""The entire docstring should be indented same as code.
The entire docstring is indented the same as the quotes at its
first line.
"""
if (not docstring) or len(eval(docstring).split('\n')) == 1:
return
non_empty_lines = [line for line in eval(docstring).split('\n')[1:]
if line.strip()]
if not non_empty_lines:
return
indent = min([len(l) - len(l.lstrip()) for l in non_empty_lines])
if indent != len(context.split(docstring)[0].split('\n')[-1]):
return True
def check_blank_before_after_class(class_docstring, context, is_script):
"""Class docstring should have 1 blank line around them.
Insert a blank line before and after all docstrings (one-line or
multi-line) that document a class -- generally speaking, the class's
methods are separated from each other by a single blank line, and the
docstring needs to be offset from the first method by a blank line;
for symmetry, put a blank line between the class header and the
docstring.
"""
if not class_docstring:
return
before, after = context.split(class_docstring)[:2]
before_blanks = [not line.strip() for line in before.split('\n')]
after_blanks = [not line.strip() for line in after.split('\n')]
if before_blanks[-3:] != [False, True, True]:
return True
if not all(after_blanks) and after_blanks[:3] != [True, True, False]:
return True
def check_blank_after_last_paragraph(docstring, context, is_script):
"""Multiline docstring should end with 1 blank line.
The BDFL recommends inserting a blank line between the last
paragraph in a multi-line docstring and its closing quotes,
placing the closing quotes on a line by themselves.
"""
if (not docstring) or len(eval(docstring).split('\n')) == 1:
return
blanks = [not line.strip() for line in eval(docstring).split('\n')]
if blanks[-3:] != [False, True, True]:
return True
if __name__ == '__main__':
try:
sys.exit(main(*parse_options()))
except KeyboardInterrupt:
pass