2020-09-05 21:23:52 +00:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
# Wireshark - Network traffic analyzer
|
|
|
|
|
# By Gerald Combs <gerald@wireshark.org>
|
|
|
|
|
# Copyright 1998 Gerald Combs
|
|
|
|
|
#
|
|
|
|
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
|
|
|
|
|
|
import os
|
2023-06-15 01:14:36 +00:00
|
|
|
|
import sys
|
2020-09-05 21:23:52 +00:00
|
|
|
|
import re
|
|
|
|
|
import subprocess
|
|
|
|
|
import argparse
|
|
|
|
|
import signal
|
|
|
|
|
from collections import Counter
|
|
|
|
|
|
|
|
|
|
# Looks for spelling errors among strings found in source or documentation files.
|
2023-09-29 01:40:08 +00:00
|
|
|
|
# N.B.,
|
|
|
|
|
# - To run this script, you should install pyspellchecker (not spellchecker) using pip.
|
|
|
|
|
# - Because of colouring, you may want to pipe into less -R
|
|
|
|
|
|
2020-09-05 21:23:52 +00:00
|
|
|
|
|
2021-09-08 22:33:59 +00:00
|
|
|
|
# TODO: check structured doxygen comments?
|
|
|
|
|
|
2020-09-05 21:23:52 +00:00
|
|
|
|
# For text colouring/highlighting.
|
|
|
|
|
class bcolors:
|
|
|
|
|
HEADER = '\033[95m'
|
|
|
|
|
OKBLUE = '\033[94m'
|
|
|
|
|
OKGREEN = '\033[92m'
|
|
|
|
|
ADDED = '\033[45m'
|
|
|
|
|
WARNING = '\033[93m'
|
|
|
|
|
FAIL = '\033[91m'
|
|
|
|
|
ENDC = '\033[0m'
|
|
|
|
|
BOLD = '\033[1m'
|
|
|
|
|
UNDERLINE = '\033[4m'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Try to exit soon after Ctrl-C is pressed.
|
|
|
|
|
should_exit = False
|
|
|
|
|
|
|
|
|
|
def signal_handler(sig, frame):
|
|
|
|
|
global should_exit
|
|
|
|
|
should_exit = True
|
|
|
|
|
print('You pressed Ctrl+C - exiting')
|
|
|
|
|
|
|
|
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Create spellchecker, and augment with some Wireshark words.
|
|
|
|
|
from spellchecker import SpellChecker
|
|
|
|
|
# Set up our dict with words from text file.
|
|
|
|
|
spell = SpellChecker()
|
|
|
|
|
spell.word_frequency.load_text_file('./tools/wireshark_words.txt')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Track words that were not found.
|
|
|
|
|
missing_words = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Split camelCase string into separate words.
|
|
|
|
|
def camelCaseSplit(identifier):
|
2020-09-26 21:33:21 +00:00
|
|
|
|
matches = re.finditer(r'.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
return [m.group(0) for m in matches]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# A File object contains all of the strings to be checked for a given file.
|
|
|
|
|
class File:
|
|
|
|
|
def __init__(self, file):
|
|
|
|
|
self.file = file
|
|
|
|
|
self.values = []
|
|
|
|
|
|
|
|
|
|
filename, extension = os.path.splitext(file)
|
|
|
|
|
self.code_file = extension in {'.c', '.cpp'}
|
|
|
|
|
|
|
|
|
|
|
2023-06-11 20:49:33 +00:00
|
|
|
|
with open(file, 'r', encoding="utf8") as f:
|
2020-09-05 21:23:52 +00:00
|
|
|
|
contents = f.read()
|
|
|
|
|
|
|
|
|
|
if self.code_file:
|
|
|
|
|
# Remove comments so as not to trip up RE.
|
|
|
|
|
contents = removeComments(contents)
|
|
|
|
|
|
|
|
|
|
# Find protocol name and add to dict.
|
|
|
|
|
# N.B. doesn't work when a variable is used instead of a literal for the protocol name...
|
2020-11-22 00:42:06 +00:00
|
|
|
|
matches = re.finditer(r'proto_register_protocol\s*\([\n\r\s]*\"(.*)\",[\n\r\s]*\"(.*)\",[\n\r\s]*\"(.*)\"', contents)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
for m in matches:
|
|
|
|
|
protocol = m.group(3)
|
|
|
|
|
# Add to dict.
|
|
|
|
|
spell.word_frequency.load_words([protocol])
|
|
|
|
|
spell.known([protocol])
|
|
|
|
|
print('Protocol is: ' + bcolors.BOLD + protocol + bcolors.ENDC)
|
|
|
|
|
|
|
|
|
|
# Add a string found in this file.
|
|
|
|
|
def add(self, value):
|
2023-06-15 01:14:36 +00:00
|
|
|
|
self.values.append(value.encode('utf-8') if sys.platform.startswith('win') else value)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
|
|
|
|
|
# Whole word is not recognised, but is it 2 words concatenated (without camelcase) ?
|
|
|
|
|
def checkMultiWords(self, word):
|
|
|
|
|
if len(word) < 6:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Don't consider if mixed cases.
|
|
|
|
|
if not (word.islower() or word.isupper()):
|
|
|
|
|
# But make an exception if only the fist letter is uppercase..
|
|
|
|
|
if not word == (word[0].upper() + word[1:]):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Try splitting into 2 words recognised at various points.
|
2021-04-10 17:45:50 +00:00
|
|
|
|
# Allow 3-letter words.
|
2020-09-05 21:23:52 +00:00
|
|
|
|
length = len(word)
|
|
|
|
|
for idx in range(3, length-3):
|
|
|
|
|
word1 = word[0:idx]
|
|
|
|
|
word2 = word[idx:]
|
|
|
|
|
|
|
|
|
|
if not spell.unknown([word1, word2]):
|
|
|
|
|
return True
|
|
|
|
|
|
2021-04-10 17:45:50 +00:00
|
|
|
|
return self.checkMultiWordsRecursive(word)
|
2020-11-22 00:42:06 +00:00
|
|
|
|
|
2021-11-29 17:46:16 +00:00
|
|
|
|
# If word before 'id' is recognised, accept word.
|
|
|
|
|
def wordBeforeId(self, word):
|
|
|
|
|
if word.lower().endswith('id'):
|
|
|
|
|
if not spell.unknown([word[0:len(word)-2]]):
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
|
2021-04-10 17:45:50 +00:00
|
|
|
|
def checkMultiWordsRecursive(self, word):
|
|
|
|
|
length = len(word)
|
|
|
|
|
#print('word=', word)
|
|
|
|
|
if length < 4:
|
|
|
|
|
return False
|
2020-09-05 21:23:52 +00:00
|
|
|
|
|
2021-04-10 17:45:50 +00:00
|
|
|
|
for idx in range(4, length+1):
|
|
|
|
|
w = word[0:idx]
|
|
|
|
|
if not spell.unknown([w]):
|
|
|
|
|
if idx == len(word):
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
if self.checkMultiWordsRecursive(word[idx:]):
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
return False
|
2020-09-05 21:23:52 +00:00
|
|
|
|
|
2022-02-12 23:13:56 +00:00
|
|
|
|
def numberPlusUnits(self, word):
|
|
|
|
|
m = re.search(r'^([0-9]+)([a-zA-Z]+)$', word)
|
|
|
|
|
if m:
|
|
|
|
|
if m.group(2).lower() in { "bit", "bits", "gb", "kbps", "gig", "mb", "th", "mhz", "v", "hz", "k",
|
2023-06-10 18:22:41 +00:00
|
|
|
|
"mbps", "m", "g", "ms", "nd", "nds", "rd", "kb", "kbit", "ghz",
|
2022-04-25 08:57:43 +00:00
|
|
|
|
"khz", "km", "ms", "usec", "sec", "gbe", "ns", "ksps", "qam", "mm" }:
|
2022-02-12 23:13:56 +00:00
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2021-02-17 09:36:23 +00:00
|
|
|
|
# Check the spelling of all the words we have found
|
2020-09-05 21:23:52 +00:00
|
|
|
|
def spellCheck(self):
|
|
|
|
|
|
|
|
|
|
num_values = len(self.values)
|
2022-04-25 08:57:43 +00:00
|
|
|
|
for value_index,v in enumerate(self.values):
|
2020-09-05 21:23:52 +00:00
|
|
|
|
if should_exit:
|
|
|
|
|
exit(1)
|
|
|
|
|
|
2023-06-11 20:49:33 +00:00
|
|
|
|
v = str(v)
|
|
|
|
|
|
2020-09-05 21:23:52 +00:00
|
|
|
|
# Ignore includes.
|
|
|
|
|
if v.endswith('.h'):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Store original (as want to include for context in error report).
|
|
|
|
|
original = str(v)
|
|
|
|
|
|
|
|
|
|
# Replace most punctuation with spaces, and eliminate common format specifiers.
|
|
|
|
|
v = v.replace('.', ' ')
|
|
|
|
|
v = v.replace(',', ' ')
|
|
|
|
|
v = v.replace('`', ' ')
|
|
|
|
|
v = v.replace(':', ' ')
|
|
|
|
|
v = v.replace(';', ' ')
|
|
|
|
|
v = v.replace('"', ' ')
|
|
|
|
|
v = v.replace('\\', ' ')
|
|
|
|
|
v = v.replace('+', ' ')
|
|
|
|
|
v = v.replace('|', ' ')
|
|
|
|
|
v = v.replace('(', ' ')
|
|
|
|
|
v = v.replace(')', ' ')
|
|
|
|
|
v = v.replace('[', ' ')
|
|
|
|
|
v = v.replace(']', ' ')
|
|
|
|
|
v = v.replace('{', ' ')
|
|
|
|
|
v = v.replace('}', ' ')
|
|
|
|
|
v = v.replace('<', ' ')
|
|
|
|
|
v = v.replace('>', ' ')
|
|
|
|
|
v = v.replace('_', ' ')
|
|
|
|
|
v = v.replace('-', ' ')
|
|
|
|
|
v = v.replace('/', ' ')
|
|
|
|
|
v = v.replace('!', ' ')
|
|
|
|
|
v = v.replace('?', ' ')
|
|
|
|
|
v = v.replace('=', ' ')
|
|
|
|
|
v = v.replace('*', ' ')
|
|
|
|
|
v = v.replace('%', ' ')
|
|
|
|
|
v = v.replace('#', ' ')
|
|
|
|
|
v = v.replace('&', ' ')
|
|
|
|
|
v = v.replace('@', ' ')
|
2020-09-13 08:03:08 +00:00
|
|
|
|
v = v.replace('$', ' ')
|
2023-11-01 23:40:57 +00:00
|
|
|
|
v = v.replace('^', ' ')
|
2023-09-04 09:20:14 +00:00
|
|
|
|
v = v.replace('®', '')
|
2020-09-05 21:23:52 +00:00
|
|
|
|
v = v.replace("'", ' ')
|
|
|
|
|
v = v.replace('"', ' ')
|
|
|
|
|
v = v.replace('%u', '')
|
|
|
|
|
v = v.replace('%d', '')
|
|
|
|
|
v = v.replace('%s', '')
|
|
|
|
|
|
|
|
|
|
# Split into words.
|
|
|
|
|
value_words = v.split()
|
|
|
|
|
# Further split up any camelCase words.
|
|
|
|
|
words = []
|
|
|
|
|
for w in value_words:
|
|
|
|
|
words += camelCaseSplit(w)
|
|
|
|
|
|
|
|
|
|
# Check each word within this string in turn.
|
|
|
|
|
for word in words:
|
|
|
|
|
# Strip trailing digits from word.
|
|
|
|
|
word = word.rstrip('1234567890')
|
|
|
|
|
|
|
|
|
|
# Quote marks found in some of the docs...
|
|
|
|
|
word = word.replace('“', '')
|
|
|
|
|
word = word.replace('”', '')
|
|
|
|
|
|
2023-09-04 09:20:14 +00:00
|
|
|
|
# Single and collective possession
|
|
|
|
|
if word.endswith("’s"):
|
|
|
|
|
word = word[:-2]
|
|
|
|
|
if word.endswith("s’"):
|
|
|
|
|
word = word[:-2]
|
|
|
|
|
|
2022-02-12 23:13:56 +00:00
|
|
|
|
if self.numberPlusUnits(word):
|
|
|
|
|
continue
|
|
|
|
|
|
2021-11-29 17:46:16 +00:00
|
|
|
|
if len(word) > 4 and spell.unknown([word]) and not self.checkMultiWords(word) and not self.wordBeforeId(word):
|
2022-04-25 08:57:43 +00:00
|
|
|
|
print(self.file, value_index, '/', num_values, '"' + original + '"', bcolors.FAIL + word + bcolors.ENDC,
|
2023-06-15 01:14:36 +00:00
|
|
|
|
' -> ', '?')
|
|
|
|
|
|
2020-09-05 21:23:52 +00:00
|
|
|
|
# TODO: this can be interesting, but takes too long!
|
|
|
|
|
# bcolors.OKGREEN + spell.correction(word) + bcolors.ENDC
|
|
|
|
|
global missing_words
|
|
|
|
|
missing_words.append(word)
|
|
|
|
|
|
2020-11-22 00:42:06 +00:00
|
|
|
|
def removeWhitespaceControl(code_string):
|
|
|
|
|
code_string = code_string.replace('\\n', ' ')
|
|
|
|
|
code_string = code_string.replace('\\r', ' ')
|
|
|
|
|
code_string = code_string.replace('\\t', ' ')
|
|
|
|
|
return code_string
|
|
|
|
|
|
2020-12-16 09:59:50 +00:00
|
|
|
|
# Remove any contractions from the given string.
|
2020-11-22 00:42:06 +00:00
|
|
|
|
def removeContractions(code_string):
|
2020-12-16 09:59:50 +00:00
|
|
|
|
contractions = [ "wireshark’s", "don’t", "let’s", "isn’t", "won’t", "user’s", "hasn’t", "you’re", "o’clock", "you’ll",
|
|
|
|
|
"you’d", "developer’s", "doesn’t", "what’s", "let’s", "haven’t", "can’t", "you’ve",
|
|
|
|
|
"shouldn’t", "didn’t", "wouldn’t", "aren’t", "there’s", "packet’s", "couldn’t", "world’s",
|
|
|
|
|
"needn’t", "graph’s", "table’s", "parent’s", "entity’s", "server’s", "node’s",
|
2022-02-12 23:13:56 +00:00
|
|
|
|
"querier’s", "sender’s", "receiver’s", "computer’s", "frame’s", "vendor’s", "system’s",
|
2022-04-25 08:57:43 +00:00
|
|
|
|
"we’ll", "asciidoctor’s", "protocol’s", "microsoft’s", "wasn’t" ]
|
2020-11-22 00:42:06 +00:00
|
|
|
|
for c in contractions:
|
|
|
|
|
code_string = code_string.replace(c, "")
|
2020-12-16 09:59:50 +00:00
|
|
|
|
code_string = code_string.replace(c.capitalize(), "")
|
2020-11-22 00:42:06 +00:00
|
|
|
|
code_string = code_string.replace(c.replace('’', "'"), "")
|
2020-12-16 09:59:50 +00:00
|
|
|
|
code_string = code_string.replace(c.capitalize().replace('’', "'"), "")
|
2020-11-22 00:42:06 +00:00
|
|
|
|
return code_string
|
|
|
|
|
|
2020-09-05 21:23:52 +00:00
|
|
|
|
def removeComments(code_string):
|
2023-04-30 14:50:51 +00:00
|
|
|
|
code_string = re.sub(re.compile(r"/\*.*?\*/", re.DOTALL), "" , code_string) # C-style comment
|
|
|
|
|
# Avoid matching // where it is allowed, e.g., https://www... or file:///...
|
2023-11-01 23:40:57 +00:00
|
|
|
|
code_string = re.sub(re.compile(r"(?<!:)(?<!/)(?<!\")(?<!\"\s\s)(?<!file:/)(?<!\,\s)//.*?\n" ) ,"" , code_string) # C++-style comment
|
2020-09-05 21:23:52 +00:00
|
|
|
|
return code_string
|
|
|
|
|
|
|
|
|
|
def removeSingleQuotes(code_string):
|
2021-02-20 23:50:15 +00:00
|
|
|
|
code_string = code_string.replace('\\\\', " ") # Separate at \\
|
2020-09-05 21:23:52 +00:00
|
|
|
|
code_string = code_string.replace('\"\\\\\"', "")
|
|
|
|
|
code_string = code_string.replace("\\\"", " ")
|
|
|
|
|
code_string = code_string.replace("'\"'", "")
|
2021-02-20 23:50:15 +00:00
|
|
|
|
code_string = code_string.replace('…', ' ')
|
2020-09-05 21:23:52 +00:00
|
|
|
|
return code_string
|
|
|
|
|
|
|
|
|
|
def removeHexSpecifiers(code_string):
|
2021-02-17 09:36:23 +00:00
|
|
|
|
# Find all hex numbers
|
|
|
|
|
|
|
|
|
|
looking = True
|
|
|
|
|
while looking:
|
|
|
|
|
m = re.search(r'(0x[0-9a-fA-F]*)', code_string)
|
|
|
|
|
if m:
|
|
|
|
|
code_string = code_string.replace(m.group(0), "")
|
|
|
|
|
else:
|
|
|
|
|
looking = False
|
|
|
|
|
|
2020-09-05 21:23:52 +00:00
|
|
|
|
return code_string
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Create a File object that knows about all of the strings in the given file.
|
|
|
|
|
def findStrings(filename):
|
2023-06-11 20:49:33 +00:00
|
|
|
|
with open(filename, 'r', encoding="utf8") as f:
|
2020-09-05 21:23:52 +00:00
|
|
|
|
contents = f.read()
|
|
|
|
|
|
|
|
|
|
# Remove comments & embedded quotes so as not to trip up RE.
|
2020-11-22 00:42:06 +00:00
|
|
|
|
contents = removeContractions(contents)
|
|
|
|
|
contents = removeWhitespaceControl(contents)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
contents = removeSingleQuotes(contents)
|
|
|
|
|
contents = removeHexSpecifiers(contents)
|
|
|
|
|
|
|
|
|
|
# Create file object.
|
|
|
|
|
file = File(filename)
|
|
|
|
|
|
|
|
|
|
# What we check depends upon file type.
|
|
|
|
|
if file.code_file:
|
2020-11-22 00:42:06 +00:00
|
|
|
|
contents = removeComments(contents)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
# Code so only checking strings.
|
2023-06-11 20:49:33 +00:00
|
|
|
|
matches = re.finditer(r'\"([^\"]*)\"', contents)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
for m in matches:
|
|
|
|
|
file.add(m.group(1))
|
|
|
|
|
else:
|
|
|
|
|
# A documentation file, so examine all words.
|
2022-04-25 08:57:43 +00:00
|
|
|
|
for w in contents.split():
|
2020-09-05 21:23:52 +00:00
|
|
|
|
file.add(w)
|
|
|
|
|
|
|
|
|
|
return file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Test for whether the given file was automatically generated.
|
|
|
|
|
def isGeneratedFile(filename):
|
2023-08-15 04:29:08 +00:00
|
|
|
|
# Check file exists - e.g. may have been deleted in a recent commit.
|
|
|
|
|
if not os.path.exists(filename):
|
|
|
|
|
return False
|
|
|
|
|
|
2021-01-16 22:59:12 +00:00
|
|
|
|
if not filename.endswith('.c'):
|
|
|
|
|
return False
|
|
|
|
|
|
2022-05-19 09:29:46 +00:00
|
|
|
|
# This file is generated, but notice is further in than want to check for all files
|
2023-08-07 17:36:01 +00:00
|
|
|
|
if filename.endswith('pci-ids.c') or filename.endswith('services-data.c') or filename.endswith('manuf-data.c'):
|
2022-05-19 09:29:46 +00:00
|
|
|
|
return True
|
|
|
|
|
|
2020-09-05 21:23:52 +00:00
|
|
|
|
# Open file
|
2023-06-11 20:49:33 +00:00
|
|
|
|
f_read = open(os.path.join(filename), 'r', encoding="utf8")
|
2022-04-25 08:57:43 +00:00
|
|
|
|
for line_no,line in enumerate(f_read):
|
2020-09-05 21:23:52 +00:00
|
|
|
|
# The comment to say that its generated is near the top, so give up once
|
|
|
|
|
# get a few lines down.
|
2022-04-25 08:57:43 +00:00
|
|
|
|
if line_no > 10:
|
2020-09-05 21:23:52 +00:00
|
|
|
|
f_read.close()
|
|
|
|
|
return False
|
|
|
|
|
if (line.find('Generated automatically') != -1 or
|
|
|
|
|
line.find('Autogenerated from') != -1 or
|
|
|
|
|
line.find('is autogenerated') != -1 or
|
|
|
|
|
line.find('automatically generated by Pidl') != -1 or
|
2020-09-24 08:53:51 +00:00
|
|
|
|
line.find('Created by: The Qt Meta Object Compiler') != -1 or
|
2020-12-16 09:59:50 +00:00
|
|
|
|
line.find('This file was generated') != -1 or
|
2021-11-07 15:39:42 +00:00
|
|
|
|
line.find('This filter was automatically generated') != -1 or
|
2023-08-07 17:36:01 +00:00
|
|
|
|
line.find('This file is auto generated, do not edit!') != -1 or
|
|
|
|
|
line.find('this file is automatically generated') != -1):
|
2020-09-05 21:23:52 +00:00
|
|
|
|
|
|
|
|
|
f_read.close()
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
# OK, looks like a hand-written file!
|
|
|
|
|
f_read.close()
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def isAppropriateFile(filename):
|
|
|
|
|
file, extension = os.path.splitext(filename)
|
2022-06-18 21:23:10 +00:00
|
|
|
|
if filename.find('CMake') != -1:
|
|
|
|
|
return False
|
2023-12-21 09:25:46 +00:00
|
|
|
|
return extension in { '.adoc', '.c', '.cpp', '.pod', '.txt'} or file.endswith('README')
|
2020-09-05 21:23:52 +00:00
|
|
|
|
|
|
|
|
|
|
2021-09-27 08:17:16 +00:00
|
|
|
|
def findFilesInFolder(folder, recursive=True):
|
2020-09-05 21:23:52 +00:00
|
|
|
|
files_to_check = []
|
|
|
|
|
|
2021-09-27 08:17:16 +00:00
|
|
|
|
if recursive:
|
|
|
|
|
for root, subfolders, files in os.walk(folder):
|
|
|
|
|
for f in files:
|
|
|
|
|
if should_exit:
|
|
|
|
|
return
|
|
|
|
|
f = os.path.join(root, f)
|
|
|
|
|
if isAppropriateFile(f) and not isGeneratedFile(f):
|
|
|
|
|
files_to_check.append(f)
|
|
|
|
|
else:
|
|
|
|
|
for f in sorted(os.listdir(folder)):
|
|
|
|
|
f = os.path.join(folder, f)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
if isAppropriateFile(f) and not isGeneratedFile(f):
|
|
|
|
|
files_to_check.append(f)
|
|
|
|
|
|
|
|
|
|
return files_to_check
|
|
|
|
|
|
|
|
|
|
|
2021-01-25 11:24:03 +00:00
|
|
|
|
# Check the given file.
|
2020-09-05 21:23:52 +00:00
|
|
|
|
def checkFile(filename):
|
2020-12-31 10:35:14 +00:00
|
|
|
|
# Check file exists - e.g. may have been deleted in a recent commit.
|
|
|
|
|
if not os.path.exists(filename):
|
|
|
|
|
print(filename, 'does not exist!')
|
|
|
|
|
return
|
|
|
|
|
|
2020-09-05 21:23:52 +00:00
|
|
|
|
file = findStrings(filename)
|
|
|
|
|
file.spellCheck()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#################################################################
|
|
|
|
|
# Main logic.
|
|
|
|
|
|
|
|
|
|
# command-line args. Controls which files should be checked.
|
|
|
|
|
# If no args given, will just scan epan/dissectors folder.
|
2021-01-25 11:24:03 +00:00
|
|
|
|
parser = argparse.ArgumentParser(description='Check spellings in specified files')
|
2022-02-20 23:12:10 +00:00
|
|
|
|
parser.add_argument('--file', action='append',
|
2021-01-25 11:24:03 +00:00
|
|
|
|
help='specify individual file to test')
|
2020-09-05 21:23:52 +00:00
|
|
|
|
parser.add_argument('--folder', action='store', default='',
|
|
|
|
|
help='specify folder to test')
|
2021-09-27 08:17:16 +00:00
|
|
|
|
parser.add_argument('--no-recurse', action='store_true', default='',
|
|
|
|
|
help='do not recurse inside chosen folder')
|
2020-09-05 21:23:52 +00:00
|
|
|
|
parser.add_argument('--commits', action='store',
|
|
|
|
|
help='last N commits to check')
|
|
|
|
|
parser.add_argument('--open', action='store_true',
|
|
|
|
|
help='check open files')
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Get files from wherever command-line args indicate.
|
|
|
|
|
files = []
|
|
|
|
|
if args.file:
|
2022-02-20 23:12:10 +00:00
|
|
|
|
# Add specified file(s)
|
|
|
|
|
for f in args.file:
|
|
|
|
|
if not os.path.isfile(f):
|
|
|
|
|
print('Chosen file', f, 'does not exist.')
|
|
|
|
|
exit(1)
|
|
|
|
|
else:
|
|
|
|
|
files.append(f)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
elif args.commits:
|
|
|
|
|
# Get files affected by specified number of commits.
|
|
|
|
|
command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits]
|
|
|
|
|
files = [f.decode('utf-8')
|
|
|
|
|
for f in subprocess.check_output(command).splitlines()]
|
2021-01-25 11:24:03 +00:00
|
|
|
|
# Filter files
|
2021-10-05 19:50:25 +00:00
|
|
|
|
files = list(filter(lambda f : os.path.exists(f) and isAppropriateFile(f) and not isGeneratedFile(f), files))
|
2020-09-05 21:23:52 +00:00
|
|
|
|
elif args.open:
|
|
|
|
|
# Unstaged changes.
|
|
|
|
|
command = ['git', 'diff', '--name-only']
|
|
|
|
|
files = [f.decode('utf-8')
|
|
|
|
|
for f in subprocess.check_output(command).splitlines()]
|
2021-01-25 11:24:03 +00:00
|
|
|
|
# Filter files.
|
|
|
|
|
files = list(filter(lambda f : isAppropriateFile(f) and not isGeneratedFile(f), files))
|
2020-09-05 21:23:52 +00:00
|
|
|
|
# Staged changes.
|
|
|
|
|
command = ['git', 'diff', '--staged', '--name-only']
|
|
|
|
|
files_staged = [f.decode('utf-8')
|
|
|
|
|
for f in subprocess.check_output(command).splitlines()]
|
2021-01-25 11:24:03 +00:00
|
|
|
|
# Filter files.
|
|
|
|
|
files_staged = list(filter(lambda f : isAppropriateFile(f) and not isGeneratedFile(f), files_staged))
|
2020-09-05 21:23:52 +00:00
|
|
|
|
for f in files_staged:
|
|
|
|
|
if not f in files:
|
|
|
|
|
files.append(f)
|
|
|
|
|
else:
|
2021-01-25 11:24:03 +00:00
|
|
|
|
# By default, scan dissectors directory
|
2020-09-05 21:23:52 +00:00
|
|
|
|
folder = os.path.join('epan', 'dissectors')
|
|
|
|
|
# But overwrite with any folder entry.
|
|
|
|
|
if args.folder:
|
|
|
|
|
folder = args.folder
|
|
|
|
|
if not os.path.isdir(folder):
|
|
|
|
|
print('Folder', folder, 'not found!')
|
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
|
|
# Find files from folder.
|
|
|
|
|
print('Looking for files in', folder)
|
2021-09-27 08:17:16 +00:00
|
|
|
|
files = findFilesInFolder(folder, not args.no_recurse)
|
2020-09-05 21:23:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# If scanning a subset of files, list them here.
|
|
|
|
|
print('Examining:')
|
2020-09-26 21:33:21 +00:00
|
|
|
|
if args.file or args.folder or args.commits or args.open:
|
2020-09-05 21:23:52 +00:00
|
|
|
|
if files:
|
|
|
|
|
print(' '.join(files), '\n')
|
|
|
|
|
else:
|
|
|
|
|
print('No files to check.\n')
|
|
|
|
|
else:
|
|
|
|
|
print('All dissector modules\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Now check the chosen files.
|
|
|
|
|
for f in files:
|
2022-04-25 08:57:43 +00:00
|
|
|
|
# Check this file.
|
|
|
|
|
checkFile(f)
|
|
|
|
|
# But get out if control-C has been pressed.
|
2020-09-05 21:23:52 +00:00
|
|
|
|
if should_exit:
|
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-02-17 09:36:23 +00:00
|
|
|
|
# Show the most commonly not-recognised words.
|
2020-09-05 21:23:52 +00:00
|
|
|
|
print('')
|
|
|
|
|
counter = Counter(missing_words).most_common(100)
|
|
|
|
|
if len(counter) > 0:
|
|
|
|
|
for c in counter:
|
|
|
|
|
print(c[0], ':', c[1])
|
|
|
|
|
|
|
|
|
|
# Show error count.
|
2020-09-24 08:53:51 +00:00
|
|
|
|
print('\n' + bcolors.BOLD + str(len(missing_words)) + ' issues found' + bcolors.ENDC + '\n')
|