Spelling improvements

Improve script by ignoring common contractions, dealing with
e.g. \n within strings, and finding multiple concatenated words even
when no camelCase is used.

Also includes some actual spelling fixes.
This commit is contained in:
Martin Mathieson 2020-11-22 00:42:06 +00:00 committed by Wireshark GitLab Utility
parent 7d16b7b251
commit df651aca91
15 changed files with 68 additions and 17 deletions

View File

@ -91,7 +91,7 @@ file; B<editcap -F> provides a list of the available output formats.
=item -a E<lt>framenum:commentE<gt>
For the specifiqed frame number, assign the given comment string.
For the specified frame number, assign the given comment string.
Can be repeated for multiple frames. Quotes should be used with comment
strings that include spaces.

View File

@ -149,7 +149,7 @@ More details can be found on the
=== RTSP
In the Real Time Streaming Protocol (RTSP) menu the user can check the Paket Counter window. It shows Total RTCP Packets and divided into RTSP Response Packets, RTSP Request Packets and Other RTSP packets. The user can filter, copy or save the data into a file.
In the Real Time Streaming Protocol (RTSP) menu the user can check the Packet Counter window. It shows Total RTCP Packets and divided into RTSP Response Packets, RTSP Request Packets and Other RTSP packets. The user can filter, copy or save the data into a file.
{missing}

View File

@ -1033,7 +1033,7 @@ image::wsug_graphics/ws-bytes-pane.png[{screenshot-attrs}]
The “Packet Bytes” pane shows a canonical
https://en.wikipedia.org/wiki/Hex_dump[hex dump] of the packet data. Each line
contains the data offset, sixteen hexadecimal bytes, and sixteen ASCII bytes.
Non-printalbe bytes are replaced with a period (“.”).
Non-printable bytes are replaced with a period (“.”).
Depending on the packet data, sometimes more than one page is available, e.g.
when Wireshark has reassembled some packets into a single chunk of data. (See

View File

@ -4614,7 +4614,7 @@ void proto_register_kerberos(void) {
static ei_register_info ei[] = {
{ &ei_kerberos_missing_keytype, { "kerberos.missing_keytype", PI_DECRYPTION, PI_WARN, "Missing keytype", EXPFILL }},
{ &ei_kerberos_decrypted_keytype, { "kerberos.decrypted_keytype", PI_SECURITY, PI_CHAT, "Decryted keytype", EXPFILL }},
{ &ei_kerberos_decrypted_keytype, { "kerberos.decrypted_keytype", PI_SECURITY, PI_CHAT, "Decrypted keytype", EXPFILL }},
{ &ei_kerberos_learnt_keytype, { "kerberos.learnt_keytype", PI_SECURITY, PI_CHAT, "Learnt keytype", EXPFILL }},
{ &ei_kerberos_address, { "kerberos.address.unknown", PI_UNDECODED, PI_WARN, "KRB Address: I don't know how to parse this type of address yet", EXPFILL }},
{ &ei_krb_gssapi_dlglen, { "kerberos.gssapi.dlglen.error", PI_MALFORMED, PI_ERROR, "DlgLen is not the same as number of bytes remaining", EXPFILL }},

View File

@ -1393,7 +1393,7 @@ void proto_register_spnego(void) {
};
static ei_register_info ei[] = {
{ &ei_spnego_decrypted_keytype, { "spnego.decrypted_keytype", PI_SECURITY, PI_CHAT, "Decryted keytype", EXPFILL }},
{ &ei_spnego_decrypted_keytype, { "spnego.decrypted_keytype", PI_SECURITY, PI_CHAT, "Decrypted keytype", EXPFILL }},
{ &ei_spnego_unknown_header, { "spnego.unknown_header", PI_PROTOCOL, PI_WARN, "Unknown header", EXPFILL }},
};

View File

@ -280,7 +280,7 @@ void proto_register_sv(void) {
{ "source", "sv.meas_quality.source", FT_UINT32, BASE_HEX, VALS(sv_q_source_vals), Q_SOURCE_MASK, NULL, HFILL}},
{ &hf_sv_phsmeas_q_test,
{ "test", "sv.meas_quality.teset", FT_BOOLEAN, 32, NULL, Q_TEST, NULL, HFILL}},
{ "test", "sv.meas_quality.test", FT_BOOLEAN, 32, NULL, Q_TEST, NULL, HFILL}},
{ &hf_sv_phsmeas_q_operatorblocked,
{ "operator blocked", "sv.meas_quality.operatorblocked", FT_BOOLEAN, 32, NULL, Q_OPERATORBLOCKED, NULL, HFILL}},

View File

@ -5605,7 +5605,7 @@ static const value_string valstr_011_430_FLS[] = {
{ 1, "On Stand" },
{ 2, "Taxiing for Departure" },
{ 3, "Taxiing for Arrival" },
{ 4, "Ruwnay for Departure" },
{ 4, "Runway for Departure" },
{ 5, "Runway for Arrival" },
{ 6, "Hold for Departure" },
{ 7, "Hold for arrival" },

View File

@ -4092,7 +4092,7 @@ proto_register_gsm_a_common(void)
{ &hf_gsm_a_att,
{ "ATT", "gsm_a.att",
FT_BOOLEAN, 8, TFS(&gsm_a_att_value), 0x01,
"ttach-detach allowed", HFILL }
"Attach-detach allowed", HFILL }
},
{ &hf_gsm_a_nmo_1,
{ "NMO I", "gsm_a.nmo_1",

View File

@ -15527,7 +15527,7 @@ proto_register_gsm_rlcmac(void)
}
},
{ &hf_packet_compact_cell_sel_gprs_penalty_time,
{ "GPRS_PENALTY_TIME", "gsm_rlcmac.dl.psi3_compact_cell_sel_gprs_panelty_time",
{ "GPRS_PENALTY_TIME", "gsm_rlcmac.dl.psi3_compact_cell_sel_gprs_penalty_time",
FT_UINT8, BASE_DEC, NULL, 0x0,
NULL, HFILL
}

View File

@ -8745,7 +8745,7 @@ void proto_register_kerberos(void) {
static ei_register_info ei[] = {
{ &ei_kerberos_missing_keytype, { "kerberos.missing_keytype", PI_DECRYPTION, PI_WARN, "Missing keytype", EXPFILL }},
{ &ei_kerberos_decrypted_keytype, { "kerberos.decrypted_keytype", PI_SECURITY, PI_CHAT, "Decryted keytype", EXPFILL }},
{ &ei_kerberos_decrypted_keytype, { "kerberos.decrypted_keytype", PI_SECURITY, PI_CHAT, "Decrypted keytype", EXPFILL }},
{ &ei_kerberos_learnt_keytype, { "kerberos.learnt_keytype", PI_SECURITY, PI_CHAT, "Learnt keytype", EXPFILL }},
{ &ei_kerberos_address, { "kerberos.address.unknown", PI_UNDECODED, PI_WARN, "KRB Address: I don't know how to parse this type of address yet", EXPFILL }},
{ &ei_krb_gssapi_dlglen, { "kerberos.gssapi.dlglen.error", PI_MALFORMED, PI_ERROR, "DlgLen is not the same as number of bytes remaining", EXPFILL }},

View File

@ -1469,7 +1469,7 @@ static const value_string etc_model_id_vals[] = {
{ ETC_MODEL_ID_SOURCE_FOUR_DIMMER, "Source Four Dimmer" },
{ ETC_MODEL_ID_KILLSWITCH_WIRELESS, "Killswitch Wireless" },
{ ETC_MODEL_ID_KILLSWITCH_DMX, "Killswitch DMX" },
{ ETC_MODEL_ID_KILLSWITCH_ETHERNET, "Killswitch Etherner" },
{ ETC_MODEL_ID_KILLSWITCH_ETHERNET, "Killswitch Ethernet" },
{ ETC_MODEL_ID_KILLSWITCH_TRANSMITTER, "Killswitch Transmitter" },
{ ETC_MODEL_ID_DMX_ZONE_CONTROLLER_SINGLE_DIMMER, "DMX Zone Controller, Single Dimmer" },
{ ETC_MODEL_ID_DMX_ZONE_CONTROLLER_RELAY, "DMX Zone Controller, Relay" },

View File

@ -1955,7 +1955,7 @@ void proto_register_spnego(void) {
};
static ei_register_info ei[] = {
{ &ei_spnego_decrypted_keytype, { "spnego.decrypted_keytype", PI_SECURITY, PI_CHAT, "Decryted keytype", EXPFILL }},
{ &ei_spnego_decrypted_keytype, { "spnego.decrypted_keytype", PI_SECURITY, PI_CHAT, "Decrypted keytype", EXPFILL }},
{ &ei_spnego_unknown_header, { "spnego.unknown_header", PI_PROTOCOL, PI_WARN, "Unknown header", EXPFILL }},
};

View File

@ -576,7 +576,7 @@ void proto_register_sv(void) {
{ "source", "sv.meas_quality.source", FT_UINT32, BASE_HEX, VALS(sv_q_source_vals), Q_SOURCE_MASK, NULL, HFILL}},
{ &hf_sv_phsmeas_q_test,
{ "test", "sv.meas_quality.teset", FT_BOOLEAN, 32, NULL, Q_TEST, NULL, HFILL}},
{ "test", "sv.meas_quality.test", FT_BOOLEAN, 32, NULL, Q_TEST, NULL, HFILL}},
{ &hf_sv_phsmeas_q_operatorblocked,
{ "operator blocked", "sv.meas_quality.operatorblocked", FT_BOOLEAN, 32, NULL, Q_OPERATORBLOCKED, NULL, HFILL}},

View File

@ -13,7 +13,6 @@ import signal
from collections import Counter
# Looks for spelling errors among strings found in source or documentation files.
# TODO: deal with contractions - pyspellcheck doesn't seem to handle apostrophies..
# For text colouring/highlighting.
class bcolors:
@ -76,7 +75,7 @@ class File:
# Find protocol name and add to dict.
# N.B. doesn't work when a variable is used instead of a literal for the protocol name...
matches = re.finditer(r'proto_register_protocol\s*\([\n\r\s]*\"(.*)\",[\n\r\s]*\"(.*)\",[\n\r\s]*\"(.*)\"', contents)
matches = re.finditer(r'proto_register_protocol\s*\([\n\r\s]*\"(.*)\",[\n\r\s]*\"(.*)\",[\n\r\s]*\"(.*)\"', contents)
for m in matches:
protocol = m.group(3)
# Add to dict.
@ -108,6 +107,15 @@ class File:
if not spell.unknown([word1, word2]):
return True
# Run through, looking for any number of separate words.
next_word_start = 0
for idx in range(1, length+1):
w = word[next_word_start:idx]
if len(w) > 3 and not spell.unknown([w]):
next_word_start = idx
if next_word_start == length:
return True
return False
@ -164,7 +172,6 @@ class File:
v = v.replace('%u', '')
v = v.replace('%d', '')
v = v.replace('%s', '')
v = v.replace('\\n', ' ')
# Split into words.
value_words = v.split()
@ -190,6 +197,21 @@ class File:
global missing_words
missing_words.append(word)
def removeWhitespaceControl(code_string):
code_string = code_string.replace('\\n', ' ')
code_string = code_string.replace('\\r', ' ')
code_string = code_string.replace('\\t', ' ')
return code_string
def removeContractions(code_string):
contractions = [ "Wiresharks", "dont", "lets", "isnt", "wont", "Users", "users", "hasnt", "youre", "oclock", "youll",
"youd", "Developers", "doesnt", "whats", "Dont", "Lets", "havent", "cant", "youve",
"shouldnt", "didnt", "wouldnt", "arent", "theres", "packets", "couldnt" ]
for c in contractions:
code_string = code_string.replace(c, "")
code_string = code_string.replace(c.replace('', "'"), "")
return code_string
def removeComments(code_string):
code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment
# Remove this for now as can get tripped up if see htpps://www.... within a string!
@ -219,7 +241,8 @@ def findStrings(filename):
contents = f.read()
# Remove comments & embedded quotes so as not to trip up RE.
contents = removeComments(contents)
contents = removeContractions(contents)
contents = removeWhitespaceControl(contents)
contents = removeSingleQuotes(contents)
contents = removeHexSpecifiers(contents)
@ -228,6 +251,7 @@ def findStrings(filename):
# What we check depends upon file type.
if file.code_file:
contents = removeComments(contents)
# Code so only checking strings.
matches = re.finditer(r'\"([^\"]*)\"', contents)
for m in matches:

View File

@ -1,12 +1,16 @@
0x%02x
0x%08x
16bit
1xrtt
32bit
3gpp2
80211n
accelerometer
accessor
accessors
acked
acknack
acknowledgement
acp133
activations
@ -31,6 +35,7 @@ amperes
analyzers
analyzes
annexc
anonsvn
appdata
appid
arfcn
@ -88,6 +93,8 @@ btmesh
btsdp
btsnoop
bugzilla
buildbot
builtin
byte
byteorder
cablelabs
@ -124,6 +131,7 @@ charset
charsets
checkbox
checkout
choco
chocolatey
chunked
ciphered
@ -137,6 +145,7 @@ cmake
cmdcontrol
cmstatus
codebook
codecs
codepoint
codeset
codingrate
@ -218,6 +227,8 @@ demultiplexer
demultiplexers
deobfuscated
deprecated
dequeued
dereg
deregister
deregistered
deregistering
@ -258,6 +269,7 @@ dsmcc
dstport
dumpcap
duple
eapol
earfcn
ebcdic
ecdhe
@ -274,9 +286,11 @@ encapsulator
enciphered
encrypt
encrypting
endace
endian
endianness
enrollee
entityid
entryid
enumerations
epasv
@ -291,9 +305,11 @@ ettarr
etype
eutra
eutran
eventlog
extattr
extcap
extensibility
exthdr
extrainformation
failover
fiber
@ -310,6 +326,7 @@ format0
fortigate
fortinet
fpiur
fraghdr
framenum
framenumber
framenun
@ -330,6 +347,7 @@ geonw
geran
getattr
getnext
getter
gigabit
gigamon
github
@ -352,6 +370,7 @@ hashed
hazelcast
heuristic
hfarr
hfill,
HI2Operations
hnbap
homeplug
@ -484,6 +503,8 @@ modulo
mpeg4
mpsse
mrcpv
msbuild
msgid
msgsend
mtftp
mtrace
@ -562,6 +583,7 @@ onoff
ontime
opcode
opcodes
openstreetmap
openvpn
opnum
optimizations
@ -665,6 +687,7 @@ rebooted
reboots
recalculate
recalculating
recalibrate
recognizer
recomputed
reconf
@ -923,6 +946,7 @@ underrun
undissected
unencrypted
unescaped
unexported
unformatted
unfragmented
unhandled
@ -940,6 +964,7 @@ unjoining
unknown1
unlink
unlinked
unmappable
unmarshal
unoptimized
unparsable
@ -999,6 +1024,7 @@ version7
versioned
versioning
virtualization
vlans
vocoder
volerr
wakeup
@ -1006,6 +1032,7 @@ webcam
websocket
wideband
wikipedia
wikis
wimax
winpcap
winspool