/* * Ezpwd Reed-Solomon -- Reed-Solomon encoder / decoder library * * Copyright (c) 2014, Hard Consulting Corporation. * * Ezpwd Reed-Solomon is free software: you can redistribute it and/or modify it under the terms of * the GNU General Public License as published by the Free Software Foundation, either version 3 of * the License, or (at your option) any later version. See the LICENSE file at the top of the * source tree. Ezpwd Reed-Solomon is also available under Commercial license. c++/ezpwd/rs_base * is redistributed under the terms of the LGPL, regardless of the overall licensing terms. * * Ezpwd Reed-Solomon is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. */ #ifndef _EZPWD_CORRECTOR #define _EZPWD_CORRECTOR #include "rs" #include "serialize" namespace ezpwd { // // best_avg -- collect , guesses, and return the unambiguous best one // typedef std::map> // (, (, )) best_avg_base_t; class best_avg : public best_avg_base_t { public: using best_avg_base_t::begin; using best_avg_base_t::end; using best_avg_base_t::insert; using best_avg_base_t::find; using best_avg_base_t::iterator; using best_avg_base_t::const_iterator; using best_avg_base_t::value_type; using best_avg_base_t::mapped_type; // // add -- add the given pct to the current average for str // iterator add( const std::string &str, int pct ) { iterator i = find( str ); if ( i == end() ) i = insert( i, value_type( str, mapped_type() )); i->second.first += 1; i->second.second += pct; return i; } // // best -- return the unambiguously best value (average is >, or == but longer), or end() // const_iterator best() const { const_iterator top = end(); bool uni = false; for ( const_iterator i = begin(); i != end(); ++i ) { if ( top == end() or i->second.second/i->second.first > top->second.second/top->second.first or ( i->second.second/i->second.first == top->second.second/top->second.first and i->first.size() > top->first.size())) { top = i; uni = true; } else if ( i->second.second/i->second.first == top->second.second/top->second.first and i->first.size() == top->first.size()) { uni = false; } } return uni ? top : end(); } // // evaluation -- process a (,(,)) into (,) // sort -- return a multimap indexed by --> // output -- output the : , sorted by average // static std::pair evaluation( const value_type &val ) { return std::pair( val.second.second/val.second.first, val.first ); } typedef std::multimap sorted_t; sorted_t sort() const { sorted_t dst; std::transform( begin(), end(), std::inserter( dst, dst.begin() ), evaluation ); return dst; } std::ostream &output( std::ostream &lhs ) const { for ( auto i : sort() ) lhs << std::setw( 16 ) << i.second << ": " << std::setw( 3 ) << i.first << std::endl; return lhs; } }; } // namespace ezpwd std::ostream &operator<<( std::ostream &lhs, const ezpwd::best_avg &rhs ) { return rhs.output( lhs ); } namespace ezpwd { // // ezpwd::corrector -- Apply statistical corrections to a string, returning the confidence // // All methods are static; no instance is required, as this is primarily used to create // external language APIs. // template < size_t PARITY, size_t N = 64, typename SERIAL = serialize::base< N, serialize::ezpwd< N >>> class corrector { public: static std::ostream &output( std::ostream &lhs ) { lhs << "corrector"; return lhs; } // // parity() -- Returns 'PARITY' base-N symbols of R-S parity to the supplied password // static std::string parity( const std::string &password ) { std::string parity; rscodec.encode( password, parity ); SERIAL::encode( parity ); return parity; } // // encode() -- append PARITY base-N parity symbols to password // // The supplied password buffer size must be sufficient to contain PARITY additional // symbols, plus the terminating NUL. Returns the resultant encoded password size // (excluding the NUL). // static size_t encode( std::string &password ) { password += parity( password ); return password.size(); } static size_t encode( char *password, size_t size ) // maximum available size { size_t len = ::strlen( password ); // length w/o terminating NUL if ( len + PARITY + 1 > size ) throw std::runtime_error( "ezpwd::rspwd::encode password buffer has insufficient capacity" ); std::string par = parity( std::string( password, password + len )); if ( par.size() != PARITY ) throw std::runtime_error( "ezpwd::rspwd::encode computed parity with incorrect size" ); std::copy( par.begin(), par.end(), password + len ); len += PARITY; password[len] = 0; return len; } // // decode([,...]) -- Applies R-S error correction on the encoded string, removing parity // // Up to 'PARITY' Reed-Solomon parity symbols are examined, to determine if the supplied // string is a valid R-S codeword and hence very likely to be correct. Optionally supply a // vector of erasure positions. // // An optional 'minimum' final password length may be provided; no R-S parity is assumed // to exist in the first 'minimum' password characters (default: PARITY). This prevents // accidentally finding valid R-S codewords in passwords of known minimum length; validation // codes, for example. Likewise, the optional 'maximum' allows us to limit the number of // parity symbols that may be assumed to be missing from the end of the codeword. // // Returns a confidence strength rating, which is the ratio: // // 100 - ( errors * 2 + erasures ) * 100 / parity // // if an R-S codeword was solved, and 0 otherwise. If a codeword is solved, but the number // of errors and erasures corrected indicates that all parity was consumed, the caller may // opt to not use the corrected string, because there is a chance that our R-S polynomial // was overwhelmed with errors and actually returned an incorrect codeword. Therefore, // solving a codeword using all available parity results in 100 - PARITY * 100 / PARITY == // 0, which indicates that there is no certainty of correctness; all R-S parity resources // were used in error/erasure recover, with none left to confirm that the result is actually // correct. If only zero-strength results are achieved, the longest will be returned (the // full, original string). // // Supports the following forms of error/erasure: // // 0) Full parity. All data and parity supplied, and an R-S codeword is solved. // // 1) Partial parity. All data and some parity supplied; remainder are deemed erasures. // // If PARITY > 2, then up to PARITY/2-1 trailing parity terms are marked as erasures. // If the R-S codeword is solved and a safe number of errors are found, then we can have // reasonable confidence that the string is correct. // // 1a) Erase errors. Permute the combinations of up to PARITY-1 erasures. // // o) Raw password. No parity terms supplied; not an R-S codeword // // If none of the error/erasure forms succeed, the password is returned unmodified. // // If a non-zero 'minimum' or 'maximum' are provided, they constrain the possible // resultant password sizes that will be attempted. // static int decode( std::string &password, const std::vector &erasures, size_t minimum = PARITY,//always deemed at least 1 size_t maximum = 0 ) // if 0, no limit { int confidence; best_avg best; // Full/Partial parity. Apply some parity erasure if we have some erasure/correction // capability while maintaining at least one excess parity symbol for verification. // This can potentially result in longer password being returned, if the R-S decoder // accidentally solves a codeword. // // For example, if PARITY=3 (or 4) then (PARITY+1)/2 == 2, and we would only attempt up // to 1 parity erasure. This would leave 1 parity symbol to replace the 1 erasure, and // 1 remaining to validate the integrity of the password. // // The password must be long enough to contain at least 1 non-parity symbol, and the // designated number of non-erased parity symbols! However, by convention we'll demand // that the password contain at least PARITY symbols -- any less, and we can // accidentally correct the few remaining password symbols. // // Also, if any parity symbols won't decode (eg. were entered in error), we must deem // them to be erasures, too, and if the number of erasures exceeds the capacity of the // R-S codec, it'll fail (throw an exception, or at best solve with 0 confidence). for ( size_t era = 0 // how many parity symbols to deem erased ; era < (PARITY+1)/2 ; ++era ) { if ( password.size() < ( minimum ? minimum : 1 ) + PARITY - era ) { #if defined( DEBUG ) && DEBUG >= 1 output( std::cout ) << " Rejected too short password \"" << password << std::string( era, '_' ) << "\"" << " (" << era << " parity skipped)" << std::endl; #endif continue; // too few password symbols to start checking parity } if ( maximum and password.size() > maximum + PARITY - era ) { #if defined( DEBUG ) && DEBUG >= 1 output( std::cout ) << " Rejected too long password \"" << password << std::string( era, '_' ) << "\"" << " (" << era << " parity skipped)" << std::endl; #endif continue; // too few parity symbols erased to start checking parity } // Copy password, adding 'era' additional NULs std::string fixed( password.size() + era, 0 ); std::copy( password.begin(), password.end(), fixed.begin() ); // Decode the base-N parity, denoting any invalid (mistyped or trailing NUL) symbols // as erasures (adjust erasure offsets to be from start of password, not start of // parity). All newly added 'era' symbols will be NUL, and will be invalid. After // decoding parity, if we've slipped below our minimum R-S capacity threshold // (ie. because of mistyped parity symbols), don't attempt. std::vector all_era; SERIAL::decode( fixed.begin() + fixed.size() - PARITY, fixed.begin() + fixed.size(), &all_era, 0, serialize::ws_invalid, serialize::pd_invalid ); if ( all_era.size() >= (PARITY+1)/2 ) { #if defined( DEBUG ) && DEBUG >= 1 output( std::cout ) << " Rejected low parity password \"" << password << std::string( era, '_' ) << "\"" << " (" << all_era.size() << " parity erasures + " << era << " skipped)" << std::endl; #endif continue; // Too many missing parity symbols } if ( all_era.size() + erasures.size() > PARITY ) { #if defined( DEBUG ) && DEBUG >= 1 output( std::cout ) << " Rejected hi erasure password \"" << password << std::string( era, '_' ) << "\"" << " (" << all_era.size() + erasures.size() << " total erasures + " << era << " skipped)" << std::endl; #endif continue; // Total erasures beyond capacity } for ( auto &o : all_era ) o += fixed.size() - PARITY; std::copy( erasures.begin(), erasures.end(), std::back_inserter( all_era )); // Enough parity to try to decode. A successful R-S decode with 0 (remaining) // confidence indicates a successfully validated R-S codeword! Use it (ex. parity). try { std::vector position; int corrects= rscodec.decode( fixed, all_era, &position ); confidence = strength( corrects, all_era, position ); fixed.resize( fixed.size() - PARITY ); if ( confidence >= 0 ) best.add( fixed, confidence ); #if defined( DEBUG ) && DEBUG >= 1 output( std::cout ) << " Reed-Solomon w/ " << era << " of " << PARITY << " parity erasures " << std::setw( 3 ) << confidence << "% confidence: \"" << password << "\" ==> \"" << fixed << "\" (corrects: " << corrects << ", erasures at " << all_era << ", fixed at " << position << "): " << std::endl << best; #endif } catch ( std::exception &exc ) { #if defined( DEBUG ) && DEBUG >= 2 // should see only when ezpwd::reed_solomon<...>::decode fails output( std::cout ) << " invalid part parity password: " << exc.what() << std::endl; #endif } } // Partial parity, but below threshold for usable error detection. For the first 1 to // (PARITY+1)/2 parity symbols (eg. for PARITY == 3, (PARITY+1)/2 == 1 ), we cannot // perform meaningful error or erasure detection. However, if we see that the terminal // symbols match the R-S symbols we expect from a correct password, we'll ascribe a // partial confidence due to the matching parity symbols. // // password: sock1t // w/ 3 parity: sock1tkeB // password ----^^^^^^ // ^^^--- parity // for ( size_t era = (PARITY+1)/2 // how many parity symbols are not present ; era < PARITY ; ++era ) { if ( password.size() < ( minimum ? minimum : 1 ) + PARITY - era ) { #if defined( DEBUG ) && DEBUG >= 1 output( std::cout ) << " Rejected too short password \"" << password << std::string( era, '_' ) << "\"" << std::endl; #endif continue; // too few password symbols to start checking parity } if ( maximum and password.size() > maximum + PARITY - era ) { #if defined( DEBUG ) && DEBUG >= 1 output( std::cout ) << " Rejected too long password \"" << password << std::string( era, '_' ) << "\"" << " (" << era << " parity skipped)" << std::endl; #endif continue; // too few parity symbols erased to start checking parity } std::string fixed = password; size_t len = password.size() - ( PARITY - era ); fixed.resize( len ); encode( fixed ); auto differs = std::mismatch( fixed.begin(), fixed.end(), password.begin() ); size_t par_equ = differs.second - password.begin(); if ( par_equ < len || par_equ > len + PARITY ) throw std::runtime_error( "miscomputed R-S parity matching length" ); par_equ -= len; // At least one parity symbol is requires to give any confidence if ( par_equ > 0 ) { std::string basic( fixed.begin(), fixed.begin() + len ); confidence = par_equ * 100 / PARITY; // each worth a normal parity symbol best.add( basic, confidence ); #if defined( DEBUG ) && DEBUG >= 1 output( std::cout ) << " Check Chars. w/ " << era << " of " << PARITY << " parity missing " << std::setw( 3 ) << confidence << "% confidence: \"" << password << "\" ==> \"" << basic << " (from computed: \"" << fixed << "\")" << ": " << std::endl << best; #endif } } // Select the best guess and return its confidence. Otherwise, use raw password? If no // error/erasure attempts succeeded (if no 'best' w/ confidence >= 0), then we'll use // the raw password w/ 0 confidence, if it meets the minimum/maximum length // requirements. confidence = -1; if ( password.size() >= ( minimum ? minimum : 1 ) and ( maximum == 0 or password.size() <= maximum )) confidence = 0; typename best_avg::const_iterator bi = best.best(); #if defined( DEBUG ) output( std::cout ) << " Selected " << ( bi != best.end() ? "corrected" : "unmodified" ) << " password \"" << ( bi != best.end() ? bi->first : password ) << "\" of length " << ( minimum ? minimum : 1) << "-" << maximum << " (vs. \"" << password << "\") w/ confidence " << (bi != best.end() ? bi->second.second : confidence ) << "%, from: " << std::endl << best; #endif if ( bi != best.end() ) { auto better = best.evaluation( *bi ); // --> (,) password = better.second; confidence = better.first; } return confidence; } static int decode( std::string &password, size_t minimum = PARITY, size_t maximum = 0 ) { return decode( password, std::vector(), minimum, maximum ); } // // decode(,,,) -- C interface to decode() // // Traditional C interface. The provided NUL-terminated password+parity is decoded // (parity removed), and the confidence % is returned. // // If any failure occurs, a -'ve value will be returned, and the supplied password // buffer will be used to contain an error description. // static int decode( char *password, // NUL terminated size_t siz, // available size size_t minimum = PARITY,//minimum resultant password length size_t maximum = 0 ) // maximum '' { std::string corrected( password ); int confidence; try { confidence = decode( corrected, minimum, maximum ); if ( corrected.size() + 1 > siz ) throw std::runtime_error( "password buffer has insufficient capacity" ); std::copy( corrected.begin(), corrected.end(), password ); password[corrected.size()] = 0; } catch ( std::exception &exc ) { confidence = -1; ezpwd::streambuf_to_buffer sbf( password, siz ); std::ostream( &sbf ) << "corrector<" << PARITY << "> failed: " << exc.what(); } return confidence; } // // rscodec -- A ?-bit RS(N-1,N-1-PARITY) Reed-Solomon codec // // Encodes and decodes R-S symbols over the lower 6 bits of the supplied data. Requires // that the last N (parity) symbols of the data are in the range [0,63]. The excess bits on // the data symbols are masked and restored during decoding. // static const ezpwd::RS rscodec; }; template < size_t PARITY, size_t N, typename SERIAL > const ezpwd::RS corrector::rscodec; } // namespace ezpwd template < size_t PARITY, size_t N, typename SERIAL > std::ostream &operator<<( std::ostream &lhs, const ezpwd::corrector &rhs ) { return rhs.output( lhs ); } #endif // _EZPWD_CORRECTOR