#!/usr/bin/env python3 # ~ Thanks: # ~ https://github.com/kolypto/py-password-strength/blob/master/password_strength/stats.py import re import unicodedata from collections import Counter from functools import wraps from math import log def cached_property(f): """ Property that will replace itself with a calculated value """ name = '__' + f.__name__ @wraps(f) def wrapper(self): if not hasattr(self, name): setattr(self, name, f(self)) return getattr(self, name) return property(wrapper) class PasswordStats(object): """ PasswordStats allows to calculate statistics on a password. It considers a password as a unicode string, and all statistics are unicode-based. """ def __init__(self, password): self.password = password #region Statistics @cached_property def alphabet(self): """ Get alphabet: set of used characters :rtype: set """ return set(self.password) @cached_property def alphabet_cardinality(self): """ Get alphabet cardinality: alphabet length :rtype: int """ return len(self.alphabet) @cached_property def char_categories_detailed(self): """ Character count per unicode category, detailed format. See: http://www.unicode.org/reports/tr44/#GC_Values_Table :returns: Counter( unicode-character-category: count ) :rtype: collections.Counter """ return Counter(map(unicodedata.category, self.password)) @cached_property def char_categories(self): """ Character count per top-level category The following top-level categories are defined: - L: letter - M: Mark - N: Number - P: Punctuation - S: Symbol - Z: Separator - C: Other :return: Counter(unicode-character-category: count } :rtype: collections.Counter """ c = Counter() for cat, n in self.char_categories_detailed.items(): c[cat[0]] += n return c #endregion #region Counters @cached_property def length(self): """ Get password length :rtype: int """ return len(self.password) @cached_property def letters(self): """ Count all letters :rtype: int """ return self.char_categories['L'] @cached_property def letters_uppercase(self): """ Count uppercase letters :rtype: int """ return self.char_categories_detailed['Lu'] @cached_property def letters_lowercase(self): """ Count lowercase letters :rtype: int """ return self.char_categories_detailed['Ll'] @cached_property def numbers(self): """ Count numbers :rtype: int """ return self.char_categories['N'] def count(self, *categories): """ Count characters of the specified classes only :param categories: Character categories to count :type categories: Iterable :rtype: int """ return sum([int(cat_n[0] in categories) * cat_n[1] for cat_n in list(self.char_categories.items())]) def count_except(self, *categories): """ Count characters of all classes except the specified ones :param categories: Character categories to exclude from count :type categories: Iterable :rtype: int """ return sum([int(cat_n1[0] not in categories) * cat_n1[1] for cat_n1 in list(self.char_categories.items())]) @cached_property def special_characters(self): """ Count special characters Special characters is everything that's not a letter or a number :rtype: int """ return self.count_except('L', 'N') #region Security @cached_property def combinations(self): """ The number of possible combinations with the current alphabet :rtype: long """ return self.alphabet_cardinality ** self.length @cached_property def entropy_bits(self): """ Get information entropy bits: log2 of the number of possible passwords https://en.wikipedia.org/wiki/Password_strength :rtype: float """ return self.length * log(self.alphabet_cardinality, 2) @cached_property def entropy_density(self): """ Get information entropy density factor, ranged {0 .. 1}. This is ratio of entropy_bits() to max bits a password of this length could have. E.g. if all characters are unique -- then it's 1.0. If half of the characters are reused once -- then it's 0.5. :rtype: float """ # Simplifying: # entropy_bits / (length * log(length, 2)) = # = log(alphabet_cardinality, 2) / log(length, 2) = # = log(alphabet_cardinality, length) return log(self.alphabet_cardinality, self.length) def strength(self, weak_bits=30): """ Get password strength as a number normalized to range {0 .. 1}. Normalization is done in the following fashion: 1. If entropy_bits <= weak_bits -- linear in range{0.0 .. 0.33} (weak) 2. If entropy_bits <= weak_bits*2 -- almost linear in range{0.33 .. 0.66} (medium) 3. If entropy_bits > weak_bits*3 -- asymptotic towards 1.0 (strong) :param weak_bits: Minimum entropy bits a medium password should have. :type weak_bits: int :return: Normalized password strength: * <0.33 is WEAK * <0.66 is MEDIUM * >0.66 is STRONG :rtype: float """ WEAK_MAX = 0.333333333 if self.entropy_bits <= weak_bits: return WEAK_MAX * self.entropy_bits / weak_bits HARD_BITS = weak_bits*3 HARD_VAL = 0.950 # Here, we want a function that: # 1. f(x)=0.333 at x=weak_bits # 2. f(x)=0.950 at x=weak_bits*3 (great estimation for a perfect password) # 3. f(x) is almost linear in range{weak_bits .. weak_bits*2}: doubling the bits should double the strength # 4. f(x) has an asymptote of 1.0 (normalization) # First, the function: # f(x) = 1 - (1-WEAK_MAX)*2^( -k*x) # Now, the equation: # f(HARD_BITS) = HARD_VAL # 1 - (1-WEAK_MAX)*2^( -k*HARD_BITS) = HARD_VAL # 2^( -k*HARD_BITS) = (1 - HARD_VAL) / (1-WEAK_MAX) # k = -log2((1 - HARD_VAL) / (1-WEAK_MAX)) / HARD_BITS k = -log((1 - HARD_VAL) / (1-WEAK_MAX), 2) / HARD_BITS f = lambda x: 1 - (1-WEAK_MAX)*pow(2, -k*x) return f(self.entropy_bits - weak_bits) # with offset #endregion #region Detectors _repeated_patterns_rex = re.compile(r'((.+?)\2+)', re.UNICODE | re.DOTALL | re.IGNORECASE) @cached_property def repeated_patterns_length(self): """ Detect and return the length of repeated patterns. You will probably be comparing it with the length of the password itself and ban if it's longer than 10% :rtype: int """ length = 0 for substring, pattern in self._repeated_patterns_rex.findall(self.password): length += len(substring) return length _sequences = ( 'abcdefghijklmnopqrstuvwxyz' # Alphabet 'qwertyuiopasdfghjklzxcvbnm' # Keyboard '~!@#$%^&*()_+-=' # Keyboard special, top row '01234567890' # Numbers ) _sequences = _sequences + _sequences[::-1] # reversed @cached_property def sequences_length(self): """ Detect and return the length of used sequences: - Alphabet letters: abcd... - Keyboard letters: qwerty, etc - Keyboard special characters in the top row: ~!@#$%^&*()_+ - Numbers: 0123456 :return: Total length of character sequences that are subsets of the common sequences :rtype: int """ # FIXME: Optimize this. I'm sure there is a better way!... sequences_length = 0 # Iterate through the string, with manual variable (to allow skips) i = 0 while i < len(self.password): # Slice (since we use it often) password = self.password[i:] # Iterate over sequences to find longest common prefix j = -1 common_length = 1 while True: # Detect the first match with the current character # A character may appear multiple times j = self._sequences.find(password[0], j+1) if j == -1: break # Find the longest common prefix common_here = '' for a, b in zip(password, self._sequences[j:]): if a != b: break else: common_here += a # It it's longer than previous discoveries -- store it common_length = max(common_length, len(common_here)) # Repeated sequence? if common_length > 2: sequences_length += common_length # Next: skip to the end of the detected sequence i += common_length return sequences_length @cached_property def weakness_factor(self): """ Get weakness factor as a float in range {0 .. 1} This detects the portion of the string that contains: * repeated patterns * sequences E.g. a value of 1.0 means the whole string is weak, and 0.5 means half of the string is weak. Typical usage: password_strength = (1 - weakness_factor) * strength :return: Weakness factor :rtype: float """ return min(1.0, (self.repeated_patterns_length + self.sequences_length) / self.length)