332 lines
9.7 KiB
Python
332 lines
9.7 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# ~ Thanks:
|
|
# ~ https://github.com/kolypto/py-password-strength/blob/master/password_strength/stats.py
|
|
|
|
|
|
import re
|
|
import unicodedata
|
|
from collections import Counter
|
|
from functools import wraps
|
|
from math import log
|
|
|
|
|
|
def cached_property(f):
|
|
""" Property that will replace itself with a calculated value """
|
|
name = '__' + f.__name__
|
|
|
|
@wraps(f)
|
|
def wrapper(self):
|
|
if not hasattr(self, name):
|
|
setattr(self, name, f(self))
|
|
return getattr(self, name)
|
|
return property(wrapper)
|
|
|
|
|
|
class PasswordStats(object):
|
|
""" PasswordStats allows to calculate statistics on a password.
|
|
|
|
It considers a password as a unicode string, and all statistics are unicode-based.
|
|
"""
|
|
|
|
def __init__(self, password):
|
|
self.password = password
|
|
|
|
#region Statistics
|
|
|
|
@cached_property
|
|
def alphabet(self):
|
|
""" Get alphabet: set of used characters
|
|
|
|
:rtype: set
|
|
"""
|
|
return set(self.password)
|
|
|
|
@cached_property
|
|
def alphabet_cardinality(self):
|
|
""" Get alphabet cardinality: alphabet length
|
|
|
|
:rtype: int
|
|
"""
|
|
return len(self.alphabet)
|
|
|
|
@cached_property
|
|
def char_categories_detailed(self):
|
|
""" Character count per unicode category, detailed format.
|
|
|
|
See: http://www.unicode.org/reports/tr44/#GC_Values_Table
|
|
|
|
:returns: Counter( unicode-character-category: count )
|
|
:rtype: collections.Counter
|
|
"""
|
|
return Counter(map(unicodedata.category, self.password))
|
|
|
|
@cached_property
|
|
def char_categories(self):
|
|
""" Character count per top-level category
|
|
|
|
The following top-level categories are defined:
|
|
|
|
- L: letter
|
|
- M: Mark
|
|
- N: Number
|
|
- P: Punctuation
|
|
- S: Symbol
|
|
- Z: Separator
|
|
- C: Other
|
|
|
|
:return: Counter(unicode-character-category: count }
|
|
:rtype: collections.Counter
|
|
"""
|
|
c = Counter()
|
|
for cat, n in self.char_categories_detailed.items():
|
|
c[cat[0]] += n
|
|
return c
|
|
|
|
#endregion
|
|
|
|
#region Counters
|
|
|
|
@cached_property
|
|
def length(self):
|
|
""" Get password length
|
|
|
|
:rtype: int
|
|
"""
|
|
return len(self.password)
|
|
|
|
@cached_property
|
|
def letters(self):
|
|
""" Count all letters
|
|
|
|
:rtype: int
|
|
"""
|
|
return self.char_categories['L']
|
|
|
|
@cached_property
|
|
def letters_uppercase(self):
|
|
""" Count uppercase letters
|
|
|
|
:rtype: int
|
|
"""
|
|
return self.char_categories_detailed['Lu']
|
|
|
|
@cached_property
|
|
def letters_lowercase(self):
|
|
""" Count lowercase letters
|
|
|
|
:rtype: int
|
|
"""
|
|
return self.char_categories_detailed['Ll']
|
|
|
|
@cached_property
|
|
def numbers(self):
|
|
""" Count numbers
|
|
|
|
:rtype: int
|
|
"""
|
|
return self.char_categories['N']
|
|
|
|
def count(self, *categories):
|
|
""" Count characters of the specified classes only
|
|
|
|
:param categories: Character categories to count
|
|
:type categories: Iterable
|
|
:rtype: int
|
|
"""
|
|
return sum([int(cat_n[0] in categories) * cat_n[1] for cat_n in list(self.char_categories.items())])
|
|
|
|
def count_except(self, *categories):
|
|
""" Count characters of all classes except the specified ones
|
|
|
|
:param categories: Character categories to exclude from count
|
|
:type categories: Iterable
|
|
:rtype: int
|
|
"""
|
|
return sum([int(cat_n1[0] not in categories) * cat_n1[1] for cat_n1 in list(self.char_categories.items())])
|
|
|
|
@cached_property
|
|
def special_characters(self):
|
|
""" Count special characters
|
|
|
|
Special characters is everything that's not a letter or a number
|
|
|
|
:rtype: int
|
|
"""
|
|
return self.count_except('L', 'N')
|
|
|
|
#region Security
|
|
|
|
@cached_property
|
|
def combinations(self):
|
|
""" The number of possible combinations with the current alphabet
|
|
|
|
:rtype: long
|
|
"""
|
|
return self.alphabet_cardinality ** self.length
|
|
|
|
@cached_property
|
|
def entropy_bits(self):
|
|
""" Get information entropy bits: log2 of the number of possible passwords
|
|
|
|
https://en.wikipedia.org/wiki/Password_strength
|
|
|
|
:rtype: float
|
|
"""
|
|
return self.length * log(self.alphabet_cardinality, 2)
|
|
|
|
@cached_property
|
|
def entropy_density(self):
|
|
""" Get information entropy density factor, ranged {0 .. 1}.
|
|
|
|
This is ratio of entropy_bits() to max bits a password of this length could have.
|
|
E.g. if all characters are unique -- then it's 1.0.
|
|
If half of the characters are reused once -- then it's 0.5.
|
|
|
|
:rtype: float
|
|
"""
|
|
# Simplifying:
|
|
# entropy_bits / (length * log(length, 2)) =
|
|
# = log(alphabet_cardinality, 2) / log(length, 2) =
|
|
# = log(alphabet_cardinality, length)
|
|
return log(self.alphabet_cardinality, self.length)
|
|
|
|
def strength(self, weak_bits=30):
|
|
""" Get password strength as a number normalized to range {0 .. 1}.
|
|
|
|
Normalization is done in the following fashion:
|
|
|
|
1. If entropy_bits <= weak_bits -- linear in range{0.0 .. 0.33} (weak)
|
|
2. If entropy_bits <= weak_bits*2 -- almost linear in range{0.33 .. 0.66} (medium)
|
|
3. If entropy_bits > weak_bits*3 -- asymptotic towards 1.0 (strong)
|
|
|
|
:param weak_bits: Minimum entropy bits a medium password should have.
|
|
:type weak_bits: int
|
|
:return: Normalized password strength:
|
|
* <0.33 is WEAK
|
|
* <0.66 is MEDIUM
|
|
* >0.66 is STRONG
|
|
:rtype: float
|
|
"""
|
|
WEAK_MAX = 0.333333333
|
|
|
|
if self.entropy_bits <= weak_bits:
|
|
return WEAK_MAX * self.entropy_bits / weak_bits
|
|
|
|
HARD_BITS = weak_bits*3
|
|
HARD_VAL = 0.950
|
|
|
|
# Here, we want a function that:
|
|
# 1. f(x)=0.333 at x=weak_bits
|
|
# 2. f(x)=0.950 at x=weak_bits*3 (great estimation for a perfect password)
|
|
# 3. f(x) is almost linear in range{weak_bits .. weak_bits*2}: doubling the bits should double the strength
|
|
# 4. f(x) has an asymptote of 1.0 (normalization)
|
|
|
|
# First, the function:
|
|
# f(x) = 1 - (1-WEAK_MAX)*2^( -k*x)
|
|
|
|
# Now, the equation:
|
|
# f(HARD_BITS) = HARD_VAL
|
|
# 1 - (1-WEAK_MAX)*2^( -k*HARD_BITS) = HARD_VAL
|
|
# 2^( -k*HARD_BITS) = (1 - HARD_VAL) / (1-WEAK_MAX)
|
|
# k = -log2((1 - HARD_VAL) / (1-WEAK_MAX)) / HARD_BITS
|
|
k = -log((1 - HARD_VAL) / (1-WEAK_MAX), 2) / HARD_BITS
|
|
f = lambda x: 1 - (1-WEAK_MAX)*pow(2, -k*x)
|
|
|
|
return f(self.entropy_bits - weak_bits) # with offset
|
|
|
|
#endregion
|
|
|
|
#region Detectors
|
|
|
|
_repeated_patterns_rex = re.compile(r'((.+?)\2+)', re.UNICODE | re.DOTALL | re.IGNORECASE)
|
|
|
|
@cached_property
|
|
def repeated_patterns_length(self):
|
|
""" Detect and return the length of repeated patterns.
|
|
|
|
You will probably be comparing it with the length of the password itself and ban if it's longer than 10%
|
|
|
|
:rtype: int
|
|
"""
|
|
length = 0
|
|
for substring, pattern in self._repeated_patterns_rex.findall(self.password):
|
|
length += len(substring)
|
|
return length
|
|
|
|
_sequences = (
|
|
'abcdefghijklmnopqrstuvwxyz' # Alphabet
|
|
'qwertyuiopasdfghjklzxcvbnm' # Keyboard
|
|
'~!@#$%^&*()_+-=' # Keyboard special, top row
|
|
'01234567890' # Numbers
|
|
)
|
|
_sequences = _sequences + _sequences[::-1] # reversed
|
|
|
|
@cached_property
|
|
def sequences_length(self):
|
|
""" Detect and return the length of used sequences:
|
|
|
|
- Alphabet letters: abcd...
|
|
- Keyboard letters: qwerty, etc
|
|
- Keyboard special characters in the top row: ~!@#$%^&*()_+
|
|
- Numbers: 0123456
|
|
|
|
:return: Total length of character sequences that are subsets of the common sequences
|
|
:rtype: int
|
|
"""
|
|
# FIXME: Optimize this. I'm sure there is a better way!...
|
|
sequences_length = 0
|
|
|
|
# Iterate through the string, with manual variable (to allow skips)
|
|
i = 0
|
|
while i < len(self.password):
|
|
# Slice (since we use it often)
|
|
password = self.password[i:]
|
|
|
|
# Iterate over sequences to find longest common prefix
|
|
j = -1
|
|
common_length = 1
|
|
while True:
|
|
# Detect the first match with the current character
|
|
# A character may appear multiple times
|
|
j = self._sequences.find(password[0], j+1)
|
|
if j == -1:
|
|
break
|
|
|
|
# Find the longest common prefix
|
|
common_here = ''
|
|
for a, b in zip(password, self._sequences[j:]):
|
|
if a != b: break
|
|
else: common_here += a
|
|
|
|
# It it's longer than previous discoveries -- store it
|
|
common_length = max(common_length, len(common_here))
|
|
|
|
# Repeated sequence?
|
|
if common_length > 2:
|
|
sequences_length += common_length
|
|
|
|
# Next: skip to the end of the detected sequence
|
|
i += common_length
|
|
|
|
return sequences_length
|
|
|
|
@cached_property
|
|
def weakness_factor(self):
|
|
""" Get weakness factor as a float in range {0 .. 1}
|
|
|
|
This detects the portion of the string that contains:
|
|
* repeated patterns
|
|
* sequences
|
|
|
|
E.g. a value of 1.0 means the whole string is weak, and 0.5 means half of the string is weak.
|
|
|
|
Typical usage:
|
|
|
|
password_strength = (1 - weakness_factor) * strength
|
|
|
|
:return: Weakness factor
|
|
:rtype: float
|
|
"""
|
|
return min(1.0, (self.repeated_patterns_length + self.sequences_length) / self.length)
|