How do I create statistics for de-hashed passwords using Python?

47 Views Asked by At

MD5 Password De-Hasher

I have been working on this MD5 (cracker) for a day or two now, and cannot seem how to implement the next part. Right now the code opens a file with usernames and hashed passwords, as well as rockyou.txt to check the hashed passwords against. It then dehashes all the passwords that it can based on the rockyou text file, and outputs a csv file with usernames and passwords that it could find. Now I also want the program to output a statistics text file, where it lists:

  1. The number of passwords cracked
  2. The number of users with the same (repeated) cracked passwords
  3. The top 10 most repeated cracked passwords in plain text and the number of times each is repeated (in descending order)
  4. The number of passwords NOT cracked
  5. The number of users with the same (repeated) non-cracked passwords
#Import necessary libraries
import hashlib
import openpyxl
import csv
import chardet

# Configuration settings for file locations
passwordFile = "Random Projects/Password Cracker/passwords.xlsx"
wordListFile = "Random Projects/Password Cracker/wordlist.csv"
outputFile = "Random Projects/Password Cracker/decodedPasswords.csv"
statsFile = "Random Projects/Password Cracker/passwordStats.txt"

#Open word list csv
def loadWordList(wordListFile):
    print("Loading wordlist...")
    #Start an empty dictionary
    wordlist = {}
    
    #Use chardet to detect and use correct encoding for file
    with open(wordListFile, 'rb') as csvfile:
        result = chardet.detect(csvfile.read())
        encoding = result['encoding']
        #Print the corret encoding used
        print("Detected encoding:", encoding)

    #Open the word file for reading with correct encoding
    with open(wordListFile, 'r', encoding=encoding) as csvfile:
        reader = csv.reader(csvfile)
        #Set the correct format for reading the word file
        for row in reader:
            word = row[1]  # Assuming words are in the 2nd column
            
            # Hash the word and store it as the key, with the actual word as the value
            wordlist[hashlib.md5(word.encode()).hexdigest()] = word
    
    print("Wordlist loaded with", len(wordlist), "hashed words.")
    return wordlist

def decodePasswords(passwordFile, wordListFile, outputFile):
    try:
        #Set wordlist as the loaded list
        wordlist = loadWordList(wordListFile)

        #Open the excel sheet and set as active worksheet
        workbook = openpyxl.load_workbook(passwordFile)
        worksheet = workbook.active
        
        #Set the max rows to calculate the percentage completed and progress
        max_rows = worksheet.max_row
        print("Excel file loaded.")   #Print the loaded message

        #Start an empty list for decoded passwords
        decoded_passwords = []
        record_number = 0   #Start counting records from 0, regardless of position


        #For loop to count records from correct position
        for row in worksheet.iter_rows(min_row=2, values_only=True):
            record_number += 1
            username = row[1]  # Assuming username is in the 2nd column
            md5_hash = row[2]  # Assuming hashed passwords are in the 3rd column

            #Convert wordlist into md5 hash form for lookup
            unhashedPassword = wordlist.get(md5_hash)
            
            #Append the decoded password to the final output format
            if unhashedPassword:
                decoded_passwords.append([username, unhashedPassword])
                
                #Print progress statement for each found hash
                print(f"Password found for user {username} (Record {record_number}/{max_rows}, {record_number / max_rows * 100:.2f}% completed)")

        #After all possible hashes found, print number of passwords decoded
        print("Passwords decoded:", len(decoded_passwords))

        #Create the output file and write the decoded passwords to it
        with open(outputFile, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(decoded_passwords)
        
        print("Output file created:", outputFile)   #Print output creation statement
        
    #Set the Exception as an error and print the error message
    except Exception as e:
        print("Error decoding passwords:", e)

#Run the main function
if __name__ == "__main__":
    decodePasswords(passwordFile, wordListFile, outputFile)

Here is what I have tried so far, but the calculations for the statistics seem off, and I have not implemented the top 10 either.

import hashlib
import openpyxl
import csv
import chardet

# Configuration settings for file locations
passwordFile = "Random Projects/Password Cracker/passwords.xlsx"
wordListFile = "Random Projects/Password Cracker/wordlist.csv"
decodedOutputFile = "Random Projects/Password Cracker/decodedPasswords.csv"
statsOutputFile = "Random Projects/Password Cracker/passwordStats.txt"

# Open word list csv
def loadWordList(wordListFile):
    print("Loading wordlist...")
    wordlist = {}
    
    # Use chardet to detect and use the correct encoding for the file
    with open(wordListFile, 'rb') as csvfile:
        result = chardet.detect(csvfile.read())
        encoding = result['encoding']
        # Print the correct encoding used
        print("Detected encoding:", encoding)

    # Open the word file for reading with the correct encoding
    with open(wordListFile, 'r', encoding=encoding) as csvfile:
        reader = csv.reader(csvfile)
        # Set the correct format for reading the word file
        for row in reader:
            word = row[1]  # Assuming words are in the 2nd column
            
            # Hash the word and store it as the key, with the actual word as the value
            wordlist[hashlib.md5(word.encode()).hexdigest()] = word
    
    print("Wordlist loaded with", len(wordlist), "hashed words.")
    return wordlist

def decodePasswords(passwordFile, wordListFile, decodedOutputFile, statsOutputFile):
    try:
        # Set wordlist as the loaded list
        wordlist = loadWordList(wordListFile)

        # Open the excel sheet and set it as the active worksheet
        workbook = openpyxl.load_workbook(passwordFile)
        worksheet = workbook.active
        
        # Set the max rows to calculate the percentage completed and progress
        max_rows = worksheet.max_row
        print("Excel file loaded.")   # Print the loaded message

        # Start an empty list for decoded passwords
        decoded_passwords = []
        record_number = 0   # Start counting records from 0, regardless of position

        users_passwords = {}
        decoded_users = set()
        decoded_password_count = 0

        # Change the following line to use dictionaries to track the occurrence of passwords
        cracked_password_occurrences = {}
        uncracked_password_occurrences = {}

        # For loop to count records from the correct position
        for row in worksheet.iter_rows(min_row=2, values_only=True):
            record_number += 1
            username = row[1]  # Assuming username is in the 2nd column
            md5_hash = row[2]  # Assuming hashed passwords are in the 3rd column

            # Convert wordlist into md5 hash form for lookup
            unhashedPassword = wordlist.get(md5_hash)

            if unhashedPassword:
                decoded_passwords.append([username, unhashedPassword])
                decoded_users.add(username)
                decoded_password_count += 1

                # Track password occurrences for cracked passwords
                if unhashedPassword in cracked_password_occurrences:
                    cracked_password_occurrences[unhashedPassword].append(username)
                else:
                    cracked_password_occurrences[unhashedPassword] = [username]

                # Print progress statement for each found hash
                print(f"Password found for user {username} (Record {record_number}/{max_rows}, {record_number / max_rows * 100:.2f}% completed)")
            else:
                # Track password occurrences for uncracked passwords
                if md5_hash in uncracked_password_occurrences:
                    uncracked_password_occurrences[md5_hash].append(username)
                else:
                    uncracked_password_occurrences[md5_hash] = [username]

        # After all possible hashes found, print the number of passwords decoded
        print("Passwords decoded:", decoded_password_count)

        # Calculate the number of repeated cracked passwords and users with repeated cracked passwords
        repeated_cracked_password_count = sum(1 for usernames in cracked_password_occurrences.values() if len(usernames) > 1)
        users_with_repeated_cracked_passwords = sum(1 for usernames in cracked_password_occurrences.values() if len(usernames) > 1)

        # Calculate the number of repeated uncracked passwords
        repeated_uncracked_password_count = sum(1 for usernames in uncracked_password_occurrences.values() if len(usernames) > 1)

        # Create the output file and write the decoded passwords to it
        with open(decodedOutputFile, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(decoded_passwords)
        
        print("Output file created:", decodedOutputFile)   # Print output creation statement

        # Generate statistics
        stats = []

        stats.append(f"Passwords Successfully Cracked: {decoded_password_count}")
        stats.append(f"Users with Repeated Cracked Passwords: {users_with_repeated_cracked_passwords}")
        stats.append(f"Repeated Cracked Passwords: {repeated_cracked_password_count}")
        stats.append(f"Passwords NOT Cracked: {max_rows - decoded_password_count}")
        stats.append(f"Repeated Uncracked Passwords: {repeated_uncracked_password_count}")

        # Write the statistics to the stats file
        with open(statsOutputFile, 'w', encoding='utf-8') as statsfile:
            statsfile.write("\n".join(stats))

        print(f"Statistics file created: {statsOutputFile}")

    # Set the Exception as an error and print the error message
    except Exception as e:
        print("Error decoding passwords:", e)

# Run the main function
if __name__ == "__main__":
    decodePasswords(passwordFile, wordListFile, decodedOutputFile, statsOutputFile)

1

There are 1 best solutions below

0
Cotten32 On

Wanted to come back to this post and post the finished code that I found worked best for my needs. I ended up going with @Barmar's suggestion of using pandas data frames to aid in generation of statistics about the cracked and un-cracked passwords.

import hashlib
import openpyxl
import csv
import chardet
import pandas as pd

# Configuration settings for file locations
passwordFile = "Random Projects/Password Cracker/passwords.xlsx"
wordListFile = "Random Projects/Password Cracker/wordlist.csv"
outputFile = "Random Projects/Password Cracker/decodedPasswords.csv"
statsFile = "Random Projects/Password Cracker/passwordStats.txt"

# Open word list csv
def loadWordList(wordListFile):
    """
        This function loads the wordlist file (copy of rockyou.txt) and opens it with the correct encoding.
        1.) It then goes through each word and adds it to a list so it can be searched later.
        2.) From there, it then hashes each word and stores both the hash, and the plaintext.
        3.) Prints completion statement and returns the wordlist
    """
    
    print("Loading wordlist...\nDetecting encoding...")
    
    # Start an empty dictionary
    wordlist = {}
    
    # Use chardet to detect and the correct encoding for the file
    with open(wordListFile, 'rb') as csvfile:
        result = chardet.detect(csvfile.read())
        encoding = result['encoding']
        # Print the correct encoding used
        print("Detected encoding:", encoding)

    # Open the word file for reading with the correct encoding
    with open(wordListFile, 'r', encoding=encoding) as csvfile:
        reader = csv.reader(csvfile)
        # Set the correct format for reading the word file
        for row in reader:
            word = row[1]  # Assuming words are in the 2nd column
            
            # Hash the word and store it as the key, with the actual word as the value
            wordlist[hashlib.md5(word.encode()).hexdigest()] = word
    
    print("Wordlist loaded with", len(wordlist), "hashed words.")
    return wordlist

def decodePasswords(passwordFile, wordListFile, outputFile, statsFile):
    """
        This function loads the password file, iterates through each password, trying to find a match with the hashlist of words from the wordlist (rockyou.txt).
        It counts the number of times the passwords show up in the excel file, making it easier to calculate the statistics of the unhashing results later on.
        After all possible hashes are found, the function outputs the results into an csv file with the username first, and then the unhashed password in plaintext.
        The creation of a dataframe using the pandas library allows easy manipulation, analysis, and output to the csv file using "to_csv"
    """
        
    try:
        # Set wordlist as the loaded list
        wordlist = loadWordList(wordListFile)

        # Open the excel sheet and set as the active worksheet
        workbook = openpyxl.load_workbook(passwordFile)
        worksheet = workbook.active
        
        # Set the max rows to calculate the percentage completed and progress
        maxRows = worksheet.max_row
        print("Excel file loaded.")  # Print the loaded message

        # Start empty lists for decoded and undecoded passwords
        decodedPasswords = []
        undecodedPasswords = []
        recordNumber = 0  # Start counting records from 0, regardless of position
        
        # Create a dictionary to count how many times passwords have been reused
        passwordReuseCount = {}

        # For loop to count records from the correct position
        for row in worksheet.iter_rows(min_row=2, values_only=True):
            recordNumber += 1
            username = row[1]  # Assuming username is in the 2nd column
            md5Hash = row[2]  # Assuming hashed passwords are in the 3rd column

            # Convert wordlist into MD5 hash form for lookup
            unhashedPassword = wordlist.get(md5Hash)
            
            # Append the decoded password to the final output format
            if unhashedPassword:
                decodedPasswords.append([username, unhashedPassword])
            else:
                undecodedPasswords.append(username)  # Collect undecoded usernames
                
                # Count password reuse
                if md5Hash in passwordReuseCount:
                    passwordReuseCount[md5Hash] += 1
                else:
                    passwordReuseCount[md5Hash] = 1

                # Print progress statement for each found hash
                print(f"Password found for user {username} (Record {recordNumber}/{maxRows}, {round(recordNumber / maxRows * 100, 2)}% completed)")
                
        # Create a DataFrame from the decoded passwords
        decodedpasswordsDF = pd.DataFrame(decodedPasswords, columns=['Username', 'DecodedPassword'])

        # After all possible hashes found, print the number of passwords decoded and not cracked
        print(f"\nPasswords decoded: {len(decodedPasswords)}")
        print(f"Passwords not cracked: {len(undecodedPasswords)}")
        
        # Save the decoded passwords to a CSV file
        decodedpasswordsDF.to_csv(outputFile, index=False, encoding='utf-8')

        print("Output file created:", outputFile)  # Print output creation statement
        
        # Call the calcStats function
        calcStats(decodedpasswordsDF, passwordReuseCount, statsFile, maxRows)

    #Error exception
    except Exception as e:
        print("Error decoding passwords:", e)


def calcStats(decodedpasswordsDF, passwordReuseCount, statsFile, maxRows):
    """
        This function calculates statistics for the decoded passwords, such as the most common passwords, 
        how many times each one was usesd, the amount of passwords that were unable to be cracked, and 
        the count of users that have used the same password
    """
        
    # Calculate additional statistics
    totalUsers = maxRows - 1  # Subtract 1 to exclude the header
    
    try:
        # Calculate additional statistics
        passwordsCracked = len(decodedpasswordsDF)
        passwordsNotCracked = totalUsers - passwordsCracked
        
        # Calculate the count of all uncracked passwords that were used more than once
        uncrackedPasswordsReuseCount = sum(1 for count in passwordReuseCount.values() if count > 1)

        # Save the additional statistics to the text file
        with open(statsFile, 'w', newline='', encoding='utf-8') as statsfile:
            
            # Count the number of users with the same cracked passwords
            repeatedPasswords = decodedpasswordsDF['DecodedPassword'].value_counts()
            
            #Begin output and write header
            statsfile.write("               STATISTICS OUTPUT\n")
            statsfile.write("--------------------------------------------------\n")
            statsfile.write(f"Total users in the file: {totalUsers}\n")
            statsfile.write(f"Number of passwords cracked: {passwordsCracked}\n")
            statsfile.write(f"Number of passwords not cracked: {passwordsNotCracked}\n\n")
            
            
            #Start the cracked password statistics
            statsfile.write("               CRACKED PASSWORD STATS\n")
            statsfile.write("--------------------------------------------------\n")
            
            #Count the total reused cracked password count
            totalCount = 0
            topTotalCrackedPasswords = repeatedPasswords.head()
            for password, count in topTotalCrackedPasswords.items():
                totalCount += count
            statsfile.write(f"Count of users with the same cracked passwords: {totalCount}\n")
            
            # Add the count of users with the same cracked passwords
            statsfile.write("Total count of cracked passwords used more than once: " + str(repeatedPasswords.count()) + "\n")

            
            # Get the top 10 most frequently used passwords
            top10Passwords = repeatedPasswords.head(10)
            statsfile.write("\nTop 10 most frequently used cracked passwords:\n")
            for password, count in top10Passwords.items():
                statsfile.write(f"{password}: {count} times\n")
            
                        
            #Start the uncracked password statistics
            statsfile.write("\n             UNCRACKED PASSWORD STATS\n")
            statsfile.write("--------------------------------------------------\n")
            # Save the count of uncracked passwords that were used more than once
            statsfile.write(f"Count of users with the same uncracked passwords: {uncrackedPasswordsReuseCount}\n")
            
            # Get the most used uncracked passwords (still hashed)
            topUncrackedPasswords = [(k, v) for k, v in passwordReuseCount.items() if v > 1]
            topUncrackedPasswords.sort(key=lambda x: x[1], reverse=True)
            
            #Calculate the total count of uncracked passwords that have been repeated
            totalCount = 0
            for password, count in topUncrackedPasswords:
                totalCount += count
            statsfile.write(f"Total count of uncracked passwords used more than once: {totalCount}\n")
                        
            #Output the top passwords unable to be cracked and frequency
            statsfile.write("\nTop 10 most frequently used uncracked passwords (still hashed):\n")
            for password, count in topUncrackedPasswords[:10]:
                statsfile.write(f"{password}: {count} times\n")
                
            #Sucess message to tell user where file was saved
            print("Statistics saved to:", statsFile)
    
    #Error exception           
    except Exception as e:
        print("Error calculating statistics:", e)

# Run the main function
if __name__ == "__main__":
    decodePasswords(passwordFile, wordListFile, outputFile, statsFile)