For a class assignment, I'm supposed to grab the contents of a file, compute the MD5 hash and store it in a separate file. Then I'm supposed to be able to check the integrity by comparing the MD5 hash. I'm relatively new to Python and JSON, so I thought I'd try to tackle those things with this assignment as opposed to going with something I already know.
Anyway, my program reads from a file, creates a hash, and stores that hash into a JSON file just fine. The problem comes in with my integrity checking. When I return the results of the computed hash of the file, it's different from what is recorded in the JSON file even though no changes have been made to the file. Below is an example of what is happening and I pasted my code as well. Thanks in advance for the help.
For example: These are the contents of my JSON file
Content: b'I made a file to test the md5\n'
digest: 1e8f4e6598be2ea2516102de54e7e48e
This is what is returned when I try to check the integrity of the exact same file (no changes made to it):
Content: b'I made a file to test the md5\n'
digest: ef8b7bf2986f59f8a51aae6b496e8954
import hashlib
import json
import os
import fnmatch
from codecs import open
#opens the file, reads/encodes it, and returns the contents (c)
def read_the_file(f_location):
with open(f_location, 'r', encoding="utf-8") as f:
c = f.read()
f.close()
return c
def scan_hash_json(directory_content):
for f in directory_content:
location = argument + "/" + f
content = read_the_file(location)
comp_hash = create_hash(content)
json_obj = {"Directory": argument, "Contents": {"filename": str(f),
"original string": str(content), "md5": str(comp_hash)}}
location = location.replace(argument, "")
location = location.replace(".txt", "")
write_to_json(location, json_obj)
#scans the file, creates the hash, and writes it to a json file
def read_the_json(f):
f_location = "recorded" + "/" + f
read_json = open(f_location, "r")
json_obj = json.load(read_json)
read_json.close()
return json_obj
#check integrity of the file
def check_integrity(d_content):
#d_content = directory content
for f in d_content:
json_obj = read_the_json(f)
text = f.replace(".json", ".txt")
result = find(text, os.getcwd())
content = read_the_file(result)
comp_hash = create_hash(content)
print("content: " + str(content))
print(result)
print(json_obj)
print()
print("Json Obj: " + json_obj['Contents']['md5'])
print("Hash: " + comp_hash)
#find the file being searched for
def find(pattern, path):
result = ""
for root, dirs, files in os.walk(path):
for name in files:
if fnmatch.fnmatch(name, pattern):
result = os.path.join(root, name)
return result
#create a hash for the file contents being passed in
def create_hash(content):
h = hashlib.md5()
key_before = "reallyBad".encode('utf-8')
key_after = "hashKeyAlgorithm".encode('utf-8')
content = content.encode('utf-8')
h.update(key_before)
h.update(content)
h.update(key_after)
return h.hexdigest()
#write the MD5 hash to the json file
def write_to_json(arg, json_obj):
arg = arg.replace(".txt", ".json")
storage_location = "recorded/" + str(arg)
write_file = open(storage_location, "w")
json.dump(json_obj, write_file, indent=4, sort_keys=True)
write_file.close()
#variable to hold status of user (whether they are done or not)
working = 1
#while the user is not done, continue running the program
while working == 1:
print("Please input a command. For help type 'help'. To exit type 'exit'")
#grab input from user, divide it into words, and grab the command/option/argument
request = input()
request = request.split()
if len(request) == 1:
command = request[0]
elif len(request) == 2:
command = request[0]
option = request[1]
elif len(request) == 3:
command = request[0]
option = request[1]
argument = request[2]
else:
print("I'm sorry that is not a valid request.\n")
continue
#if user inputs command 'icheck'...
if command == 'icheck':
if option == '-l':
if argument == "":
print("For option -l, please input a directory name.")
continue
try:
dirContents = os.listdir(argument)
scan_hash_json(dirContents)
except OSError:
print("Directory not found. Make sure the directory name is correct or try a different directory.")
elif option == '-f':
if argument == "":
print("For option -f, please input a file name.")
continue
try:
contents = read_the_file(argument)
computedHash = create_hash(contents)
jsonObj = {"Directory": "Default", "Contents": {
"filename": str(argument), "original string": str(contents), "md5": str(computedHash)}}
write_to_json(argument, jsonObj)
except OSError:
print("File not found. Make sure the file name is correct or try a different file.")
elif option == '-t':
try:
dirContents = os.listdir("recorded")
check_integrity(dirContents)
except OSError:
print("File not found. Make sure the file name is correct or try a different file.")
elif option == '-u':
print("gonna update stuff")
elif option == '-r':
print("gonna remove stuff")
#if user inputs command 'help'...
elif command == 'help':
#display help screen
print("Integrity Checker has a few options you can use. Each option "
"must begin with the command 'icheck'. The options are as follows:")
print("\t-l <directory>: Reads the list of files in the directory and computes the md5 for each one")
print("\t-f <file>: Reads a specific file and computes its md5")
print("\t-t: Tests integrity of the files with recorded md5s")
print("\t-u <file>: Update a file that you have modified after its integrity has been checked")
print("\t-r <file>: Removes a file from the recorded md5s\n")
#if user inputs command 'exit'
elif command == 'exit':
#set working to zero and exit program loop
working = 0
#if anything other than 'icheck', 'help', and 'exit' are input...
else:
#display error message and start over
print("I'm sorry that is not a valid command.\n")