import os
import os.path
import email
import email.Parser
import sys
import time
last_run_marker_file = "spam/last_training_run"
bogofilter_path = "/usr/local/bin/bogofilter"
inbox_maildir = "Maildir"
spam_maildir = "Maildir/.Spam"
processed = 0
count_spam = 0
count_not_spam = 0
registered_as_spam = 0
registered_as_not_spam = 0
log_file = None
def log_message(text, msg_headers):
s = "%s Subject \"%s\" From \"%s\"\n" % (text, msg_headers.get("Subject"), msg_headers.get("From"))
log_file.write(s)
def count_as_spam(mail_file, msg_headers):
global count_spam
count_spam += 1
def count_as_not_spam(mail_file, msg_headers):
global count_not_spam
count_not_spam += 1
def register_as_not_spam(mail_file, msg_headers):
rc = os.spawnl(os.P_WAIT, bogofilter_path, "bogofilter",
"-n", "-I", mail_file)
if (rc != 0):
print "Error processing file %s - %d" % (mail_file, rc)
sys.exit(1)
global registered_as_not_spam
registered_as_not_spam += 1
log_message("Message In Inbox. bogofilter thought spam", msg_headers)
def register_as_spam(mail_file, msg_headers):
rc = os.spawnl(os.P_WAIT, bogofilter_path, "bogofilter",
"-s", "-I", mail_file)
if (rc != 0):
print "Error processing file %s - %d" % (mail_file, rc)
sys.exit(1)
global registered_as_spam
registered_as_spam += 1
log_message("Message In Spam. bogofilter thought not spam", msg_headers)
def process_maildir(last_run_time, maildir, spam_fcns, not_spam_fcns):
original_dir = os.getcwd()
os.chdir("%s/cur" % maildir)
parser = email.Parser.Parser()
files = os.listdir(".")
for mail_file in filter(os.path.isfile,
filter(lambda s: s[0] != ".", files)):
file_create_time = os.path.getctime(mail_file)
if (file_create_time <= last_run_time):
continue
info_index = mail_file.rfind(":2,")
if (info_index == -1 or mail_file.find("T", info_index + 3) > -1):
continue
msg_headers = parser.parse(file(mail_file), True)
if (not msg_headers.has_key("X-Bogosity")):
continue
is_spam = msg_headers.get("X-Bogosity").split(",")[0]
global processed
processed += 1
if (is_spam == "Yes"):
for fcn in spam_fcns:
fcn(mail_file, msg_headers)
else:
for fcn in not_spam_fcns:
fcn(mail_file, msg_headers)
os.chdir(original_dir)
def process_inbox(last_run_time):
process_maildir(last_run_time, inbox_maildir,
[register_as_not_spam, count_as_not_spam], [count_as_not_spam])
def process_spam(last_run_time):
process_maildir(last_run_time, spam_maildir,
[count_as_spam], [register_as_spam, count_as_spam])
def get_last_run_time():
when = 0
if (os.path.exists(last_run_marker_file)):
when = os.path.getmtime(last_run_marker_file)
return when
def update_last_run_time():
if (not os.path.exists(last_run_marker_file)):
timing_file = file(last_run_marker_file, 'w')
timing_file.write("# Marker file. Ignore contents")
timing_file.close()
os.utime(last_run_marker_file, None)
def init():
last_run_time = get_last_run_time()
global log_file
log_file = file("spam/log.txt", "a")
s = "Starting run. Last run %s\n" % time.ctime(last_run_time)
print s,
log_file.write(s)
return last_run_time
def term():
update_last_run_time()
s = "Processed %d messages. %d spam and %d not spam. Registered %d as spam and %d as not spam.\n" % (processed, count_spam, count_not_spam, registered_as_spam, registered_as_not_spam)
print s,
log_file.write(s)
log_file.close()
def main():
last_run_time = init()
process_spam(last_run_time)
process_inbox(last_run_time)
term()
if __name__ == "__main__":
os.environ['LD_LIBRARY_PATH'] = "/usr/local/BerkeleyDB.4.0/lib"
main()