import os
import os.path
import email
import email.Parser
import sys
import time

# Name of the file whose modification date records when
# this program was last run
last_run_marker_file = "spam/last_training_run"

# Path to bogofilter
bogofilter_path = "/usr/local/bin/bogofilter"

# Inbox maildir
inbox_maildir = "Maildir"

# Spam maildir
spam_maildir = "Maildir/.Spam"

# Some global counts.
# Number of mails processed
processed = 0
# Number of spams (those in spam maildir)
count_spam = 0
# Number of not spams (those in inbox maildir)
count_not_spam = 0
# Number of spams that bogofilter thought weren't spam
# and so had to be registered as spam (false negatives)
registered_as_spam = 0
# Number of non spams that bogofilter thought were spams
# and so had to be registered as normal (false positives)
registered_as_not_spam = 0

# Holds the log file 
log_file = None

# Log a message
def log_message(text, msg_headers):
    s = "%s Subject \"%s\" From \"%s\"\n" % (text, msg_headers.get("Subject"), msg_headers.get("From"))
    log_file.write(s)

# Count mail message as spam
def count_as_spam(mail_file, msg_headers):
    global count_spam
    count_spam += 1

# Count mail message as not spam
def count_as_not_spam(mail_file, msg_headers):
    global count_not_spam
    count_not_spam += 1
    
# Register the contents of the mail_file supplied as not spam
def register_as_not_spam(mail_file, msg_headers):
    rc = os.spawnl(os.P_WAIT, bogofilter_path, "bogofilter",
                    "-n",  "-I", mail_file)
    if (rc != 0):
        print "Error processing file %s - %d" % (mail_file, rc)
        sys.exit(1)
    global registered_as_not_spam
    registered_as_not_spam += 1
    log_message("Message In Inbox. bogofilter thought spam", msg_headers)

# Register the contents of the mail_file supplied as spam
def register_as_spam(mail_file, msg_headers):
    rc = os.spawnl(os.P_WAIT, bogofilter_path, "bogofilter",
                    "-s",  "-I", mail_file)
    if (rc != 0):
        print "Error processing file %s - %d" % (mail_file, rc)
        sys.exit(1)
    global registered_as_spam
    registered_as_spam += 1
    log_message("Message In Spam. bogofilter thought not spam", msg_headers)

# Process the mail in maildir that has been read. Only files
# created since last_run_time are processed. The functions in spam_fcns
# are is called for those mails bogofilter thought were spam, those
# in not_spam_fcns for those it thought weren't spam
def process_maildir(last_run_time, maildir, spam_fcns, not_spam_fcns):
    # Remember where we are
    original_dir = os.getcwd()
    # Change to correct dir
    os.chdir("%s/cur" % maildir)
    # A parser to parse the email files
    parser = email.Parser.Parser()
    # Get list of all files
    files = os.listdir(".")
    # Drop dot files and directories
    for mail_file in filter(os.path.isfile,
                            filter(lambda s: s[0] != ".", files)):
        # Only process those files created after the last run
        file_create_time = os.path.getctime(mail_file)
        if (file_create_time <= last_run_time):
            continue
        # Don't look at those marked with a T (for trashed) in
        # the info field. When mailfolder is "compacted" these
        # messages will go away
        info_index = mail_file.rfind(":2,")
        if (info_index == -1 or mail_file.find("T", info_index + 3) > -1):
            continue
        # We only need the headers so only parse them
        msg_headers = parser.parse(file(mail_file), True)
        # Only process those marked by bogofilter
        if (not msg_headers.has_key("X-Bogosity")):
            continue
        is_spam = msg_headers.get("X-Bogosity").split(",")[0]
        global processed
        processed += 1
        # Call the appropriate functions
        if (is_spam == "Yes"):
            for fcn in spam_fcns:
                fcn(mail_file, msg_headers)
        else:
            for fcn in not_spam_fcns:
                fcn(mail_file, msg_headers)
    # Back to the original dir
    os.chdir(original_dir)

# Process the inbox
def process_inbox(last_run_time):
    process_maildir(last_run_time, inbox_maildir,
                    [register_as_not_spam, count_as_not_spam], [count_as_not_spam])

# Process the spam folder
def process_spam(last_run_time):
    process_maildir(last_run_time, spam_maildir,
                    [count_as_spam], [register_as_spam, count_as_spam])

# Return the time (number of secs since epoch) the
# program was last run
def get_last_run_time():
    when = 0
    if (os.path.exists(last_run_marker_file)):
        when = os.path.getmtime(last_run_marker_file)
    return when

# Update the record of when the program was last run
def update_last_run_time():
    if (not os.path.exists(last_run_marker_file)):
        timing_file = file(last_run_marker_file, 'w')
        timing_file.write("# Marker file. Ignore contents")
        timing_file.close()
    os.utime(last_run_marker_file, None)

# Initialize the program by getting the time it was last run
# and writing an initial message
def init():
    last_run_time = get_last_run_time()
    global log_file
    log_file = file("spam/log.txt", "a")
    s = "Starting run. Last run %s\n" % time.ctime(last_run_time)
    print s,
    log_file.write(s)
    return last_run_time

# End the program by updating the time it was last run
# and writing a final message
def term():
    update_last_run_time()

    s =  "Processed %d messages. %d spam and %d not spam. Registered %d as spam and %d as not spam.\n" % (processed, count_spam, count_not_spam, registered_as_spam, registered_as_not_spam)
    print s,
    log_file.write(s)
    log_file.close()
    
# The main function
def main():

    last_run_time = init()
    
    process_spam(last_run_time)
    process_inbox(last_run_time)

    term()
    
if __name__ == "__main__":
    # Need to set library path for my install of BerkeleyDB used by
    # bogofilter
    os.environ['LD_LIBRARY_PATH'] = "/usr/local/BerkeleyDB.4.0/lib"
    main()