#!/usr/bin/env python

# separte confusion data into two kinds:
#
# 1. "bad hits": Gi is drawn instead of Gj
# 2. "misses": Gi was not drawn when it should have been
#
# input is text confusion log (*-cm.txt)
#
# output is a file with bad hits followed by misses
#

import sys, string, re, os

progName = os.path.basename(sys.argv[0])
USAGE = '%s: usage: %s [-e excludePids ] numbering cmFile1 [cmFile2 ...]\n' % (progName, progName)

# indexed by pid, contains hash of # of bad hits, indexed by gesture name
badHits = {}
# indexed by pid, contains hash of # of misses, indexed by gesture name
misses = {}
gestureNames = []

def readNumbering(fileName):
    "returns a list of dictionaries, one for each case, keyed by gesture name, whose values are the indices"
    file = open(fileName)
    gNames = map(string.strip, string.split(string.strip(file.readline()), '\t'))
    lines = file.readlines()
    result = []
    for line in lines:
        dict = {}
        indices = map(string.atoi, string.split(string.strip(line), '\t')[1:])
        for i in range(len(indices)):
            dict[gNames[i]] = indices[i]
        result.append(dict)
    return result

def appendToDict(dict, key, value):
    if dict.has_key(key):
        dict[key].append(value)
    else:
        dict[key] = [ value ]
        
DATA_FILENAME_REGEX = re.compile("(?P<prefix>.*/)?(?P<pid>\d+)-(?P<phase>\d)_(?P<month>\d+)-(?P<day>\d+)(?P<rest>.*)")

def getDate(fileName):
    return '%s-%s' % DATA_FILENAME_REGEX.match(fileName).group('month', 'day')

def incrKey(dict, key):
    if dict.has_key(key):
	dict[key] = dict[key] + 1
    else:
	dict[key] = 1

def appendUniquely(l, item):
    if not item in l:
	l.append(item)

def printHeader(l):
    for item in l:
	print '\t%s' % item,
    print

def printTable(gNames, dict):
    printHeader(gNames)
    keys = dict.keys()
    keys.sort()
    for k in keys:
	print k,
	row = dict[k]
	for gesture in gNames:
	    if row.has_key(gesture):
		value = row[gesture]
	    else:
		value = 0
	    print '\t%s' % value,
	print

def pidToCase(pidStr):
    pid = string.atoi(pidStr)
    if pid < 100:
        return (pid - 1) % 3 + 1
    else:
        return (pid - 1) % 2 + 1

import getopt

optlist, args = getopt.getopt(sys.argv[1:], 'e:o:')

if len(args) < 2:
    sys.stderr.write(USAGE)
    sys.exit(-1)

exclusions = []
only = None
for (option, arg) in optlist:
    if option == '-e':
        exclusions = map(string.atoi, string.split(arg))
        sys.stderr.write('exclusions: %s\n' % exclusions)
    elif option == '-o':
        only = map(string.atoi, string.split(arg))
         
numbering = readNumbering(args[0])

gestureNames = range(len(numbering[0]))

files = args[1:]
for fileName in files:
    match = DATA_FILENAME_REGEX.match(fileName)
    pid = match.group('pid')
    pidNum = string.atoi(pid)
    if pidNum not in exclusions and (not only or pidNum in only):
        file = open(fileName)
        lines = file.readlines()
        case = pidToCase(pid) - 1
        if badHits.has_key(pid):
            pBadHits = badHits[pid]
            pMisses = misses[pid]
        else:
            pBadHits = {}
            pMisses = {}
            badHits[pid] = pBadHits
            misses[pid] = pMisses
        for line in lines:
            fields = string.split(string.strip(line), '\t')
            (desiredName, time, correct) = fields[:3]
            desiredIndex = numbering[case][desiredName]
            appendUniquely(gestureNames, desiredIndex)
            if correct == '0':
                actualIndex = string.atoi(fields[3])
                incrKey(pMisses, desiredIndex)
                incrKey(pBadHits, actualIndex)
    else:
        sys.stderr.write('excluding: %s\n' % pid)

gestureNames.sort()

# print bad hits
print 'Bad hits:'
printTable(gestureNames, badHits)

print

# print misses
print 'Misses'
printTable(gestureNames, misses)

    
