Source code for frontline

"""
Script to estimate the amount of time that Dr. Bruce Ivins spent at USAMRIID 
outside of normal work hours, based on his swipe access records at the "rear"
door.  Analysis performed for PBS Frontline's "The Anthrax Files".

Usage: python frontline.py 

Author:  Elaine Angelino <elaine at eecs dot harvard dot edu>

Copyright 2011

"""

import datetime

import numpy as np
import tabular as tb

weekday_dict = dict(zip(range(7), ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']))

[docs]def str2weekday(x): """ Helper function to get the day of the week corresponding to a given date. """ (m, d, y) = x.split('/') return weekday_dict[datetime.date(int(y), int(m), int(d)).weekday()]
[docs]def str2min(x): """ Helper function to convert a time into the number of minutes since midnight. """ (h, m, s) = x.split(':') return int(h)*60 + int(m)
[docs]def compute_extra_hours(): """ Estimate the extent of Dr. Bruce Ivins's after hours time at USAMRIID. """ x = [r.replace('2001', '2001 ').replace('IVINS', ' IVINS') for r in open('K8USAMRIIDIvinsSwipeRecords200127173-272361.txt', 'rU').read().strip().split('\n') if r and r[0].isdigit() and ('/' in r[:3])] x = [r for r in x if ('REAR' in r)] x = [r + ' ' + str2weekday(r.split()[0]) for r in x] y = [] last_in = None out_hour = None day_in = None for line in x: if 'DR IN' in line: last_in = line day_in = line.split()[-1] elif 'DR OUT' in line: out_date = line.split()[0] out_hour = int(line.split()[1].split(':')[0]) if (day_in in ['Saturday', 'Sunday']) or (out_hour > 17) or (out_hour < 6): if last_in is None: last_in = line.split() last_in[1] = '17:00:00' last_in = ' '.join(last_in).replace('OUT', 'IN') y += [last_in] + [line] last_in = None recs = [(r.split()[0], r.split()[1], str2min(r.split()[1]), r.split()[-1], r.split()[8], ) for r in y] in_data = [r[:4] for r in recs if r[4] == 'IN'] out_data = [r[:4] for r in recs if r[4] == 'OUT'] a = tb.tabarray(records=in_data, names=['date_in', 'time_in', 'min_in', 'day_in']) b = tb.tabarray(records=out_data, names=['date_out', 'time_out', 'min_out', 'day_out']) z = a.colstack(b) min_out = np.array([z['min_out'][i] if (z['date_out'][i] == z['date_in'][i]) else z['min_out'][i] + (24*60) for i in range(len(z))]) sixpm = 17*60 min_in = [] for rec in z: if rec['day_in'] in ['Saturday', 'Sunday']: min_in += [rec['min_in']] elif (rec['min_out'] > sixpm) and (rec['min_in'] < sixpm) : min_in += [sixpm] else: min_in += [rec['min_in']] min_in = np.array(min_in) total_time = min_out - min_in z = z.colstack(tb.tabarray(columns=[total_time], names=['total_time'])) z = z[z['total_time'] > 0] z.saveSV('door-5pm-7am.tsv')
[docs]def plot_hours(fin='door-5pm-7am.tsv', fout='ivins-extra-hours-2001.pdf'): """ Bar chart summarizing estimated time Dr. Bruce Ivins's worked after hours. Estimated time Dr. Bruce Ivins spent at USAMRIID after hours. The total number of hours is plotted for each month in 2001. Note that the anthrax attacks occurred for several weeks, starting on September 18, 2001. Disclaimer: I performed this analysis in November 2011. It was not a part of PBS Frontline's analysis for "The Anthrax Files". """ import calendar import pylab x = tb.tabarray(SVfile=fin) x = x[['date_in', 'total_time']] recs = [[int(i) for i in d.split('/')] for d in x['date_in']] dates = tb.tabarray(records=recs, names=['month', 'day', 'year']) assert len(set(dates['year'])) == 1 y = x[['total_time']].colstack(dates[['month']]) z = y.aggregate(On=['month']) z.sort(order=['month']) ind = np.arange(len(z)) pylab.clf() pylab.bar(ind, z['total_time'] / 60) xt = [calendar.month_name[m][:3] for m in z['month']] pylab.xticks(ind + 0.4, xt) pylab.title('Estimated time Dr. Bruce Ivins spent at USAMRIID after hours') pylab.xlabel('Month (in 2001)') pylab.ylabel('Number of hours') pylab.savefig(fout)
if __name__ == '__main__': compute_extra_hours()