%%% EV_estimator.m - a MATLAB script
%%% Copyright 2008 by Samuel S.-H. Wang
%%% Noncommercial-use-only license:
%%% You may use or modify this software, but only for noncommercial purposes.
%%% To seek a commercial-use license, contact the author at sswang@princeton.edu.
% Likelihood analysis of all possible outcomes of election based
% on the meta-analytical methods of Prof. Sam Wang, Princeton University.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% EV_estimator.m
%
% This script loads 'poll.median.txt' and generates or replaces 3 CSV files:
%
% EV_estimates.csv
% all in one line:
% 2 values - medianEV for the two candidates, where a margin>0 favors the first candidate (in our case, Obama);
% 2 values - modeEV for the two candidates;
% 3 values - "safe" (>95% prob) EV for each candidate, with a third entry for undecided;
% 4 values - confidence intervals for candidate 1's EV: +/-1 sigma, then
% 95% band; and
% 1 value - number of state polls used to make the estimates.
% 1 value - (calculated by EV_metamargin and appended) the meta-margin.
%
% stateprobs.csv
% A 51-line file giving percentage probabilities for candidate #1 win of the popular vote, state by state.
% Note that for EV calculation, NE and ME were assumed to have a winner-take-all rule, but in fact they do not.
% For now this is a satisfactory approximation.
% The second field on each line is the current median polling margin.
% The third field on each line is the two-letter postal abbreviation.
%
% EV_histogram.csv
% A 538-line file giving the probability histogram of each EV outcome. Line 1 is
% the probability of candidate #1 (Obama) getting 1 EV. Line 2 is 2 EV, and so on.
% Note that 0 EV is left out of this histogram for ease of indexing.
%
% Also, a 4th file, EV_estimate_history, is updated with the same
% information as EV_estimates.csv plus 1 value for the date.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This routine expects the global variables biaspct and analysisdate
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%% Initialize variables %%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
polls.state=[
'AL,AK,AZ,AR,CA,CO,CT,DC,DE,FL,GA,HI,ID,IL,IN,IA,KS,KY,LA,ME,MD,MA,MI,MN,MS,MO,MT,NE,NV,NH,NJ,NM,NY,NC,ND,OH,OK,OR,PA,RI,SC,SD,TN,TX,UT,VT,VA,WA,WV,WI,WY '];
polls.EV=[9 3 10 6 55 9 7 3 3 27 15 4 4 21 11 7 6 8 9 4 10 12 17 10 6 11 3 5 5 4 15 5 31 15 3 20 7 7 21 4 8 3 11 34 5 3 13 11 5 10 3 ];
num_states=size(polls.EV,2);
safeEV(3)=sum(polls.EV);
safeEV(1)=0; safeEV(2)=0; % do not assume any states are safe - calculate all 2^51 possibilities!
% 1=Dem, 2=GOP, 3=uncertain
% checksum to make sure no double assignment or missed assignment
if (sum(safeEV)~=538)
warning('Electoral votes do not sum to 538!')
safeEV
end
if ~exist('biaspct','var')
biaspct=0;
end
forhistory=biaspct==0;
if ~exist('analysisdate','var')
analysisdate=0;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% Load and parse polling data %%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
polldata=load('polls.median.txt');
numlines = size(polldata,1);
if mod(numlines,51)>0
warning('polls.median.txt is not a multiple of 51 lines long');
end
% Currently we are using median and effective SEM of the last 3 polls.
% To de-emphasize extreme outliers, in place of SD we use (median absolute deviation)/0.6745
% find the desired data within the file
if analysisdate>0 && numlines>51
foo=find(polldata(:,13)>=analysisdate);
ind=min([1 foo']);
polldata=polldata(ind:ind+50,:);
clear foo ind
elseif numlines>51
polldata = polldata(numlines-50:numlines,:);
end
% Use statistics from data file
polls.margin=polldata(:,1)';
polls.SEM=polldata(:,2)';
totalpollsused=sum(polldata(:,3))-1; % assume DC has no polls
% mock data in case we ever need to do a dry run
% Use one poll (as of 17 July)
% polls.margin=[-14 -4 -10 -7 24 3 20 81 9 -7 -6 30 -13 13 1 10 -20 -16 -19 16 13 16 8 18 -6 0 5 -16 -3 12 11 5 13 -5 0 1 -14 9 4 24 -9 -4 -15 -9 -24 34 2 12 -8 11 -13];
% polls.SEM=zeros(1,51)+4;
EV_median % where the magic happens!
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%% More calculations %%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Plot the histogram
close
plot([269 269],[0 max(histogram)*105],'-r')
hold on
bar(histogram*100)
axis([200 380 0 max(histogram)*105])
xlabel('Electoral votes for Obama');
ylabel('Probability of exact # of electoral votes (%)')
% Calculate median and confidence bands from cumulative histogram
confidenceintervals(3)=electoralvotes(max(find(cumulative_prob<=0.025))); % 95-pct lower limit
confidenceintervals(1)=electoralvotes(max(find(cumulative_prob<=0.15865))); % 1-sigma lower limit
confidenceintervals(2)=electoralvotes(min(find(cumulative_prob>=0.84135))); % 1-sigma upper limit
confidenceintervals(4)=electoralvotes(min(find(cumulative_prob>=0.975))); % 95-pct upper limit
probability_GOP_win=cumulative_prob(min(find(electoralvotes>=269)));
modeEV(1)=find(histogram==max(histogram));
medianEV(2)=538-medianEV(1); % assume no EV go to a third candidate
modeEV(2)=538-modeEV(1); % assume no EV go to a third candidate
% Re-calculate safe EV for each party
safeEV(1)=sum(polls.EV(find(stateprobs>=95)));
safeEV(2)=sum(polls.EV(find(stateprobs<=5)));
safeEV(3)=538-safeEV(1)-safeEV(2);
uncertain=intersect(find(stateprobs<95),find(stateprobs>5));
uncertainstates='';
for i=1:max(size(uncertain))
uncertainstates=[uncertainstates statename(uncertain(i)) ' '];
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%% Daily update first part %%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Write a file of unbiased statewise percentage probabilities
% Only write this file if bias is zero!
if biaspct==0
save 'EVoutput'
% Today's output
% dlmwrite('EV_estimates.csv',medianEV)
% dlmwrite('EV_estimates.csv',modeEV,'-append')
% dlmwrite('EV_estimates.csv',safeEV,'-append')
% dlmwrite('EV_estimates.csv',confidenceintervals,'-append')
% dlmwrite('EV_estimates.csv',totalpollsused,'-append')
outputs=[medianEV modeEV safeEV confidenceintervals totalpollsused];
% Export probability histogram
dlmwrite('EV_histogram.csv',histogram')
% Export state-by-state percentage probabilities as CSV, including 2-letter state abbreviations
if exist('stateprobs.csv','file')
delete('stateprobs.csv')
end
for i=1:num_states
foo=[num2str(stateprobs(i)) ',' num2str(polls.margin(i)) ',' statename(i)];
dlmwrite('stateprobs.csv',foo,'-append','delimiter','')
end
% dlmwrite('stateprobs.csv',uncertainstates,'-append','delimiter','')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%% The meta-margin %%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
reality=probability_GOP_win;
biaspct=round((269-medianEV(1))/1.25)/10-2;
EV_median
while medianEV(1) < 269
biaspct=biaspct+.02;
EV_median
end
metamargin=-biaspct;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% Daily and History Update %%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
outputs = [outputs metamargin];
dlmwrite('EV_estimates.csv', outputs)
if forhistory && size(polldata,2)==13
dlmwrite('EV_estimate_history.csv',[polldata(1,13) outputs],'-append')
end