%%% EV_estimator.m - a MATLAB script %%% Copyright 2008 by Samuel S.-H. Wang %%% Noncommercial-use-only license: %%% You may use or modify this software, but only for noncommercial purposes. %%% To seek a commercial-use license, contact the author at sswang@princeton.edu. % Likelihood analysis of all possible outcomes of election based % on the meta-analytical methods of Prof. Sam Wang, Princeton University. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % EV_estimator.m % % This script loads 'poll.median.txt' and generates or replaces 3 CSV files: % % EV_estimates.csv % all in one line: % 2 values - medianEV for the two candidates, where a margin>0 favors the first candidate (in our case, Obama); % 2 values - modeEV for the two candidates; % 3 values - "safe" (>95% prob) EV for each candidate, with a third entry for undecided; % 4 values - confidence intervals for candidate 1's EV: +/-1 sigma, then % 95% band; and % 1 value - number of state polls used to make the estimates. % 1 value - (calculated by EV_metamargin and appended) the meta-margin. % % stateprobs.csv % A 51-line file giving percentage probabilities for candidate #1 win of the popular vote, state by state. % Note that for EV calculation, NE and ME were assumed to have a winner-take-all rule, but in fact they do not. % For now this is a satisfactory approximation. % The second field on each line is the current median polling margin. % The third field on each line is the two-letter postal abbreviation. % % EV_histogram.csv % A 538-line file giving the probability histogram of each EV outcome. Line 1 is % the probability of candidate #1 (Obama) getting 1 EV. Line 2 is 2 EV, and so on. % Note that 0 EV is left out of this histogram for ease of indexing. % % Also, a 4th file, EV_estimate_history, is updated with the same % information as EV_estimates.csv plus 1 value for the date. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % This routine expects the global variables biaspct and analysisdate %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%% Initialize variables %%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% polls.state=[ 'AL,AK,AZ,AR,CA,CO,CT,DC,DE,FL,GA,HI,ID,IL,IN,IA,KS,KY,LA,ME,MD,MA,MI,MN,MS,MO,MT,NE,NV,NH,NJ,NM,NY,NC,ND,OH,OK,OR,PA,RI,SC,SD,TN,TX,UT,VT,VA,WA,WV,WI,WY ']; polls.EV=[9 3 10 6 55 9 7 3 3 27 15 4 4 21 11 7 6 8 9 4 10 12 17 10 6 11 3 5 5 4 15 5 31 15 3 20 7 7 21 4 8 3 11 34 5 3 13 11 5 10 3 ]; num_states=size(polls.EV,2); safeEV(3)=sum(polls.EV); safeEV(1)=0; safeEV(2)=0; % do not assume any states are safe - calculate all 2^51 possibilities! % 1=Dem, 2=GOP, 3=uncertain % checksum to make sure no double assignment or missed assignment if (sum(safeEV)~=538) warning('Electoral votes do not sum to 538!') safeEV end if ~exist('biaspct','var') biaspct=0; end forhistory=biaspct==0; if ~exist('analysisdate','var') analysisdate=0; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%% Load and parse polling data %%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% polldata=load('polls.median.txt'); numlines = size(polldata,1); if mod(numlines,51)>0 warning('polls.median.txt is not a multiple of 51 lines long'); end % Currently we are using median and effective SEM of the last 3 polls. % To de-emphasize extreme outliers, in place of SD we use (median absolute deviation)/0.6745 % find the desired data within the file if analysisdate>0 && numlines>51 foo=find(polldata(:,13)>=analysisdate); ind=min([1 foo']); polldata=polldata(ind:ind+50,:); clear foo ind elseif numlines>51 polldata = polldata(numlines-50:numlines,:); end % Use statistics from data file polls.margin=polldata(:,1)'; polls.SEM=polldata(:,2)'; totalpollsused=sum(polldata(:,3))-1; % assume DC has no polls % mock data in case we ever need to do a dry run % Use one poll (as of 17 July) % polls.margin=[-14 -4 -10 -7 24 3 20 81 9 -7 -6 30 -13 13 1 10 -20 -16 -19 16 13 16 8 18 -6 0 5 -16 -3 12 11 5 13 -5 0 1 -14 9 4 24 -9 -4 -15 -9 -24 34 2 12 -8 11 -13]; % polls.SEM=zeros(1,51)+4; EV_median % where the magic happens! %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%% More calculations %%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Plot the histogram close plot([269 269],[0 max(histogram)*105],'-r') hold on bar(histogram*100) axis([200 380 0 max(histogram)*105]) xlabel('Electoral votes for Obama'); ylabel('Probability of exact # of electoral votes (%)') % Calculate median and confidence bands from cumulative histogram confidenceintervals(3)=electoralvotes(max(find(cumulative_prob<=0.025))); % 95-pct lower limit confidenceintervals(1)=electoralvotes(max(find(cumulative_prob<=0.15865))); % 1-sigma lower limit confidenceintervals(2)=electoralvotes(min(find(cumulative_prob>=0.84135))); % 1-sigma upper limit confidenceintervals(4)=electoralvotes(min(find(cumulative_prob>=0.975))); % 95-pct upper limit probability_GOP_win=cumulative_prob(min(find(electoralvotes>=269))); modeEV(1)=find(histogram==max(histogram)); medianEV(2)=538-medianEV(1); % assume no EV go to a third candidate modeEV(2)=538-modeEV(1); % assume no EV go to a third candidate % Re-calculate safe EV for each party safeEV(1)=sum(polls.EV(find(stateprobs>=95))); safeEV(2)=sum(polls.EV(find(stateprobs<=5))); safeEV(3)=538-safeEV(1)-safeEV(2); uncertain=intersect(find(stateprobs<95),find(stateprobs>5)); uncertainstates=''; for i=1:max(size(uncertain)) uncertainstates=[uncertainstates statename(uncertain(i)) ' ']; end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%% Daily update first part %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Write a file of unbiased statewise percentage probabilities % Only write this file if bias is zero! if biaspct==0 save 'EVoutput' % Today's output % dlmwrite('EV_estimates.csv',medianEV) % dlmwrite('EV_estimates.csv',modeEV,'-append') % dlmwrite('EV_estimates.csv',safeEV,'-append') % dlmwrite('EV_estimates.csv',confidenceintervals,'-append') % dlmwrite('EV_estimates.csv',totalpollsused,'-append') outputs=[medianEV modeEV safeEV confidenceintervals totalpollsused]; % Export probability histogram dlmwrite('EV_histogram.csv',histogram') % Export state-by-state percentage probabilities as CSV, including 2-letter state abbreviations if exist('stateprobs.csv','file') delete('stateprobs.csv') end for i=1:num_states foo=[num2str(stateprobs(i)) ',' num2str(polls.margin(i)) ',' statename(i)]; dlmwrite('stateprobs.csv',foo,'-append','delimiter','') end % dlmwrite('stateprobs.csv',uncertainstates,'-append','delimiter','') end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%% The meta-margin %%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% reality=probability_GOP_win; biaspct=round((269-medianEV(1))/1.25)/10-2; EV_median while medianEV(1) < 269 biaspct=biaspct+.02; EV_median end metamargin=-biaspct; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%% Daily and History Update %%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% outputs = [outputs metamargin]; dlmwrite('EV_estimates.csv', outputs) if forhistory && size(polldata,2)==13 dlmwrite('EV_estimate_history.csv',[polldata(1,13) outputs],'-append') end