Code

See More Code at my Github:
github

______________________________________________________________________


Below is a brief demonstration of how I implemented Python, Matlab, and R on three large data projects ranging from 200 observations to half a million. The first is an automated process that scans Department of Energy Green Button energy files. The second implemented in Matlab models 200 electric vehicles on the island of Lanai. The last in R shows my methodology for a targeted marketing statistical model that identifies the statistically significant parameter in our target market for electric vehicles.
______________________________________________________________________

 

python
Users submitted zipped and unzipped energy and gas 15 minute, hourly, and monthly data. I want just the energy data and not the gas. I also want to take the 15 minute data that exists and resample to hourly data using Python. The modules that I use to extract this XML data and place the results into one spreadsheet include Numpy for array handling, Pandas for dataframe structures and resampling, etree for xml reading, regex for regular expressions, and many others.


__author__ = 'john.horn'

from GreenButtonGrap import *

FolderWithZips = FolderContainingZippedGreenButtonFiles

OutputPathUnzip = FolderContainingUnzippedFiles
XMLSummaryDataPath = FolderPathToContainSummerizedGreenButtonData
XMLT_Location = FilePathContainingGreenButtonXLSTFile

# Unzip the files and move all unzipped and apriori unzipped xmls to the destination folder
#Also rename the xml unzipped files so that it contains the applicant ID #

DeleteContentsOfOutputFolder(OutputPathUnzip)
UnzipFilesInFolder(FolderWithZips,OutputPathUnzip)
MoveXMLInFolder(FolderWithZips,OutputPathUnzip)

#Loop through each xml file and find the interval data we are interested in
GreenButtonXML(OutputPathUnzip,XMLSummaryDataPath,XMLT_Location)

# INDIVIDUAL MODULES TO PROCESS ZIPPED GREENBUTTON FILES

#Overwrite Previous Results
def DeleteContentsOfOutputFolder(OutputFolderContainUnzip):
    import os
    for the_file in os.listdir(OutputFolderContainUnzip):
        file_path = os.path.join(OutputFolderContainUnzip, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
        except Exception, e:
            print e

#Unzip XML Files
def UnzipFilesInFolder(FolderContainZips, OutputFolderContainUnzip):
    import zipfile
    import os
    import re
    RenameList = {}
    if not os.path.exists(OutputFolderContainUnzip):
        os.makedirs(OutputFolderContainUnzip)

    filename = os.listdir(FolderContainZips)
    for file in filename:
        fileName, fileExtension = os.path.splitext(file)

        if str(fileExtension) == '.zip':
            CompleteFile = FolderContainZips + str(file)
            fh = open(CompleteFile, 'rb')
            z = zipfile.ZipFile(fh)
            SurveyId = re.search(r'(\w+\-\w+\-)',str(file))
            SurveyId = SurveyId.group(1)
            #print SurveyId
            for name in z.namelist():
                newName = SurveyId + name
                RenameList[name] = newName
                #print name
                #print newName
                outpath = OutputFolderContainUnzip
                z.extract(name, outpath)
            fh.close()

    UnzipFiles = os.listdir(OutputFolderContainUnzip)
    os.chdir(str(OutputFolderContainUnzip))
    for file in UnzipFiles:
        if file in RenameList:
            MyRename = str(RenameList[file])
            OldFile = str(file)
            OldFile = os.path.join(OutputFolderContainUnzip, OldFile)
            MyRename = os.path.join(OutputFolderContainUnzip, MyRename)
            os.rename(OldFile,MyRename)

#Move XML Files
def MoveXMLInFolder(FolderContainsXML,OutputFolderContainXML):
    import os
    from shutil import copyfile

    if not os.path.exists(OutputFolderContainXML):
        os.makedirs(OutputFolderContainXML)

    filename = os.listdir(FolderContainsXML)
    for file in filename:
        fileName, fileExtension = os.path.splitext(file)

        if str(fileExtension) == '.xml':
            CompleteFile = FolderContainsXML + str(file)
            copyfile(CompleteFile, OutputFolderContainXML + str(file))

#Extract the XML using etree

def transform(xmlPath, xslPath):

    from lxml import etree
    import re
# read xsl file
    xslRoot = etree.fromstring(open(xslPath).read())

    transform = etree.XSLT(xslRoot)

# read xml
    xmlRoot = etree.fromstring(open(xmlPath).read())

# transform xml with xslt
    transRoot = transform(xmlRoot)

# return transformation result
    SearchText = str(etree.tostring(transRoot))

    StartTime = re.findall(r'
<td>(\w+-\w+-\w+\s\w+:\w+)',SearchText)
    EndTime = re.findall(r'</span>(\w+-\w+-\w+\s\w+:\w+)',SearchText)
    value = re.findall(r'"right">([+-]?\d+)</td>
',SearchText)
    return StartTime, value
#if __name__ == '__main__':
#    print(transform('./s0101m.mul0.xml', './tipitaka-latn.xsl'))

#Main Module to sort the Gas and Energy Green Button files and extract data from XML
def GreenButtonXML(InputFolderWithXML,OutputFolderWithSummaryData,XMLT_Location):
    import os
    import sys
    import re
    import numpy as np
    from pandas import *
    #import pandas as pd
    from datetime import datetime
    import collections

    if not os.path.exists(OutputFolderWithSummaryData):
        os.makedirs(OutputFolderWithSummaryData)

    MyXmlFiles = os.listdir(InputFolderWithXML)
    Counter = 0
    a = {}
    #print len(MyXmlFiles)
    Denom = len(MyXmlFiles)
    for xmlFile in MyXmlFiles:
        Counter += 1

        print str(100*Counter/Denom) + "% Complete"
        #print Counter
        print xmlFile

        if (re.search('Gas', xmlFile)) or (os.path.getsize(InputFolderWithXML + str(xmlFile)) == 0):
            GasRecord = True
        else:
            GasRecord = False

        if GasRecord == False:
            if xmlFile.endswith('.xml'):
                try:
                    NewOutput = transform(xmlFile, XMLT_Location)
                    StartTime = NewOutput[0]
                    WattHours = NewOutput[1]
                    ClientID = re.search(r'(\d+)-', xmlFile)
                    deleteList = []
                    for dup in sorted(list_duplicates(StartTime)):
                        deleteList.append(dup[1][0])

                    #Ideally you would take the 15 min interval over the 1 hour time step
                    #Need to find duplicates in the StartTime
                    #Take the smaller of the Watthours as this is probably the 15 min interval

                    deleteList = [int(i) for i in deleteList]
                    deleteList.sort(reverse=True)

                    for deleteCount in deleteList:
                        del StartTime[deleteCount]
                        del WattHours[deleteCount]
                    print(WattHours)
                    print(StartTime)
                    a[str(ClientID.group(1))] = Series(WattHours, index=StartTime)
                    df = DataFrame(a)
                except AssertionError:
                    print "error at ID: " + ClientID.group(1)

df.to_csv(OutputFolderWithSummaryData + 'GBSummarized.csv')

from collections import defaultdict

def list_duplicates(seq):
    tally = defaultdict(list)
    for i,item in enumerate(seq):
        tally[item].append(i)
    return ((key,locs) for key,locs in tally.items()
                            if len(locs)>1)

______________________________________________________________________
matlab
The goal of this project was to model 200 electric vehicles on the island of Lanai, Hawaii and optimize the solar panel capacity, # chargers, and battery storage to support the vehicles. Ridership was randomized and an ensemble or random models was created to arrive at average conditions over many runs.


VehiclesInTrip = 4;
NumberOfDays = 1;
CityBatteryCapacity = 0;
ResortBatteryCapacity = 0;
NumChargersResort = 40;
NumChargersCity = 40;
SolarCapacity = 175;

   %RELEASE THE CARS!
            for Count3 = 1:length(Junction_Table(:,3))
               %Release Resort Cars
               if Junction_Table(Count3,3) == 1
                   for Quantity1 = 1:Junction_Table(Count3,2)
                       % Ask if there is an available car in the Resort if
                       % not skip that for loop
                    if length(Lanai_Resort(:,1)) == 0
                        Overbooked_Resort(Quarter) = Overbooked_Resort(Quarter) + 1;
                        continue
                    end

                   Lanai_Resort(length(Lanai_Resort(:,1)),2) = Lanai_Resort(length(Lanai_Resort(:,1)),2) - Junction_Table(Count3,4);
                   Lanai_Resort(length(Lanai_Resort(:,1)),4) = Lanai_Resort(length(Lanai_Resort(:,1)),4) + Junction_Table(Count3,6);

                   %Switching Locations
                   if Junction_Table(Count3,5) == 1
                   Lanai_Resort(length(Lanai_Resort(:,1)),3) = 0;
                   else
                   Lanai_Resort(length(Lanai_Resort(:,1)),3) = 1;
                   end

                   CarsDriving = [CarsDriving; Lanai_Resort(length(Lanai_Resort(:,1)),:)];
                   Lanai_Resort(length(Lanai_Resort(:,1)),:) = [];

                   %Possible skip location

                   end

               else
               %Release City Cars
                   for Quantity2 = 1:Junction_Table(Count3,2)

                       % Ask if there is an available car in the Resort if
                       % not skip that for loop
                    if length(Lanai_City(:,1)) == 0
                        Overbooked_City(Quarter) = Overbooked_City(Quarter) + 1;
                        continue
                    end

                   Lanai_City(length(Lanai_City(:,1)),2) = Lanai_City(length(Lanai_City(:,1)),2) - Junction_Table(Count3,4);
                   Lanai_City(length(Lanai_City(:,1)),4) = Lanai_City(length(Lanai_City(:,1)),4) + Junction_Table(Count3,6);

                   %Switching Locations
                   if Junction_Table(Count3,5) == 1
                   Lanai_City(length(Lanai_City(:,1)),3) = 1;
                   else
                   Lanai_City(length(Lanai_City(:,1)),3) = 0;
                   end

                   CarsDriving = [CarsDriving; Lanai_City(length(Lanai_City(:,1)),:)];
                   Lanai_City(length(Lanai_City(:,1)),:) = [];

                   %Possible skip location
                   end

               end

            end
     %RELEASE THE CARS!

        end
     %RECEIVE THE CARS!
CarsAtResort(Quarter) =  length(Lanai_Resort(:,1));
CarsAtCity(Quarter) = length(Lanai_City(:,1));
CarsOutDriving(Quarter) = length(CarsDriving(:,1));

     %for ReceiveSkim = 1:length(CarsDriving(:,1))
     CurrentRow = length(CarsDriving(:,1)) + 1;

     while CurrentRow ~= 1
     CurrentRow = CurrentRow - 1;

     if Quarter == CarsDriving(CurrentRow,4)
        %fprintf('the quarter %d \n', Quarter)

         if CarsDriving(CurrentRow,3) == 0
             City_Charging(Quarter) = City_Charging(Quarter) + (CarBatteryCapacity - CarsDriving(CurrentRow,2));
             %ASSUMPTION INSTANT CHARGING
         %    CarsDriving(CurrentRow,2) = 24;
             Lanai_City = [CarsDriving(CurrentRow,:); Lanai_City];
             CarsDriving(CurrentRow,:) = [];
         else
             Resort_Charging(Quarter) = Resort_Charging(Quarter) + (CarBatteryCapacity - CarsDriving(CurrentRow,2));
             %ASSUMPTION INSTANT CHARGING
        %     CarsDriving(CurrentRow,2) = 24;
             Lanai_Resort = [CarsDriving(CurrentRow,:); Lanai_Resort];
             CarsDriving(CurrentRow,:) = [];
         end

     end

     end

     %RECEIVE THE CARS!

     %SORT THE VEHICLES (WE WANT TO SORT THE VEHICLES EVERY 15 MINUTES
Lanai_Resort = sortrows(Lanai_Resort,2);
Lanai_City = sortrows(Lanai_City,2);
     %SORT THE VEHICLES

%CHARGE THE VEHICLES

                ChargerResortEffective = NumChargersResort;
                ChargerCityEffective = NumChargersCity;

                %Charge Resort Vehicles
                if length(Lanai_Resort(:,2)) < NumChargersResort
                ChargerResortEffective = length(Lanai_Resort(:,2));
                end

                ChargingQuantResort = Insol_kWh2(Quarter)/ChargerResortEffective;

                if ChargingQuantResort > 1.875
                    ChargingQuantResort = 1.875;
                    %fprintf('going over resort max at Q: %f \n', Quarter)
                end

                for CounterChargeResort = 1:ChargerResortEffective
                    ChargeResortDeficit = CarBatteryCapacity - Lanai_Resort(CounterChargeResort,2);
                    if ChargingQuantResort > ChargeResortDeficit
                       WastedEResortSupplyDemand(Quarter) = WastedEResortSupplyDemand(Quarter)+ (ChargingQuantResort - ChargeResortDeficit);
                       EnergyDelivered = ChargeResortDeficit;
                       %disp('testForErrorHereResort1')
                       testForErrorHereResort1 = 1;
                    else
                        EnergyDelivered = ChargingQuantResort;
                    end
                    Used_Energy_Resort(Quarter) = Used_Energy_Resort(Quarter) + EnergyDelivered;
                    LeftOverEResort(Quarter) = LeftOverEResort(Quarter) - EnergyDelivered;
                    Lanai_Resort(CounterChargeResort,2) = Lanai_Resort(CounterChargeResort,2) + ChargingLoss*EnergyDelivered;
                end

                %Charge City Vehicles
                if length(Lanai_City(:,2)) < NumChargersCity
                ChargerCityEffective = length(Lanai_City(:,2));
                end

                ChargingQuantCity = Insol_kWh2(Quarter)/ChargerCityEffective;

                if ChargingQuantCity > 1.875
                    ChargingQuantCity = 1.875;
                    %fprintf('going over city max at Q: %f \n', Quarter)
                end

                for CounterChargeCity = 1:ChargerCityEffective
                    ChargeCityDeficit = CarBatteryCapacity - Lanai_City(CounterChargeCity,2);
                    if ChargingQuantCity > ChargeCityDeficit
                       WastedECitySupplyDemand(Quarter) = WastedECitySupplyDemand(Quarter) + (ChargingQuantCity - ChargeCityDeficit);
                       EnergyDelivered = ChargeCityDeficit;
                       %disp('testForErrorHereCity1')
                       testForErrorHereCity1 = 1;
                    else
                        EnergyDelivered = ChargingQuantCity;
                    end
                    Used_Energy_City(Quarter) = Used_Energy_City(Quarter) + EnergyDelivered;
                    LeftOverECity(Quarter) = LeftOverECity(Quarter) - EnergyDelivered;
                    Lanai_City(CounterChargeCity,2) = Lanai_City(CounterChargeCity,2) + ChargingLoss*EnergyDelivered;
                end

%CHARGE THE Local Batteries
                    if Quarter > 1
                    CityBatteryLevel(Quarter) = CityBatteryLevel(Quarter - 1) + StationaryChargingLoss*LeftOverECity(Quarter);
                    end

                    if CityBatteryLevel(Quarter) > CityBatteryCapacity
                        CityBatteryLevel(Quarter) = CityBatteryCapacity;
                    end

                     if (CityBatteryCapacity - CityBatteryLevel(Quarter))  < LeftOverECity(Quarter)
                         LeftOverECity(Quarter) = LeftOverECity(Quarter) - (CityBatteryCapacity - CityBatteryLevel(Quarter));
                     else
                         LeftOverECity(Quarter) = 0;
                     end

                     if Quarter > 1
                    ResortBatteryLevel(Quarter) = ResortBatteryLevel(Quarter - 1) + StationaryChargingLoss*LeftOverEResort(Quarter);
                     end

                    if ResortBatteryLevel(Quarter) > ResortBatteryCapacity
                        ResortBatteryLevel(Quarter) = ResortBatteryCapacity;
                    end

                     if (ResortBatteryCapacity - ResortBatteryLevel(Quarter))  < LeftOverEResort(Quarter)
                        LeftOverEResort(Quarter) = LeftOverEResort(Quarter) - (ResortBatteryCapacity - ResortBatteryLevel(Quarter));
                        else
                         LeftOverEResort(Quarter) = 0;
                     end

        %CHARGE VEHICLES AT NIGHT         

         DayScaler = floor(Quarter/97);

         QuarterTest = Quarter - (96*DayScaler);
         %ONLY AT NIGHT

         if 84 < (QuarterTest) | (QuarterTest) < 24

                ChargerResortEffective = NumChargersResort;
                ChargerCityEffective = NumChargersCity;

                %Charge City Vehicles
                if length(Lanai_City(:,2)) < NumChargersCity
                ChargerCityEffective = length(Lanai_City(:,2));
                end

                ChargingQuantCity = CityBatteryLevel(Quarter)/ChargerCityEffective;

                if ChargingQuantCity > 1.875
                    ChargingQuantCity = 1.875;
                end

                for CounterChargeCity = 1:ChargerCityEffective
                    ChargeCityDeficit = CarBatteryCapacity - Lanai_City(CounterChargeCity,2);
                    if ChargingQuantCity > ChargeCityDeficit
                       EnergyDelivered = ChargeCityDeficit;
                       %fprintf('if %f \n', EnergyDelivered)
                    else
                        EnergyDelivered = ChargingQuantCity;
                       %fprintf('else %f \n', EnergyDelivered)
                    end
                    Used_Energy_City(Quarter) = Used_Energy_City(Quarter) + EnergyDelivered;
                    CityBatteryLevel(Quarter) = CityBatteryLevel(Quarter) - EnergyDelivered;
                    Lanai_City(CounterChargeCity,2) = Lanai_City(CounterChargeCity,2) + ChargingLoss*EnergyDelivered;
                end

                %disp(EnergyDelivered)
                %fprintf('energy delivered city: %f \n', EnergyDelivered)
                %fprintf('charger quant city and resort: %f & %f \n', ChargingQuantCity, ChargingQuantResort)

                %Charge Resort Vehicles
                if length(Lanai_Resort(:,2)) < NumChargersResort
                ChargerResortEffective = length(Lanai_Resort(:,2));
                end

                ChargingQuantResort = ResortBatteryLevel(Quarter)/ChargerResortEffective;

                if ChargingQuantResort > 1.875
                    ChargingQuantResort = 1.875;
                end

                for CounterChargeResort = 1:ChargerResortEffective
                    ChargeResortDeficit = CarBatteryCapacity - Lanai_Resort(CounterChargeResort,2);
                    if ChargingQuantResort > ChargeResortDeficit
                       EnergyDelivered = ChargeResortDeficit;
                    else
                        EnergyDelivered = ChargingQuantResort;
                    end
                    Used_Energy_Resort(Quarter) = Used_Energy_Resort(Quarter) + EnergyDelivered;
                    ResortBatteryLevel(Quarter) = ResortBatteryLevel(Quarter) - EnergyDelivered;
                    Lanai_Resort(CounterChargeResort,2) = Lanai_Resort(CounterChargeResort,2) + ChargingLoss*EnergyDelivered;
                end

         end
    end

    TotalEUsed(Repeat) = (sum(Used_Energy_Resort) + sum(Used_Energy_City));
    TotalEWastedFromVehicleSat = (sum(LeftOverECity) + sum(LeftOverEResort) - sum(WastedEResortSupplyDemand) - sum(WastedECitySupplyDemand));
    TotalEWastedFromChargingSat = sum(WastedEResortSupplyDemand) + sum(WastedECitySupplyDemand);
    TotalEWasted(Repeat) = TotalEWastedFromVehicleSat + TotalEWastedFromChargingSat;

    LeftOverECityFormatted(Repeat) = sum(LeftOverECity) - sum(WastedECitySupplyDemand);
    LeftOverEResortFormatted(Repeat) = sum(LeftOverEResort) - sum(WastedEResortSupplyDemand);
    WastedECityChargerRun(Repeat) = sum(WastedECitySupplyDemand);
    WastedEResortChargerRun(Repeat) = sum(WastedEResortSupplyDemand);
    TripCountLog(Repeat) = Counterooni;

end

figure;

x = linspace(1, NumberOfDays*NumOfQuarters/4,length(Resort_Charging));
plot(x,Resort_Charging, 'LineWidth', 2, 'Color', 'green');
title('Resort Charging Demand');
xlabel('Time(hour)');
ylabel('kwh demand');

hold on

title(sprintf('EV Load versus Time; %d Vehicles/Hour; Two %d kW Panels',4*VehiclesInTrip,SolarCapacity));
xlabel('Time(hour)');
ylabel('kWh Demand');
plot(x,City_Charging, 'LineWidth', 2, 'Color', 'blue')

plot(Insol_kWh_Display, 'LineWidth', 2, 'Color', 'red')

h1eg1 = legend('Resort', 'City', 'Ave. Insolation', 'Location', 'NorthEast');

h_xlabel = get(gca,'XLabel');
set(h_xlabel,'FontSize',14);
h_ylabel = get(gca,'YLabel');
set(h_ylabel,'FontSize',14);
h_Title = get(gca,'Title');
set(h_Title,'FontSize',14);

hold off
figure(2);

x = linspace(1, NumberOfDays*NumOfQuarters/4,length(Resort_Charging));
plot(x, CarsAtResort, 'LineWidth', 2, 'Color', 'green')

hold on
plot(x,CarsAtCity, 'LineWidth', 2, 'Color', 'blue')
plot(x,CarsOutDriving, 'LineWidth', 2, 'Color', 'black')
%plot(x,Overbooked_City, 'LineWidth', 2, 'Color', 'red')
%plot(x,Overbooked_Resort, 'LineWidth', 2, 'Color', 'magenta')

title(sprintf('Vehicle Tracking; %d Vehicles/Hour; Two %d kW Panels',4*VehiclesInTrip, SolarCapacity));
xlabel('Time(hour)');
ylabel('# Vehicles');
h1eg1 = legend('Resort', 'City','Driving','Location', 'NorthEast');
%h1eg1 = legend('Resort', 'City','Driving','Resort Overbooked', 'City Overbooked', 'Location', 'NorthEast');

h_xlabel = get(gca,'XLabel');
set(h_xlabel,'FontSize',14);
h_ylabel = get(gca,'YLabel');
set(h_ylabel,'FontSize',14);
h_Title = get(gca,'Title');
set(h_Title,'FontSize',14);

hold off
figure(3);
x = linspace(1, NumberOfDays*NumOfQuarters/4,length(Resort_Charging));
plot(x,Used_Energy_City, 'LineWidth', 2, 'Color', 'blue')

hold on
plot(x,Used_Energy_Resort, 'LineWidth', 2, 'Color', 'green')
title(sprintf('Solar & Battery Energy Used to Charge Vehicles(kWh); %d Vehicles/Hour; Two %d kW Panels',4*VehiclesInTrip, SolarCapacity));
xlabel('Time(hour)');
ylabel('Energy (kWh)');
h1eg1 = legend('City', 'Resort', 'Location', 'NorthEast');

h_xlabel = get(gca,'XLabel');
set(h_xlabel,'FontSize',14);
h_ylabel = get(gca,'YLabel');
set(h_ylabel,'FontSize',14);
h_Title = get(gca,'Title');
set(h_Title,'FontSize',14);

LeftOverECityFormatted = LeftOverECity - WastedECitySupplyDemand;
LeftOverEResortFormatted = LeftOverEResort - WastedEResortSupplyDemand;

figure(4)
hold off
plot(x,LeftOverECityFormatted, 'LineWidth', 2, 'Color', 'blue');
title(sprintf('Energy Not Captured by Vehicles and Batteries(kWh); %d Vehicles/Hour, Two %d kW Panels',4*VehiclesInTrip, SolarCapacity));
xlabel('Time(hour)');
ylabel('Energy (kWh)');
hold on
plot(x,LeftOverEResortFormatted, 'LineWidth', 2, 'Color', 'green');
plot(x,WastedECitySupplyDemand, 'LineWidth', 2, 'Color', [0 0 0.5]);
plot(x,WastedEResortSupplyDemand, 'LineWidth', 2, 'Color', [0 0.7 0]);
h1eg1 = legend('City Charger Saturation', 'Resort Charger Saturation', 'City Vehicle Saturation', 'Resort Vehicle Saturation', 'Location', 'NorthWest');

h_xlabel = get(gca,'XLabel');
set(h_xlabel,'FontSize',14);
h_ylabel = get(gca,'YLabel');
set(h_ylabel,'FontSize',14);
h_Title = get(gca,'Title');
set(h_Title,'FontSize',14);

%  figure(5)
%  plot(x, round(ResortBatteryLevel), 'LineWidth', 2, 'Color', 'green');
%  hold on
%  title(sprintf('Stationary Battery Levels; %d Vehicles/Hour, Two %d kW Panels',4*VehiclesInTrip, SolarCapacity));
%  xlabel('Time(hour)');
%  ylabel('Energy Stored (kWh)');
%  plot(x, round(CityBatteryLevel), 'LineWidth', 2, 'Color', 'blue')
%  h1eg1 = legend('Resort', 'City', 'Location', 'NorthEast');
%
%  hold off
%
% %CityMean(Repeat) = mean(Lanai_City(:,2));
% %ReportMean(Repeat) = mean(Lanai_Resort(:,2));
%
%
% %mean(CityMean)
% %mean(ReportMean)
fprintf('Two %dkW Systems \n',SolarCapacity)
fprintf('%d Charging Stations at each site \n',NumChargersResort)
fprintf('Average Day %d Run over %d cycles \n', NumberOfDays,CycleRepeats)
fprintf('Mean Battery Level at City: %f (kWh) \n',mean(Lanai_City(:,2)))
fprintf('Mean Battery Level at Resort: %f (kWh) \n',mean(Lanai_Resort(:,2)))
% % % %  Resort_Fit = polyfit(x,Resort_Charging,1);
% % % %  City_Fit = polyfit(x,City_Charging,1);
% % % %  fprintf('Slope Resort: %f \n', Resort_Fit(1))
% % % %  fprintf('Slope City: %f \n', City_Fit(1))
fprintf('total Solar Energy / day: %f \n',(2*sum(Insol_kWh2))/NumberOfDays)

fprintf('total energy used / day: %f \n',mean(TotalEUsed)/NumberOfDays)
fprintf('total energy wasted / day: %f \n',mean(TotalEWasted)/NumberOfDays)
fprintf('Lossed %f kWh/day due to Charging Station saturation \n', mean((LeftOverECityFormatted + LeftOverEResortFormatted)/NumberOfDays))
fprintf('Lossed %f kWh/day due to Vehicle Battery saturation \n', mean((WastedEResortChargerRun + WastedECityChargerRun)/NumberOfDays))
fprintf('Mean Trip Count/day: %f +/- %f \n', mean(TripCountLog/NumberOfDays), std(TripCountLog/NumberOfDays))

%LeftOverECityFormatted
%LeftOverEResortFormatted
%WastedEResortChargerRun
%WastedECityChargerRun

% fprintf('total energy wasted + used: %f \n', TotalEUsed + TotalEWasted)
% fprintf('total energy in Resort battery: %f, \n', sum(ResortBatteryLevel))
% fprintf('total energy in City battery: %f, \n', sum(CityBatteryLevel))
%
% if (round(sum(TotalEWasted + TotalEUsed)) == round(2*sum(Insol_kWh2)))
%     disp('Match')
% else
%     disp('No Match')
% end
%
% fprintf('Difference is: %f , LeftOverEResort is: %f, WastedEResortSupplyDemand is: %f \n,',TotalEUsed + TotalEWasted - (2*sum(Insol_kWh2)), sum(LeftOverEResort), sum(WastedEResortSupplyDemand))
% fprintf('Difference is: %f , LeftOverECity is: %f, WastedECitySupplyDemand is: %f \n,',TotalEUsed + TotalEWasted - (2*sum(Insol_kWh2)), sum(LeftOverECity), sum(WastedECitySupplyDemand))
%
% if testForErrorHereResort1 == 1
%     disp('testForErrorHereResort1 = TRUE')
% else
%     disp('testForErrorHereResort1 = FALSE')
% end
% if testForErrorHereCity1 == 1
%     disp('testForErrorHereCity1 = TRUE')
% else
%     disp('testForErrorHereCity1 = FALSE')
% end

______________________________________________________________________
R
This R code uses consumer demographics to predict which part of the population to target based on features such as political affiliation, income, residence features, and many others. The point of this code is to demonstrate how I begin with raw data in R and clean the data, look for interesting correlations, conduct model selection, normalize the coefficients, and conduct a multivariate regression in R.

library(lattice)
library(leaps)

#Load Data
#-----------------------------------------------------------------------------------

MyData <- read.csv("ForR_Analysis.csv")

summary(MyData)

#Filter Data Ranges
#-----------------------------------------------------------------------------------
FiltData <- MyData

colnames(FiltData) <- c("ID", "CVRP","APN","YEAR","SQFT", "LTV","BEDS","INCOME","OCC","BATHS","GARAGE","POOL","POLIT","PARTY")
#YearBuilt make a low threshold

FiltData <- FiltData[- which(FiltData$YEAR < 1850),]
#bldgliving make a low thresholed
FiltData <- FiltData[- which(FiltData$SQFT <= 300),]
#No. of bedroom make a max threshold
FiltData <- FiltData[- which(FiltData$BEDS > 10),]
#LTV make a max threshold
FiltData <- FiltData[- which(FiltData$LTV > 400),]
#Median income make a low threshold
FiltData <- FiltData[- which(FiltData$INCOME == 0),]

#Remove NA's
#-----------------------------------------------------------------------------------

FiltData <- FiltData[complete.cases(FiltData),]
summary(FiltData)

#Cross Correlation Plot
#-----------------------------------------------------------------------------------
#We need to install the levelplot package

#Baths modify values
CROSSCOR <- FiltData[,c("CVRP","YEAR","SQFT","BEDS","LTV","INCOME","OCC","GARAGE","POOL","POLIT")]

summary(CROSSCOR)

cor(CROSSCOR)

levelplot(cor(CROSSCOR),  scales=list(x=list(rot=90)))

summary(FiltData)

#Variable Center and Scale
#-------------------------------------------------------------------

X <- data.frame(matrix(ncol = 0, nrow = nrow(FiltData)))
X$SQFT <- (FiltData$SQFT - mean(FiltData$SQFT))/(2*sd(FiltData$SQFT))
X$YEAR <- (FiltData$YEAR - mean(FiltData$YEAR))/(2*sd(FiltData$YEAR))
X$BEDS <- (FiltData$BEDS - mean(FiltData$BEDS))/(2*sd(FiltData$BEDS))
X$LTV <- (FiltData$LTV - mean(FiltData$LTV))/(2*sd(FiltData$LTV))
X$INCOME <- (FiltData$INCOME - mean(FiltData$INCOME))/(2*sd(FiltData$INCOME))
X$POLIT <- (FiltData$POLIT - mean(FiltData$POLIT))/(2*sd(FiltData$POLIT))

X$GARAGE <- (FiltData$GARAGE - mean(FiltData$GARAGE))
X$POOL <- (FiltData$POOL - mean(FiltData$POOL))
X$OCC <- (FiltData$OCC - mean(FiltData$OCC))

#Linear Regression
mylogit <- glm(FiltData$CVRP ~ YEAR + SQFT + BEDS + LTV + INCOME + OCC + GARAGE + POOL + POLIT, data = X, family = "binomial")
summary(mylogit)

#--------------------------------------------------------------------------

null=lm(FiltData$CVRP~1, data=FiltData)
full=lm(FiltData$CVRP ~ YEAR + SQFT + BEDS + LTV + INCOME + OCC + GARAGE + POOL + POLIT, data = FiltData)
step(null, scope=list(lower=null, upper=full), direction="forward")

#Backward elimination
step(full, data=FiltData, direction="backward")

#Stepwise Regression
step(null, scope = list(upper=full), data=FiltData, direction="both") 

#Model Selection
#BIC 

# The leaps package in R contains the regsubsets() function
# for model selection. This function handles best subset
# as well as stepwise (forward and backward) regression. We
# begin with best subset regression.

# The regsubsets() function will run best subset regression
# by default. It will only save information on the single best
# model for each subset size.

# install.packages("leaps") # run to install first

###subsets <- regsubsets(lpsa ~ .,data=train)
###summary(subsets)

# To have regsubsets save information on every possible model,
# use the optional argument nbest and set really.big = TRUE. The
# nbest refers to the maximum number of subsets of each size to
# save. The maximum number of subsets for any given size is
# choose(p,floor(p/2)), where p = total number of predictors.
# Note that choose(n,k) returns the binomial coefficent for
# n choose k, and floor() is the floor function.

choose(9,0:9) # total number of models for each subset size.
# Syntax: notice that we computed 9 binomial coefficents at once.
choose(9,floor(9/2)) # 8 choose 4

#12
#subsets <- regsubsets(MyData$PART~ X$CVRP + X$BEDROOMS + X$POOL + X$ELEC + X$GAS + X$POLIT + X$BUILT + X$LTV + X$ANNCOR  + X$MEDINCOMEACS + X$SEEDSPV + X$EIGTH, data=MyData,nbest=924, nvmax = 7,really.big=TRUE)

subsets <- regsubsets(FiltData$CVRP ~ YEAR + SQFT + BEDS + LTV + INCOME + OCC + GARAGE + POOL + POLIT, data = FiltData, nbest=126, nvmax = 9,really.big=TRUE)

# The summary(subsets) object has a component called bic, which

# contains the BIC for every model saved in the subsets object.

bic <- summary(subsets)$bic

summary(subsets)$which[which(bic == min(bic)),]

########## Mallow's Cp

# The summary(subsets) object has a component called cp, which
# contains the Mallow's Cp statistic for every model saved in
# the subsets object.

cp <- summary(subsets)$cp

# To determine the model which minimizes Mallow's Cp, find the
# index of the model which minimizes the Cp and extract the model
# from the summary(subsets) object.

summary(subsets)$which[which(cp == min(cp)),]

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s