5.2 Data prepration

There are a lot of columns, and a lot of weird names, so let’s clean things up a little bit. Let’s keep only the rows for which Fuel.Type.Code is ELEC, so that we are dealing with only electric vehicle charging stations. This data set also has ethanol, hydrogen, etc., but we don’t want to focus on those. Let’s also rename some columns, and transform the Status.Code into something more human readable. Since there are some NA values in the lev2 and lev3 columns, we’ll replace each NA with 0. We’ll also clean up some network names,

Code
library(leaflet)
library(htmltools)
library(tidyverse)
library(pubtheme)

d = readRDS('data/EVstations.rds')
colnames(d) = tolower(colnames(d))

d = d %>%
  select(-matches('cng|lng|lpg|hydrogen|e85|bd[.]blends|french|^ng[.]|plus4|level1')) %>%
  filter(fuel.type.code == 'ELEC') %>%
  rename(lev2    = ev.level2.evse.num, 
         lev3    = ev.dc.fast.count, 
         network = ev.network, 
         lat     = latitude, 
         lon     = longitude) %>%
  filter(lat > 20, 
         lon < -60,
         lat < 55, 
         lon >-130) %>%
  mutate(status = case_when(status.code == 'E' ~ 'avail', 
                            status.code == 'P' ~ 'planned', 
                            status.code == 'T' ~ 'temp.unavail', 
                            TRUE ~ ''), 
         lev2  = ifelse(is.na(lev2), 0, lev2), 
         lev3  = ifelse(is.na(lev3), 0, lev3), 
         lev23 = lev2 + lev3) %>%
  mutate(network = ifelse(network == 'Tesla Destination', 
                          'Tesla', 
                          network), 
         network = gsub('é', 'E', network), 
         network = gsub('É', 'E', network), 
         network = gsub('SemaCharge', 'Blink', network), ## https://www.semaconnect.com/
         network = gsub(' Network', '', network)) 

head(d,2)
  fuel.type.code                    station.name        street.address
1           ELEC        LADWP - Truesdale Center    11797 Truesdale St
2           ELEC LADWP - West LA District Office 1394 S Sepulveda Blvd
  intersection.directions        city state   zip station.phone status.code
1                          Sun Valley    CA 91352                         E
2                         Los Angeles    CA 90024                         E
  expected.date groups.with.access.code access.days.time cards.accepted lev2
1                               Private   Fleet use only                  39
2                               Private                                    4
  lev3 ev.other.info       network ev.network.web geocode.status      lat
1    3               Non-Networked                           GPS 34.24832
2    0               Non-Networked                         200-8 34.05254
        lon date.last.confirmed   id              updated.at owner.type.code
1 -118.3880          2023-01-10 1517 2023-02-15 22:45:41 UTC              LG
2 -118.4485          2023-01-10 1519 2023-02-15 22:45:41 UTC              LG
  federal.agency.id federal.agency.name  open.date       ev.connector.types
1                NA                     1999-10-15 CHADEMO J1772 J1772COMBO
2                NA                     2020-02-28                    J1772
  country access.code access.detail.code federal.agency.code facility.type
1      US     private                                              UTILITY
2      US     private                                              UTILITY
  ev.pricing ev.on.site.renewable.source restricted.access rd.blends
1                                                               <NA>
2       Free                                                    <NA>
  rd.blended.with.biodiesel rd.maximum.biodiesel.level nps.unit.name
1                      <NA>                         NA              
2                      <NA>                         NA              
  maximum.vehicle.class src status lev23
1                       csv  avail    42
2                       csv  avail     4

Which are the biggest networks?

Code
dn = d %>% 
  filter(status == 'avail') %>%
  group_by(network) %>% 
  summarise(lev2 = sum(lev2), 
            lev3 = sum(lev3)) %>%
  mutate(tot = lev2 + lev3)

dn %>% arrange(desc(lev2))
dn %>% arrange(desc(lev3))
dn %>% arrange(desc(tot ))
# A tibble: 43 × 4
   network             lev2  lev3   tot
   <chr>              <dbl> <dbl> <dbl>
 1 ChargePoint        58662  2168 60830
 2 Non-Networked      23879  1479 25358
 3 Tesla              14931 21812 36743
 4 Blink              11075    88 11163
 5 FLO                 6180   453  6633
 6 EV Connect          3863   487  4350
 7 Circuit électrique  3631   839  4470
 8 Volta               2922   104  3026
 9 AMPUP               2879     5  2884
10 POWERFLEX           2473    28  2501
# ℹ 33 more rows
# A tibble: 43 × 4
   network             lev2  lev3   tot
   <chr>              <dbl> <dbl> <dbl>
 1 Tesla              14931 21812 36743
 2 Electrify America    166  3542  3708
 3 eVgo                 362  2618  2980
 4 ChargePoint        58662  2168 60830
 5 Non-Networked      23879  1479 25358
 6 Circuit électrique  3631   839  4470
 7 SHELL_RECHARGE      1979   576  2555
 8 FCN                  120   546   666
 9 EV Connect          3863   487  4350
10 FLO                 6180   453  6633
# ℹ 33 more rows
# A tibble: 43 × 4
   network             lev2  lev3   tot
   <chr>              <dbl> <dbl> <dbl>
 1 ChargePoint        58662  2168 60830
 2 Tesla              14931 21812 36743
 3 Non-Networked      23879  1479 25358
 4 Blink              11075    88 11163
 5 FLO                 6180   453  6633
 6 Circuit électrique  3631   839  4470
 7 EV Connect          3863   487  4350
 8 Electrify America    166  3542  3708
 9 Volta               2922   104  3026
10 eVgo                 362  2618  2980
# ℹ 33 more rows

Let’s create a new network column where all networks other than the top 5 networks with Level 2 charging stations a grouped together as an “Other” category. Likewise with Level 3 charging stations.

Code
top5lev2 = c('ChargePoint', 
             'Non-Networked', 
             'Tesla', 
             'Blink', 
             'FLO')

top5lev3 = c('Tesla',
             'Electrify America', 
             'eVgo', 
             'ChargePoint',
             'Non-Networked')
d = d %>%
  mutate(network2 = ifelse(network %in% top5lev2, network, 'Other'), 
         network3 = ifelse(network %in% top5lev3, network, 'Other'), 
         network2 = factor(network2,  levels = c(top5lev2, 'Other')), 
         network3 = factor(network3,  levels = c(top5lev3, 'Other')))
head(d,2)
  fuel.type.code                    station.name        street.address
1           ELEC        LADWP - Truesdale Center    11797 Truesdale St
2           ELEC LADWP - West LA District Office 1394 S Sepulveda Blvd
  intersection.directions        city state   zip station.phone status.code
1                          Sun Valley    CA 91352                         E
2                         Los Angeles    CA 90024                         E
  expected.date groups.with.access.code access.days.time cards.accepted lev2
1                               Private   Fleet use only                  39
2                               Private                                    4
  lev3 ev.other.info       network ev.network.web geocode.status      lat
1    3               Non-Networked                           GPS 34.24832
2    0               Non-Networked                         200-8 34.05254
        lon date.last.confirmed   id              updated.at owner.type.code
1 -118.3880          2023-01-10 1517 2023-02-15 22:45:41 UTC              LG
2 -118.4485          2023-01-10 1519 2023-02-15 22:45:41 UTC              LG
  federal.agency.id federal.agency.name  open.date       ev.connector.types
1                NA                     1999-10-15 CHADEMO J1772 J1772COMBO
2                NA                     2020-02-28                    J1772
  country access.code access.detail.code federal.agency.code facility.type
1      US     private                                              UTILITY
2      US     private                                              UTILITY
  ev.pricing ev.on.site.renewable.source restricted.access rd.blends
1                                                               <NA>
2       Free                                                    <NA>
  rd.blended.with.biodiesel rd.maximum.biodiesel.level nps.unit.name
1                      <NA>                         NA              
2                      <NA>                         NA              
  maximum.vehicle.class src status lev23      network2      network3
1                       csv  avail    42 Non-Networked Non-Networked
2                       csv  avail     4 Non-Networked Non-Networked