>

R 버전 3.4.2

동일한 데이터 프레임의 다른 변수를 기반으로 조건에 따라 3 개의 새 변수를 만들려고합니다. 나는 의도 한 것을 달성했지만 다른 방법 (dplyr 사용)이 훨씬 쉽게 전달할 수 있다고 생각하는 출력을 생성하려면 몇 줄의 코드가 필요합니다.

생식 가능한 예는 다음과 같습니다.

city <- c("London", "London", "Leeds","Leeds", "Leeds", "Nottingham", "Glasgow", "Belfast", "Belfast", "Oxford", "Oxford", "Southampton", "Aberdeen", "Bath", "Bath", "Bath", "Preston", "Preston", "Liverpool", "Derby","Hereford")
transport <- c("cars", "scooters", "cars", "scooters", "bikes", "cars", "scooters", "cars", "bikes", "scooters", "bikes", "bikes", "scooters", "cars", "scooters", "bikes", "scooters", "bikes", "bikes", "cars", "bikes")
number <- c("153", "21", "267", "87", "13", "95", "17", "199", "8", "34", "5", "23", "40", "142", "79", "28", "37", "22", "19", "83", "23")
df <- data.frame(city, transport, number)

내가 원하는 것은각 도시의 각 유형의 교통 수단의 비율을 아는 것입니다.

> df
          city transport number pct.cars pct.scooters pct.bikes
1       London      cars    153    87.93        12.07      0.00
2       London  scooters     21    87.93        12.07      0.00
3        Leeds      cars    267    72.75        23.71      3.54
4        Leeds  scooters     87    72.75        23.71      3.54
5        Leeds     bikes     13    72.75        23.71      3.54
6   Nottingham      cars     95   100.00         0.00      0.00
7      Glasgow  scooters     17     0.00       100.00      0.00
8      Belfast      cars    199    96.14         0.00      3.86
9      Belfast     bikes      8    96.14         0.00      3.86
10      Oxford  scooters     34     0.00        87.18     12.82
11      Oxford     bikes      5     0.00        87.18     12.82
12 Southampton     bikes     23     0.00         0.00    100.00
13    Aberdeen  scooters     40     0.00       100.00      0.00
14        Bath      cars    142    57.03        31.73     11.24
15        Bath  scooters     79    57.03        31.73     11.24
16        Bath     bikes     28    57.03        31.73     11.24
17     Preston  scooters     37     0.00        62.71     37.29
18     Preston     bikes     22     0.00        62.71     37.29
19   Liverpool     bikes     19     0.00         0.00    100.00
20       Derby      cars     83   100.00         0.00      0.00
21    Hereford     bikes     23     0.00         0.00    100.00

위의 데이터 프레임을 생성하는 코드는 다음과 같습니다 :

df <- tbl_df(df) %>%
  mutate(., pct.cars = rep(as.numeric(0), length.out = length(df$city)),
            pct.scooters = rep(as.numeric(0), length.out = length(df$city)),
            pct.bikes = rep(as.numeric(0), length.out = length(df$city)))

for (i in 1:nrow(df)) {
  cur_city <- df$city[i]
  n_cars <- df$number[df$city == cur_city & df$transport == "cars"]
  n_scooters <- df$number[df$city == cur_city & df$transport == "scooters"]
  n_bikes <- df$number[df$city == cur_city & df$transport == "bikes"]
  if (length(n_cars) == 1 & length(n_scooters) < 1 & length(n_bikes) < 1) {
    # case: there are no scooters nor bikes
    df$pct.cars[i] <- 100
    df$pct.scooters[i] <- 0
    df$pct.bikes[i] <- 0
  } else if (length(n_cars) < 1 & length(n_scooters) == 1 & length(n_bikes) == 1) {
    # case: there are no cars
    df$pct.cars[i] <- 0
    df$pct.scooters[i] <- (n_scooters/(n_scooters + n_bikes))*100
    df$pct.bikes[i] <- (n_bikes/(n_scooters + n_bikes))*100
  } else if (length(n_cars) == 1 & length(n_scooters) == 1 & length(n_bikes) < 1) {
    # case: there are no bikes
    df$pct.cars[i] <- (n_cars/(n_cars + n_scooters))*100
    df$pct.scooters[i] <- (n_scooters/(n_cars + n_scooters))*100
    df$pct.bikes[i] <- 0
  } else if (length(n_cars) == 1 & length(n_scooters) < 1 & length(n_bikes) == 1) {
    # case: there are no scooters
    df$pct.cars[i] <- (n_cars/(n_cars + n_bikes))*100
    df$pct.scooters[i] <- 0
    df$pct.bikes[i] <- (n_bikes/(n_cars + n_bikes))*100
  } else if (length(n_cars) < 1 & length(n_scooters) == 1 & length(n_bikes) < 1) {
    # case: there are no cars nor bikes
    df$pct.cars[i] <- 0
    df$pct.scooters[i] <- 100
    df$pct.bikes[i] <- 0
  } else if (length(n_cars) < 1 & length(n_scooters) < 1 & length(n_bikes) == 1) {
    # case: there are no cars nor scooters
    df$pct.cars[i] <- 0
    df$pct.scooters[i] <- 0
    df$pct.bikes[i] <- 100
  } else if (length(n_cars) == 1 & length(n_scooters) == 1 & length(n_bikes) == 1 ) {
    # case: there are cars, scooters & bikes
    df$pct.cars[i] <- (n_cars/(n_cars + n_scooters + n_bikes))*100
    df$pct.scooters[i] <- (n_scooters/(n_cars + n_scooters + n_bikes))*100
    df$pct.bikes[i] <- (n_bikes/(n_cars + n_scooters + n_bikes))*100
  }
}

누군가더 쉬운 솔루션이나 제안 (dplyr 사용)을 받았다면 대단히 감사하겠습니다. 미리 감사드립니다!

  • 답변 # 1

    이것은 어때요?

    df %>%
        group_by(city) %>%
        mutate(
            number = as.numeric(as.character(number)),
            ntot = sum(number),
            percentage = number / ntot * 100,
            id = paste0("perc.", transport)) %>%
        spread(id, percentage) %>%
        replace(., is.na(.), 0) %>%
        select(-ntot) %>%
        ungroup()
    #   city     transport number perc.bikes perc.cars perc.scooters
    #   <fct>    <fct>      <dbl>      <dbl>     <dbl>         <dbl>
    # 1 Aberdeen scooters   40.0        0          0           100
    # 2 Bath     bikes      28.0       11.2        0             0
    # 3 Bath     cars      142          0         57.0           0
    # 4 Bath     scooters   79.0        0          0            31.7
    # 5 Belfast  bikes       8.00       3.86       0             0
    # 6 Belfast  cars      199          0         96.1           0
    # 7 Derby    cars       83.0        0        100             0
    # 8 Glasgow  scooters   17.0        0          0           100
    # 9 Hereford bikes      23.0      100          0             0
    #10 Leeds    bikes      13.0        3.54       0             0
    
    

    또는 모든 백분율 열에서 항목을 채우려면 :

    df %>%
        group_by(city) %>%
        mutate(
            number = as.numeric(as.character(number)),
            ntot = sum(number),
            percentage = number / ntot * 100,
            id = paste0("perc.", transport)) %>%
        spread(id, percentage) %>%
        select(-ntot) %>%
        fill(perc.bikes, perc.cars, perc.scooters, .direction = "up") %>%
        fill(perc.bikes, perc.cars, perc.scooters, .direction = "down") %>%
        replace(., is.na(.), 0) %>%
        ungroup()
    ## A tibble: 21 x 6
    #   city     transport number perc.bikes perc.cars perc.scooters
    #   <fct>    <fct>      <dbl>      <dbl>     <dbl>         <dbl>
    # 1 Aberdeen scooters   40.0        0          0           100
    # 2 Bath     bikes      28.0       11.2       57.0          31.7
    # 3 Bath     cars      142         11.2       57.0          31.7
    # 4 Bath     scooters   79.0       11.2       57.0          31.7
    # 5 Belfast  bikes       8.00       3.86      96.1           0
    # 6 Belfast  cars      199          3.86      96.1           0
    # 7 Derby    cars       83.0        0        100             0
    # 8 Glasgow  scooters   17.0        0          0           100
    # 9 Hereford bikes      23.0      100          0             0
    #10 Leeds    bikes      13.0        3.54      72.8          23.7
    ## ... with 11 more rows
    
    

    <시간> 업데이트

    도시 당 백분율이있는 표만 원하는 경우 다음을 수행 할 수 있습니다.

    df %>%
        group_by(city) %>%
        mutate(
            number = as.numeric(as.character(number)),
            number = number / sum(ntot) * 100,
            transport = paste0("perc.", transport)) %>%
        spread(transport, number, fill = 0)
        ungroup()
        ## A tibble: 13 x 4
        #   city        perc.bikes perc.cars perc.scooters
        #   <fct>            <dbl>     <dbl>         <dbl>
        # 1 Aberdeen          0          0           100
        # 2 Bath             11.2       57.0          31.7
        # 3 Belfast           3.86      96.1           0
        # 4 Derby             0        100             0
        # 5 Glasgow           0          0           100
        # 6 Hereford        100          0             0
        # 7 Leeds             3.54      72.8          23.7
        # 8 Liverpool       100          0             0
        # 9 London            0         87.9          12.1
        #10 Nottingham        0        100             0
        #11 Oxford           12.8        0            87.2
        #12 Preston          37.3        0            62.7
        #13 Southampton     100          0             0
    
    

  • 답변 # 2

    때로는 테이블/매트릭스 작업으로 간단한 해결책을 제시 할 수 있습니다.

    df$number <- as.numeric(as.character(df$number))
    prop.table(xtabs(number ~ city + transport, data=df), 1)
    #             transport
    #city               bikes       cars   scooters
    #  Aberdeen    0.00000000 0.00000000 1.00000000
    #  Bath        0.11244980 0.57028112 0.31726908
    #  Belfast     0.03864734 0.96135266 0.00000000
    #  Derby       0.00000000 1.00000000 0.00000000
    #  Glasgow     0.00000000 0.00000000 1.00000000
    #  Hereford    1.00000000 0.00000000 0.00000000
    #  Leeds       0.03542234 0.72752044 0.23705722
    #  Liverpool   1.00000000 0.00000000 0.00000000
    #  London      0.00000000 0.87931034 0.12068966
    #  Nottingham  0.00000000 1.00000000 0.00000000
    #  Oxford      0.12820513 0.00000000 0.87179487
    #  Preston     0.37288136 0.00000000 0.62711864
    #  Southampton 1.00000000 0.00000000 0.00000000
    
    

    이는 또한 하나의 값을 변경하여 각 도시에서 운송 유형의 비율을 찾을 수있는 유연성을 제공합니다.

    prop.table(xtabs(number ~ city + transport, data=df), 2)
    
    

  • 이전 javascript - 부트 스트랩 아코디언 - 시작 및 종료 카드의 요소에서 별도의 함수 호출
  • 다음 android - 네이티브 디버그 로깅 멈춤 반응