6

my data is structured as follows:

DT <- data.table(Id = c(1, 1, 1, 1, 10, 100, 100, 101, 101, 101), 
                 Date = as.Date(c("1997-01-01", "1997-01-02", "1997-01-03", "1997-01-04", 
                                  "1997-01-02", "1997-01-02", "1997-01-04", "1997-01-03", 
                                  "1997-01-04", "1997-01-04")),
                 group = c(1,1,1,1,1,2,2,2,2,2),
                 Price.1 = c(29, 25, 14, 26, 30, 16, 13, 62, 12,  6), 
                 Price.2 = c(4, 5, 6, 6, 8, 2, 3, 5, 7, 8))

>DT
     Id       Date group Price.1 Price.2
 1:   1 1997-01-01     1      29       4
 2:   1 1997-01-02     1      25       5
 3:   1 1997-01-03     1      14       6
 4:   1 1997-01-04     1      26       6
 5:  10 1997-01-02     1      30       8
 6: 100 1997-01-02     2      16       2
 7: 100 1997-01-04     2      13       3
 8: 101 1997-01-03     2      62       5
 9: 101 1997-01-04     2      12       7
10: 101 1997-01-04     2       6       8

I am trying to cast it (using dcast.data.table):

dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.1") 
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.1")
dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.2")
dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.2")

but rather than 4 separate outputs I am trying to get the following:

    Id 1997-01-01 1997-01-02 1997-01-03 1997-01-04  1  2   Price
1:   1         29         25         14         26 94  0 Price.1
2:  10          0         30          0          0 30  0 Price.1
3: 100          0         16          0         13  0 29 Price.1
4: 101          0          0         62         18  0 80 Price.1
5:   1          4          5          6          6 21  0 Price.2
6:  10          0          8          0          0  8  0 Price.2
7: 100          0          2          0          3  0  5 Price.2
8: 101          0          0          5         15  0 20 Price.2

and my work-around uses rbind, cbind, and merge.

cbind(rbind(merge(dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.1"), 
            dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.1"), by = "Id", all.x  = T),
      merge(dcast.data.table(DT, Id ~ Date, fun = sum, value.var = "Price.2"),
            dcast.data.table(DT, Id ~ group, fun = sum, value.var = "Price.2"), by = "Id", all.x  = T)),
      Price = c("Price.1","Price.1","Price.1","Price.1","Price.2","Price.2","Price.2","Price.2"))

Is there an existing and cleaner way to do this?

2
  • If each Id only belongs to one group, you could spin off that mapping and thereby simplify your life. Commented Jul 8, 2015 at 16:38
  • That is one crazy reshaping problem! Commented Jul 8, 2015 at 16:41

3 Answers 3

5

I make the assumption that each Id maps to a unique group and get rid of that variable, but otherwise this is essentially the same as @user227710's answer.

Idg <- unique(DT[,.(Id,group)])
DT[,group:=NULL]

res <- dcast(
  melt(DT, id.vars = c("Id","Date")),
  variable+Id ~ Date,
  value.var = "value",
  fill = 0,
  margins = "Date",
  fun.aggregate = sum
)

# and if you want the group back...
setDT(res) # needed before data.table 1.9.5, where using dcast.data.table is another option
setkey(res,Id)
res[Idg][order(variable,Id)]

which gives

   variable  Id 1997-01-01 1997-01-02 1997-01-03 1997-01-04 (all) group
1:  Price.1   1         29         25         14         26    94     1
2:  Price.2   1          4          5          6          6    21     1
3:  Price.1  10          0         30          0          0    30     1
4:  Price.2  10          0          8          0          0     8     1
5:  Price.1 100          0         16          0         13    29     2
6:  Price.2 100          0          2          0          3     5     2
7:  Price.1 101          0          0         62         18    80     2
8:  Price.2 101          0          0          5         15    20     2
Sign up to request clarification or add additional context in comments.

Comments

4

This was really a trial and error: I hope it works.

library(data.table) #version 1.9.4
library(reshape2)
kk <- melt(DT,id.vars=c("Id","Date","group"),
                    measure.vars = c("Price.1","Price.2"),
                    value.name = "Price")


dcast(kk, Id + variable + group ~ Date, value.var = "Price", fun = sum,margins="Date")
# ^ use of margins borrowed from @Frank. 


#    Id variable group 1997-01-01 1997-01-02 1997-01-03 1997-01-04 (all)
# 1   1  Price.1     1         29         25         14         26    94
# 2   1  Price.2     1          4          5          6          6    21
# 3  10  Price.1     1          0         30          0          0    30
# 4  10  Price.2     1          0          8          0          0     8
# 5 100  Price.1     2          0         16          0         13    29
# 6 100  Price.2     2          0          2          0          3     5
# 7 101  Price.1     2          0          0         62         18    80
# 8 101  Price.2     2          0          0          5         15    20

Comments

0

And just to compare, a solution in dplyr (as I have yet to learn how to get my brain to melt things properly.)

    # aggregate the data completely 
## (rows 9 & 10 need to be collapsed, and spread works on a single key)
DTT <- 
    DT %>% 
    group_by(Id, Date, group) %>% 
    summarise(Price.1 = sum(Price.1), Price.2 = sum(Price.2)) %>% 
    left_join(DT) %>% 
    unite(id_grp, Id, group, sep = "_") %>% 
    group_by(id_grp) %>% 
    mutate(s1 = sum(Price.1), s2 = sum(Price.2))
# pivot out the index into cartesian (long to wide) for 1st Price set 
DW1 <- 
    DTT %>% 
    select(-Price.2) %>% 
    spread(Date, Price.1) %>% 
    mutate(Price = "Price.1")
# pivot out the index into cartesian (long to wide) for 2nd Price set 
DW2 <- 
    DTT %>% 
    select(-Price.1) %>% 
    spread(Date, Price.2) %>% 
    mutate(Price = "Price.2")
# Bind records back together and make purdy
DWFin <- 
    bind_rows(DW1,DW2) %>% 
    separate(id_grp, c("Id", "group")) %>% 
    mutate(g = group, p = str_sub(Price, -1),
           n1 = ifelse(group == 1 & p == 1, s1, ifelse(group == 1 & p == 2, s2, 0)),
           n2 = ifelse(group == 2 & p == 2, s2, ifelse(group == 2 & p == 1, s1, 0))) %>% 
    select(Id, starts_with("19"), "1" = n1, "2" = n2, Price)
DWFin

Source: local data table [8 x 8]

# tbl_dt [8 × 8]
     Id `1997-01-01` `1997-01-02` `1997-01-03` `1997-01-04`   `1`   `2`   Price
  <chr>        <dbl>        <dbl>        <dbl>        <dbl> <dbl> <dbl>   <chr>
1     1           29           25           14           26    94     0 Price.1
2    10           NA           30           NA           NA    30     0 Price.1
3   100           NA           16           NA           13     0    29 Price.1
4   101           NA           NA           62           18     0    80 Price.1
5     1            4            5            6            6    21     0 Price.2
6    10           NA            8           NA           NA     8     0 Price.2
7   100           NA            2           NA            3     0     5 Price.2
8   101           NA           NA            5           15     0    20 Price.2

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.