I have a tibble songs which is too big to share here. Also, it doesn't matter; the problem applies for any tibble that only has dbl values.
The idea is that I have one row I selected before. It can be any one of them, without any previous knowledge. The first thing I did was to filter it out:
songs2 <- songs %>%
anti_join(choice)
This works.
By the way, choice has a single row.
Now, I create a second tibble (third, but second in this post) called dist, which only has dbl values (and therefore shares columns with choice). I want to subtract the values in choice from each row in dist.
I tried writting this:
for (i in seq_along(distUseful)) {
dist <- dist %>%
mutate_(distUseful[i] = (.data[[i]] - choice[[i]]))
}
But it doesn't work:
> for (i in seq_along(distUseful)) {
+ dist <- dist %>%
+ mutate_(distUseful[i] = (.data[[i]] - choice[[i]]))
Error: unexpected '=' in:
" dist <- dist %>%
mutate_(distUseful[i] ="
> }
Error: unexpected '}' in "}"
EDIT: This is the first 10 rows in songs2, as requested in the comments.
structure(list(acousticness = c(0.991, 0.643, 0.993, 0.000173,
0.295, 0.996, 0.992, 0.996, 0.996, 0.00682), artists = c("['Mamie Smith']",
"[\"Screamin' Jay Hawkins\"]", "['Mamie Smith']", "['Oscar Velazquez']",
"['Mixe']", "['Mamie Smith & Her Jazz Hounds']", "['Mamie Smith']",
"['Mamie Smith & Her Jazz Hounds']", "['Francisco Canaro']",
"['Meetya']"), danceability = c(0.598, 0.852, 0.647, 0.73, 0.704,
0.424, 0.782, 0.474, 0.469, 0.571), duration_ms = c(168333, 150200,
163827, 422087, 165224, 198627, 195200, 186173, 146840, 476304
), energy = c(0.224, 0.517, 0.186, 0.798, 0.707, 0.245, 0.0573,
0.239, 0.238, 0.753), explicit = c(FALSE, FALSE, FALSE, FALSE,
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE), id = c("0cS0A1fUEUd1EW3FcF8AEI",
"0hbkKFIJm7Z05H8Zl9w30f", "11m7laMUgmOKqI3oYzuhne", "19Lc5SfJJ5O1oaxY0fpwfh",
"2hJjbsLCytGsnAHfdsLejp", "3HnrHGLE9u2MjHtdobfWl9", "5DlCyqLyX2AOVDTjjkDZ8x",
"02FzJbHtqElixxCmrpSCUa", "02i59gYdjlhBmbbWhf8YuK", "06NUxS2XL3efRh0bloxkHm"
), instrumentalness = c(0.000522, 0.0264, 1.76e-05, 0.801, 0.000246,
0.799, 1.61e-06, 0.186, 0.96, 0.873), key = c(5, 5, 0, 2, 10,
5, 5, 9, 8, 8), liveness = c(0.379, 0.0809, 0.519, 0.128, 0.402,
0.235, 0.176, 0.195, 0.149, 0.092), loudness = c(-12.628, -7.261,
-12.098, -7.311, -6.036, -11.47, -12.453, -9.712, -18.717, -6.943
), mode = c(0, 0, 1, 1, 0, 1, 1, 1, 1, 1), name = c("Keep A Song In Your Soul",
"I Put A Spell On You", "Golfing Papa", "True House Music - Xavier Santos & Carlos Gomix Remix",
"Xuniverxe", "Crazy Blues - 78rpm Version", "Don't You Advertise Your Man",
"Arkansas Blues", "La Chacarera - Remasterizado", "Broken Puppet - Original Mix"
), popularity = c(12, 7, 4, 17, 2, 9, 5, 0, 0, 0), release_date = c("1920",
"1920-01-05", "1920", "1920-01-01", "1920-10-01", "1920", "1920",
"1920", "1920-07-08", "1920-01-01"), speechiness = c(0.0936,
0.0534, 0.174, 0.0425, 0.0768, 0.0397, 0.0592, 0.0289, 0.0741,
0.0446), tempo = c(149.976, 86.889, 97.6, 127.997, 122.076, 103.87,
85.652, 78.784, 130.06, 126.993), valence = c(0.634, 0.95, 0.689,
0.0422, 0.299, 0.477, 0.487, 0.366, 0.621, 0.119), year = c(1920,
1920, 1920, 1920, 1920, 1920, 1920, 1920, 1920, 1920)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
This is choice:
structure(list(acousticness = 0.511, danceability = 0.403, duration_ms = 117395,
instrumentalness = 0.896, liveness = 0.108, loudness = -8.126,
popularity = 65, speechiness = 0.0514, tempo = 135.047, valence = 0.192), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
And finally:
distUseful <- c("acousticness", "danceability", "duration_ms", "duration_ms", "instrumentalness", "liveness", "loudness", "popularity", "speechiness", "tempo", "valence")
EDIT 2: Just an afterthought: if you take the loop I cited earlier and see how it would work for a single iteration (you choose the variable), it works. In fact,the problem lies in the first part, distUseful[i] =, as per the error messages and by playing with the code.
EDIT 3: As an example, here's what happens if this is done only to the first column (so the first one is correct and the rest didn't change):
> dist %>%
+ mutate(acousticness = (dist[[1]] - choice[[1]]))
# A tibble: 174,388 x 10
acousticness danceability duration_ms instrumentalness liveness loudness popularity speechiness tempo valence
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.48 0.598 168333 0.000522 0.379 -12.6 12 0.0936 150. 0.634
2 0.132 0.852 150200 0.0264 0.0809 -7.26 7 0.0534 86.9 0.95
3 0.482 0.647 163827 0.0000176 0.519 -12.1 4 0.174 97.6 0.689
4 -0.511 0.73 422087 0.801 0.128 -7.31 17 0.0425 128. 0.0422
5 -0.216 0.704 165224 0.000246 0.402 -6.04 2 0.0768 122. 0.299
6 0.485 0.424 198627 0.799 0.235 -11.5 9 0.0397 104. 0.477
7 0.481 0.782 195200 0.00000161 0.176 -12.5 5 0.0592 85.7 0.487
8 0.485 0.474 186173 0.186 0.195 -9.71 0 0.0289 78.8 0.366
9 0.485 0.469 146840 0.96 0.149 -18.7 0 0.0741 130. 0.621
10 -0.504 0.571 476304 0.873 0.092 -6.94 0 0.0446 127. 0.119
dput(head(data_frame_name, 10))would help.songs2,choiceanddistUseful.choicefromsong2rows here?