After merging data, you will generate extra rows if one source has repeated IDs. See below.
No ID repetitions = join successful.
df1 <- data.frame(
'id' = c(1, 2, 3),
'y' = c(20, 30, 40)
)
df2 <- data.frame(
'id' = c(1, 2, 3),
'x' = c(rnorm(3, 30, 5))
)
library(tidyverse)
df <- left_join(df1, df2)
df
id y x
1 1 20 30.09988
2 2 30 29.41230
3 3 40 23.84746
ID repetitions = join unsuccessful
data1 <- data.frame(
'id' = c(1, 2, 3),
'y' = c(20, 23, 40)
)
data2 <- data.frame(
'id' = c(1, 2, 2), # repeated id
'x' = c(60, 70, 80)
)
library(tidyverse)
dataf <- left_join(data1, data2)
dataf # has 4 rows rather than 3
id y x
1 1 20 60
2 2 23 70
3 2 23 80
4 3 40 NA
Bo\(^2\)m =)