So i have three data frame having this information
> dim(result)
[1] 652265 10
> dim(rat)
[1] 107190 2
> dim(mouse)
[1] 219016 2
> names(result)
[1] "ensembl_gene_id" "ensembl_transcript_id"
[3] "external_gene_name" "mmusculus_homolog_ensembl_gene"
[5] "mmusculus_homolog_associated_gene_name" "rnorvegicus_homolog_ensembl_gene"
[7] "rnorvegicus_homolog_associated_gene_name" "celegans_homolog_ensembl_gene"
[9] "celegans_homolog_associated_gene_name" "celegans_homolog_ensembl_gene_transcript_id"
> names(rat)
[1] "rnorvegicus_homolog_ensembl_gene" "rnorvegicus_homolog_ensembl_transcript_id"
> names(mouse)
[1] "mmusculus_homolog_ensembl_gene" "mmusculus_homolog_ensembl_transcript_id"
My dataframes are as such
head(result)
ensembl_gene_id ensembl_transcript_id external_gene_name mmusculus_homolog_ensembl_gene
1 ENSG00000198888 ENST00000361390 MT-ND1 ENSMUSG00000064341
2 <NA> <NA> <NA> <NA>
3 ENSG00000198804 ENST00000361624 MT-CO1 ENSMUSG00000064351
4 ENSG00000198712 ENST00000361739 MT-CO2 ENSMUSG00000064354
5 <NA> <NA> <NA> <NA>
6 <NA> <NA> <NA> <NA>
mmusculus_homolog_associated_gene_name rnorvegicus_homolog_ensembl_gene
1 mt-Nd1 ENSRNOG00000030644
2 <NA> <NA>
3 mt-Co1 ENSRNOG00000034234
4 mt-Co2 ENSRNOG00000030371
5 <NA> <NA>
6 <NA> <NA>
rnorvegicus_homolog_associated_gene_name celegans_homolog_ensembl_gene
1 Mt-nd1 WBGene00010959
2 <NA> <NA>
3 Mt-co1 WBGene00010964
4 Mt-co2 WBGene00010965
5 <NA> <NA>
6 <NA> <NA>
celegans_homolog_associated_gene_name celegans_homolog_ensembl_gene_transcript_id
1 MTCE.11.1
2 <NA> <NA>
3 ctc-1 MTCE.26.1
4 ctc-2 MTCE.31.1
5 <NA> <NA>
6 <NA> <NA>
> head(rat)
rnorvegicus_homolog_ensembl_gene rnorvegicus_homolog_ensembl_transcript_id
1 ENSRNOG00000033395 ENSRNOT00000044979
2 ENSRNOG00000070901 ENSRNOT00000119508
3 ENSRNOG00000031391 ENSRNOT00000045306
4 ENSRNOG00000067904 ENSRNOT00000099225
5 ENSRNOG00000067904 ENSRNOT00000099225
6 ENSRNOG00000067904 ENSRNOT00000099225
> head(mouse)
mmusculus_homolog_ensembl_gene mmusculus_homolog_ensembl_transcript_id
1 ENSMUSG00000064341 ENSMUST00000082392
2 ENSMUSG00000064345 ENSMUST00000082396
3 ENSMUSG00000064351 ENSMUST00000082402
4 ENSMUSG00000064354 ENSMUST00000082405
5 ENSMUSG00000064356 ENSMUST00000082407
6 ENSMUSG00000064357 ENSMUST00000082408
I tried sqldf based solution since dplyr was stuck when i tried
joined_data <- sqldf("
SELECT *
FROM result
LEFT OUTER JOIN rat ON result.rnorvegicus_homolog_ensembl_gene = rat.rnorvegicus_homolog_ensembl_gene
LEFT OUTER JOIN mouse ON result.mmusculus_homolog_ensembl_gene = mouse.mmusculus_homolog_ensembl_gene
")
The issue is it just hit the upper limit of the system i have with me
Now what is the optimized way to do the same if any suggestion or help would be really appreciated
UPDATE_data
dput(head(result))
structure(list(ensembl_gene_id = c("ENSG00000198888", NA, "ENSG00000198804",
"ENSG00000198712", NA, NA), ensembl_transcript_id = c("ENST00000361390",
NA, "ENST00000361624", "ENST00000361739", NA, NA), external_gene_name = c("MT-ND1",
NA, "MT-CO1", "MT-CO2", NA, NA), mmusculus_homolog_ensembl_gene = c("ENSMUSG00000064341",
NA, "ENSMUSG00000064351", "ENSMUSG00000064354", NA, NA), mmusculus_homolog_associated_gene_name = c("mt-Nd1",
NA, "mt-Co1", "mt-Co2", NA, NA), rnorvegicus_homolog_ensembl_gene = c("ENSRNOG00000030644",
NA, "ENSRNOG00000034234", "ENSRNOG00000030371", NA, NA), rnorvegicus_homolog_associated_gene_name = c("Mt-nd1",
NA, "Mt-co1", "Mt-co2", NA, NA), celegans_homolog_ensembl_gene = c("WBGene00010959",
NA, "WBGene00010964", "WBGene00010965", NA, NA), celegans_homolog_associated_gene_name = c("",
NA, "ctc-1", "ctc-2", NA, NA), celegans_homolog_ensembl_gene_transcript_id = c("MTCE.11.1",
NA, "MTCE.26.1", "MTCE.31.1", NA, NA)), row.names = c(NA, 6L), class = "data.frame")
dput(head(rat))
structure(list(rnorvegicus_homolog_ensembl_gene = c("ENSRNOG00000033395",
"ENSRNOG00000070901", "ENSRNOG00000031391", "ENSRNOG00000067904",
"ENSRNOG00000067904", "ENSRNOG00000067904"), rnorvegicus_homolog_ensembl_transcript_id = c("ENSRNOT00000044979",
"ENSRNOT00000119508", "ENSRNOT00000045306", "ENSRNOT00000099225",
"ENSRNOT00000099225", "ENSRNOT00000099225")), row.names = c(NA,
6L), class = "data.frame")
dput(head(mouse))
structure(list(mmusculus_homolog_ensembl_gene = c("ENSMUSG00000064341",
"ENSMUSG00000064345", "ENSMUSG00000064351", "ENSMUSG00000064354",
"ENSMUSG00000064356", "ENSMUSG00000064357"), mmusculus_homolog_ensembl_transcript_id = c("ENSMUST00000082392",
"ENSMUST00000082396", "ENSMUST00000082402", "ENSMUST00000082405",
"ENSMUST00000082407", "ENSMUST00000082408")), row.names = c(NA,
6L), class = "data.frame")

For a dataset of this size, you won't be able to simply join your data in a naive way and keep working on it interactively. The best approach will depend on what you expect to to with it.
Maybe you'll need to work in a sample. Maybe split data in chunks of manageable size. Maybe loading them into an Spark instance. Or maybe perform the calculations you need using SQL.
If all you want is the data joined, you can just load them into your database, perform the join and then export to CSV or another format. No need to load them into a data frame.
State your intent and others can help you better.