with a1 as (
select 1 as user_id,'a' as v1,'x' as v2,1 as target_variable
union all
select 2,'a','y',2
union all
select 3,'b','y',3
union all
select 4,'b','z',4
union all
select 5,'c','z',5
union all
select 6,'d','x',6
union all
select 7,'f','w',7
),
a2 as (
select user_id,v1 as v1_v2_union,target_variable
from a1
union all
select user_id,v2,target_variable
from a1
),
a3 as (
select v1,string_agg(v2) as v2s,concat(v1,',',string_agg(v2)) as v1_v2
from a1
group by 1
),
a5 as (
select v1,v1_v2,max(target_variable) as target_variable
from(
select *
from a3
left join a2 on a3.v1_v2 like concat('%',a2.v1_v2_union,'%'))
group by 1,2
)
select user_id,max(target_variable) as target_variable
from(
select user_id,a5.*
from a5
left join a1 on v1_v2 like concat('%',a1.v1,'%') or v1_v2 like concat('%',a1.v2,'%')
)
group by user_id
order by user_id
If you take the above data as input you can see that user_id from 1 to 6 are related through v1 and v2 I am trying to find the max(target_variable) considering the relation between the ids
After running the above code I am getting the following as my output
| user_id | target_variable |
|---|---|
| 1 | 6 |
| 2 | 6 |
| 3 | 6 |
| 4 | 5 |
| 5 | 5 |
| 6 | 6 |
| 7 | 7 |
The Output should be for user_1d 1 to 6 the max(target_variable)=6 and for user_id=7 the max(target_variable)=7
| user_id | target_variable |
|---|---|
| 1 | 6 |
| 2 | 6 |
| 3 | 6 |
| 4 | 6 |
| 5 | 6 |
| 6 | 6 |
| 7 | 7 |