Finding and Grouping associations in Python

27 Views Asked by At
import pandas as pd

# Define the data as a DataFrame with column headers
data = pd.DataFrame({
    "Plant": ["A299", "A299", "A299", "A299", "T299", "T299", "T299", "T299", "M199", "M199", "M199", "M199", "A299", "A299", "T299", "A499", "A499", "A499", "A499", "A499", "A299", "A299"],
    "Material": ["M1", "M1", "M1", "M1", "M1", "M1", "M1", "M1", "M1", "M2", "M2", "M3", "M6", "M6", "M8", "C1", "C1", "C1", "C1", "C2", "M9", "M9"],
    "Region": ["R1", "R2", "R3", "R4", "R1", "R2", "R3", "R4", "R1", "R2", "R2", "R2", "R4", "R5", "R7", "R1", "R2", "R3", "R4", "R9", "R9", "R4"]
})

# Create a dictionary to store materials and their associated regions
material_regions = {}
for _, row in data.iterrows():
    material = row["Material"]
    region = row["Region"]
    if material in material_regions:
        material_regions[material].add(region)
    else:
        material_regions[material] = {region}

# Create a dictionary to store the groups
groups = {}
group_id = 1

# Function to check if two sets have a common element
def has_common_element(set1, set2):
    return bool(set1.intersection(set2))

# Iterate through the materials and create groups based on common regions
for material1, regions1 in material_regions.items():
    assigned = False
    for group, materials in groups.items():
        for material2 in materials:
            regions2 = material_regions[material2]
            if has_common_element(regions1, regions2):
                groups[group].add(material1)
                assigned = True
                break
        if assigned:
            break
    if not assigned:
        groups[group_id] = {material1}
        group_id += 1

# Combine groups that share common materials
final_groups = {}
for group, materials in groups.items():
    new_group = set(materials)
    for other_group, other_materials in groups.items():
        if group != other_group and has_common_element(new_group, other_materials):
            new_group.update(other_materials)
    final_groups[group] = new_group

# Map materials to their final group
material_to_group = {}
for group, materials in final_groups.items():
    for material in materials:
        material_to_group[material] = group

# Create a mapping of regions to groups
region_to_group = {}
for group, materials in final_groups.items():
    regions = set()
    for material in materials:
        regions.update(material_regions[material])
    for region in regions:
        region_to_group[region] = group

# Print the materials and their corresponding groups
for material, group in material_to_group.items():
    print(f"{material} is in Group {group}")

# Print the regions and their corresponding groups
for region, group in region_to_group.items():
    print(f"Region {region} is in Group {group}")

From the above, it seems like I cannot tag C2 under group 1, C2 is being used in Region 9, other Material that goes into Region 9 is M9. But M9 is already in Group1 because of Region4. I know it might be tricky, but is it possible to modify the code to tag C2 in Group 1 as well because of its association with M9. please advise

This would be desired output- Group is the output field that i expect enter image description here

Updated code after using Networkx package(connected components approach) as advised by @mozway

import networkx as nx

# Define the table data
data = [
    ("A299", "M1", "R1"),
    ("A299", "M1", "R2"),
    ("A299", "M1", "R3"),
    ("A299", "M1", "R4"),
    ("T299", "M1", "R1"),
    ("T299", "M1", "R2"),
    ("T299", "M1", "R3"),
    ("T299", "M1", "R4"),
    ("M199", "M1", "R1"),
    ("M199", "M2", "R2"),
    ("M199", "M2", "R2"),
    ("M199", "M3", "R2"),
    ("A299", "M6", "R4"),
    ("A299", "M6", "R5"),
    ("T299", "M8", "R7"),
    ("A499", "C1", "R1"),
    ("A499", "C1", "R2"),
    ("A499", "C1", "R3"),
    ("A499", "C1", "R4"),
    ("A499", "C2", "R9"),
    ("A299", "M9", "R9"),
    ("A299", "M9", "R4"),
]

# Create a graph
G = nx.Graph()

# Add nodes (materials and regions) to the graph
for _, material, region in data:
    G.add_node(material, type="material")
    G.add_node(region, type="region")
    G.add_edge(material, region)

# Find connected components
components = list(nx.connected_components(G))

# Create a mapping of materials to their connected component (group)
material_to_group = {}
for idx, component in enumerate(components):
    for node in component:
        if G.nodes[node]["type"] == "material":
            material_to_group[node] = idx + 1

# Print the materials and their corresponding groups
for material, group in sorted(material_to_group.items()):
    print(f"{material} is in Group {group}")
0

There are 0 best solutions below