SqlAlchemy association tables resulting with duplicate records

34 Views Asked by At

Given the following schema

class Header(Base):
    __tablename__ = 'header'
    id = Column(Integer, primary_key=True)

class Row(Base):
    __tablename__ = 'row'
    id = Column(Integer, primary_key=True)
    header_id = Column(Integer)

    tags = relationship("Tag", secondary="row_tag", back_populates="rows")

class Tag(Base):
    __tablename__ = 'tag'
    id = Column(Integer, primary_key=True)
    rows = relationship("Row", secondary="row_tag", back_populates="tags")

class RowTag(Base):
    __tablename__ = 'row_tag'
    row_id = Column(Integer, ForeignKey('row.id'), primary_key=True)
    tag_id = Column(Integer, ForeignKey('tag.id'), primary_key=True)

When I commit a Header object that has tags attached to its rows, each row is duplicated by the number of its attached tags.

I understand that SQLAlchemy interprets this as new records for the association table RowTag, resulting in the duplication of Row records.

How to avoid this duplication in rows, and ensure that SQLAlchemy recognizes the existing tags and doesn't create duplicates?

Simplified example of my application

DB and Imports

from sqlalchemy import Column, Integer, ForeignKey
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.orm import relationship


class MySQLAlchemy(SQLAlchemy):
    """A custom SQLAlchemy class to handle multiple databases sessions."""


db = MySQLAlchemy()
Model = db.Model

Schema

class Header(Model):
    __tablename__ = 'header'
    id = Column(Integer, primary_key=True)


class Row(Model):
    __tablename__ = 'row'
    id = Column(Integer, primary_key=True)
    header_id = Column(Integer)

    tags = relationship("Tag", secondary="row_tag", back_populates="rows")


class Tag(Model):
    __tablename__ = 'tag'
    id = Column(Integer, primary_key=True)
    rows = relationship("Row", secondary="row_tag", back_populates="tags")


class RowTag(Model):
    __tablename__ = 'row_tag'
    row_id = Column(Integer, ForeignKey('row.id'), primary_key=True)
    tag_id = Column(Integer, ForeignKey('tag.id'), primary_key=True)

Functions

def insert_rows(header_id, tags):
    """
        Simplified version of inserting rows function,
        more rows attributes come with the request, instead of doing a dummy loop.
    """
    ready_rows = []
    for i in range(3):
        row1 = Row({"id": i, "header_id": header_id})
        row1.tags.extend(tags)

        ready_rows.append(row1)

        # If I uncomment this line here, rows are inserted without any duplication and each has 2 tags attached in the association table
        # session.add(row1)
        # session.flush()

    return ready_rows


def new_page():
    # Tags are selected from database
    tags = [tag1, tag2]

    # Add header
    header = Header({"id": 1})
    session.add(header)
    session.flush()

    # Add rows
    ready_rows = insert_rows(header.id, tags)

    # I add rows with bulk_save_objects here, and each row is inserted twice (or by the number of tags attached to it)
    session.bulk_save_objects(ready_rows)
    session.flush()   

    # Commit at last
    session.commit()

    return None, 200


# Run
new_page()

0

There are 0 best solutions below