Given the following schema
class Header(Base):
__tablename__ = 'header'
id = Column(Integer, primary_key=True)
class Row(Base):
__tablename__ = 'row'
id = Column(Integer, primary_key=True)
header_id = Column(Integer)
tags = relationship("Tag", secondary="row_tag", back_populates="rows")
class Tag(Base):
__tablename__ = 'tag'
id = Column(Integer, primary_key=True)
rows = relationship("Row", secondary="row_tag", back_populates="tags")
class RowTag(Base):
__tablename__ = 'row_tag'
row_id = Column(Integer, ForeignKey('row.id'), primary_key=True)
tag_id = Column(Integer, ForeignKey('tag.id'), primary_key=True)
When I commit a Header object that has tags attached to its rows, each row is duplicated by the number of its attached tags.
I understand that SQLAlchemy interprets this as new records for the association table RowTag, resulting in the duplication of Row records.
How to avoid this duplication in rows, and ensure that SQLAlchemy recognizes the existing tags and doesn't create duplicates?
Simplified example of my application
DB and Imports
from sqlalchemy import Column, Integer, ForeignKey
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.orm import relationship
class MySQLAlchemy(SQLAlchemy):
"""A custom SQLAlchemy class to handle multiple databases sessions."""
db = MySQLAlchemy()
Model = db.Model
Schema
class Header(Model):
__tablename__ = 'header'
id = Column(Integer, primary_key=True)
class Row(Model):
__tablename__ = 'row'
id = Column(Integer, primary_key=True)
header_id = Column(Integer)
tags = relationship("Tag", secondary="row_tag", back_populates="rows")
class Tag(Model):
__tablename__ = 'tag'
id = Column(Integer, primary_key=True)
rows = relationship("Row", secondary="row_tag", back_populates="tags")
class RowTag(Model):
__tablename__ = 'row_tag'
row_id = Column(Integer, ForeignKey('row.id'), primary_key=True)
tag_id = Column(Integer, ForeignKey('tag.id'), primary_key=True)
Functions
def insert_rows(header_id, tags):
"""
Simplified version of inserting rows function,
more rows attributes come with the request, instead of doing a dummy loop.
"""
ready_rows = []
for i in range(3):
row1 = Row({"id": i, "header_id": header_id})
row1.tags.extend(tags)
ready_rows.append(row1)
# If I uncomment this line here, rows are inserted without any duplication and each has 2 tags attached in the association table
# session.add(row1)
# session.flush()
return ready_rows
def new_page():
# Tags are selected from database
tags = [tag1, tag2]
# Add header
header = Header({"id": 1})
session.add(header)
session.flush()
# Add rows
ready_rows = insert_rows(header.id, tags)
# I add rows with bulk_save_objects here, and each row is inserted twice (or by the number of tags attached to it)
session.bulk_save_objects(ready_rows)
session.flush()
# Commit at last
session.commit()
return None, 200
# Run
new_page()