SQLAlchemy - query with multiple func.count() in results

32 Views Asked by At

I've been playing around with building a flask/SQLalchemy site which allows users to post travel experiences - grouping these by trip, and being able to post various pictures and videos under each trip. I'm hoping to be able to query users or trips based on certain criteria and display the number of pictures and videos posted by a given user or under a given trip. Was hoping to build a query that provides picture quantity and video quantity along with the trip or user object.

Here are my models:

from flask import Flask, render_template, url_for, request, redirect,send_file
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.sql import select
from sqlalchemy.sql.expression import func, or_
from datetime import datetime
import time

app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///test_v1.db'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False

db = SQLAlchemy(app)
app.app_context().push()

class User(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(30), unique = False, nullable=False)
    trips = db.relationship('Trips', backref='user', lazy=True)

    def __repr__(self):
        return f"User('{self.id}','{self.name}')"

class Trips(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    location = db.Column(db.String(20), unique = False, nullable=False)
    departure_date = db.Column(db.DateTime, nullable=False)
    return_date = db.Column(db.DateTime, nullable=False)
    user_id = db.Column(db.Integer, db.ForeignKey('user.id'), nullable=False)
    pics = db.relationship('Pictures', backref='trips', lazy=True)
    vids = db.relationship('Videos', backref='trips', lazy=True)

    def __repr__(self):
        return f"Trips('{self.id}','{self.location}')"

class Pictures(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    file_name = db.Column(db.String(40), unique = False, nullable=False)
    date_taken = db.Column(db.DateTime, nullable=False)
    trip_id = db.Column(db.Integer, db.ForeignKey('trips.id'), nullable=False)

    def __repr__(self):
        return f"Pictures('{self.id}','{self.file_name}')"

class Videos(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    file_name = db.Column(db.String(40), unique = False, nullable=False)
    date_taken = db.Column(db.DateTime, nullable=False)
    length = db.Column(db.Float, nullable=False)
    trip_id = db.Column(db.Integer, db.ForeignKey('trips.id'), nullable=False)

    def __repr__(self):
        return f"Videos('{self.id}','{self.file_name}','{self.length}')"

db.create_all()

I've figured out how to query the trips and get a column of picture counts as follows:

pic_count=db.session.query(Trips,func.count(Pictures.id)).join(Pictures,Trips.id==Pictures.trip_id).group_by(Pictures.trip_id)

however if I try to extend this by also joining with the videos table the total count seems to get screwed up - like it's multiplying the pictures qty by the videos qty.

Is it even possible to query in this way?

1

There are 1 best solutions below

2
Ian Wilson On BEST ANSWER

When you have to count across different groups I would use subqueries.

    with Session(conn) as session:
        # Count up pictures per trip.
        picture_subq = select(
            Picture.trip_id,
            func.count(Picture.id).label('picture_count')
        ).group_by(Picture.trip_id).subquery()
        # Count up videos per trip.
        video_subq = select(
            Video.trip_id,
            func.count(Video.id).label('video_count')
        ).group_by(Video.trip_id).subquery()
        # Now get trips with those counts.
        # - We use outerjoin because some trips have no matching
        # picture and/or video count and we want those trips
        # to still be included.
        # - When those trips are included that missing count would
        # normally be NULL/None but we use coalesce() to tell
        # the database to convert that value to 0.
        # - When referencing columns on the subqueries we need to 
        # use `.c.` to access the columns.
        q = select(
            Trip,
            func.coalesce(picture_subq.c.picture_count, 0),
            func.coalesce(video_subq.c.video_count, 0)
        ).outerjoin(
            picture_subq, Trip.id == picture_subq.c.trip_id
        ).outerjoin(
            video_subq, Trip.id == video_subq.c.trip_id)
        for trip, picture_count, video_count in session.execute(q).all():
            print (trip.id, picture_count, video_count)

If the join-operator is a "LEFT JOIN" or "LEFT OUTER JOIN", then after the ON or USING filtering clauses have been applied, an extra row is added to the output for each row in the original left-hand input dataset that does not match any row in the right-hand dataset. The added rows contain NULL values in the columns that would normally contain values copied from the right-hand input dataset.