Using cross join of subquery in python Peewee

41 Views Asked by At

I want to convert the following sql query to python peewee -

WITH common_subquery AS (
    SELECT
        t1.fly_from,
        t1.airlines as first_airline,
        t1.flight_numbers as first_flight_number,
        t1.link_to as first_link,

        t1.departure_to,
        t1.fly_to AS connection_at,
        t2.airlines as second_airline,
        t2.flight_numbers as second_flight_number,
        t2.link_to as second_link,
        
        t2.fly_to,
        t1.arrival_to AS landing_at_connection,
        t2.departure_to AS departure_from_connection,
        t2.arrival_to,
        CAST((julianday(t2.departure_to) - julianday(t1.arrival_to)) * 24 AS INTEGER) AS duration_hours,
        t1.discount_price + t2.discount_price AS total_price
    FROM
        flights AS t1
        JOIN flights AS t2 ON t1.flight_hash = t2.flight_hash
    WHERE
        (t2.fly_from != t1.fly_from)
        AND (t1.fly_from != t2.fly_to)
    ORDER BY
        total_price ASC
)

SELECT
    t1.fly_from as source,
    t1.first_airline as source_outbound_airline,
    t1.first_flight_number as source_outbound_flight_number,
    t1.first_link as source_outbound_link,
    t1.departure_to as outbound_departure, 
    t1.landing_at_connection, 
    t1.connection_at as outbound_connection,
    t1.second_airline as connection_outbound_airline, 
    t1.second_flight_number as connection_outbound_flight_number, 
    t1.second_link as connection_outbound_link,
    t1.departure_from_connection,
    t1.arrival_to as destination_arrival,
    t1.fly_to as destination,
    t2.first_airline as inbound_connection_airline,
    t2.first_flight_number as inbound_connection_flight_number,
    t2.first_link as inbound_connection_link, 
    t2.departure_to as return_departure, 
    t2.landing_at_connection as return_arrival,
    t2.connection_at as inbound_connection,
    t2.second_airline as inbound_airline,
    t2.second_flight_number as inbound_flight_number,
    t2.second_link as inbound_link, 
    t2.departure_from_connection as return_departure_from,
    t2.arrival_to as return_destination_arrival,
    CEIL((t1.total_price + t2.total_price) / 100.0) * 100 AS round_total_price,
    FLOOR((julianday(t2.departure_from_connection) - julianday(t1.arrival_to))) AS days_in_dest
FROM
    common_subquery AS t1
    CROSS JOIN common_subquery AS t2
WHERE
    (julianday(t2.departure_from_connection) - julianday(t1.arrival_to)) BETWEEN 5 AND 8
    AND t1.duration_hours < 24
    AND t2.duration_hours < 24
    AND t1.fly_to = t2.fly_from
    AND t1.fly_from like '%TLV%' and t1.fly_to like '%PRG%'
ORDER BY
    round_total_price ASC,
    t1.duration_hours ASC,
    t2.duration_hours ASC;

The peewee model is -

class Flights(Model):
    fly_from = CharField()
    fly_to = CharField()
    nights = IntegerField()
    days_off = IntegerField()
    price = IntegerField()
    discount_price = IntegerField()
    airlines = CharField()
    flight_numbers = CharField()
    departure_to = DateTimeField()
    arrival_to = DateTimeField()
    departure_from = DateTimeField()
    arrival_from = DateTimeField()
    link_to = CharField()
    link_from = CharField()
    month = IntegerField()
    date_of_scan = DateTimeField()
    holiday_name = CharField()
    special_date = BooleanField(default=False)
    is_connection_flight = BooleanField(default=False)
    flight_hash = CharField(default="")

    class Meta:
        database = db

I was able to recreate the subquery, but the issue is that I am not able to cross join the subquery.

for example -

subquery = Flight.select(...).order_by(...) 

I can't figure out how to cross join it in another query with itself. I want to get something like -

   subquery.select().join(subquery, CROSS)

is that possible using peewee? I can do it using python, but I want to have that calculations on the database engine side

1

There are 1 best solutions below

0
coleifer On

The best way to get this accomplished is to "fake" the alias in the cross join. Here is a minimal example (I've left out some of the selections, but you should hopefully get the idea):

FA = Flight.alias('f2')
subq = (Flight
        .select(Flight.fly_from, Flight.airlines.alias('first_airline'),
                Flight.flight_numbers.alias('first_flight_number'),
                ...
                FA.airlines.alias('second_airline'), FA.fly_to)
        .join(FA, on=(Flight.flight_hash == FA.flight_hash))
        .where(
            (Flight.fly_from != FA.fly_from) &
            (Flight.fly_from != FA.fly_to))
        .order_by(Flight.discount_price + FA.discount_price))
        
# Create our CTE and an alias to it.
cte = subq.cte('common_subquery')
t2 = cte.alias('t2')

# Now build the full query. Note in the join that we're
# joining on a dummy Table.
q = (cte
     .select_from(
         cte.c.fly_from.alias('source'),
         ...
         t2.c.first_airline.alias('inbound_connection_airline'))
     .join(Table('common_subquery', alias='t2'), JOIN.CROSS)
     .where(
         (cte.c.duration_hours < 24) &
         (t2.c.duration_hours < 24))
     .order_by(cte.c.duration_hours, t2.c.duration_hours))

The above will produce the following SQL:

WITH "common_subquery" AS (
    SELECT "t1"."fly_from", "t1"."airlines" AS "first_airline", 
           "t1"."flight_numbers" AS "first_flight_number", 
           "f2"."airlines" AS "second_airline", "f2"."fly_to" 
    FROM "flight" AS "t1" 
    INNER JOIN "flight" AS "f2" ON ("f2"."flight_hash" = "t1"."flight_hash") 
    WHERE (
        ("f2"."fly_from" != "t1"."fly_from") AND 
        ("f2"."fly_to" != "t1"."fly_from")) 
    ORDER BY ("t1"."discount_price" + "f2"."discount_price")
)
SELECT 
    "common_subquery"."fly_from" AS "source", 
    "t2"."first_airline" AS "inbound_connection_airline" 
FROM "common_subquery" 
CROSS JOIN "common_subquery" AS "t2" 
WHERE (
    ("common_subquery"."duration_hours" < 24) AND 
    ("t2"."duration_hours" < 24)) 
ORDER BY "common_subquery"."duration_hours", "t2"."duration_hours"