sql >> Base de Datos >  >> RDS >> Mysql

¿Cómo construir un método híbrido que cuente la cantidad de registros de los últimos X días?

A continuación se muestra el fragmento de código (casi) completo:

# ... omitted import statements and session configuration

def _date(date_str):
    return datetime.strptime(date_str, "%Y-%m-%d")

class Match(Base):
    __tablename__ = "match"

    match_id = Column(Integer, primary_key=True)
    date = Column(Date, nullable=False)

    def match_count(self, timespan_days):
        cut_off = self.date - timedelta(days=timespan_days)
        sess = object_session(self)
        M = Match
        q = (
            # .filter(M.match_id != self.match_id)  # option-1: only other on the same day
            .filter(M.match_id < self.match_id)  # option-2: only smaller-id on the same day (as in OP)
            .filter(M.date <= self.date)
            .filter(M.date >= cut_off)
        return q.count()

    def match_count(cls, timespan_days):
        M = aliased(Match, name="other")
        cut_off = cls.date - timespan_days
        q = (
            # .filter(Match.match_id != self.match_id)  # option-1: only other on the same day
            .where(M.match_id < cls.match_id)  # option-2: only smaller-id on the same day (as in OP)
            .where(M.date <= cls.date)
            .where(M.date >= cut_off)
        return q.label("match_count")

def test():

    from sys import version_info as py_version
    from sqlalchemy import __version__ as sa_version

    print(f"PY version={py_version}")
    print(f"SA version={sa_version}")
    print(f"SA engine={engine.name}")
    print("=" * 80)

    # 1. test data
    matches = [
    print("=" * 80)

    # 2. test query in "in-memory"
    for m in session.query(Match):
        print(m, m.match_count(3))
    print("=" * 80)

    # 3. test query on "SQL"
    q = session.query(Match, Match.match_count(3))
    for match, match_count in q:
        print(match, match_count)
    print("=" * 80)

if __name__ == "__main__":

El código anterior produce el siguiente resultado:

PY version=sys.version_info(major=3, minor=8, micro=1, releaselevel='final', serial=0)
SA version=1.3.20
SA engine=postgresql
<Match(date=datetime.date(2020, 1, 1), match_id=1)> 0
<Match(date=datetime.date(2020, 1, 2), match_id=2)> 1
<Match(date=datetime.date(2020, 1, 3), match_id=3)> 2
<Match(date=datetime.date(2020, 1, 5), match_id=4)> 2
<Match(date=datetime.date(2020, 1, 5), match_id=5)> 3
<Match(date=datetime.date(2020, 1, 10), match_id=6)> 0
<Match(date=datetime.date(2020, 1, 1), match_id=1)> 0
<Match(date=datetime.date(2020, 1, 2), match_id=2)> 1
<Match(date=datetime.date(2020, 1, 3), match_id=3)> 2
<Match(date=datetime.date(2020, 1, 5), match_id=4)> 2
<Match(date=datetime.date(2020, 1, 5), match_id=5)> 3
<Match(date=datetime.date(2020, 1, 10), match_id=6)> 0

mientras que la consulta q me gustaría a continuación (en postgresql ):

SELECT match.match_id,

  (SELECT count(other.match_id) AS count_1
   FROM match AS other
   WHERE other.match_id < match.match_id
     AND other.date <= match.date
     AND other.date >= match.date - %(date_1)s) AS match_count
FROM match

Un elemento que me gustaría señalar es que la verificación "en memoria" no es muy eficiente, porque uno tiene que consultar la base de datos para cada Match instancia. Por lo tanto, usaría la última consulta si es posible.