How do I fix the inefficiency of a django query in a loop?

How can the following code be made more efficient (for example, how to replace a loop with a query)?

def get_question(datetime_now, questions_queryset, user):
    best_schedule = None
    best_question = None
    # HOW TO ELIMINATE THE FOLLOWING LOOP AND REPLACE WITH A QUERY?
    for question in questions_queryset:
        try:
            schedule = (Schedule.objects
                        .filter(question=question, user=user)
                        .latest(field_name='datetime_added')
        except ObjectDoesNotExist:
            schedule = None
        if (schedule and (schedule.date_show_next >= datetime_now) and
                ((not best_schedule) or
                 (schedule.datetime_added >= best_schedule.datetime_added))):
            best_schedule = schedule
            best_question = question

    return best_question

      



models.py

from django.contrib.auth.models import User
from django.db.models import DateTimeField, ForeignKey, Model, TextField

class Question(Model):
    question = TextField()

class Schedule(Model):
    datetime_added = DateTimeField(auto_now_add=True)
    datetime_show_next = DateTimeField(null=True, default=None)
    question = ForeignKey(Question)
    user = ForeignKey(User, null=True)

      

+3


source to share


2 answers


You can use Subquery

like in this answer fooobar.com/questions/240063 / ... or with fooobar.com/questions/240061 / ...Prefetch

Here's one way to accomplish this with Prefetch

:

schedules_prefetch = Prefetch(
        'schedule_set',
        queryset=Schedule.objects.filter(user=user))
for question in questions_queryset.prefetch_related(schedules_prefetch):
    try:
        # using max here so it wouldn't do another DB hit
        schedule = max(question.schedule_set.all(),
                       key=lambda x: x.datetime_added)
    except ValueError:
        schedule = None

      



Here's a usage example Subquery

(it may not work, but will give you a general idea):

from django.db.models import OuterRef, Subquery
schedules = (Schedule.objects
             .filter(user=user, question=OuterRef('pk'))
             .order_by('datetime_added'))
questions_queryset = (questions_queryset
                    .annotate(latest_schedule=Subquery(schedules[:1])))
for question in questions_queryset:
    schedule = question.latest_schedule

      

+4


source


    # Get the question ids    
    question_ids = questions_queryset.values_list('id', flat=True)

    # get the beloved shedule
    schedule = Schedule.objects.filter(question__in=question_ids, user=user).latest(field_name='datetime_added')

    # You may opt for Schedule.objects.get() so as not to run into
    # the problem of multiple objects returned if all you need is strictly one schedule

      



+2


source







All Articles