How do I fix the inefficiency of a django query in a loop?
How can the following code be made more efficient (for example, how to replace a loop with a query)?
def get_question(datetime_now, questions_queryset, user):
best_schedule = None
best_question = None
# HOW TO ELIMINATE THE FOLLOWING LOOP AND REPLACE WITH A QUERY?
for question in questions_queryset:
try:
schedule = (Schedule.objects
.filter(question=question, user=user)
.latest(field_name='datetime_added')
except ObjectDoesNotExist:
schedule = None
if (schedule and (schedule.date_show_next >= datetime_now) and
((not best_schedule) or
(schedule.datetime_added >= best_schedule.datetime_added))):
best_schedule = schedule
best_question = question
return best_question
models.py
from django.contrib.auth.models import User
from django.db.models import DateTimeField, ForeignKey, Model, TextField
class Question(Model):
question = TextField()
class Schedule(Model):
datetime_added = DateTimeField(auto_now_add=True)
datetime_show_next = DateTimeField(null=True, default=None)
question = ForeignKey(Question)
user = ForeignKey(User, null=True)
+3
source to share
2 answers
You can use Subquery
like in this answer fooobar.com/questions/240063 / ... or with fooobar.com/questions/240061 / ...Prefetch
Here's one way to accomplish this with Prefetch
:
schedules_prefetch = Prefetch(
'schedule_set',
queryset=Schedule.objects.filter(user=user))
for question in questions_queryset.prefetch_related(schedules_prefetch):
try:
# using max here so it wouldn't do another DB hit
schedule = max(question.schedule_set.all(),
key=lambda x: x.datetime_added)
except ValueError:
schedule = None
Here's a usage example Subquery
(it may not work, but will give you a general idea):
from django.db.models import OuterRef, Subquery
schedules = (Schedule.objects
.filter(user=user, question=OuterRef('pk'))
.order_by('datetime_added'))
questions_queryset = (questions_queryset
.annotate(latest_schedule=Subquery(schedules[:1])))
for question in questions_queryset:
schedule = question.latest_schedule
+4
source to share
# Get the question ids
question_ids = questions_queryset.values_list('id', flat=True)
# get the beloved shedule
schedule = Schedule.objects.filter(question__in=question_ids, user=user).latest(field_name='datetime_added')
# You may opt for Schedule.objects.get() so as not to run into
# the problem of multiple objects returned if all you need is strictly one schedule
+2
source to share