How to iterate over nextPageToken using GoogleDrive Python quickstart
My goal is to have a list of all items and folders in all Google Drive. I start by trying to make sure the script works on its own. I read the cover for the REST API Documentation and ended up finding this code, which can also be found here .
from __future__ import print_function
import httplib2
import os
import sys
from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
reload(sys)
sys.setdefaultencoding('utf-8')
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/drive-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Drive API Python Quickstart'
def get_credentials():
"""Gets valid user credentials from storage.
If nothing has been stored, or if the stored credentials are invalid,
the OAuth2 flow is completed to obtain the new credentials.
Returns:
Credentials, the obtained credential.
"""
home_dir = os.path.expanduser('~')
credential_dir = os.path.join(home_dir, '.credentials')
if not os.path.exists(credential_dir):
os.makedirs(credential_dir)
credential_path = os.path.join(credential_dir,
'drive-python-quickstart.json')
store = Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
if flags:
credentials = tools.run_flow(flow, store, flags)
else: # Needed only for compatibility with Python 2.6
credentials = tools.run(flow, store)
print('Storing credentials to ' + credential_path)
return credentials
def main():
"""Shows basic usage of the Google Drive API.
Creates a Google Drive API service object and outputs the names and IDs
for up to 10 files.
"""
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v3', http=http)
results = service.files().list(
pageSize=1000,fields="nextPageToken, files(mimeType, name)").execute()
items = results.get('files', [])
if not items:
print('No files found.')
else:
print('Files:')
for item in items:
print('{0} ({1})'.format(item['name'], item['mimeType']))
if __name__ == '__main__':
main()
My problem is with nextPageToken
and how to use it correctly. The max value PageSize
is 1000, so I have to loop over nextPageToken
, extract it from the resulting JSON, put it back in the original loop (line 66?) To get another 1000 results. How to do it?
source to share
Look at the Apache Google documentation for File: List Method
In the fields of your request you are requesting nextPageToken
, the result will contain the token for nextPage (if next page exists). The result will be something like this:
{
...,
"nextPageToken": "V1*3|0|XXXXXX",
"files": [
{
...
},...
]
}
you can retrieve nextPageToken value like:
token = results.get('nextPageToken', None)
The List method can take a string parameter pageToken
:
A marker to continue with the previous list request on the next page. This should be set to "nextPageToken" from the previous Reply.
Just set the pageToken parameter in the following request to get the following page of results:
results = service.files().list(
pageSize=1000,
pageToken=token,
fields="nextPageToken, files(mimeType, name)").execute()
items = results.get('files', [])
Now you can easily create a loop to get the result.
source to share
I'll try to demonstrate the concept to you, but you will follow the Python implementation. Short answer: nextPageToken . nextPageToken
allow you to get results from the next page.
When you make a GET request, the nextPageToken is always included in the response, so if you had 1000 results but only needed to display 20 pages, you can get the remaining 980 files using nextPageToken.
Run this url and you should see something like:
"kind": "drive#fileList",
"nextPageToken": "V1*3|0|CjkxOHY2aDdROE9JYkJGWUJEaU5Ybm1OVURSemJTcWFMa2lRQlVJSnVxYmI2YkYzMmhnVHozeWkwRnASBxCqqcG4kis",
"incompleteSearch": false,
The meaning nextPageToken
here is what you use to navigate to the next page. When you go to the next page and get more results, a new one will be created nextPageToken
for you until you view / get all the results (980-1000).
source to share
I had problems with this. I haven't read the example closely enough to notice that nextPageToken and newStartPageToken are not the same.
I decomposed the functions a bit and added a loop. Basically, return startPageToken and follow the same function / call the function as needed.
from __future__ import print_function
import httplib2
import os
#julian
import time
from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None
# If modifying these scopes, delete your previously saved credentials
# at ~/.credentials/drive-python-quickstart.json
SCOPES = 'https://www.googleapis.com/auth/drive.metadata.readonly'
CLIENT_SECRET_FILE = 'client_secret.json'
APPLICATION_NAME = 'Drive API Python Quickstart'
def get_credentials():
"""Gets valid user credentials from storage.
If nothing has been stored, or if the stored credentials are invalid,
the OAuth2 flow is completed to obtain the new credentials.
Returns:
Credentials, the obtained credential.
"""
home_dir = os.path.expanduser('~')
credential_dir = os.path.join(home_dir, '.credentials')
if not os.path.exists(credential_dir):
os.makedirs(credential_dir)
credential_path = os.path.join(credential_dir,'drive-python-quickstart.json')
store = Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
if flags:
credentials = tools.run_flow(flow, store, flags)
else: # Needed only for compatibility with Python 2.6
credentials = tools.run(flow, store)
print('Storing credentials to ' + credential_path)
return credentials
def main():
"""Shows basic usage of the Google Drive API.
Creates a Google Drive API service object and outputs the names and IDs
for up to 10 files.
"""
credentials = get_credentials()
http = credentials.authorize(httplib2.Http())
service = discovery.build('drive', 'v3', http=http)
saved_start_page_token = StartPage_v3(service)
saved_start_page_token = DetectChanges_v3(service, saved_start_page_token)
starttime=time.time()
while True:
saved_start_page_token = DetectChanges_v3(service, saved_start_page_token)
time.sleep(10.0 - ((time.time() - starttime) % 10.0))
def StartPage_v3(service):
response = service.changes().getStartPageToken().execute()
print('Start token: %s' % response.get('startPageToken'))
return response.get('startPageToken')
def DetectChanges_v3(service, saved_start_page_token):
# Begin with our last saved start token for this user or the
# current token from getStartPageToken()
page_token = saved_start_page_token;
while page_token is not None:
response = service.changes().list(pageToken=page_token, spaces='drive').execute()
for change in response.get('changes'):
# Process change
mimeType = change.get('file').get('mimeType')
print( 'Change found for: %s' % change)
if 'newStartPageToken' in response:
# Last page, save this token for the next polling interval
saved_start_page_token = response.get('newStartPageToken')
page_token = response.get('nextPageToken')
return saved_start_page_token
if __name__ == '__main__':
main()
source to share