How do I update the subsection numbers in this chapter in a text file?
I have text files with the CONTENTS of the book.
I have to create an index.txt file that ghostscript can read.
The text file is available HERE and looks like this:
Chapter 1 Introduction 1
Chapter 2 Fundamental Observations 7
2.1 Dark night sky 7
2.2 Isotropy and homogeneity 11
2.3 Redshift proportional to distance 15
2.4 Types of particles 22
2.5 Cosmic microwave background 28
Chapter 3 Newton Versus Einstein 32
3.1 Equivalence principle 33
3.2 Describing curvature 39
3.3 Robertson-Walker metric 44
3.4 Proper distance 47
This should be changed to:
[/Count -0 /Page 7 /Title (Chapter: 1 Introduction ) /OUT pdfmark
[/Count -5 /Page 13 /Title (Chapter: 2 Fundamental Observations ) /OUT pdfmark
[/Count 0 /Page 13 /Title (Chapter: 2.1 Dark night sky ) /OUT pdfmark
[/Count 0 /Page 17 /Title (Chapter: 2.2 Isotropy and homogeneity ) /OUT pdfmark
[/Count 0 /Page 21 /Title (Chapter: 2.3 Redshift proportional to distance ) /OUT pdfmark
[/Count 0 /Page 28 /Title (Chapter: 2.4 Types of particles ) /OUT pdfmark
[/Count 0 /Page 34 /Title (Chapter: 2.5 Cosmic microwave background ) /OUT pdfmark
[/Count -4 /Page 38 /Title (Chapter: 3 Newton Versus Einstein ) /OUT pdfmark
[/Count 0 /Page 39 /Title (Chapter: 3.1 Equivalence principle ) /OUT pdfmark
[/Count 0 /Page 45 /Title (Chapter: 3.2 Describing curvature ) /OUT pdfmark
[/Count 0 /Page 50 /Title (Chapter: 3.3 Robertson-Walker metric ) /OUT pdfmark
[/Count 0 /Page 53 /Title (Chapter: 3.4 Proper distance ) /OUT pdfmark
In the above note, please note that:
Count = number of sub chapter in the given chapter
Page = given page in table of content + 6
How can we do this?
So far I have tried this.
def get_Count_Page_and_Title(bookmark, offset=6):
"""Get chapters and page numbers."""
with open(bookmark, 'r') as fi, open('temp_index.txt', 'w') as fo:
for line in fi:
line = r'[/Count -0 /Page 0 /Title (Chapter: 1 Introduction ) /OUT pdfmark'
print(line, file = fo)
Some related links:
python reading a text file
Read a .txt file line by line in Python
source to share
Here's one way to analyze your file. This code uses simple line matching to distinguish between chapter and subheading lines. He then assembles each subchapter with an attached chapter. Finally, it will iterate over this data to generate the desired result.
Code:
def print_count_page_and_title(data, page_offset=0):
"""Get chapters and page numbers."""
chapters = []
chapter = None
for line in data:
if line.startswith('Chapter'):
if chapter is not None:
chapters.append(chapter)
chapter = (line.strip().rsplit(' ', 1), [])
else:
chapter[1].append(line.strip().rsplit(' ', 1))
if chapter is not None:
chapters.append(chapter)
def page_num(page):
return int(page) + page_offset
fmt_chapter = '[/Count -%d /Page %d /Title (%s) /OUT pdfmark'
fmt_sub_chapter = '[/Count 0 /Page %d /Title (%s) /OUT pdfmark'
for chapter in chapters:
print(fmt_chapter % (
len(chapter[1]), page_num(chapter[0][1]), chapter[0][0]))
for sub_chapter in chapter[1]:
print(fmt_sub_chapter % (
page_num(sub_chapter[1]), sub_chapter[0]))
print_count_page_and_title(test_data, page_offset=6)
Test data:
from io import StringIO
test_data = StringIO(u'\n'.join([x.strip() for x in """
Chapter 1 Introduction 1
Chapter 2 Fundamental Observations 7
2.1 Dark night sky 7
2.2 Isotropy and homogeneity 11
2.3 Redshift proportional to distance 15
2.4 Types of particles 22
2.5 Cosmic microwave background 28
Chapter 3 Newton Versus Einstein 32
3.1 Equivalence principle 33
3.2 Describing curvature 39
3.3 Robertson-Walker metric 44
3.4 Proper distance 47
""".split('\n')[1:-1]]))
Results:
[/Count -0 /Page 7 /Title (Chapter 1 Introduction) /OUT pdfmark
[/Count -5 /Page 13 /Title (Chapter 2 Fundamental Observations) /OUT pdfmark
[/Count 0 /Page 13 /Title (2.1 Dark night sky ) /OUT pdfmark
[/Count 0 /Page 17 /Title (2.2 Isotropy and homogeneity ) /OUT pdfmark
[/Count 0 /Page 21 /Title (2.3 Redshift proportional to distance ) /OUT pdfmark
[/Count 0 /Page 28 /Title (2.4 Types of particles ) /OUT pdfmark
[/Count 0 /Page 34 /Title (2.5 Cosmic microwave background ) /OUT pdfmark
[/Count -4 /Page 38 /Title (Chapter 3 Newton Versus Einstein) /OUT pdfmark
[/Count 0 /Page 39 /Title (3.1 Equivalence principle ) /OUT pdfmark
[/Count 0 /Page 45 /Title (3.2 Describing curvature ) /OUT pdfmark
[/Count 0 /Page 50 /Title (3.3 Robertson-Walker metric ) /OUT pdfmark
[/Count 0 /Page 53 /Title (3.4 Proper distance) /OUT pdfmark
source to share
First, a million thanks to @Stephen Rauch.
Using the above code:
If we have any PDF document and we want to bookmark it, we can use the following code:
Note: we need to write the output of the above code to a text file index.txt
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Author : Bhishan Poudel; Physics PhD Student, Ohio University
# Date : Jan 22, 2017
#
# Imports
import io
import subprocess
import os
from pdfrw import PdfReader, PdfWriter
from natsort import natsorted
import glob
def create_bookmarked_pdf(inpdf, outpdf):
"""Create clickable pdf."""
# input/output files
inpdf = inpdf
outpdf = outpdf
commands = "gs -sDEVICE=pdfwrite -q -dBATCH -dNOPAUSE -sOutputFile=" +\
outpdf + ' index.txt -f ' + inpdf
print('{} {} {}'.format('Creating : ', outpdf, ''))
subprocess.call(commands, shell=True)
def main():
"""Run main function."""
# create clickable index in pdf
inpdf = 'ryden.pdf'
outpdf = 'output.pdf'
create_bookmarked_pdf(inpdf, outpdf)
# delete tmp files
if os.path.exists('index.txt'):
# os.remove('index.txt')
pass
if __name__ == "__main__":
import time
# beginning time
program_begin_time = time.time()
begin_ctime = time.ctime()
# Run the main program
main()
# print the time taken
program_end_time = time.time()
end_ctime = time.ctime()
seconds = program_end_time - program_begin_time
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
d, h = divmod(h, 24)
print("nBegin time: ", begin_ctime)
print("End time: ", end_ctime, "\n")
print("Time taken: {0: .0f} days, {1: .0f} hours, \
{2: .0f} minutes, {3: f} seconds.".format(d, h, m, s))
source to share
I will modify the above answer very slightly so that I can read data from a text file and write to another text file.
The code is below:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Author : Stephen Rauch
# Modified by : Bhishan Poudel; Physics PhD Student, Ohio University
# Date : Mar 5, 2017
# pastebin link for index.txt: http://pastebin.com/LP8KXAmU
def print_count_page_and_title(data, page_offset=0):
"""Get chapters and page numbers."""
fo = open('index.txt', 'w', encoding='utf-8')
print('Creating: ', 'index.txt')
chapters = []
chapter = None
for line in data:
if line.startswith('Chapter'):
if chapter is not None:
chapters.append(chapter)
chapter = (line.strip().rsplit(' ', 1), [])
# chapter is tuple of two lists
# second list is empty list
# first list has two elements,
# second element is separated by white space in end by rsplit.
# print(line)
# Chapter 1 Introduction 1
# print(chapter)
# (['Chapter 1 Introduction', '1'], [])
# print("\n")
else:
subchapter = line.strip().rsplit(' ', 1)
chapter[1].append(subchapter)
if chapter is not None:
chapters.append(chapter)
def page_num(page):
return int(page) + page_offset
fmt_chapter = '[/Count -%d /Page %d /Title (%s) /OUT pdfmark'
fmt_sub_chapter = '[/Count 0 /Page %d /Title (%s) /OUT pdfmark'
for chapter in chapters:
print(fmt_chapter % (
len(chapter[1]), page_num(chapter[0][1]), chapter[0][0]), file=fo)
for sub_chapter in chapter[1]:
print(fmt_sub_chapter % (
page_num(sub_chapter[1]), sub_chapter[0]), file=fo)
pass
fo.close()
if __name__ == "__main__":
test_data = open('toc_ryden.txt', 'r', encoding='utf-8').readlines()
print_count_page_and_title(test_data, page_offset=6)
source to share