How do I update the subsection numbers in this chapter in a text file?

Question

How do I update the subsection numbers in this chapter in a text file?

I have text files with the CONTENTS of the book.
I have to create an index.txt file that ghostscript can read.

The text file is available HERE and looks like this:

Chapter 1 Introduction 1
Chapter 2 Fundamental Observations 7
2.1 Dark night sky   7
2.2 Isotropy and homogeneity  11
2.3 Redshift proportional to distance  15
2.4 Types of particles  22
2.5 Cosmic microwave background  28
Chapter 3 Newton Versus Einstein 32
3.1 Equivalence principle  33
3.2 Describing curvature  39
3.3 Robertson-Walker metric  44
3.4 Proper distance 47

This should be changed to:

[/Count -0 /Page 7 /Title (Chapter: 1 Introduction ) /OUT pdfmark
[/Count -5 /Page 13 /Title (Chapter: 2 Fundamental Observations ) /OUT pdfmark
[/Count 0 /Page 13 /Title (Chapter: 2.1 Dark night sky   ) /OUT pdfmark
[/Count 0 /Page 17 /Title (Chapter: 2.2 Isotropy and homogeneity  ) /OUT pdfmark
[/Count 0 /Page 21 /Title (Chapter: 2.3 Redshift proportional to distance  ) /OUT pdfmark
[/Count 0 /Page 28 /Title (Chapter: 2.4 Types of particles  ) /OUT pdfmark
[/Count 0 /Page 34 /Title (Chapter: 2.5 Cosmic microwave background  ) /OUT pdfmark
[/Count -4 /Page 38 /Title (Chapter: 3 Newton Versus Einstein ) /OUT pdfmark
[/Count 0 /Page 39 /Title (Chapter: 3.1 Equivalence principle  ) /OUT pdfmark
[/Count 0 /Page 45 /Title (Chapter: 3.2 Describing curvature  ) /OUT pdfmark
[/Count 0 /Page 50 /Title (Chapter: 3.3 Robertson-Walker metric  ) /OUT pdfmark
[/Count 0 /Page 53 /Title (Chapter: 3.4 Proper distance   ) /OUT pdfmark

In the above note, please note that:

Count = number of sub chapter in the given chapter  
Page = given page in table of content + 6

How can we do this?

So far I have tried this.

def get_Count_Page_and_Title(bookmark, offset=6):
    """Get chapters and page numbers."""
    with open(bookmark, 'r') as fi, open('temp_index.txt', 'w') as fo:
        for line in fi:
            line = r'[/Count -0 /Page 0 /Title (Chapter: 1 Introduction ) /OUT pdfmark'
            print(line, file = fo)

Some related links:
python reading a text file
Read a .txt file line by line in Python

+3

python file numpy ghostscript

Bhishan Poudel March 25 17 at 23:49

source to share

3 answers

First, a million thanks to @Stephen Rauch.
Using the above code:
If we have any PDF document and we want to bookmark it, we can use the following code:

Note: we need to write the output of the above code to a text file index.txt

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Author      : Bhishan Poudel; Physics PhD Student, Ohio University
# Date        : Jan 22, 2017
#
# Imports
import io
import subprocess
import os
from pdfrw import PdfReader, PdfWriter
from natsort import natsorted
import glob


def create_bookmarked_pdf(inpdf, outpdf):
    """Create clickable pdf."""
    # input/output files
    inpdf = inpdf
    outpdf = outpdf
    commands = "gs -sDEVICE=pdfwrite -q -dBATCH -dNOPAUSE  -sOutputFile=" +\
        outpdf + ' index.txt -f ' + inpdf
    print('{} {} {}'.format('Creating : ', outpdf, ''))
    subprocess.call(commands, shell=True)


def main():
    """Run main function."""
    # create clickable index in pdf
    inpdf = 'ryden.pdf'
    outpdf = 'output.pdf'
    create_bookmarked_pdf(inpdf, outpdf)

    # delete tmp files
    if os.path.exists('index.txt'):
        # os.remove('index.txt')
        pass


if __name__ == "__main__":
    import time

    # beginning time
    program_begin_time = time.time()
    begin_ctime        = time.ctime()

    #  Run the main program
    main()

    # print the time taken
    program_end_time = time.time()
    end_ctime        = time.ctime()
    seconds          = program_end_time - program_begin_time
    m, s             = divmod(seconds, 60)
    h, m             = divmod(m, 60)
    d, h             = divmod(h, 24)
    print("nBegin time: ", begin_ctime)
    print("End   time: ", end_ctime, "\n")
    print("Time taken: {0: .0f} days, {1: .0f} hours, \
      {2: .0f} minutes, {3: f} seconds.".format(d, h, m, s))

+1

Bhishan Poudel 26 Mar 17 at 2:06

source to share

I will modify the above answer very slightly so that I can read data from a text file and write to another text file.
The code is below:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Author      : Stephen Rauch
# Modified by : Bhishan Poudel; Physics PhD Student, Ohio University
# Date        : Mar 5, 2017
# pastebin link for index.txt: http://pastebin.com/LP8KXAmU


def print_count_page_and_title(data, page_offset=0):
    """Get chapters and page numbers."""
    fo = open('index.txt', 'w', encoding='utf-8')
    print('Creating: ', 'index.txt')
    chapters = []
    chapter = None
    for line in data:
        if line.startswith('Chapter'):
            if chapter is not None:
                chapters.append(chapter)
            chapter = (line.strip().rsplit(' ', 1), [])
            # chapter is tuple of two lists
            # second list is empty list
            # first list has two elements,
            # second element is separated by white space in end by rsplit.
            # print(line)
            # Chapter 1 Introduction 1
            # print(chapter)
            # (['Chapter 1 Introduction', '1'], [])
            # print("\n")
        else:
            subchapter = line.strip().rsplit(' ', 1)
            chapter[1].append(subchapter)

    if chapter is not None:
        chapters.append(chapter)

    def page_num(page):
        return int(page) + page_offset

    fmt_chapter = '[/Count -%d /Page %d /Title (%s) /OUT pdfmark'
    fmt_sub_chapter = '[/Count 0 /Page %d /Title (%s) /OUT pdfmark'

    for chapter in chapters:
        print(fmt_chapter % (
            len(chapter[1]), page_num(chapter[0][1]), chapter[0][0]), file=fo)
        for sub_chapter in chapter[1]:
            print(fmt_sub_chapter % (
                page_num(sub_chapter[1]), sub_chapter[0]), file=fo)
        pass
    fo.close()

if __name__ == "__main__":
    test_data = open('toc_ryden.txt', 'r', encoding='utf-8').readlines()
    print_count_page_and_title(test_data, page_offset=6)

+1

Bhishan Poudel 28 Mar 17 at 20:30

source to share

Stephen Rauch · Accepted Answer · 2017-03-26T01:24:18+0000

Here's one way to analyze your file. This code uses simple line matching to distinguish between chapter and subheading lines. He then assembles each subchapter with an attached chapter. Finally, it will iterate over this data to generate the desired result.

Code:

def print_count_page_and_title(data, page_offset=0):
    """Get chapters and page numbers."""
    chapters = []
    chapter = None
    for line in data:
        if line.startswith('Chapter'):
            if chapter is not None:
                chapters.append(chapter)
            chapter = (line.strip().rsplit(' ', 1), [])
        else:
            chapter[1].append(line.strip().rsplit(' ', 1))

    if chapter is not None:
        chapters.append(chapter)

    def page_num(page):
        return int(page) + page_offset

    fmt_chapter = '[/Count -%d /Page %d /Title (%s) /OUT pdfmark'
    fmt_sub_chapter = '[/Count 0 /Page %d /Title (%s) /OUT pdfmark'

    for chapter in chapters:
        print(fmt_chapter % (
            len(chapter[1]), page_num(chapter[0][1]), chapter[0][0]))
        for sub_chapter in chapter[1]:
            print(fmt_sub_chapter % (
                page_num(sub_chapter[1]), sub_chapter[0]))

print_count_page_and_title(test_data, page_offset=6)

Test data:

from io import StringIO

test_data = StringIO(u'\n'.join([x.strip() for x in """
    Chapter 1 Introduction 1
    Chapter 2 Fundamental Observations 7
    2.1 Dark night sky   7
    2.2 Isotropy and homogeneity  11
    2.3 Redshift proportional to distance  15
    2.4 Types of particles  22
    2.5 Cosmic microwave background  28
    Chapter 3 Newton Versus Einstein 32
    3.1 Equivalence principle  33
    3.2 Describing curvature  39
    3.3 Robertson-Walker metric  44
    3.4 Proper distance 47   
""".split('\n')[1:-1]]))

Results:

[/Count -0 /Page 7 /Title (Chapter 1 Introduction) /OUT pdfmark
[/Count -5 /Page 13 /Title (Chapter 2 Fundamental Observations) /OUT pdfmark
[/Count 0 /Page 13 /Title (2.1 Dark night sky  ) /OUT pdfmark
[/Count 0 /Page 17 /Title (2.2 Isotropy and homogeneity ) /OUT pdfmark
[/Count 0 /Page 21 /Title (2.3 Redshift proportional to distance ) /OUT pdfmark
[/Count 0 /Page 28 /Title (2.4 Types of particles ) /OUT pdfmark
[/Count 0 /Page 34 /Title (2.5 Cosmic microwave background ) /OUT pdfmark
[/Count -4 /Page 38 /Title (Chapter 3 Newton Versus Einstein) /OUT pdfmark
[/Count 0 /Page 39 /Title (3.1 Equivalence principle ) /OUT pdfmark
[/Count 0 /Page 45 /Title (3.2 Describing curvature ) /OUT pdfmark
[/Count 0 /Page 50 /Title (3.3 Robertson-Walker metric ) /OUT pdfmark
[/Count 0 /Page 53 /Title (3.4 Proper distance) /OUT pdfmark

How do I update the subsection numbers in this chapter in a text file?

More articles: