Problem writing individual JSON Python records

Hopefully someone can tell me why this is happening. I have three movies here and I am trying to systematically get each and every one of the movie's JSON data and print it to a separate file called data_fetch.txt. The site I'm using is omdbapi.com. I can get all JSON movies successfully, but for some strange reason, when it is written, it is written as one record. So when I open the file I just wrote and do the count of the records, the counter only outputs 1 record when there should be 3 separate records. So there should have been 3 JSON for 3 movies. The reason I would like to split each JSON into its own record is because I would like to get my "genre" which is in JSON. Also, I'm not sure if this is related to the problem,but when i print each JSON record to python interpreter i get this ValueError: Additional data:

Here's the code:

import urllib2, time, csv, json, re
def get_url(title):
    base_url = 'http://www.omdbapi.com/?i=&t='
    title_url = str(title)
    total_url = base_url + title_url
    return total_url

def get_json():
    file = open('data_fetch.txt', 'wb')
    lst = [['2010', 'Colin Firth', "The King Speech "], ['2009', 'Jeff Bridges', 'Crazy Heart '], ['2008', 'Sean Penn', 'Milk ']]
    for item in lst:
        movie = item[2].strip()
        if ' ' in movie:
            title = movie.replace(' ', '%20')
        else:
            title = movie

        reply = urllib2.urlopen(get_url(title))
        html_doc = reply.read()
        file.write(html_doc)
        print html_doc

        time.sleep(5)
    file.close()
def counter():
    file = open('data_fetch.txt', 'rU')
    counter = 0 
    for entry in file: 
        counter+=1
    return counter
    file.close()

print counter() # this only prints 1

def json_data():
    file = open('data_fetch.txt', 'rU')
    for entry in file: 
        decoded = json.loads(entry)
        print decoded
    file.close()
jsond_data() #ValueError: Extra data:

      

Hope someone can help me, I have been doing this for the last 3 hours. I am using Python 2.7.

Update: here's the content for my file: data_fetch.txt:

{"Title": "King's Speech", "Year": "2010", "Evaluated": "R", "Released": "December 25, 2010", "Runtime": "118 mins", Genre ":" Biography, Drama, History "," Director ":" Tom Hooper "," Writer ":" David Seidler (Screenplay) "," Actors ":" Colin Firth, Helena Bonham Carter, Derek Jacoby, Robert Portal "," Story " : "The story of King George VI of the United Kingdom of Great Britain and Northern Ireland, his impromptu ascension to the throne and a speech therapist who helped an insecure monarch to become worthy of it." Language ": English", "Country": "UK, USA, Australia", "Awards": "Won 4 Oscars, 104 more victories and 136 nominations "," Poster ":" http://ia.media-imdb.com/images/M/ MV5BMzU5MjEwMTg2Nl5BMl5BanBnXkFtZTcwNzM3MTYxNA @ @ ._ V1_SX300.jpg"," Metascore ":" 88 "," imdbRating ":" 8.1 "," imdbVotes ":" 355037 "," imdbID ":" tt1504320 "," Type ":" movie "" Answer ":" True "} { "Title": "Crazy Heart", "Year": "2009", "Rated": "R", "Released": "05 February 2010", "Runtime": "112 mins", "Genre": " Drama, Music, Romance ", Director": "Scott Cooper", "Writer": "Scott Cooper, Thomas Cobb (novel)", "Actors": "Jeff Bridges, James Keane, Anna Felix, Paul Herman", " Plot ":"A withered country musician is forced to reevaluate his dysfunctional life during a doomed romance that also inspires him. " Language ":" English, Spanish "," Country ":" USA "," Awards ":" Won 2 Oscars. Another 35 wins and 18 nominations. "," Poster ":" http://ia.media-imdb.com/images/M/ MV5BMTU0NDc5NjgzNl5BMl5BanBnXkFtZTcwNzc0NDIzMw @@ ._ V1_SX300.jpg "," Metascore ":" 83 "," imdbRating ":" 7,3 "," imdbVotes ":" 55547 "," imdbID ":" tt1263670 "," Type ":" movie "," Answer ":" True "} {" Name ":" Milk "," Year ":" 2008 "," Rated ":" R "," Released ":" January 30, 2009 "," Runtime ":" 128 mins " , "Genre": "Biography, Drama, History", "Director": "Gus Van Sant", "Writer": "Dustin Lance Black", "Actors": "Sean Penn, Emile Hirsch, Josh Brolin, Diego Luna" , "Plot": "The story of Harvey Milk and his struggles as an American gay activist,who fought for gay rights and became California's first officially open homosexual. " Language ":" English "," Country ":" USA "," Awards ":" Won 2 Oscars. Another 62 wins and 95 nominations. "," Poster ":" http://ia.media-imdb.com/images/M/MV5BMTI2OTM5NjUzMV5BMl5BanBnXkFtZTcwMzY1MTM5MQ @ @ ._ V1_SX300.jpg "," Metascore ":" 84 "," imdbRating ":" 7,7 "," imdbVotes ":" 114699 "," imdbVotes ":" tbt13 ":" imd3 " , "answer": "True"}

+3


source to share


1 answer


You need to add a new line at the end of each line, otherwise everything will be written on one line:

file.write(html_doc + '\n')

      



By the way, a few improvements to your code:

import urllib2, time, csv, json, re

def get_url(title):
    base_url = 'http://www.omdbapi.com/?i=&t='
    title = title.strip().replace(' ', '%20')
    return base_url + title


def get_json(lst, filename):
    data = []
    for year, actors, title in lst:
        data.append(urllib2.urlopen(get_url(title)).read())
        time.sleep(5)

    # save data to file
    with open(filename, 'wb') as f:
        json.dump(data, f)


def json_data(filename):
    with open(filename, 'rU') as f: 
        return json.load(f)


def counter(filename):
    return len(json_data(filename))

lst = [['2010', 'Colin Firth', "The King Speech "], ['2009', 'Jeff Bridges', 'Crazy Heart '], ['2008', 'Sean Penn', 'Milk ']]
filename = 'data_fetch.txt'
get_json(lst, filename)
print counter(filename)
print json_data(filename)

      

+1


source