BeautifulSoup - Extract json from JS

I am playing with BeautilfulSoup and I am looking for a way to get a specific json string inside a JS element.

Here's the JS:

<script>window.pinball = window.pinball || [];
window.pinball.push(['add', {"srp_cleanup":"inactive","book_visit":"inactive","my_visits":"inactive"}]);
window.Rent = window.Rent || {};
window.Rent.zutron  = {"error_div":".js-generic-error","host":"rent","user_type":null,"zid":null,"origin":null,"provider":null};
window.Rent.book_visit = {"book_visit_host":"http://bookavisit.prod.services.rentpath.com"}
window.Rent.tagging = {"tealium":{"env":"prod","profile":"tealium.rent.com","account":"rentpath"}};
window.Rent.realm   = "rent";
window.Rent.data    = {"floorplans":{"1159255":{"availability":"1 Unit Available","availability_class":"floorplan-available-now","unitstyle":"aa1- 1 Bed/1 Bath","deposit":"","floorplan_id":1159255,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"763 sqft","rent":"$1950 - $2322 /mo","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/52ad5930427b3e739676240c01b7d6cc/650-","fp3dfurnished":"http://image1.rent.com/imgr/07733fbd8c8a6a9134d5e0af77d52cb2/650-","floorplanimage":"http://image.rent.com/imgr/44c2395728fa733c2682506d96ec68f5/650-"},"1159257":{"availability":"2 Units Available","availability_class":"floorplan-available-now","unitstyle":"aa3- 1 Bed/1 Bath","deposit":"","floorplan_id":1159257,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"893 sqft","rent":"$1995 - $2531 /mo","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/187753b2e7e6beb5aaf8602514361d89/650-","fp3dfurnished":"http://image.rent.com/imgr/55673aa4253387f0d06aa02495ccf2bc/650-","floorplanimage":"http://image.rent.com/imgr/389adb5ac1fa61c56aa04c88fe97c02f/650-"},"1159259":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"aa5- 1 Bed/1 Bath","deposit":"","floorplan_id":1159259,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"899 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image.rent.com/imgr/24059a4611740bd58436236758d65e20/650-"},"1159256":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"aa2- 1 Bed/1 Bath","deposit":"","floorplan_id":1159256,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"880 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image1.rent.com/imgr/0854a95e69c0b75ee0b13c41db2f31f1/650-"},"1159258":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"aa4- 1 Bed/1 Bath","deposit":"","floorplan_id":1159258,"bed":"1 bed","listing_id":"571535","bath":"1 bath","sqft":"897 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image1.rent.com/imgr/deb3efc9ee3933a0a1b4844d886b7a8a/650-"},"1159262":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc3- 2 Bed/2 Bath","deposit":"","floorplan_id":1159262,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1194 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image1.rent.com/imgr/a1fff6050e86f98b7249b843cd6f0836/650-"},"1159263":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc4- 2 Bed/2 Bath","deposit":"","floorplan_id":1159263,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1201 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image1.rent.com/imgr/33e2bb30c9aa1fcdbbf8ce4882a18fcd/650-","fp3dfurnished":"http://image.rent.com/imgr/c4d4df83e18f2b12c8cae6dab523769b/650-","floorplanimage":"http://image.rent.com/imgr/11ac88f52ca904e7646e03b6791f8455/650-"},"1159266":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc7- 2 Bed/2 Bath","deposit":"","floorplan_id":1159266,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1461 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/0a3887c07a7bc05670a826cd5562c49d/650-","fp3dfurnished":"http://image.rent.com/imgr/efa94735904b40ba463cbd26bc5504cf/650-","floorplanimage":"http://image1.rent.com/imgr/36413f72b93f0b0ed2f4f89337ef719d/650-"},"1159264":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc5- 2 Bed/2 Bath","deposit":"","floorplan_id":1159264,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1325 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","floorplanimage":"http://image.rent.com/imgr/ce1627742dbca97cc44d726b1d906fc3/650-"},"1159267":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bcl1-2 Bed/2 Bath","deposit":"","floorplan_id":1159267,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1500 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/a5888b34db510f6932af116e5197ce0c/650-","fp3dfurnished":"http://image1.rent.com/imgr/68f33736e29613562d9a5618eec1a4c6/650-","floorplanimage":"http://image1.rent.com/imgr/d7a833b56639b121178ddc86ac074754/650-"},"1159261":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc2- 2 Bed/2 Bath","deposit":"","floorplan_id":1159261,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1187 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/33e2bb30c9aa1fcdbbf8ce4882a18fcd/650-","fp3dfurnished":"http://image.rent.com/imgr/c4d4df83e18f2b12c8cae6dab523769b/650-","floorplanimage":"http://image1.rent.com/imgr/11ac88f52ca904e7646e03b6791f8455/650-"},"1159265":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc6- 2 Bed/2 Bath","deposit":"","floorplan_id":1159265,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1400 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image.rent.com/imgr/3f80d6e4386db5f450a6750c1a537b84/650-","fp3dfurnished":"http://image1.rent.com/imgr/f54aefd699a9ed3f1d8b6fb8e4ce1500/650-","floorplanimage":"http://image1.rent.com/imgr/b78bda34547615be4973da38dbd9a10f/650-"},"1159260":{"availability":"UNAVAILABLE","availability_class":"floorplan-available-later","unitstyle":"bc1- 2 Bed/2 Bath","deposit":"","floorplan_id":1159260,"bed":"2 beds","listing_id":"571535","bath":"2 baths","sqft":"1121 sqft","rent":"Contact for Pricing","propertyname":"Reading Commons","fp3dunfurnished":"http://image1.rent.com/imgr/3b4e4306d4cc2317bd271888532405a0/650-","fp3dfurnished":"http://image1.rent.com/imgr/8ca6a08b9c4eed76575520b4f1dcc03c/650-","floorplanimage":"http://image.rent.com/imgr/f25bcd28009d72a91f02d4e125340b65/650-"},"1159268":{"availability":"1 Unit Available","availability_class":"floorplan-available-now","unitstyle":"cdta1- 3 Bed/3 Bath Office TH","deposit":"","floorplan_id":1159268,"bed":"3 beds","listing_id":"571535","bath":"3 baths","sqft":"2100 sqft","rent":"$3798 - $5073 /mo","propertyname":"Reading Commons","fp3dunfurnished":"http://image1.rent.com/imgr/82ba57c2f1be5071c3d5f48a79c9d45e/650-","fp3dfurnished":"http://image.rent.com/imgr/bc7908ca722b6f9407a247ebf7af49bd/650-","floorplanimage":"http://image.rent.com/imgr/3c881fbe1aba5ba7be68ca6399e7daa3/650-"},"1159269":{"availability":"1 Unit Available","availability_class":"floorplan-available-now","unitstyle":"cdta2- 3 Bed/3 Bath Office TH","deposit":"","floorplan_id":1159269,"bed":"3 beds","listing_id":"571535","bath":"3 baths","sqft":"2310 sqft","rent":"$3908 - $4995 /mo","propertyname":"Reading Commons","fp3dunfurnished":"http://image1.rent.com/imgr/86b5248dfbaef2534218a8bdb724d93e/650-","fp3dfurnished":"http://image.rent.com/imgr/ee01414c664925a3463bad279f943363/650-","floorplanimage":"http://image.rent.com/imgr/ba58885223be2f4f8bfd1588d9ddca9e/650-"}},"reviews":{"startingrecordnumber":1,"totalnumberofmatchingrecords":18,"numberofrecordsreturned":10,"numberofpages":2,"endingrecordnumber":10,"pagenumber":1,"numberofrecordsperpage":10},"listing":{"id":"571535","name":"Reading Commons","address_full":"7 Archstone Circle, Reading, MA 01867","phone_desktop":"(781) 205-2341","visits_enabled":true}};
window.Rent.mapbox_api_key = "pk.eyJ1IjoibmhnbWFwYm94IiwiYSI6ImNpb2VrYW5uazAwbHp5OG0yYmp6bms5bjYifQ.4RylIPWDNDEie2NreUsbig";
window.Rent.asset_host = "rent.assets.rentpathcdn.com";

window.zutron_host = "http://zutron.primedia.com";
window.ONESEARCH_URL = "http://onesearch.svc.primedia.com";

window.Rent.pageType = "pdp";

// these two globals are used in onesearch.js, not sure where else
window.channel = "apartments";
window.APPLICATION = "rent";

window.googletag = window.googletag || {};
window.googletag.cmd = window.googletag.cmd || [];

// SID is used by the Moving Leads Service
window.Rent.MOVING_LEADS_SID = 96;</script>

      

I was able to pull JS through BeautifulSoup and I am looking for the json string corresponding to the key window.Rent.data

.

Is there a way to do this without resorting to re

?

+3


source to share


1 answer


The idea is to use a regex pattern with a capturing group. Then use this regex to find the element script

by text and then extract the substring from the script itself. Then you can use json.loads()

to load JSON string into Python object:

import json
import re

from bs4 import BeautifulSoup

data = """
your HTML here"""

soup = BeautifulSoup(data, "html.parser")

pattern = re.compile(r"window.Rent.data\s+=\s+(\{.*?\});\n")
script = soup.find("script", text=pattern)

data = pattern.search(script.text).group(1)
data = json.loads(data)
print(data)

      



There is another way - JavaScript parser - fooobar.com/questions/2409375 / ... , check it out.

+2


source







All Articles