Combining URL pages as a single data frame
I am trying to load historical meteorological data for a given location. I modified the example given in flowingdata , but I am stuck at the last step - how to yield to a fewData Frames
MWE:
import pandas as pd
frames = pd.DataFrame(columns=['TimeEET', 'TemperatureC', 'Dew PointC', 'Humidity','Sea Level PressurehPa',
'VisibilityKm', 'Wind Direction', 'Wind SpeedKm/h','Gust SpeedKm/h','Precipitationmm',
'Events','Conditions', 'WindDirDegrees', 'DateUTC<br />'])
# Iterate through year, month, and day
for y in range(2006, 2007):
for m in range(1, 13):
for d in range(1, 32):
# Check if leap year
if y%400 == 0:
leap = True
elif y%100 == 0:
leap = False
elif y%4 == 0:
leap = True
else:
leap = False
#Check if already gone through month
if (m == 2 and leap and d > 29):
continue
elif (m == 2 and d > 28):
continue
elif (m in [4, 6, 9, 10] and d > 30):
continue
# Open wunderground.com url
url = "http://www.wunderground.com/history/airport/EFHK/"+str(y)+ "/" + str(m) + "/" + str(d) + "/DailyHistory.html?req_city=Vantaa&req_state=&req_statename=Finlandia&reqdb.zip=00000&reqdb.magic=4&reqdb.wmo=02974&format=1"
df=pd.read_csv(url, sep=',',skiprows=2)
frames=pd.concat(df)
This gives an error:
first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"
The desired result should be to have one data frame with all days, months and years.
+3
source to share
1 answer
You have to declare the list outside of your loop and add to it and then outside of the loop you want to concatenate all dfs into one df:
import pandas as pd
frames = pd.DataFrame(columns=['TimeEET', 'TemperatureC', 'Dew PointC', 'Humidity','Sea Level PressurehPa',
'VisibilityKm', 'Wind Direction', 'Wind SpeedKm/h','Gust SpeedKm/h','Precipitationmm',
'Events','Conditions', 'WindDirDegrees', 'DateUTC<br />'])
# Iterate through year, month, and day
df_list = []
for y in range(2006, 2007):
for m in range(1, 13):
for d in range(1, 32):
# Check if leap year
if y%400 == 0:
leap = True
elif y%100 == 0:
leap = False
elif y%4 == 0:
leap = True
else:
leap = False
#Check if already gone through month
if (m == 2 and leap and d > 29):
continue
elif (m == 2 and d > 28):
continue
elif (m in [4, 6, 9, 10] and d > 30):
continue
# Open wunderground.com url
url = "http://www.wunderground.com/history/airport/EFHK/"+str(y)+ "/" + str(m) + "/" + str(d) + "/DailyHistory.html?req_city=Vantaa&req_state=&req_statename=Finlandia&reqdb.zip=00000&reqdb.magic=4&reqdb.wmo=02974&format=1"
df=pd.read_csv(url, sep=',',skiprows=2)
df_list.append(df)
frames=pd.concat(df_list, ignore_index=True)
+3
source to share