Combining URL pages as a single data frame

I am trying to load historical meteorological data for a given location. I modified the example given in flowingdata , but I am stuck at the last step - how to yield to a fewData Frames

MWE:

import pandas as pd

frames = pd.DataFrame(columns=['TimeEET', 'TemperatureC', 'Dew PointC', 'Humidity','Sea Level PressurehPa', 
       'VisibilityKm', 'Wind Direction', 'Wind SpeedKm/h','Gust SpeedKm/h','Precipitationmm', 
       'Events','Conditions', 'WindDirDegrees', 'DateUTC<br />'])

# Iterate through year, month, and day
for y in range(2006, 2007):
    for m in range(1, 13):
       for d in range(1, 32):

# Check if leap year
        if y%400 == 0:
            leap = True
        elif y%100 == 0:
            leap = False
        elif y%4 == 0:
            leap = True
        else:
            leap = False

#Check if already gone through month
        if (m == 2 and leap and d > 29):
            continue
        elif (m == 2 and d > 28):
            continue
        elif (m in [4, 6, 9, 10] and d > 30):
            continue

 # Open wunderground.com url
        url = "http://www.wunderground.com/history/airport/EFHK/"+str(y)+ "/" + str(m) + "/" + str(d) + "/DailyHistory.html?req_city=Vantaa&req_state=&req_statename=Finlandia&reqdb.zip=00000&reqdb.magic=4&reqdb.wmo=02974&format=1"
        df=pd.read_csv(url, sep=',',skiprows=2)
        frames=pd.concat(df)

      

This gives an error:

 first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"

      

The desired result should be to have one data frame with all days, months and years.

+3


source to share


1 answer


You have to declare the list outside of your loop and add to it and then outside of the loop you want to concatenate all dfs into one df:



import pandas as pd

frames = pd.DataFrame(columns=['TimeEET', 'TemperatureC', 'Dew PointC', 'Humidity','Sea Level PressurehPa', 
       'VisibilityKm', 'Wind Direction', 'Wind SpeedKm/h','Gust SpeedKm/h','Precipitationmm', 
       'Events','Conditions', 'WindDirDegrees', 'DateUTC<br />'])

# Iterate through year, month, and day
df_list = []
for y in range(2006, 2007):
    for m in range(1, 13):
       for d in range(1, 32):

# Check if leap year
        if y%400 == 0:
            leap = True
        elif y%100 == 0:
            leap = False
        elif y%4 == 0:
            leap = True
        else:
            leap = False

#Check if already gone through month
        if (m == 2 and leap and d > 29):
            continue
        elif (m == 2 and d > 28):
            continue
        elif (m in [4, 6, 9, 10] and d > 30):
            continue

 # Open wunderground.com url
        url = "http://www.wunderground.com/history/airport/EFHK/"+str(y)+ "/" + str(m) + "/" + str(d) + "/DailyHistory.html?req_city=Vantaa&req_state=&req_statename=Finlandia&reqdb.zip=00000&reqdb.magic=4&reqdb.wmo=02974&format=1"
        df=pd.read_csv(url, sep=',',skiprows=2)
        df_list.append(df)
frames=pd.concat(df_list, ignore_index=True)

      

+3


source







All Articles