Skip to content

API requests using Python

Here are two ways to search and retrieve data from the DataStore using datastore_search endpoints and a python request module.

Requests

To install: pip install requests

Example

import pandas as pd
import requests

#API endpoint
url = 'https://canwin-datahub.ad.umanitoba.ca/data/api/3/action/datastore_search?resource_id=c5c16064-e2b3-4618-9b27-0dbf5c1388c2'

#To make a ‘GET’ request, we’ll use the requests.get() function, which requires one argument — the URL we want to make the request to.
response = requests.get(url)

#The get() function returns a response object. We can use the response.status_code attribute to receive the status code for our request.
#Code - 200: Everything went okay, and the result has been returned (if any).
print(response.status_code)

#JSON (JavaScript Object Notation) is the language of APIs. JSON is a way to encode data structures that ensures that they are easily readable by machines. 
#JSON is the primary format in which data is passed back and forth to APIs, and most API servers will send their responses in JSON format.

#Use the response.json() method to see the data we received back from the API as a dictionary
response_data=response.json()
#print(response_data)

#To get the data records. 'data' is now a list, containing each line of data as a dictonary.
data=response_data['result']['records']

#To see a list of all the column headers
col_headers=data[0].keys()

#If we want to filter the data, for example, we only want Cast=3.
filtered_data =  list(filter(lambda row: row['Cast']=='3', data))

#Output data as a dataframe for easy manipulation
df1= pd.DataFrame(data)
df2= pd.DataFrame(filtered_data)

# Save to csv file
df1.to_csv('output.csv', index=False)


#To get more metadata
# dataDump = json.dumps(response_data, sort_keys=True, indent=4)
# print(dataDump)


urllib.request

Example

import urllib.request
import pandas as pd

url = 'https://canwin-datahub.ad.umanitoba.ca/data/api/3/action/datastore_search?resource_id=ea474f80-dcbe-4647-a28d-7fdce1293e09'  

# make a request and receive a response with urlopen()
http_response = urllib.request.urlopen(url)

# Returns and reads the response body
raw_data=http_response.read()

# raw_data is a bytes object and will need to be decoded.
# Get the encoding from the response.
encoding = http_response.info().get_content_charset()

# Decode the byte object using json.loads
data_dict = json.loads(raw_data.decode(encoding))

# get the resource data
data=data_dict['result']['records']

#Output data as a dataframe for easy manipulation
df1= pd.DataFrame(data)

# Save to csv file
df1.to_csv('output.csv', index=False)


To add more parameters to the query, use & before each parameter pair (key=value) after the resource id. For example, to get the first two rows of data only, add &limit=2 after the resource id.

See a list of all datastore_search parameters here.


Example of filtering the data

Format: &filters={"key":"value"}

import pandas as pd
import requests

# filters (dictionary)

# Filtering only the data records where Cast=3 and sample_date=2016-06-09T00:00:00

url = 'https://canwin-datahub.ad.umanitoba.ca/data/api/3/action/datastore_search?resource_id=c5c16064-e2b3-4618-9b27-0dbf5c1388c2&filters={"Cast":"3","sample_date":"2016-06-09T00:00:00" }'

response = requests.get(url)
response_data=response.json()
data=response_data['result']['records']

df1= pd.DataFrame(data)
df1


Example of limiting the data returned

Format: &limit=3

import pandas as pd
import requests

# limit (integer)

# limit=2 , only the first two rows
url = 'https://canwin-datahub.ad.umanitoba.ca/data/api/3/action/datastore_search?resource_id=c5c16064-e2b3-4618-9b27-0dbf5c1388c2&limit=2'

response = requests.get(url)
response_data=response.json()
data=response_data['result']['records']

df1= pd.DataFrame(data)
df1