Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def analyseAPI(url):
#if url == "http://catalog.data.gov/":
# return
print url
try:
package_list, status = util.getPackageList(url)
print "\t",len(package_list),"getPackageList", status
ps_name=[]
ps_id=[]
api = ckanapi.RemoteCKAN(url, get_only=True)
response = api.action.package_search(rows=100000000)
if response:
datasets= response["results"]
for ds in datasets:
ps_name.append(ds['name'])
ps_id.append(ds['id'])
ps_name=set(ps_name)
ps_id=set(ps_id)
print "\t",len(ps_name),len(ps_id),"package_search"
start=0
steps=len(ps_name)
pss_name=[]
pss_id=[]
while True:
def test_remoteckan_validlocations(self, project_config_yaml):
Configuration._create(user_agent='test', hdx_site='prod', hdx_key='TEST_HDX_KEY',
hdx_base_config_dict={}, project_config_yaml=project_config_yaml)
remoteckan = ckanapi.RemoteCKAN('http://lalala', apikey='12345',
user_agent='HDXPythonLibrary/1.0')
Configuration.read().setup_remoteckan(remoteckan=remoteckan)
assert Configuration.read().remoteckan() == remoteckan
remoteckan = ckanapi.RemoteCKAN('http://hahaha', apikey='54321',
user_agent='HDXPythonLibrary/0.5')
Configuration._create(user_agent='test',
remoteckan=remoteckan,
hdx_site='prod', hdx_key='TEST_HDX_KEY',
hdx_base_config_dict={},
project_config_yaml=project_config_yaml)
assert Configuration.read().remoteckan() == remoteckan
Configuration.read()._remoteckan = None
with pytest.raises(ConfigurationError):
Configuration.read().remoteckan()
Configuration.delete()
with pytest.raises(ConfigurationError):
Configuration.read().remoteckan()
def scrape_ckan_instance(ckan_url='https://open.alberta.ca', formats=['xls', 'xlsx', 'csv'], data_dir='data/ckan'):
print('scraping ckan instance', ckan_url)
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
instance = RemoteCKAN(ckan_url)
print('retrieving list of instance datasets')
datasets = instance.action.current_package_list_with_resources() # limit, offset)
print('processing datasets')
for dataset in datasets:
# print(dataset['title'])
# check that at least one resource is in a desired format
valid_resources = [resource for resource in dataset['resources'] if is_valid_resource(resource, formats)]
if valid_resources:
dataset_name = get_dataset_name(dataset)
dataset_folder = '{0}/{1}'.format(data_dir, dataset_name)
if not os.path.isdir(dataset_folder):
os.mkdir(dataset_folder)
save_metadata(dataset, dataset_folder)
def getPackageList(apiurl):
""" Try api 3 and api 2 to get the full package list"""
ex =None
status=200
package_list=set([])
try:
api = ckanapi.RemoteCKAN(apiurl, get_only=True)
start=0
steps=1000
while True:
p_l = api.action.package_list(limit=steps, offset=start)
if p_l:
c=len(package_list)
steps= c if start==0 else steps
package_list.update(p_l)
if c == len(package_list):
#no new packages
break
start+=steps
else:
break
except Exception as e:
def send_to_ckan(m):
from ckanapi import RemoteCKAN, NotFound
try:
doc = MetatabDoc(m.mt_file, cache=m.cache)
except (IOError, MetatabError) as e:
err("Failed to open metatab '{}': {}".format(m.mt_file, e))
c = RemoteCKAN(m.ckan_url, apikey=m.api_key)
ckanid = doc.find_first_value('Root.Ckanid')
identifier = doc.find_first_value('Root.Identitfier')
name = doc.as_version(None).find_first('Root.Name')
ckan_name = name.value.replace('.','-')
id_name = ckanid or ckan_name
try:
pkg = c.action.package_show(name_or_id=id_name)
prt("Updating CKAN dataset for '{}'".format(id_name))
except NotFound as e:
e.__traceback__ = None
traceback.clear_frames(e.__traceback__)
try:
def generateFetchDatasetIter(self, Portal, PortalSnapshot, sn, timeout_attempts=5, timeout=24*60*60):
starttime=time.time()
api = ckanapi.RemoteCKAN(Portal.apiuri, get_only=True)
start=0
rows=1000
p_count=0
p_steps=1
total=0
processed_ids=set([])
processed_names=set([])
tstart=time.time()
try:
response = api.action.package_search(rows=0)
total = response["count"]
PortalSnapshot.datasetcount = total
p_steps=total/10
if p_steps ==0:
p_steps=1
def post(self, request, publisher_id):
user = request.user.organisationuser
iati_user_id = user.iati_user_id
publisher = Publisher.objects.get(pk=publisher_id)
source_url = request.data.get('source_url', None)
if not source_url:
raise exceptions.APIException(detail="no source_url provided")
user = request.user
organisationuser = user.organisationuser
api_key = organisationuser.iati_api_key
client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)
# TODO: should this be the name? - 2017-02-20
source_name = '{}-activities'.format(publisher.name)
# get all published activities, except for the ones that are just
# modified
activities = Activity.objects.filter(
ready_to_publish=True,
publisher=publisher
)
try:
orgList = client.call_action('organization_list_for_user', {})
except BaseException:
raise exceptions.APIException(
detail="Can't get organisation list for user".format(
import requests
import ckanapi
from dateutil.parser import parse as date_parser
from .queue import get_queue
from werkzeug.http import http_date
from iatilib import db, parse
from iatilib.model import Dataset, Resource, Activity, Log, DeletedActivity
from iatilib.loghandlers import DatasetMessage as _
log = logging.getLogger("crawler")
CKAN_WEB_BASE = 'http://www.iatiregistry.org/dataset/%s'
CKAN_API = 'http://localhost:8000'
registry = ckanapi.RemoteCKAN(CKAN_API)
class CouldNotFetchPackageList(Exception):
pass
def fetch_dataset_list():
existing_datasets = Dataset.query.all()
existing_ds_names = set(ds.name for ds in existing_datasets)
package_list = registry.action.package_list()
if package_list.get('success', False):
incoming_ds_names = set(package_list['result'])
new_datasets = [Dataset(name=n) for n
in incoming_ds_names - existing_ds_names]
all_datasets = existing_datasets + new_datasets
for dataset in all_datasets:
def generateFetchDatasetIter(self, Portal, sn, timeout_attempts=5, timeout=24*60*60):
starttime=time.time()
api = ckanapi.RemoteCKAN(Portal.apiurl, get_only=True)
start=0
rows=1000
p_count=0
p_steps=1
total=0
processed_ids=set([])
processed_names=set([])
try:
response = api.action.package_search(rows=0)
total = response["count"]
p_steps=total/10
if p_steps ==0:
p_steps=1
while True: