How to use the asreview.datasets.BaseDataSet function in asreview

To help you get started, we’ve selected a few asreview examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github msdslab / automated-systematic-review / asreview / datasets.py View on Github external
Dataset_ids should not contain semicolons (:).
            Return None if the dataset could not be found.

        Returns
        -------
        BaseDataSet, VersionedDataSet:
            If the dataset with that name is found, return it
            (or a list there of).
        """
        # If dataset_name is a non-string iterable, return a list.
        if is_iterable(dataset_name):
            return [self.find(x) for x in dataset_name]

        # If dataset_name is a valid path, create a dataset from it.
        if Path(dataset_name).is_file():
            return BaseDataSet(dataset_name)

        dataset_name = str(dataset_name)

        # Split into group/dataset if possible.
        split_dataset_id = dataset_name.split(":")
        if len(split_dataset_id) == 2:
            data_group = split_dataset_id[0]
            split_dataset_name = split_dataset_id[1]
            if data_group in self.all_datasets:
                return self.all_datasets[data_group].find(split_dataset_name)

        # Look through all available/installed groups for the name.
        all_results = {}
        for group_name, dataset in self.all_datasets.items():
            result = dataset.find(dataset_name)
            if result is not None:
github msdslab / automated-systematic-review / asreview / datasets.py View on Github external
link = ("https://dmice.ohsu.edu/cohenaa/"
            "systematic-drug-class-review-data.html")
    authors = ["A.M. Cohen", "W.R. Hersh", "K. Peterson", "Po-Yin Yen"]
    year = 2006
    license = None
    topic = "ACEInhibitors"
    sha512 = ("bde84809236e554abd982c724193777c1b904adb2326cb0a0ccb350b02d4246"
              "e8db5e9b36d0cb4b23e9aab521441764cdb0e31d6cb90fdc9e6c907ae1650d6"
              "1a")
    img_url = ("https://raw.githubusercontent.com/asreview/asreview/master/"
               "images/ace.png")
    last_update = "2020-03-23"
    date = "2006-03-01"


class HallDataSet(BaseDataSet):
    dataset_id = "hall"
    aliases = ["hall", "example_hall", "example_software"]
    title = "Fault prediction - Hall"
    description = ("A systematic literature review on fault prediction "
                   "performance in software engineering")
    url = "https://raw.githubusercontent.com/asreview/asreview/master/datasets/Software_Engineering_Hall.csv"  # noqa
    url_demo = "https://raw.githubusercontent.com/asreview/asreview/master/tests/test_datasets/Software_Engineering_Hall_debug.csv"  # noqa
    link = "https://zenodo.org/record/1162952#.XIVBE_ZFyVR"
    authors = ["Tracy Hall", "Sarah Beecham", "David Bowes", "David Gray",
               "Steve Counsell"]
    year = 2012
    license = "CC-BY Attribution 4.0 International"
    topic = "Software Fault Prediction"
    sha512 = ("0d5cc86586d7e6f28e5c52c78cf4647556cdf41a73c9188b6424ca007f38ea9"
              "55230e297d7c4a96a41ae46ec716a21c2d5dc432a77dd4d81886aa60ad9b771"
              "00")
github msdslab / automated-systematic-review / asreview / datasets.py View on Github external
meta_data = json.loads(f.read().decode())
    dataset_type = meta_data["type"]
    if dataset_type == "versioned":
        file_list = meta_data["filenames"]
        base_dataset_id = meta_data["base_id"]
        title = meta_data["title"]
        if meta_data["type"] != "versioned":
            raise ValueError("BaseVersionedDataSet: wrong datatype: "
                             f"{meta_data['type']}")
        datasets = []
        for config_file in [url+"/"+f for f in file_list]:
            datasets.append(BaseDataSet.from_config(config_file))
        return BaseVersionedDataSet(base_dataset_id, datasets, title)
    elif dataset_type == "base":
        config_file = url + "/" + meta_data["filenames"][0]
        return BaseDataSet.from_config(config_file)
    raise ValueError(f"Dataset type {dataset_type} unknown.")
github msdslab / automated-systematic-review / asreview / datasets.py View on Github external
if len(results) > 1:
            raise ValueError(
                f"Broken dataset group '{self.group_id}' containing multiple"
                f" datasets with the same name/alias '{dataset_name}'.")
        elif len(results) == 1:
            return results[0]
        return None

    def list(self, latest_only=True):
        return_list = []
        for d in self._data_sets:
            return_list.extend(d.list(latest_only=latest_only))
        return return_list


class PTSDDataSet(BaseDataSet):
    dataset_id = "ptsd"
    aliases = ["ptsd", "example_ptsd", "schoot"]
    title = "PTSD - Schoot"
    description = "Bayesian PTSD-Trajectory Analysis with Informed Priors"
    url = "https://raw.githubusercontent.com/asreview/asreview/master/datasets/PTSD_VandeSchoot_18.csv"  # noqa
    url_demo = "https://raw.githubusercontent.com/asreview/asreview/master/tests/test_datasets/PTSD_VandeSchoot_18_debug.csv"  # noqa
    sha512 = ("e2b62c93e4e9ddebf786e2cc8a0effb7fd8bf2ada986d53e6e5133092e7de88"
              "6b311286fa459144576ed3ac0dfff1bca1ba9c198d0235d8280a40b2533d0c0"
              "a7")
    authors = ['Rens van de Schoot', 'Marit Sijbrandij', 'Sarah Depaoli',
               'Sonja D. Winter', 'Miranda Olff', 'Nancy E. van Loey']
    topic = "PTSD"
    license = "CC-BY Attribution 4.0 International"
    link = "https://osf.io/h5k2q/"
    last_update = "2020-03-23"
    year = 2018
github msdslab / automated-systematic-review / asreview / datasets.py View on Github external
def to_dict(self):
        """Convert self to a python dictionary."""
        mydict = {}
        for attr in dir(self):
            try:
                is_callable = callable(getattr(BaseDataSet, attr))
            except AttributeError:
                is_callable = False
            if attr.startswith("__") or is_callable:
                continue
            try:
                val = getattr(self, attr)
                mydict[attr] = val
            except AttributeError:
                pass
        return mydict
github msdslab / automated-systematic-review / asreview / datasets.py View on Github external
sha512 = ("e2b62c93e4e9ddebf786e2cc8a0effb7fd8bf2ada986d53e6e5133092e7de88"
              "6b311286fa459144576ed3ac0dfff1bca1ba9c198d0235d8280a40b2533d0c0"
              "a7")
    authors = ['Rens van de Schoot', 'Marit Sijbrandij', 'Sarah Depaoli',
               'Sonja D. Winter', 'Miranda Olff', 'Nancy E. van Loey']
    topic = "PTSD"
    license = "CC-BY Attribution 4.0 International"
    link = "https://osf.io/h5k2q/"
    last_update = "2020-03-23"
    year = 2018
    img_url = ("https://raw.githubusercontent.com/asreview/asreview/master/"
               "images/ptsd.png")
    date = "2018-01-11"


class AceDataSet(BaseDataSet):
    dataset_id = "ace"
    aliases = ["ace", "example_cohen", "example_ace"]
    title = "ACEInhibitors - Cohen"
    description = "Systematic Drug Class Review Gold Standard Data"
    url = "https://raw.githubusercontent.com/asreview/asreview/master/datasets/ACEInhibitors.csv"  # noqa
    url_demo = "https://raw.githubusercontent.com/asreview/asreview/master/tests/test_datasets/ACEInhibitors_debug.csv"  # noqa
    link = ("https://dmice.ohsu.edu/cohenaa/"
            "systematic-drug-class-review-data.html")
    authors = ["A.M. Cohen", "W.R. Hersh", "K. Peterson", "Po-Yin Yen"]
    year = 2006
    license = None
    topic = "ACEInhibitors"
    sha512 = ("bde84809236e554abd982c724193777c1b904adb2326cb0a0ccb350b02d4246"
              "e8db5e9b36d0cb4b23e9aab521441764cdb0e31d6cb90fdc9e6c907ae1650d6"
              "1a")
    img_url = ("https://raw.githubusercontent.com/asreview/asreview/master/"
github msdslab / automated-systematic-review / asreview / datasets.py View on Github external
def __init__(self, fp=None):
        """Initialize BaseDataSet which contains metadata.

        Parameters
        ----------
        fp: str
            Path to file, if None, either an url/fp has to be set manually.
        """

        if fp is not None:
            self.fp = fp
            self.id = Path(fp).name

        super(BaseDataSet, self).__init__()