How to use the asreview.config.LABEL_NA function in asreview

To help you get started, we’ve selected a few asreview examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github msdslab / automated-systematic-review / asreview / review / factory.py View on Github external
if isinstance(included_dataset, (str, PurePath)):
        included_dataset = [included_dataset]

    if isinstance(excluded_dataset, (str, PurePath)):
        excluded_dataset = [excluded_dataset]

    if isinstance(prior_dataset, (str, PurePath)):
        prior_dataset = [prior_dataset]

    as_data = ASReviewData()
    # Find the URL of the datasets if the dataset is an example dataset.
    for data in dataset:
        as_data.append(ASReviewData.from_file(find_data(data)))

    if new:
        as_data.labels = np.full((len(as_data),), LABEL_NA, dtype=int)
    for data in included_dataset:
        as_data.append(ASReviewData.from_file(
            find_data(data), data_type="included"))
    for data in excluded_dataset:
        as_data.append(ASReviewData.from_file(
            find_data(data), data_type="excluded"))
    for data in prior_dataset:
        as_data.append(ASReviewData.from_file(
            find_data(data), data_type="prior"))
    return as_data
github msdslab / automated-systematic-review / asreview / io / utils.py View on Github external
logging.warning("Unable to detect abstracts in dataset.")
    if "title" not in col_names:
        logging.warning("Unable to detect titles in dataset.")

    # Replace NA values with empty strings.
    for col in ["title", "abstract", "authors", "keywords"]:
        try:
            df[all_column_spec[col]].fillna("", inplace=True)
        except KeyError:
            pass

    # Convert labels to integers.
    if "final_included" in col_names:
        try:
            col = all_column_spec["final_included"]
            df[col].fillna(LABEL_NA, inplace=True)
            df[col] = pd.to_numeric(df[col])
        except KeyError:
            pass

    # If the we have a record_id (for example from an ASReview export) use it.
    if "record_id" in list(df):
        df.set_index('record_id', inplace=True)
    if df.index.name != "record_id":
        df["record_id"] = np.arange(len(df.index))
        df.set_index('record_id', inplace=True)
    df.sort_index(inplace=True)
    return df, all_column_spec
github msdslab / automated-systematic-review / asreview / review / base.py View on Github external
super(BaseReview, self).__init__()

        # Default to Naive Bayes model
        if model is None:
            model = NBModel()
        if query_model is None:
            query_model = MaxQuery()
        if balance_model is None:
            balance_model = SimpleBalance()
        if feature_model is None:
            feature_model = Tfidf()

        self.as_data = as_data
        self.y = as_data.labels
        if self.y is None:
            self.y = np.full(len(as_data), LABEL_NA)
        self.model = model
        self.balance_model = balance_model
        self.query_model = query_model
        self.feature_model = feature_model

        self.shared = {"query_src": {}, "current_queries": {}}
        self.model.shared = self.shared
        self.query_model.shared = self.shared
        self.balance_model.shared = self.shared

        self.n_papers = n_papers
        self.n_instances = n_instances
        self.n_queries = n_queries
        self.start_idx = start_idx

        if log_file is not None:
github msdslab / automated-systematic-review / asreview / io / paper_record.py View on Github external
def todict(self):
        """Create dictionary from the record."""
        label = self.label
        if self.label is LABEL_NA:
            label = None
        paper_dict = {
            "title": self.title,
            "abstract": self.abstract,
            "authors": self.authors,
            "keywords": self.keywords,
            "record_id": self.record_id,
            "label": label,
        }
        paper_dict.update(self.extra_fields)
        return paper_dict
github msdslab / automated-systematic-review / asreview / webapp / utils / project.py View on Github external
with SQLiteLock(fp_lock, blocking=True, lock_name="active"):
        # open the projects file
        with open(project_file_path, "r") as f_read:
            project_dict = json.load(f_read)

        # add path to dict (overwrite if already exists)
        project_dict["dataset_path"] = file_name

        with open(project_file_path, "w") as f_write:
            json.dump(project_dict, f_write)

        # fill the pool of the first iteration
        as_data = read_data(project_id)
        if as_data.labels is not None:
            unlabeled = np.where(as_data.labels == LABEL_NA)[0]
            pool_indices = as_data.record_ids[unlabeled]
        else:
            pool_indices = as_data.record_ids
        np.random.shuffle(pool_indices)

        write_pool(project_id, pool_indices.tolist())

        # make a empty qeue for the items to label
        write_label_history(project_id, [])
github msdslab / automated-systematic-review / asreview / review / oracle.py View on Github external
def __init__(self, as_data, *args, use_cli_colors=True,
                 new_review=False,
                 **kwargs):
        self.as_data = as_data
        if not new_review:
            start_idx = np.where(as_data.labels != LABEL_NA)[0]
        else:
            as_data.labels = np.full_like(as_data.labels, LABEL_NA)
            start_idx = []
        super(ReviewOracle, self).__init__(
            as_data, *args, **kwargs, start_idx=start_idx)

        self.use_cli_colors = use_cli_colors
github msdslab / automated-systematic-review / asreview / data.py View on Github external
pd.DataFrame:
            Dataframe of all available record data.
        """
        new_df = pd.DataFrame.copy(self.df)
        col = self.column_spec["final_included"]
        if labels is not None:
            new_df[col] = labels
        if ranking is not None:
            # sort the datasets based on the ranking
            new_df = new_df.iloc[ranking]
            # append a column with 1 to n
            new_df["asreview_ranking"] = np.arange(1, len(new_df) + 1)

        if col in list(new_df):
            new_df[col] = new_df[col].astype(object)
            new_df.loc[new_df[col] == LABEL_NA, col] = np.nan
        return new_df
github msdslab / automated-systematic-review / asreview / data.py View on Github external
self.data_name = as_data.data_name
            self.prior_idx = as_data.prior_idx
            self.max_idx = as_data.max_idx
            self.column_spec = as_data.column_spec
            return

        reindex_val = max(self.max_idx - min(as_data.df.index.values), 0)
        new_index = np.append(self.df.index.values,
                              as_data.df.index.values + reindex_val)
        new_priors = np.append(self.prior_idx, as_data.prior_idx + reindex_val)
        new_df = self.df.append(as_data.df, sort=False)
        new_df.index = new_index

        new_labels = None
        if self.labels is None and as_data.labels is not None:
            new_labels = np.append(np.full(len(self), LABEL_NA, dtype=int),
                                   as_data.labels)
        elif self.labels is not None and as_data.labels is None:
            new_labels = np.append(self.labels,
                                   np.full(len(as_data), LABEL_NA, dtype=int))
        self.max_idx = max(self.max_idx, as_data.max_idx, max(new_index))
        self.df = new_df
        if new_labels is not None:
            self.labels = new_labels
        self.prior_idx = new_priors
        self.data_name += "_" + as_data.data_name
        for data_type, col in as_data.column_spec.items():
            if data_type in self.column_spec:
                if self.column_spec[data_type] != col:
                    raise ValueError(
                        "Error merging dataframes: column specifications "
                        f"differ: {self.column_spec} vs {as_data.column_spec}")
github msdslab / automated-systematic-review / asreview / webapp / utils / io.py View on Github external
def read_current_labels(project_id, as_data=None, label_history=None):
    if as_data is None:
        as_data = read_data(project_id)

    if label_history is None:
        label_history = read_label_history(project_id)
    labels = as_data.labels
    if labels is None:
        labels = np.full(len(as_data), LABEL_NA, dtype=int)

    for idx, inclusion in label_history:
        labels[idx] = inclusion

    return np.array(labels, dtype=int)
github msdslab / automated-systematic-review / asreview / io / paper_record.py View on Github external
def __init__(self, record_id, column_spec={}, **kwargs):

        for attr in ["title", "abstract", "authors", "keywords",
                     "final_included"]:
            if attr in column_spec:
                col = column_spec[attr]
            elif attr in kwargs:
                col = attr
            else:
                col = None
            setattr(self, attr, kwargs.pop(col, None))

        self.record_id = record_id
        if self.final_included is None:
            self.final_included = LABEL_NA
        else:
            self.final_included = int(self.final_included)

        self.extra_fields = kwargs

        for attr, val in self.extra_fields.items():
            if pd.isna(val):
                self.extra_fields[attr] = None