Skip to content

ProblemsAnnotator

Bases: Annotator

Annotator class for identifying and processing problems in medical notes.

This class extends the base Annotator class and provides specific functionality for identifying and processing problems in medical notes. It implements methods for loading problem lookup data, processing meta annotations, filtering concepts, and post-processing the annotated concepts.

Attributes:

Name Type Description
cat CAT

The CAT (Concept Annotation Tool) instance used for annotation.

config AnnotatorConfig

The configuration object for the annotator.

Properties

concept_types (list): A list of concept types supported by this annotator. pipeline (list): The list of processing steps in the annotation pipeline.

Source code in miade/annotators.py
class ProblemsAnnotator(Annotator):
    """
    Annotator class for identifying and processing problems in medical notes.

    This class extends the base `Annotator` class and provides specific functionality
    for identifying and processing problems in medical notes. It implements methods
    for loading problem lookup data, processing meta annotations, filtering concepts,
    and post-processing the annotated concepts.

    Attributes:
        cat (CAT): The CAT (Concept Annotation Tool) instance used for annotation.
        config (AnnotatorConfig): The configuration object for the annotator.

    Properties:
        concept_types (list): A list of concept types supported by this annotator.
        pipeline (list): The list of processing steps in the annotation pipeline.
    """

    def __init__(self, cat: CAT, config: AnnotatorConfig = None):
        super().__init__(cat, config)
        self._load_problems_lookup_data()

    @property
    def concept_types(self) -> List[Category]:
        """
        Get the list of concept types supported by this annotator.

        Returns:
            [Category.PROBLEM]
        """
        return [Category.PROBLEM]

    @property
    def pipeline(self) -> List[str]:
        """
        Get the list of processing steps in the annotation pipeline.

        Returns:
            ["preprocessor", "medcat", "list_cleaner", "paragrapher", "postprocessor", "deduplicator"]
        """
        return ["preprocessor", "medcat", "list_cleaner", "paragrapher", "postprocessor", "deduplicator"]

    def run_pipeline(self, note: Note, record_concepts: Optional[List[Concept]] = None) -> List[Concept]:
        """
        Runs the annotation pipeline on a given note and returns the extracted problems concepts.

        Args:
            note (Note): The input note to process.
            record_concepts (Optional[List[Concept]]): The list of concepts from existing EHR records.

        Returns:
            List[Concept]: The extracted concepts from the note.
        """
        # TODO: not the best way to do this - make this more extensible!!
        concepts: List[Concept] = []

        for pipe in self.pipeline:
            if pipe not in self.config.disable:
                if pipe == "preprocessor":
                    note = self.preprocess(note=note, refine=self.config.refine_paragraphs)
                elif pipe == "medcat":
                    concepts = self.get_concepts(note=note)
                elif pipe == "list_cleaner":
                    concepts = self.filter_concepts_in_numbered_list(concepts=concepts, note=note)
                elif pipe == "paragrapher":
                    concepts = self.process_paragraphs(note=note, concepts=concepts)
                elif pipe == "postprocessor":
                    concepts = self.postprocess(concepts=concepts)
                elif pipe == "deduplicator":
                    concepts = self.deduplicate(concepts=concepts, record_concepts=record_concepts)

        return concepts

    def _load_problems_lookup_data(self) -> None:
        """
        Load the problem lookup data. Load prepackaged lookups if lookup_data_path is None.

        Raises:
            RuntimeError: If the lookup data directory does not exist.
        """
        self.negated_lookup = load_lookup_data(
            self.lookup_data_path + "negated.csv", is_package_data=self.use_package_data, as_dict=True
        )
        self.historic_lookup = load_lookup_data(
            self.lookup_data_path + "historic.csv", is_package_data=self.use_package_data, as_dict=True
        )
        self.suspected_lookup = load_lookup_data(
            self.lookup_data_path + "suspected.csv", is_package_data=self.use_package_data, as_dict=True
        )
        self.filtering_blacklist = load_lookup_data(
            self.lookup_data_path + "problem_blacklist.csv", is_package_data=self.use_package_data, no_header=True
        )

    def _process_meta_annotations(self, concept: Concept) -> Optional[Concept]:
        """
        Process the meta annotations for a concept.

        Args:
            concept (Concept): The concept to process.

        Returns:
           The processed concept, or None if it should be removed.

        Raises:
            ValueError: If the concept has an invalid negex value.
        """
        # Add, convert, or ignore concepts
        meta_ann_values = [meta_ann.value for meta_ann in concept.meta] if concept.meta is not None else []

        convert = False
        tag = ""
        # only get meta model results if negex is false
        if concept.negex is not None:
            if concept.negex:
                convert = self.negated_lookup.get(int(concept.id), False)
                tag = " (negated)"
            elif Presence.SUSPECTED in meta_ann_values:
                convert = self.suspected_lookup.get(int(concept.id), False)
                tag = " (suspected)"
            elif Relevance.HISTORIC in meta_ann_values:
                convert = self.historic_lookup.get(int(concept.id), False)
                tag = " (historic)"
        else:
            if Presence.NEGATED in meta_ann_values:
                convert = self.negated_lookup.get(int(concept.id), False)
                tag = " (negated)"
            elif Presence.SUSPECTED in meta_ann_values:
                convert = self.suspected_lookup.get(int(concept.id), False)
                tag = " (suspected)"
            elif Relevance.HISTORIC in meta_ann_values:
                convert = self.historic_lookup.get(int(concept.id), False)
                tag = " (historic)"

        if convert:
            if tag == " (negated)" and concept.negex:
                log.debug(
                    f"Converted concept ({concept.id} | {concept.name}) to ({str(convert)} | {concept.name + tag}): "
                    f"negation detected by negex"
                )
            else:
                log.debug(
                    f"Converted concept ({concept.id} | {concept.name}) to ({str(convert)} | {concept.name + tag}):"
                    f"detected by meta model"
                )
            concept.id = str(convert)
            concept.name += tag
        else:
            if concept.negex:
                log.debug(f"Removed concept ({concept.id} | {concept.name}): negation (negex) with no conversion match")
                return None
            if concept.negex is None and Presence.NEGATED in meta_ann_values:
                log.debug(
                    f"Removed concept ({concept.id} | {concept.name}): negation (meta model) with no conversion match"
                )
                return None
            if Presence.SUSPECTED in meta_ann_values:
                log.debug(f"Removed concept ({concept.id} | {concept.name}): suspected with no conversion match")
                return None
            if Relevance.IRRELEVANT in meta_ann_values:
                log.debug(f"Removed concept ({concept.id} | {concept.name}): irrelevant concept")
                return None
            if Relevance.HISTORIC in meta_ann_values:
                log.debug(f"No change to concept ({concept.id} | {concept.name}): historic with no conversion match")

        concept.category = Category.PROBLEM

        return concept

    def _is_blacklist(self, concept):
        """
        Check if a concept is in the filtering blacklist.

        Args:
            concept: The concept to check.

        Returns:
            True if the concept is in the blacklist, False otherwise.
        """
        # filtering blacklist
        if int(concept.id) in self.filtering_blacklist.values:
            log.debug(f"Removed concept ({concept.id} | {concept.name}): concept in problems blacklist")
            return True
        return False

    def _process_meta_ann_by_paragraph(
        self, concept: Concept, paragraph: Paragraph, prob_concepts_in_structured_sections: List[Concept]
    ):
        """
        Process the meta annotations for a concept based on the paragraph type.

        Args:
            concept (Concept): The concept to process.
            paragraph (Paragraph): The paragraph containing the concept.
            prob_concepts_in_structured_sections (List[Concept]): The list of problem concepts in structured sections.
        """
        # if paragraph is structured problems section, add to prob list and convert to corresponding relevance
        if paragraph.type in self.structured_prob_lists:
            prob_concepts_in_structured_sections.append(concept)
            for meta in concept.meta:
                if meta.name == "relevance" and meta.value == Relevance.IRRELEVANT:
                    new_relevance = self.structured_prob_lists[paragraph.type]
                    log.debug(
                        f"Converted {meta.value} to "
                        f"{new_relevance} for concept ({concept.id} | {concept.name}): "
                        f"paragraph is {paragraph.type}"
                    )
                    meta.value = new_relevance
        # if paragraph is meds or irrelevant section, convert problems to irrelevant
        elif paragraph.type in self.structured_med_lists or paragraph.type in self.irrelevant_paragraphs:
            for meta in concept.meta:
                if meta.name == "relevance" and meta.value != Relevance.IRRELEVANT:
                    log.debug(
                        f"Converted {meta.value} to "
                        f"{Relevance.IRRELEVANT} for concept ({concept.id} | {concept.name}): "
                        f"paragraph is {paragraph.type}"
                    )
                    meta.value = Relevance.IRRELEVANT

    def process_paragraphs(self, note: Note, concepts: List[Concept]) -> List[Concept]:
        """
        Process the paragraphs in a note and filter the concepts.

        Args:
            note (Note): The note to process.
            concepts (List[Concept]): The list of concepts to filter.

        Returns:
            The filtered list of concepts.
        """
        prob_concepts_in_structured_sections: List[Concept] = []
        if note.paragraphs:
            # Use a list comprehension to flatten the loop and conditionals
            concepts_in_paragraphs = [
                (concept, paragraph)
                for paragraph in note.paragraphs
                for concept in concepts
                if concept.start >= paragraph.start and concept.end <= paragraph.end and concept.meta
            ]
            # Process each concept and paragraph pair
            for concept, paragraph in concepts_in_paragraphs:
                self._process_meta_ann_by_paragraph(concept, paragraph, prob_concepts_in_structured_sections)
        else:
            log.warn("Unable to run paragrapher pipeline: did you add preprocessor to the pipeline?")

        # if more than set no. concepts in prob or imp or pmh sections, return only those and ignore all other concepts
        if len(prob_concepts_in_structured_sections) > self.config.structured_list_limit:
            log.debug(
                f"Ignoring concepts elsewhere in the document because "
                f"more than {self.config.structured_list_limit} concepts exist "
                f"in prob, imp, pmh structured sections: {len(prob_concepts_in_structured_sections)}"
            )
            return prob_concepts_in_structured_sections

        return concepts

    def postprocess(self, concepts: List[Concept]) -> List[Concept]:
        """
        Post-process the concepts and filter out irrelevant concepts.

        Args:
            concepts (List[Concept]): The list of concepts to post-process.

        Returns:
            The filtered list of concepts.
        """
        # deepcopy so we still have reference to original list of concepts
        all_concepts = deepcopy(concepts)
        filtered_concepts = []
        for concept in all_concepts:
            if self._is_blacklist(concept):
                continue
            # meta annotations
            concept = self._process_meta_annotations(concept)
            # ignore concepts filtered by meta-annotations
            if concept is None:
                continue
            filtered_concepts.append(concept)

        return filtered_concepts

concept_types: List[Category] property

Get the list of concept types supported by this annotator.

Returns:

Type Description
List[Category]

[Category.PROBLEM]

pipeline: List[str] property

Get the list of processing steps in the annotation pipeline.

Returns:

Type Description
List[str]

["preprocessor", "medcat", "list_cleaner", "paragrapher", "postprocessor", "deduplicator"]

postprocess(concepts)

Post-process the concepts and filter out irrelevant concepts.

Parameters:

Name Type Description Default
concepts List[Concept]

The list of concepts to post-process.

required

Returns:

Type Description
List[Concept]

The filtered list of concepts.

Source code in miade/annotators.py
def postprocess(self, concepts: List[Concept]) -> List[Concept]:
    """
    Post-process the concepts and filter out irrelevant concepts.

    Args:
        concepts (List[Concept]): The list of concepts to post-process.

    Returns:
        The filtered list of concepts.
    """
    # deepcopy so we still have reference to original list of concepts
    all_concepts = deepcopy(concepts)
    filtered_concepts = []
    for concept in all_concepts:
        if self._is_blacklist(concept):
            continue
        # meta annotations
        concept = self._process_meta_annotations(concept)
        # ignore concepts filtered by meta-annotations
        if concept is None:
            continue
        filtered_concepts.append(concept)

    return filtered_concepts

process_paragraphs(note, concepts)

Process the paragraphs in a note and filter the concepts.

Parameters:

Name Type Description Default
note Note

The note to process.

required
concepts List[Concept]

The list of concepts to filter.

required

Returns:

Type Description
List[Concept]

The filtered list of concepts.

Source code in miade/annotators.py
def process_paragraphs(self, note: Note, concepts: List[Concept]) -> List[Concept]:
    """
    Process the paragraphs in a note and filter the concepts.

    Args:
        note (Note): The note to process.
        concepts (List[Concept]): The list of concepts to filter.

    Returns:
        The filtered list of concepts.
    """
    prob_concepts_in_structured_sections: List[Concept] = []
    if note.paragraphs:
        # Use a list comprehension to flatten the loop and conditionals
        concepts_in_paragraphs = [
            (concept, paragraph)
            for paragraph in note.paragraphs
            for concept in concepts
            if concept.start >= paragraph.start and concept.end <= paragraph.end and concept.meta
        ]
        # Process each concept and paragraph pair
        for concept, paragraph in concepts_in_paragraphs:
            self._process_meta_ann_by_paragraph(concept, paragraph, prob_concepts_in_structured_sections)
    else:
        log.warn("Unable to run paragrapher pipeline: did you add preprocessor to the pipeline?")

    # if more than set no. concepts in prob or imp or pmh sections, return only those and ignore all other concepts
    if len(prob_concepts_in_structured_sections) > self.config.structured_list_limit:
        log.debug(
            f"Ignoring concepts elsewhere in the document because "
            f"more than {self.config.structured_list_limit} concepts exist "
            f"in prob, imp, pmh structured sections: {len(prob_concepts_in_structured_sections)}"
        )
        return prob_concepts_in_structured_sections

    return concepts

run_pipeline(note, record_concepts=None)

Runs the annotation pipeline on a given note and returns the extracted problems concepts.

Parameters:

Name Type Description Default
note Note

The input note to process.

required
record_concepts Optional[List[Concept]]

The list of concepts from existing EHR records.

None

Returns:

Type Description
List[Concept]

List[Concept]: The extracted concepts from the note.

Source code in miade/annotators.py
def run_pipeline(self, note: Note, record_concepts: Optional[List[Concept]] = None) -> List[Concept]:
    """
    Runs the annotation pipeline on a given note and returns the extracted problems concepts.

    Args:
        note (Note): The input note to process.
        record_concepts (Optional[List[Concept]]): The list of concepts from existing EHR records.

    Returns:
        List[Concept]: The extracted concepts from the note.
    """
    # TODO: not the best way to do this - make this more extensible!!
    concepts: List[Concept] = []

    for pipe in self.pipeline:
        if pipe not in self.config.disable:
            if pipe == "preprocessor":
                note = self.preprocess(note=note, refine=self.config.refine_paragraphs)
            elif pipe == "medcat":
                concepts = self.get_concepts(note=note)
            elif pipe == "list_cleaner":
                concepts = self.filter_concepts_in_numbered_list(concepts=concepts, note=note)
            elif pipe == "paragrapher":
                concepts = self.process_paragraphs(note=note, concepts=concepts)
            elif pipe == "postprocessor":
                concepts = self.postprocess(concepts=concepts)
            elif pipe == "deduplicator":
                concepts = self.deduplicate(concepts=concepts, record_concepts=record_concepts)

    return concepts