Source code for openprotein.align.schemas

"""Schemas for OpenProtein align system."""

from enum import Enum
from typing import Literal

from pydantic import BaseModel, Field

from openprotein.jobs import Job, JobType


class AlignType(str, Enum):
    """
    Enumeration of alignment types.

    Attributes
    ----------
    INPUT : str
        Raw input alignment.
    MSA : str
        Generated multiple sequence alignment.
    PROMPT : str
        Prompt-based alignment.
    """

    INPUT = "RAW"
    MSA = "GENERATED"
    PROMPT = "PROMPT"


class MSASamplingMethod(str, Enum):
    """
    Enumeration of MSA sampling methods.

    Attributes
    ----------
    RANDOM : str
        Random sampling.
    NEIGHBORS : str
        Sampling based on neighbors.
    NEIGHBORS_NO_LIMIT : str
        Neighbor sampling without limit.
    NEIGHBORS_NONGAP_NORM_NO_LIMIT : str
        Neighbor sampling without gap normalization and without limit.
    TOP : str
        Top scoring sampling.
    """

    RANDOM = "RANDOM"
    NEIGHBORS = "NEIGHBORS"
    NEIGHBORS_NO_LIMIT = "NEIGHBORS_NO_LIMIT"
    NEIGHBORS_NONGAP_NORM_NO_LIMIT = "NEIGHBORS_NONGAP_NORM_NO_LIMIT"
    TOP = "TOP"


class PromptPostParams(BaseModel):
    """
    Parameters for posting a prompt to generate an MSA.

    Attributes
    ----------
    msa_id : str
        Identifier for the MSA.
    num_sequences : int or None, optional
        Number of sequences to sample (default is None, must be >=0 and <100).
    num_residues : int or None, optional
        Number of residues to sample (default is None, must be >=0 and <24577).
    method : MSASamplingMethod, optional
        Sampling method to use (default is NEIGHBORS_NONGAP_NORM_NO_LIMIT).
    homology_level : float, optional
        Homology level threshold (default is 0.8, must be between 0 and 1).
    max_similarity : float, optional
        Maximum similarity threshold (default is 1.0, must be between 0 and 1).
    min_similarity : float, optional
        Minimum similarity threshold (default is 0.0, must be between 0 and 1).
    always_include_seed_sequence : bool, optional
        Whether to always include the seed sequence (default is False).
    num_ensemble_prompts : int, optional
        Number of ensemble prompts to generate (default is 1).
    random_seed : int or None, optional
        Random seed for reproducibility (default is None).
    """

    msa_id: str
    num_sequences: int | None = Field(None, ge=0, lt=100)
    num_residues: int | None = Field(None, ge=0, lt=24577)
    method: MSASamplingMethod = MSASamplingMethod.NEIGHBORS_NONGAP_NORM_NO_LIMIT
    homology_level: float = Field(0.8, ge=0, le=1)
    max_similarity: float = Field(1.0, ge=0, le=1)
    min_similarity: float = Field(0.0, ge=0, le=1)
    always_include_seed_sequence: bool = False
    num_ensemble_prompts: int = 1
    random_seed: int | None = None


class MSAJob(Job):
    """
    Base class for MSA-related jobs.

    Attributes
    ----------
    job_type : Literal[JobType.align_align]
        The type of job (must be JobType.align_align).
    """

    job_type: Literal[JobType.align_align]

    @property
    def msa_id(self):
        """
        Returns the MSA identifier for this job.

        Returns
        -------
        str
            The MSA identifier.
        """
        return self.msa_id


class MafftJob(MSAJob, Job):
    """
    Job for running MAFFT alignment.

    Attributes
    ----------
    job_type : Literal[JobType.mafft]
        The type of job (must be JobType.mafft).
    """

    job_type: Literal[JobType.mafft]


class ClustalOJob(MSAJob, Job):
    """
    Job for running Clustal Omega alignment.

    Attributes
    ----------
    job_type : Literal[JobType.clustalo]
        The type of job (must be JobType.clustalo).
    """

    job_type: Literal[JobType.clustalo]


class AbNumberJob(MSAJob, Job):
    """
    Job for running AbNumber alignment.

    Attributes
    ----------
    job_type : Literal[JobType.abnumber]
        The type of job (must be JobType.abnumber).
    """

    job_type: Literal[JobType.abnumber]


[docs] class AbNumberScheme(str, Enum): """Antibody numbering scheme.""" IMGT = "imgt" CHOTHIA = "chothia" KABAT = "kabat" AHO = "aho"