"""Pydantic v2 schema for BoltzGen design specification."""
from typing import Literal
from pydantic import BaseModel, Field, field_validator, model_validator
# ============================================================================
# Entity Definitions
# ============================================================================
[docs]
class ProteinEntity(BaseModel):
"""
Protein entity specification.
Attributes
----------
id : str or list[str]
Chain identifier(s) for the protein.
sequence : str
Protein sequence. Can include:
- Amino acid letters (A-Z)
- Design residues (numbers, e.g., "10" for 10 design residues)
- Ranges (e.g., "15..20" for random number between 15-20)
- Mixed patterns (e.g., "3..5C6C3" for variable design + fixed residues)
secondary_structure : str | None
Secondary structure specification. Defaults to None.
binding_types : str | dict | None
Binding type specification. Can be:
- String with characters: 'u' (unspecified), 'B' (binding), 'N' (not binding)
- Dict with 'binding' and/or 'not_binding' keys
cyclic : bool
Whether the protein is cyclic. Defaults to False.
"""
id: str | list[str]
sequence: str
secondary_structure: str | None = None
binding_types: str | dict | None = None
cyclic: bool = False
[docs]
class LigandEntity(BaseModel):
"""
Ligand entity specification.
Attributes
----------
id : str or list[str]
Chain identifier(s) for the ligand.
ccd : str | None
Chemical Component Dictionary identifier.
smiles : str | None
SMILES string representation of the ligand.
binding_types : str | dict | None
Binding type specification.
"""
id: str | list[str]
ccd: str | None = None
smiles: str | None = None
binding_types: str | dict | None = None
[docs]
@model_validator(mode="after")
def check_ccd_or_smiles(self):
"""Ensure either ccd or smiles is provided."""
if self.ccd is None and self.smiles is None:
raise ValueError("Either 'ccd' or 'smiles' must be provided for ligand")
return self
class ChainInclude(BaseModel):
"""
Chain inclusion specification.
Attributes
----------
id : str
Chain identifier.
res_index : str | None
Residue index range (e.g., "10..16", "..5", "20..").
"""
id: str
res_index: str | None = None
class ChainIncludeProximity(BaseModel):
"""
Proximity-based chain inclusion.
Attributes
----------
id : str
Chain identifier.
res_index : str
Residue index range.
radius : float
Radius in angstroms for proximity inclusion.
"""
id: str
res_index: str
radius: float
class ChainBindingType(BaseModel):
"""
Binding type specification for a chain.
Attributes
----------
id : str
Chain identifier.
binding : str | None
Residue indices that are binding (e.g., "5..7,13").
not_binding : str | None
Residue indices that are not binding (e.g., "9..11" or "all").
"""
id: str
binding: str | None = None
not_binding: str | None = None
class StructureGroup(BaseModel):
"""
Structure group for visibility control.
Attributes
----------
visibility : int
Visibility level (0, 1, 2, etc.).
id : str
Chain identifier or "all".
res_index : str | None
Residue index range.
"""
visibility: int
id: str
res_index: str | None = None
class ChainDesign(BaseModel):
"""
Design specification for a chain.
Attributes
----------
id : str
Chain identifier.
res_index : str
Residue indices to design (e.g., "..4,20..27").
"""
id: str
res_index: str
class ChainSecondaryStructure(BaseModel):
"""
Secondary structure specification for a chain.
Attributes
----------
id : str
Chain identifier.
loop : str | None
Residue indices for loop regions.
helix : str | None
Residue indices for helix regions.
sheet : str | None
Residue indices for sheet regions.
"""
id: str
loop: str | None = None
helix: str | None = None
sheet: str | None = None
class DesignInsertion(BaseModel):
"""
Design insertion specification.
Attributes
----------
id : str
Chain identifier.
res_index : int
Residue index where insertion occurs (1-based).
num_residues : str | int
Number of residues to insert. Can be a range (e.g., "2..9") or fixed number.
secondary_structure : Literal["UNSPECIFIED", "LOOP", "HELIX", "SHEET"]
Secondary structure type for inserted residues.
"""
id: str
res_index: int
num_residues: str | int
secondary_structure: Literal["UNSPECIFIED", "LOOP", "HELIX", "SHEET"] = (
"UNSPECIFIED"
)
[docs]
class FileEntity(BaseModel):
"""
File-based entity specification (e.g., PDB/CIF files).
Note
----
When using the `generate()` method, the `path` field is overwritten by the
`structure_file` argument. The OpenProtein platform backend currently only
accepts structure files via the `structure_file` parameter, not as paths
in the design spec. The `path` field is included here for compatibility with
the BoltzGen YAML format, but will be replaced when submitting to the API.
Attributes
----------
path : str
Path to the structure file. This is a placeholder that will be overwritten
by the `structure_file` argument when calling `generate()`. The actual
structure content must be provided via the `structure_file` parameter.
fuse : str | None
Chain ID to fuse with.
include : str | list[dict]
Chains or regions to include. Can be "all" or list of chain specifications.
exclude : list[dict] | None
Chains or regions to exclude.
include_proximity : list[dict] | None
Proximity-based inclusion specifications.
binding_types : list[dict] | None
Binding type specifications for chains.
structure_groups : list[dict] | None
Structure group specifications.
design : list[dict] | None
Design specifications for chains.
secondary_structure : list[dict] | None
Secondary structure specifications for chains.
design_insertions : list[dict] | None
Design insertion specifications.
"""
path: str
fuse: str | None = None
include: str | list[dict] | None = None
exclude: list[dict] | None = None
include_proximity: list[dict] | None = None
binding_types: list[dict] | None = None
structure_groups: list[dict] | None = None
design: list[dict] | None = None
secondary_structure: list[dict] | None = None
design_insertions: list[dict] | None = None
[docs]
class Entity(BaseModel):
"""
Entity wrapper for different entity types.
Attributes
----------
protein : ProteinEntity | None
Protein entity specification.
ligand : LigandEntity | None
Ligand entity specification.
file : FileEntity | None
File-based entity specification.
"""
protein: ProteinEntity | None = None
ligand: LigandEntity | None = None
file: FileEntity | None = None
[docs]
@model_validator(mode="after")
def check_exactly_one_entity(self):
"""Ensure exactly one entity type is specified."""
entities = [self.protein, self.ligand, self.file]
if sum(x is not None for x in entities) != 1:
raise ValueError(
"Exactly one of 'protein', 'ligand', or 'file' must be specified"
)
return self
# ============================================================================
# Constraint Definitions
# ============================================================================
[docs]
class BondConstraint(BaseModel):
"""
Covalent bond constraint between two atoms.
Attributes
----------
atom1 : list[str | int]
First atom specification: [CHAIN_ID, RES_IDX, ATOM_NAME].
atom2 : list[str | int]
Second atom specification: [CHAIN_ID, RES_IDX, ATOM_NAME].
"""
atom1: list[str | int] = Field(..., min_length=3, max_length=3)
atom2: list[str | int] = Field(..., min_length=3, max_length=3)
[docs]
class TotalLengthConstraint(BaseModel):
"""
Total length constraint for the design.
Attributes
----------
min : int | None
Minimum total length.
max : int | None
Maximum total length.
"""
min: int | None = None
max: int | None = None
[docs]
class Constraint(BaseModel):
"""
Constraint wrapper for different constraint types.
Attributes
----------
bond : BondConstraint | None
Bond constraint specification.
total_len : TotalLengthConstraint | None
Total length constraint specification.
"""
bond: BondConstraint | None = None
total_len: TotalLengthConstraint | None = None
[docs]
@model_validator(mode="after")
def check_at_least_one_constraint(self):
"""Ensure at least one constraint type is specified."""
constraints = [self.bond, self.total_len]
if sum(x is not None for x in constraints) == 0:
raise ValueError("At least one constraint type must be specified")
return self
# ============================================================================
# Top-Level Design Spec
# ============================================================================
[docs]
class BoltzGenDesignSpec(BaseModel):
"""
Complete BoltzGen design specification.
This schema represents the full design specification for BoltzGen,
including entities (proteins, ligands, files) and constraints.
Attributes
----------
entities : list[Entity]
List of entities in the design.
constraints : list[Constraint] | None
List of constraints for the design.
Examples
--------
>>> spec = BoltzGenDesignSpec(
... entities=[
... Entity(protein=ProteinEntity(id="A", sequence="ACDEFGHIKLMNPQRSTVWY")),
... Entity(ligand=LigandEntity(id="B", ccd="ATP"))
... ],
... constraints=[
... Constraint(bond=BondConstraint(atom1=["A", 10, "CA"], atom2=["B", 1, "O"]))
... ]
... )
"""
entities: list[Entity]
constraints: list[Constraint] | None = None
[docs]
@field_validator("entities")
@classmethod
def check_entities_not_empty(cls, v):
"""Ensure at least one entity is provided."""
if not v:
raise ValueError("At least one entity must be specified")
return v