"""Classes for scoring Recipes"""
import mrich
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d
DATA_COLUMNS = [
"score",
"price",
"compound_ids",
"pose_ids",
"interaction_ids",
"pose_metadata",
]
[docs]
class Scorer:
"""Create a scorer object to score sets of recipes
:param db: :class:`.Database`
:param directory: path to directory containing recipe JSONs
:param pattern: glob pattern for :class:`.Recipe` JSON, default: "*.json"
:param attributes: attributes of :class:`.Recipe` objects to use for scoring
:param populate: Pre-populate query caches and child objects in memory (don't disable unless you have a good reason)
:param load_cache: Load cache from existing JSON
:param allowed_pose_ids: Restrict interaction and subsite calculations to these :class:`.Pose` IDs
"""
def __init__(
self,
db: "Database",
directory: "Path | str",
pattern: str = "*.json",
attributes: list[str] = None,
populate: bool = True,
load_cache: bool = True,
allowed_poses: "PoseSet | list[int] | None" = None,
out_key: str = "scorer",
) -> None:
"""Scorer initialisation"""
from .pset import PoseSet
from .recipe import RecipeSet
self._db = db
self._out_key = out_key
if allowed_poses is None:
self._allowed_pose_ids = None
elif isinstance(allowed_poses, PoseSet):
self._allowed_pose_ids = set(allowed_poses.ids)
else:
self._allowed_pose_ids = set(allowed_poses)
attributes = attributes or []
recipes = RecipeSet(db, directory, pattern=pattern)
self._recipes = recipes
self._attributes = {}
for key in attributes:
attribute = Attribute(self, key)
self._attributes[key] = attribute
self._data = pd.DataFrame(
index=recipes.keys(),
columns=DATA_COLUMNS + self.attribute_keys,
)
self._data.replace({np.nan: None}, inplace=True),
if populate:
if load_cache and self.json_path.exists():
self._load_json()
else:
self._populate_query_cache()
self._populate_recipe_child_sets()
self.weights = 1.0
### FACTORIES
[docs]
@classmethod
def default(
cls,
db: "Database",
directory: "Path | str",
pattern: str = "*.json",
skip: list[str] | None = None,
load_cache: bool = True,
subsites: bool = True,
allowed_poses: "PoseSet | list[int] | None" = None,
out_key: str = "scorer",
) -> "Scorer":
"""Create a Scorer instance with Default attributes"""
from .recipe import RecipeSet
self = cls.__new__(cls)
attributes = [
k for k, v in DEFAULT_ATTRIBUTES.items() if v["type"] == "standard"
]
self.__init__(
db=db,
directory=directory,
pattern=pattern,
attributes=attributes,
populate=False,
allowed_poses=allowed_poses,
out_key=out_key,
)
skip = skip or []
if not db.count("interaction"):
mrich.warning("No interactions in DB, skipping related metrics")
skip.append("interaction_count")
skip.append("interaction_balance")
if not db.count("pose"):
mrich.warning("No poses in DB, skipping related metrics")
skip.append("num_inspirations")
skip.append("num_inspiration_sets")
skip.append("avg_energy_score")
skip.append("avg_distance_score")
if not db.count("scaffold"):
mrich.warning("No scaffold entries in DB, skipping related metrics")
skip.append("num_scaffolds")
skip.append("num_scaffolds_elaborated")
skip.append("elaboration_balance")
# custom attributes
for key, attribute in [
(k, v) for k, v in DEFAULT_ATTRIBUTES.items() if v["type"] == "custom"
]:
if skip and key in skip:
continue
if not subsites and "subsite" in key:
continue
self.add_custom_attribute(
key, attribute["function"], weight_reset_warning=False
)
if load_cache and self.json_path.exists():
self._load_json()
else:
self._populate_query_cache()
self._populate_recipe_child_sets()
# weights
wsum = sum(abs(d["weight"]) for d in DEFAULT_ATTRIBUTES.values())
for attribute in self.attributes:
d = DEFAULT_ATTRIBUTES[attribute.key]
attribute.weight = d["weight"] / wsum
return self
### PROPERTIES
@property
def num_recipes(self) -> int:
"""Number of recipes being evaluated"""
return len(self._recipes)
@property
def attributes(self) -> "list[Attribute | CustomAttribute]":
"""Return list of :class:`.Attribute` / :class:`.CustomAttribute` objects"""
return list(self._attributes.values())
@property
def attribute_keys(self) -> list[str]:
"""Return list of :class:`.Attribute` / :class:`.CustomAttribute` names/keys"""
return list(self._attributes.keys())
@property
def recipes(self) -> "RecipeSet":
"""Return :class:`.RecipeSet` of recipes being scored"""
return self._recipes
@property
def num_attributes(self) -> int:
"""Count of attributes"""
return len(self.attributes)
@property
def weights(self) -> list[float]:
"""List of attribute weights"""
return [a.weight for a in self.attributes]
@weights.setter
def weights(self, ws) -> None:
"""Setter for weights list"""
self._flag_weight_modification()
if isinstance(ws, float) or isinstance(ws, int):
ws = [ws] * self.num_attributes
ws = [w for w in ws]
wsum = sum([abs(w) for w in ws])
for a, w in zip(self.attributes, ws):
a.weight = w / wsum
@property
def score_dict(self) -> dict[str, float]:
"""Dictionary of scores keyed by :meth:`.Recipe.hash`"""
col = self._data["score"]
null = col.isnull()
if null.sum():
mrich.debug("Calculating scores...")
for key in col[null].index.values:
recipe = self.recipes[key]
score = self.score(recipe)
self._data.at[key, "score"] = score
self._dump_json()
return col.to_dict()
@property
def scores(self) -> list[float]:
"""List of :class:`.Recipe` scores"""
return list(self.score_dict.values())
@property
def best(self) -> "Recipe":
"""Return highest scoring :class:`.Recipe`"""
return self.top(1)
@property
def db(self) -> "Database":
""":class:`.Database`"""
return self._db
@property
def json_path(self) -> "Path":
"""Path where cache will be written"""
from pathlib import Path
return Path(self.db.path.name.replace(".sqlite", f"_{self._out_key}.json"))
@property
def poses(self) -> "PoseSet":
"""Return all associated poses as :class:`.PoseSet`"""
from .pset import PoseSet
ids = set().union(*self._data["pose_ids"])
return PoseSet(self.db, ids)
### METHODS
[docs]
def add_custom_attribute(
self,
key: str,
function: "Callable",
weight_reset_warning: bool = True,
) -> "CustomAttribute":
"""Add a custom scoring attribute
:param key: name/key for the attribute
:param function: function call to get the attribute alue, will be passed :class:`.Recipe` object
:param weight_reset_warning: write a warning to indicate weights have been reset
"""
ca = CustomAttribute(self, key, function)
if key not in self._attributes:
# self._flag_weight_modification()
self._attributes[key] = ca
if weight_reset_warning:
mrich.warning("Attribute weights have been reset")
self.weights = 1.0
self._data[key] = None
else:
mrich.warning("Existing attribute with {key=}")
return self._attributes[key]
[docs]
def add_recipes(self, json_paths: "list", debug: bool = False) -> None:
"""Add more serialised :class:`.Recipe` objects to be scored
:param json_paths: list of JSON paths
:param debug: increase verbosity for debugging
"""
from pathlib import Path
from .recipe import Recipe
for json_path in json_paths:
path = Path(json_path)
key = path.name.removeprefix("Recipe_").removesuffix(".json")
if key in self.recipes:
mrich.warning(f"Skipping duplicate {path}")
continue
recipe = Recipe.from_json(self._db, path, allow_db_mismatch=True)
recipe._hash = key
if debug:
mrich.debug(recipe)
if debug:
mrich.debug("Updating Scorer.recipes._json_paths")
self.recipes._json_paths[key] = path.resolve()
if debug:
mrich.debug("Updating Scorer.recipes._recipes")
self.recipes._recipes[key] = recipe
self._data.loc[key] = None
self._data.replace({np.nan: None}, inplace=True),
self._populate_query_cache()
self._populate_recipe_child_sets()
self._flag_weight_modification()
[docs]
def score(
self,
recipe: "Recipe",
*,
debug: bool = False,
) -> float:
"""Score a :class:`.Recipe` object
:param recipe: :class:`.Recipe` to be scored
:param debug: increase verbosity for debugging
:returns: float score from 0 to 1
"""
score = 0.0
for attribute in self.attributes:
recipe_score = attribute(recipe)
score += recipe_score
if debug:
print_data = []
for attribute in self.attributes:
print_data.append(
dict(
key=attribute.key,
weight=f"{attribute.weight:.2f}",
value=f"{attribute.get_value(recipe):.2f}",
unweighted=f"{attribute.unweighted(recipe):.2%}",
weighted=f"{attribute(recipe):.2%}",
)
)
df = pd.DataFrame(print_data).set_index("key")
mrich.print(df)
mrich.var("score", score)
recipe._score = score
return score
[docs]
def compare(self, recipes: "list[Recipe] | list[str]") -> None:
"""Compare attribute values and scores for recipes
:param recipes: list of :class:`.Recipe` objects or hashes
"""
recipes = [
self.recipes[recipe] if isinstance(recipe, str) else recipe
for recipe in recipes
]
print_data = []
for attribute in self.attributes:
d = {"attribute (weight)": f"{attribute.key} ({attribute.weight:.2%})"}
for recipe in recipes:
# d = dict(hash=recipe.hash)
d[recipe.hash] = (
f"{attribute.get_value(recipe):.2f} ({attribute.unweighted(recipe):.2%})"
)
print_data.append(d)
df = pd.DataFrame(print_data).set_index("attribute (weight)")
mrich.print(df)
[docs]
def get_sorted_df(self) -> "pd.DataFrame":
"""Get DataFrame sorted by descending score"""
# compute scores
self.scores
return self._data.sort_values(by="score", ascending=False)
[docs]
def plot(
self,
keys: list[str],
budget: float | None = None,
) -> "plotly.graph_objects.Figure":
"""Plot any two attributes as a scatter plot
:param keys: list two attribute keys to plot
:param budget: limit :class:`.Recipe` objects to below this budget value
:returns: plotly Figure object containing a scatter trace
"""
import plotly.express as px
if len(keys) != 2:
mrich.error("Only two keys supported")
return None
# calculate scores
self.scores
df = self._data.drop(
columns=[
"compound_ids",
"pose_ids",
"interaction_ids",
]
)
df["score"] = pd.to_numeric(df["score"])
if isinstance(keys, str):
assert keys in df.columns
return px.histogram(df, x=keys)
if not all(key in df.columns for key in keys):
for key in keys:
if key not in df.columns:
raise KeyError(f'no attribute/column named "{key}"')
if budget:
df = df[df["price"] < budget]
df["hash"] = df.index.values
hover_data = [
"hash",
]
hover_data += [c for c in df.columns]
return px.scatter(
df, x=keys[0], y=keys[1], color="score", hover_data=hover_data
)
[docs]
def top_keys(self, n: int, budget: float | None = None) -> list[str]:
"""Return keys of top `n` scoring :class:`.Recipe`
:param n: number of keys to return
:param budget: limit :class:`.Recipe` objects to below this budget value
:returns: list of :class:`.Recipe` hashes
"""
keys = self.get_sorted_df(budget=budget).index[:n]
return list(keys)
[docs]
def top(self, n: int, budget: float | None = None) -> "list[Recipe]":
"""Return top `n` scoring :class:`.Recipe`
:param n: number of :class:`.Recipe` objects to return
:param budget: limit :class:`.Recipe` objects to below this budget value
:returns: list of :class:`.Recipe` objects
"""
keys = self.top_keys(n=n, budget=budget)
if n == 1:
return [self.recipes[key] for key in keys][0]
else:
return [self.recipes[key] for key in keys]
### INTERNALS
def _flag_weight_modification(self):
"""Reset scores due to weight modification"""
self._data["score"] = None
[docs]
def summary(self) -> None:
"""Print some summary statistics of the scorer's attributes"""
mrich.header(self)
for attribute in self.attributes:
mrich.print(
attribute,
f"min={attribute.min:.3g}, mean={attribute.mean:.3g}, std={attribute.std:.3g}, max={attribute.max:.3g}",
)
def __check_integrity(self) -> bool:
"""Check integrity of data"""
n_recipes = len(self.recipes)
for attribute in self.attributes:
assert len(attribute._value_dict) == n_recipes
assert len(self._scores) == n_recipes
assert len(self._data) == n_recipes
assert len(self._data.columns) == len(attributes) + len(DATA_COLUMNS)
return True
def _populate_query_cache(self) -> None:
"""Update internal data with pre-fetched related database IDs"""
from .cset import CompoundSet
from .pset import PoseSet
df = self._data
### Recipe prices
for recipe in self.recipes:
self._data.at[recipe.hash, "price"] = recipe.price.amount
### Compound IDs
col = "compound_ids"
null = df[col].isnull()
# populate missing product compound ids
if null.sum():
mrich.debug(f'Populating _data["{col}"]...')
assert len(df[null]) == null.sum()
for key in df[null].index.values:
recipe = self.recipes[key]
df.at[key, col] = recipe.combined_compound_ids
### Pose IDs
col = "pose_ids"
null = df[col].isnull()
# populate missing product pose ids
if null.sum():
compound_ids = set()
for ids in df[null]["compound_ids"]:
for id in ids:
compound_ids.add(id)
cset = CompoundSet(self.db, compound_ids, sort=False)
mrich.debug(f"Getting poses for {len(cset)} compounds")
pose_map = self.db.get_compound_id_pose_ids_dict(cset)
mrich.debug(f'Populating _data["{col}"]...')
for key in df[null].index.values:
assert len(df[null]) == null.sum()
recipe = self.recipes[key]
comp_ids = df["compound_ids"][key]
all_pose_ids = set()
for comp_id in comp_ids:
pose_ids = pose_map.get(comp_id, set())
if self._allowed_pose_ids:
pose_ids = set(
i for i in pose_ids if i in self._allowed_pose_ids
)
all_pose_ids |= pose_ids
df.at[key, col] = all_pose_ids
### Interaction IDs
col = "interaction_ids"
null = df[col].isnull()
# populate missing product interaction ids
if null.sum():
pose_ids = set()
for ids in df[null]["pose_ids"]:
for id in ids:
pose_ids.add(id)
pset = PoseSet(self.db, pose_ids, sort=False)
mrich.debug(f"Getting interactions for {len(pset)} poses")
interaction_map = self.db.get_pose_id_interaction_ids_dict(pset)
mrich.debug(f'Populating _data["{col}"]...')
for key in df[null].index.values:
assert len(df[null]) == null.sum()
recipe = self.recipes[key]
pose_ids = df["pose_ids"][key]
all_interaction_ids = set()
for pose_id in pose_ids:
interaction_ids = interaction_map.get(pose_id, set())
all_interaction_ids |= interaction_ids
df.at[key, col] = all_interaction_ids
### Metadata Dictionaries
col = "pose_metadata"
null = df[col].isnull()
# populate missing product interaction ids
if null.sum():
pose_ids = set()
for ids in df[null]["pose_ids"]:
for id in ids:
pose_ids.add(id)
# pset = PoseSet(self.db, pose_ids, sort=False)
mrich.debug(f"Getting metadata for {len(pose_ids)} poses")
metadata_lookup = self.db.get_id_metadata_dict(table="pose", ids=pose_ids)
mrich.debug(f'Populating _data["{col}"]...')
for key in df[null].index.values:
assert len(df[null]) == null.sum()
recipe = self.recipes[key]
pose_ids = df["pose_ids"][key]
row = df.loc[key]
metadata = {}
for pose_id in pose_ids:
metadata[pose_id] = metadata_lookup[pose_id]
df.at[key, col] = metadata
def _populate_recipe_child_sets(self) -> None:
"""Populate internal cache of recipe child compound/pose/interaction sets"""
from .cset import CompoundSet
from .pset import PoseSet
from .iset import InteractionSet
mrich.debug("Populating recipe caches")
for key, recipe in self.recipes.items():
row = self._data.loc[key]
if recipe._combined_compounds is None:
ids = row["compound_ids"]
cache = CompoundSet(self.db, ids)
cache._name = f"Recipe_{key} products"
recipe._combined_compounds = cache
if recipe._poses is None:
ids = row["pose_ids"]
cache = PoseSet(self.db, ids)
cache._name = f"Recipe_{key} poses"
recipe._poses = cache
if recipe._interactions is None:
ids = row["interaction_ids"]
cache = InteractionSet(self.db, ids)
cache._name = f"Recipe_{key} product interactions"
recipe._interactions = cache
if recipe._poses._metadata_dict is None:
cache = row["pose_metadata"]
recipe._poses._metadata_dict = cache
def _dump_json(self) -> None:
"""Write JSON cache to file"""
path = self.json_path
mrich.writing(path)
self._data.to_json(path)
def _load_json(self):
"""Load JSON cache from file"""
path = self.json_path
mrich.reading(path)
cached = pd.read_json(path, orient="columns")
if (cached_columns := set(cached.columns)) != (
self_columns := set(self._data.columns)
):
for col in cached_columns - self_columns:
mrich.error(f"JSON has unexpected {col}")
for col in self_columns - cached_columns:
mrich.error(f"JSON is missing {col}")
display(cached.head())
display(self._data.head())
raise ValueError("JSON columns don't match expectation")
cached_keys = set(cached.index.values)
self_keys = set(self._data.index.values)
if difference := cached_keys - self_keys:
mrich.warning("JSON has extra Recipes:")
mrich.warning(difference)
if difference := self_keys - cached_keys:
mrich.error("JSON is missing Recipes:")
mrich.error(difference)
raise ValueError("JSON is missing Recipes")
cached.replace({np.nan: None}, inplace=True),
self._data = cached
### DUNDERS
[docs]
def __str__(self) -> str:
"""Unformatted string representation"""
return f"Scorer(#recipes={self.num_recipes})"
[docs]
def __repr__(self) -> str:
"""ANSI Formatted string representation"""
import mcol
return f"{mcol.bold}{mcol.underline}{self}{mcol.unbold}{mcol.ununderline}"
def __rich__(self) -> str:
"""Rich Formatted string representation"""
return f"[bold underline]{self}"
[docs]
class Attribute:
"""Scoring Attribute to be used with a :class:`.Scorer` object
:param scorer: associated :class:`.Scorer`
:param key: key/name for the attribute
:param inverse: if true, lower values score higher
:param weight: adjust scores by this weight
:param bins: number of scoring bins
"""
_type = "Attribute"
### DUNDERS
def __init__(
self,
scorer: "Scorer",
key: str,
*,
inverse: bool = False,
weight: float = 1.0,
bins: int = 100,
) -> None:
"""Attribute initialisation"""
self._scorer = scorer
self._key = key
self._inverse = inverse
self._weight = weight
self._value_dict = {}
self._bins = bins
self._percentile_interpolator = None
### PROPERTIES
@property
def scorer(self) -> "Scorer":
"""Get associated :class:`.Scorer`"""
return self._scorer
@property
def key(self) -> str:
"""Get name/key"""
return self._key
@property
def inverse(self) -> bool:
"""Is this attribute inverted, lower values will score higher if true"""
return self._inverse
@property
def bins(self) -> int:
"""Number of bins"""
return self._bins
@property
def value_dict(self) -> dict[str, float]:
"""Dictionary of attribute values keyed by :class:`.Recipe` hash"""
df = self.scorer._data[self.key]
null = df.isnull()
if null.sum():
with mrich.loading(f"Constructing value dictionary for {self}"):
for key in df[null].index.values:
recipe = self.scorer.recipes[key]
self.get_value(recipe, force=True)
self.scorer._dump_json()
return df.to_dict()
@property
def values(self) -> list[float]:
"""Return list of values"""
return list(self.value_dict.values())
@property
def mean(self) -> float:
"""Return mean of value"""
return np.mean(self.values)
@property
def std(self) -> float:
"""Return standard deviation of values"""
return np.std(self.values)
@property
def max(self) -> float:
"""Return maximum of values"""
return max(self.values)
@property
def min(self) -> float:
"""Return minimum of values"""
return min(self.values)
@property
def weight(self) -> float:
"""Return weight"""
return self._weight
@weight.setter
def weight(self, w):
"""Set attribute weight"""
self.scorer._flag_weight_modification()
self._weight = abs(w)
self._reverse = w < 0
@property
def percentile_interpolator(self):
"""Interpolator function"""
if self._percentile_interpolator is None:
count, bins_count = np.histogram(self.values, bins=self.bins)
pdf = count / sum(count)
cdf = np.cumsum(pdf)
self._percentile_interpolator = interp1d(
bins_count[1:], cdf, kind="linear", fill_value="extrapolate"
)
return self._percentile_interpolator
### METHODS
[docs]
def get_value(
self,
recipe: "Recipe",
serialise_price: bool = True,
force: bool = False,
) -> float:
"""Get value for a :class:`.Recipe`
:param serialise_price: serialise :class:`.Price` objects to their amount
:param force: force calculation? (don't use cache)
"""
if not force:
cached = self.scorer._data[self.key][recipe.hash]
if force or cached is None:
value = getattr(recipe, self.key)
if serialise_price and self.key == "price":
value = value.amount
self.scorer._data.at[recipe.hash, self.key] = value
else:
return cached
return value
[docs]
def histogram(self) -> "plotly.graph_objects.Figure":
"""Plot histogram of attribute values"""
import plotly.graph_objects as go
fig = go.Figure(go.Histogram(x=self.values))
fig.update_layout(xaxis_title=self.key, yaxis_title="count")
return fig
[docs]
def unweighted(
self,
recipe: "Recipe",
) -> float:
"""Return unweighted percentile score for a given :class:`.Recipe`"""
value = self.get_value(recipe)
score = float(self.percentile_interpolator(value))
if self.inverse:
score = 1 - score
return score
### DUNDERS
[docs]
def __call__(
self,
recipe: "Recipe",
) -> float:
"""return the weighted score of a given :class:`.Recipe`"""
if not self.weight:
return 0.0
value = self.unweighted(recipe)
return self.weight * value
[docs]
def __str__(self) -> str:
"""Unformatted string representation"""
if self.weight is None:
return f'{self._type}("{self.key}", inverse={self.inverse})'
else:
return f'{self._type}("{self.key}", weight={self.weight:.2f}, inverse={self.inverse})'
[docs]
def __repr__(self) -> str:
"""ANSI Formatted string representation"""
import mcol
return f"{mcol.bold}{mcol.underline}{self}{mcol.unbold}{mcol.ununderline}"
def __rich__(self) -> str:
"""Rich Formatted string representation"""
return f"[bold underline]{self}"
[docs]
class CustomAttribute(Attribute):
"""Scoring attribute with a custom function"""
_type = "CustomAttribute"
def __init__(self, scorer: "Scorer", key: str, function: "Callable") -> None:
"""CustomAttribute initialisation"""
self._function = function
super(CustomAttribute, self).__init__(scorer=scorer, key=key)
### METHODS
[docs]
def get_value(
self,
recipe: "Recipe",
serialise_price: bool = True,
force: bool = False,
) -> float:
"""Compute custom attribute value for provided :class:`.Recipe`
:param serialise_price: serialise :class:`.Price` objects to their amount
:param force: force calculation? (don't use cache)
"""
if not force:
cached = self.scorer._data[self.key][recipe.hash]
if force or cached is None:
value = self._function(recipe)
if serialise_price and self.key == "price":
value = value.amount
self.scorer._data.at[recipe.hash, self.key] = value
else:
return cached
return value
DEFAULT_ATTRIBUTES = {
"num_scaffolds": dict(
type="custom",
weight=1.0,
function=lambda r: r.combined_compounds.count_by_tag(tag="Syndirella scaffold"),
description="The number of Syndirella scaffold compounds in this selection. Higher is better.",
),
"num_compounds": dict(
type="standard",
weight=1.0,
description="The number of product compounds in this selection. Higher is better.",
),
"num_scaffolds_elaborated": dict(
type="custom",
weight=1.0,
function=lambda r: r.combined_compounds.num_scaffolds_elaborated,
description="The number of Syndirella scaffold compounds that have at least one elaboration in this selection. Higher is better.",
),
"elaboration_balance": dict(
type="custom",
weight=1.0,
function=lambda r: r.combined_compounds.elaboration_balance,
description="A measure for how evenly scaffold compounds have been elaborated using an h-index. Higher is better.",
), ### REALLY UNPERFORMANT?
"num_inspirations": dict(
type="custom",
weight=1.0,
function=lambda r: r.poses.num_inspirations,
description="The number of unique fragment compounds that inspired poses for product compounds in this selection. Higher is better.",
),
"num_inspiration_sets": dict(
type="custom",
weight=1.0,
function=lambda r: r.poses.num_inspiration_sets,
description="The number of unique fragment combinations that inspired poses for product compounds in this selection. Higher is better.",
),
# "risk_diversity": dict(
# type="custom",
# weight=0.0,
# function=lambda r: r.combined_compounds.risk_diversity,
# description="A measure of how evenly spread the risk of elaborations are for each scaffold compound. Risk in this case refers to the number of atoms added. Higher is better",
# ), # REMOVED BECAUSE IT DOES NOT NECESSARILY IMPROVE AS PRODUCTS ARE ADDED
"interaction_count": dict(
type="custom",
weight=1.0,
function=lambda r: r.interactions.num_features,
description="The number of protein features that are being interecated with in this selection. Higher is better.",
),
"interaction_balance": dict(
type="custom",
weight=0.0,
function=lambda r: r.interactions.per_feature_count_hirsch,
description="A measure for how evenly protein features are being interacted with in this selection using an h-index. Higher is better",
),
"num_subsites": dict(
type="custom",
weight=1.0,
function=lambda r: r.poses.num_subsites,
description="Count the number of subsites that poses in this set come into contact with. Higher is better.",
),
"subsite_balance": dict(
type="custom",
weight=0.0,
function=lambda r: r.poses.subsite_balance,
description="Count the number of subsites that poses in this set come into contact with",
),
"avg_distance_score": dict(
type="custom",
weight=-0.0,
function=lambda r: r.poses.avg_distance_score,
description="Average distance score (e.g. RMSD to fragment inspirations) for poses in this set. Lower is better.",
),
"avg_energy_score": dict(
type="custom",
weight=-0.0,
function=lambda r: r.poses.avg_energy_score,
description="Average energy score (e.g. binding ddG) for poses in this set. Lower is better.",
),
# "reaction_risk": dict(type='custom', weight=1.0, function=None),
# "pockets?": dict(type='custom', weight=1.0, function=None),
# "chemical_diversity": dict(type='custom', weight=1.0, function=None),
# "DMS/sequence_variability": dict(type='custom', weight=1.0, function=None),
}