Source code for hippo.tags
"""Classes for managing compound/pose tags"""
import mcol
import mrich
from collections.abc import MutableSet
[docs]
class TagTable:
"""Object representing the 'tag' table in the :class:`.Database`.
.. attention::
:class:`.TagTable` objects should not be created directly. Instead use the :meth:`.HIPPO.tags` property.
"""
_table = "tag"
def __init__(
self,
db: "Database",
) -> None:
"""TagTable initialisation"""
self._db = db
### PROPERTIES
@property
def db(self) -> "Database":
"""Returns a pointer to the parent database"""
return self._db
@property
def table(self) -> str:
"""Returns the name of the :class:`.Database` table"""
return self._table
@property
def unique(self) -> set[str]:
"""Returns a set of unique tag names contained in the table"""
values = self.db.select(
table=self.table, query="DISTINCT tag_name", multiple=True
)
return list(sorted(set(v for v, in values)))
### METHODS
[docs]
def summary(self, return_df: bool = False) -> "pd.DataFrame":
"""Print a summary table of tags with compound and pose counts"""
from pandas import DataFrame
sql = """
SELECT tag_name,
COUNT(DISTINCT tag_compound),
COUNT(DISTINCT tag_pose)
FROM tag
GROUP BY tag_name
ORDER BY tag_name;
"""
cursor = self.db.execute(sql)
data = [
dict(tag=a, num_compounds=b, num_poses=c) for a, b, c in cursor.fetchall()
]
df = DataFrame(data)
df = df.set_index("tag")
# compounds with poses
sql = """
SELECT tag_name, COUNT(DISTINCT pose_compound) FROM tag
INNER JOIN pose
ON tag_pose = pose_id
GROUP BY tag_name
ORDER BY tag_name;
"""
cursor = self.db.execute(sql)
for tag, count in cursor.fetchall():
df.loc[tag, "num_posed_compounds"] = count
df = df.fillna(0)
df = df.astype(int)
if return_df:
return df
else:
mrich.print(df)
[docs]
def rename(self, old: str, new: str) -> None:
"""Rename all instances of a tag across the database"""
sql = """
UPDATE OR IGNORE tag
SET tag_name = ?2
WHERE tag_name = ?1;
"""
self.db.execute(sql, (str(old), str(new)))
self.delete(old)
self.db.commit()
[docs]
def delete(self, tag: str) -> None:
"""Delete all assignments for the given tag"""
self.db.delete_where(table="tag", key="name", value=tag)
### DUNDERS
[docs]
def __str__(self) -> str:
"""Unformatted representation of this object"""
return f"Tags {self.unique}"
[docs]
def __repr__(self) -> str:
"""ANSI Formatted string representation"""
return f"{mcol.bold}{mcol.underline}{self}{mcol.unbold}{mcol.ununderline}"
def __rich__(self) -> str:
"""Rich Formatted string representation"""
return f"[bold underline]{self}"
[docs]
class TagSet(MutableSet):
"""Object representing a subset of the 'tag' table in the :class:`.Database` belonging to a certain :class:`.Compound` or :class:`.Pose`.
.. attention::
:class:`.TagSet` objects should not be created directly. Instead use the :meth:`.Compound.tags` or :meth:`.Pose.tags` property.
"""
def __init__(
self,
parent: "Compound | Pose",
tags: list | tuple | None = None,
immutable: bool = False,
commit: bool = True,
):
"""TagSet initialisation"""
self._elements = []
self._immutable = immutable
self._parent = parent
tags = tags or []
for tag in tags:
if tag not in self._elements:
self._elements.append(tag)
### PROPERTIES
@property
def tags(self) -> list:
"""Returns the elements in this set"""
return self._elements
@property
def immutable(self) -> bool:
"""Is this set is immutable?"""
return self._immutable
@immutable.setter
def immutable(
self,
b: bool,
) -> None:
self._immutable = b
@property
def parent(self):
"""Returns this set of tags parent :class:`.Compound` or :class:`.Pose`."""
return self._parent
@property
def db(self) -> "Database":
"""Returns a pointer to the parent database"""
return self.parent.db
### DATABASE
def _remove_tag_from_db(
self,
tag: str,
) -> None:
"""Delete a specific tag assignment for the parent :class:`.Compound`/:class:`.Pose`
:param tag: tag to delete
"""
sql = f'DELETE FROM tag WHERE tag_name="{tag}" AND tag_{self.parent.table} = {self.parent.id}'
self.db.execute(sql)
def _clear_tags_from_db(
self,
tag: str,
) -> None:
"""Delete all tag assignments for the parent :class:`.Compound`/:class:`.Pose`
:param tag: tag to delete
"""
sql = f"DELETE FROM tag WHERE tag_{self.parent.table} = {self.parent.id}"
self.db.execute(sql)
def _add_tag_to_db(
self,
tag: str,
commit: bool = True,
) -> None:
"""Assign a given tag to the parent
:param tag: tag to add
:param commit: commit the changes? (Default value = True)
"""
payload = {"name": tag, self.parent.table: self.parent.id}
self.db.insert_tag(**payload, commit=commit)
### METHODS
[docs]
def pop(self) -> str:
"""Pop the last element"""
assert not self.immutable
return self._elements.pop()
[docs]
def discard(
self,
tag: str,
) -> None:
"""Discard an element
:param tag: tag to discard
"""
self.discard(tag)
[docs]
def remove(self, tag: str) -> None:
"""Remove an element
:param tag: tag to remove
:raises ValueError: if tag is not in set
"""
assert not self.immutable
if tag in self:
i = self._elements.index(tag)
del self._elements[i]
self._remove_tag_from_db(tag)
else:
raise ValueError(f"{tag} not in {self}")
[docs]
def add(
self,
tag: str,
commit: bool = True,
) -> None:
"""Add a tag to the set
:param tag: tag to add
:param commit: commit the change? (Default value = True)
"""
assert not self.immutable
if tag not in self._elements:
self._elements.append(tag)
self._add_tag_to_db(tag, commit=commit)
[docs]
def glob(self, pattern: str) -> list[str]:
"""Construct a list from tags in the set names that match a given UNIX-style pattern.
:param pattern: unix style pattern with shell-style wildcards
:returns: list of tags
"""
import fnmatch
return fnmatch.filter(self.tags, pattern)
### DUNDERS
[docs]
def __contains__(self, tag: str) -> bool:
"""Is this tag in the set?"""
return tag in self.tags
[docs]
def __str__(self) -> str:
"""Unformatted representation of this object"""
return str(self._elements)
[docs]
def __repr__(self) -> str:
"""ANSI Formatted string representation"""
return f"{mcol.bold}{mcol.underline}{self}{mcol.unbold}{mcol.ununderline}"
def __rich__(self) -> str:
"""Rich Formatted string representation"""
return f"[bold underline]{self}"
[docs]
def __add__(self, other):
"""
.. attention::
Adding sets together is not supported
"""
raise NotImplementedError
[docs]
def __getitem__(self, key: int):
"""Get a specific element in the set by index"""
return self._elements[key]