Source code for hippo.pycule

import pycule
from .quote import Quote
import requests

import logging

# logging.getLogger("requests").setLevel(logging.WARNING)
# logging.getLogger("mcule:core").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)

ENAMINE_CATALOGUES = [
    "BB",
    "SCR",
    "MADE",
    "REAL",
]

ENAMINE_V2_CATALOGUES = {
    "building-blocks": "BB",
    "screening-compounds": "SCR",
    "made-building-blocks": "MADE",
    "real": "REAL",
    "bioactive-compounds": "BIO",
}

ENAMINE_V2_BASE_URL = "https://new.enaminestore.com/api/v2"

ENAMINE_V2_LEAD_TIME = {
    "BB": {
        "IN_STOCK": 5,
        "SYNTHESIS": 7,
    },
    "SCR": {
        "IN_STOCK": 5,
        "SYNTHESIS": 20,
    },
    "MADE": {
        "SYNTHESIS": 20,
    },
    "REAL": {
        "SYNTHESIS": 20,
    },
    "BIO": {
        "IN_STOCK": 5,
    },
}

import mrich


[docs] class Quoter: """Class to scrape catalogue data""" def __init__( self, supplier: str, username: str = None, password: str = None, token: str = None, ): """Currently 'Enamine' and 'MCule' are supported. Once initialised""" if supplier == "Enamine": self.supplier = "Enamine" self.batch_size = 2000 self.catalogues = ENAMINE_CATALOGUES self._get_quote = self.get_enamine_quote self._get_batch_quote = self.get_enamine_batch_quote assert username assert password self.wrapper = pycule.core.EnamineWrapper( username=username, password=password ) elif supplier == "MCule": self.supplier = "MCule" self.batch_size = 1000 self._get_quote = self.get_mcule_quote self._get_batch_quote = self.get_mcule_batch_quote assert token self.wrapper = pycule.core.MCuleWrapper(authorisation_token=token) else: mrich.error(f'Unsupported supplier: "{supplier}"') raise NotImplementedError
[docs] def get_quote(self, compound, **kwargs): """Get quotes for a compound :param compound: """ return self._get_quote(compound, **kwargs)
[docs] def get_batch_quote(self, compounds, **kwargs): """Get quotes for a compound set :param compounds: """ return self._get_batch_quote(compounds, **kwargs)
[docs] def __call__(self, compound): """Get quotes for a compound""" return self.get_quote(compound)
### ENAMINE
[docs] def get_enamine_batch_quote( self, compounds, currency="USD", catalogues=None, exact=False, forms=False, analogues=False, equivalents=False, ): """ :param compounds: :param currency: (Default value = "USD") :param catalogues: (Default value = None) :param exact: (Default value = False) :param forms: (Default value = False) :param analogues: (Default value = False) :param equivalents: (Default value = False) """ import time from .cset import CompoundSet n_comps = len(compounds) mrich.var("batch size", n_comps) assert n_comps <= 2000 unmatched = compounds.copy() matched = CompoundSet(compounds.db) errors = [] # perform an exact search on the batch of smiles (for each catalog) payload = dict(compounds=compounds.smiles, currency=currency) include = [] if forms: include.append("FORMS") if analogues: include.append("ANALOGS") if equivalents: include.append("EQUIVALENTS") if include: payload["include"] = include catalogues = catalogues or ENAMINE_V2_CATALOGUES t_start = time.perf_counter() # return payload for catalogue in catalogues: if exact: mrich.debug( f"Exact search in Enamine:{ENAMINE_V2_CATALOGUES[catalogue]}..." ) else: mrich.debug( f"Similarity==1.0 search in Enamine:{ENAMINE_V2_CATALOGUES[catalogue]}..." ) # send the request t_catalogue_start = time.perf_counter() if exact: url = ( ENAMINE_V2_BASE_URL + f"/catalog/search/in/{catalogue}/by/SMILEs/EXACT" ) else: url = ( ENAMINE_V2_BASE_URL + f"/catalog/search/in/{catalogue}/by/SMILEs/SIMILARITY/1.00" ) response = requests.post(url=url, json=payload) mrich.var( f"{catalogue} request time = ", time.perf_counter() - t_catalogue_start ) # process the request if (status := response.status_code) != 200: mrich.error(f"Request error: status={status}") errors.append( dict(catalogue=catalogue, status=status, response=response) ) continue data = response.json() results = data["results"] products = results["products"] # register the quotes in the database this_matched = self.parse_enamine_bulk_products(compounds.db, products) if this_matched: mrich.success(f"Added quotes for {len(this_matched)} compounds") matched += this_matched unmatched -= this_matched return data # break mrich.var("Total time = ", time.perf_counter() - t_start) if unmatched: mrich.error(f"Did not find quotes for {len(unmatched)} compounds") if errors: mrich.error(f"{len(errors)} failed requests") return dict(matched=matched, unmatched=unmatched, errors=errors)
[docs] def parse_enamine_bulk_products(self, db, products): """ :param db: :param products: """ from .tools import flat_inchikey from .cset import CompoundSet matched = CompoundSet(db) for product in products: prices = product["prices"] product = product["product"] # quote-level data from product entry = product["code"] smiles = product["smile"] currency = prices["currency"] catalogue = ENAMINE_V2_CATALOGUES[product["catalog"]] # check if the product has a name if not entry: mrich.warning(f"Product in {catalogue} but no entry-name. {smiles=}") if p := product["purity"]: purity = product["purity"] / 100 else: purity = None assert catalogue, product assert currency, product # identify the compound compound_id = db.get_compound_id(inchikey=flat_inchikey(smiles)) matched.add(compound_id) # loop through prices for pack in prices["all"]: assert pack["weight"]["measure"] == "mg" amount = pack["weight"]["amount"] price = pack["price"] availability_str = pack["weight"]["available"] try: lead_time = ENAMINE_V2_LEAD_TIME[catalogue][availability_str] except KeyError: mrich.error( f"Unsupported {availability_str=} [{compound_id=}, {entry=}]" ) print(pack) continue # register the quote quote_id = db.insert_quote( compound=compound_id, supplier="Enamine", catalogue=catalogue, entry=entry, amount=amount, price=price, currency=currency, purity=purity, lead_time=lead_time, smiles=smiles, commit=False, ) # break mrich.debug("Committing changes to the database...") db.commit() return matched
[docs] def get_enamine_quote(self, compound, currency="USD"): """ :param compound: :param currency: (Default value = "USD") """ assert compound._table == "compound" try: smiles = compound.smiles mrich.header(f"Exact search: {smiles=}") for catalogue in self.catalogues: mrich.header(f"Searching in {self.supplier} {catalogue}...") try: result = self.wrapper.exactsearch( smiles=compound.smiles, currency=currency, catalogue=catalogue ) except requests.ConnectionError as e: mrich.error(f"ConnectionError: {e}") return None # try again but with a similarity search if result["response"]["result"]["code"] != 0: try: result = self.wrapper.similaritysearch( smiles, similarity_value=1.0, catalogue=catalogue ) except requests.ConnectionError as e: mrich.error(f"ConnectionError: {e}") return None if result["response"]["result"]["code"] != 0: continue data = result["response"]["data"] if len(data) < 1: print(result["response"]) raise NotImplementedError data = data[0] name = data["Id"] mrich.success(f"Found in {catalogue} w/ id={name}") break else: mrich.error("No match found") return None ### get the information mrich.header(f"Searching by ID in {self.supplier} {catalogue}...") for catalogue in self.catalogues: result = self.wrapper.compoundidsearch(name, catalogue=catalogue) if result["response"]["result"]["code"] == 0: try: mrich.success(f"Found in {catalogue}") return ( self.parse_enamine_response(result, compound, catalogue), result, ) except NoDataInReponse: pass else: mrich.error("No catalog entry") return None except KeyboardInterrupt: mrich.warning("Interrupted quoting")
[docs] def pick_enamine_exact_data(self, smiles, data): """ :param smiles: :param data: """ if len(data) == 1: return data[0]["Id"] if len(data) == 0: raise NoDataInResponse(smiles) # sort the results by increasing lead_time, and increasing stereo-complexity data = sorted( data, key=lambda x: ( self.parse_enamine_delivery_string(x["deliveryDays"]), stereo_complexity(x["smile"]), ), ) mout.warning( f'Picking quickest and least stereo complex result from: {[x["Id"] for x in data]}' ) return data[0]["Id"]
[docs] def parse_enamine_response(self, result, compound, catalogue): """ :param result: :param compound: :param catalogue: """ quotes = [] for i, data in enumerate(result["response"]["data"]): if i == 0: # update compound metadata metadata = dict() metadata["enamine_id"] = data["Id"] metadata["alias"] = data["name"] metadata["cas"] = data["cas"] metadata["mfcd"] = data["mfcd"] metadata["formula"] = data["formula"] metadata["molecular_weight"] = data["mw"] compound.metadata.update(metadata) """ do something with: * availability * storageCond * productUrl * lastUpdate """ lead_time = self.parse_enamine_delivery_string( delivery := data["deliveryDays"] ) entry = data["Id"] smiles = data["smile"] if purity := data["purity"]: purity /= 100 # if not (packs := data['packs']): if not (packs := data["packs"]) and "synthesis" in delivery: mrich.warning("Hardcoded REAL quote") compound.db.insert_quote( compound=compound, supplier="Enamine", catalogue=catalogue, entry=entry, amount=20, price=330, currency="USD", purity=None, lead_time=15, smiles=smiles, ) else: for pack in packs: self.parse_enamine_pack( compound, entry, purity, catalogue, pack, lead_time, smiles )
[docs] def pick_enamine_data(self, comp_id, data): """ :param comp_id: :param data: """ data = [d for d in data if d["Id"] == comp_id] if len(data) == 0: raise NoDataInReponse elif len(data) == 1: return data[0] data = [d for d in data if not any([p["price"] == 0.0 for p in d["packs"]])] if len(data) > 1: mrich.warning( f"Taking first data entry after stripping non-priced ({comp_id})" ) return data[0]
[docs] def parse_enamine_delivery_string(self, string): """ :param string: """ if string.startswith("regular delivery, ") and string.endswith("days"): return int(string.removeprefix("regular delivery, ").removesuffix(" days")) if string.startswith("backorder, ") and string.endswith("days"): return int(string.removeprefix("backorder, ").removesuffix(" days")) if string == "3 weeks synthesis time": return 15 raise Exception(f"Unexpected delivery string: {string}")
[docs] def parse_enamine_pack( self, compound, entry, purity, catalogue, pack, lead_time, smiles ): """ :param compound: :param entry: :param purity: :param catalogue: :param pack: :param lead_time: :param smiles: """ supplier = "Enamine" match pack["measure"]: case "g": amount = pack["amount"] * 1000 case "mg": amount = pack["amount"] case _: raise NotImplementedError( f'Not supported pack measure: {pack["measure"]}' ) price = pack["price"] currency = pack["currencyName"] if pack["status"] not in ["Normal", "Ice pack", ""]: print(pack) mrich.warning(f"{pack['status']=}") if not price: mrich.warning(f"Skipping price-less Enamine pack ({entry})") return None compound.db.insert_quote( compound=compound, supplier=supplier, catalogue=catalogue, entry=entry, amount=amount, price=price, currency=currency, purity=purity, lead_time=lead_time, smiles=smiles, )
### MCULE
[docs] def get_mcule_quote(self, compound, exact=False): """ :param compound: :param exact: (Default value = False) """ try: # single exact query smiles = compound.smiles if exact: mrich.header(f"Exact search: {smiles=}") result = self.wrapper.singlequerysearch(smiles) else: mrich.header(f"Similarity==1.0 search: {smiles=}") result = self.wrapper.similaritysearch(smiles, threshold=1.0) if result["response"]["results"]: results = result["response"]["results"] mrich.header(results) mcule_ids = [data["mcule_id"] for data in results] smiles = [data["smiles"] for data in results] mrich.success(f"Found w/ ids={mcule_ids}") for mcule_id, smile in zip(mcule_ids, smiles): # mrich.header(f'{self.wrapper.compoundavailability(mcule_id)=}') # mrich.header(f'{self.wrapper.compounddetails(mcule_id)=}') result = self.wrapper.compoundprices(mcule_id) for pack in result["response"]["best_prices"]: self.parse_mcule_pack( compound.db, compound, mcule_id, pack, smile ) # mrich.header(f'{self.wrapper.compoundpricesamount(mcule_id)=}') return result except KeyboardInterrupt: mrich.warning("Interrupted quoting")
[docs] def get_mcule_batch_quote(self, compounds): """ :param compounds: """ import time from .cset import CompoundSet from .tools import flat_inchikey db = compounds.db n_comps = len(compounds) matched = CompoundSet(db) mrich.var("batch size", n_comps) assert n_comps <= 1000 # bulk query compound availability t_start = time.perf_counter() mrich.title("MCule bulk availability query...") data = self.wrapper.multiplequerieswithavailability(compounds.smiles) mrich.var("availability query time", time.perf_counter() - t_start) results = data["response"]["results"] mrich.var("#results", len(results)) mrich.title("MCule single price queries...") for result in mrich.track(results, prefix="Querying MCule"): entry = result["mcule_id"] smiles = result["smiles"] # match to compound compound_id = db.get_compound_id(inchikey=flat_inchikey(smiles)) if not compound_id: mrich.error(f"Could not find compound matching {smiles=}") continue matched.add(compound_id) prices = self.wrapper.compoundprices(entry) try: best_prices = prices["response"]["best_prices"] except KeyError as e: print(prices) mrich.error(f"Unsupported prices {e}") continue for pack in prices["response"]["best_prices"]: try: self.parse_mcule_pack( db, compound_id, entry, pack, smiles, commit=False ) except KeyError as e: print(pack) mrich.error(f"Unsupported pack {e}") continue mrich.var("Total time = ", time.perf_counter() - t_start) unmatched = compounds - matched if unmatched: mrich.error(f"Did not find quotes for {len(unmatched)} compounds") mrich.success("Committing changes to the database...") db.commit() return dict(matched=matched, unmatched=unmatched)
[docs] def parse_mcule_pack(self, db, compound, entry, pack, smiles, commit=True): """ :param db: :param compound: :param entry: :param pack: :param smiles: :param commit: (Default value = True) """ supplier = "MCule" match pack["unit"]: case "g": amount = pack["amount"] * 1000 case "mg": amount = pack["amount"] case _: raise NotImplementedError(f'Not supported pack measure: {pack["unit"]}') price = pack["price"] currency = pack["currency"] if "purity" in pack: purity = pack["purity"] / 100 else: purity = None lead_time = pack["delivery_time_working_days"] db.insert_quote( compound=compound, supplier=supplier, catalogue=None, entry=entry, amount=amount, price=price, currency=currency, purity=purity, lead_time=lead_time, smiles=smiles, commit=commit, )
class NoDataInReponse(Exception): """ """ ... class NotInCatalogues(Exception): """ """ ...