RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
SubstructMatch.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SUBSTRUCTMATCH_H
12#define RD_SUBSTRUCTMATCH_H
13
14// std bits
15#include <vector>
16
17#include <unordered_set>
18#include <functional>
19#include <unordered_map>
20#include <cstdint>
21#include <string>
22
23#include <boost/dynamic_bitset.hpp>
24#if BOOST_VERSION >= 107100
25#define RDK_INTERNAL_BITSET_HAS_HASH
26#endif
27
29
30namespace RDKit {
31class ROMol;
32class Atom;
33class Bond;
34class ResonanceMolSupplier;
35class MolBundle;
36
37//! \brief used to return matches from substructure searching,
38//! The format is (queryAtomIdx, molAtomIdx)
39typedef std::vector<std::pair<int, int>> MatchVectType;
40
42 bool useChirality = false; //!< Use chirality in determining whether or not
43 //!< atoms/bonds match
44 bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
45 //!< determining whether atoms/bonds match
46 bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
47 //!< match each other
48 bool useQueryQueryMatches = false; //!< Consider query-query matches, not
49 //!< just simple matches
50 bool useGenericMatchers = false; //!< Looks for generic atoms in the query
51 //!< and uses them as part of the matching
52 bool recursionPossible = true; //!< Allow recursive queries
53 bool uniquify = true; //!< uniquify (by atom index) match results
54 unsigned int maxMatches = 1000; //!< maximum number of matches to return
55 int numThreads = 1; //!< number of threads to use when multi-threading
56 //!< is possible. 0 selects the number of
57 //!< concurrent threads supported by the hardware
58 //!< negative values are added to the number of
59 //!< concurrent threads supported by the hardware
60 std::function<bool(const ROMol &mol,
61 const std::vector<unsigned int> &match)>
62 extraFinalCheck; //!< a function to be called at the end to validate a
63 //!< match
64
66};
67
69 SubstructMatchParameters &params, const std::string &json);
71 const SubstructMatchParameters &params);
72
73//! Find a substructure match for a query in a molecule
74/*!
75 \param mol The ROMol to be searched
76 \param query The query ROMol
77 \param matchParams Parameters controlling the matching
78
79 \return The matches, if any
80
81*/
83 const ROMol &mol, const ROMol &query,
85
86//! Find all substructure matches for a query in a ResonanceMolSupplier object
87/*!
88 \param resMolSuppl The ResonanceMolSupplier object to be searched
89 \param query The query ROMol
90 \param matchParams Parameters controlling the matching
91
92 \return The matches, if any
93
94*/
98
100 const MolBundle &bundle, const ROMol &query,
103 const ROMol &mol, const MolBundle &query,
106 const MolBundle &bundle, const MolBundle &query,
108
109//! Find a substructure match for a query
110/*!
111 \param mol The object to be searched
112 \param query The query
113 \param matchVect Used to return the match
114 (pre-existing contents will be deleted)
115 \param recursionPossible flags whether or not recursive matches are allowed
116 \param useChirality use atomic CIP codes as part of the comparison
117 \param useQueryQueryMatches if set, the contents of atom and bond queries
118 will be used as part of the matching
119
120 \return whether or not a match was found
121
122*/
123template <typename T1, typename T2>
124bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
125 bool recursionPossible = true, bool useChirality = false,
126 bool useQueryQueryMatches = false) {
128 params.recursionPossible = recursionPossible;
129 params.useChirality = useChirality;
130 params.useQueryQueryMatches = useQueryQueryMatches;
131 params.maxMatches = 1;
132 std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
133 if (matchVects.size()) {
134 matchVect = matchVects.front();
135 } else {
136 matchVect.clear();
137 }
138 return matchVect.size() != 0;
139};
140
141//! Find all substructure matches for a query
142/*!
143 \param mol The object to be searched
144 \param query The query
145 \param matchVect Used to return the matches
146 (pre-existing contents will be deleted)
147 \param uniquify Toggles uniquification (by atom index) of the results
148 \param recursionPossible flags whether or not recursive matches are allowed
149 \param useChirality use atomic CIP codes as part of the comparison
150 \param useQueryQueryMatches if set, the contents of atom and bond queries
151 will be used as part of the matching
152 \param maxMatches The maximum number of matches that will be returned.
153 In high-symmetry cases with medium-sized molecules, it is
154 very
155 easy to end up with a combinatorial explosion in the
156 number of
157 possible matches. This argument prevents that from having
158 unintended consequences
159
160 \return the number of matches found
161
162*/
163template <typename T1, typename T2>
164unsigned int SubstructMatch(T1 &mol, const T2 &query,
165 std::vector<MatchVectType> &matchVect,
166 bool uniquify = true, bool recursionPossible = true,
167 bool useChirality = false,
168 bool useQueryQueryMatches = false,
169 unsigned int maxMatches = 1000,
170 int numThreads = 1) {
172 params.uniquify = uniquify;
173 params.recursionPossible = recursionPossible;
174 params.useChirality = useChirality;
175 params.useQueryQueryMatches = useQueryQueryMatches;
176 params.maxMatches = maxMatches;
177 params.numThreads = numThreads;
178 matchVect = SubstructMatch(mol, query, params);
179 return static_cast<unsigned int>(matchVect.size());
180};
181
182// ----------------------------------------------
183//
184// find one match in ResonanceMolSupplier object
185//
186template <>
188 const ROMol &query, MatchVectType &matchVect,
189 bool recursionPossible, bool useChirality,
190 bool useQueryQueryMatches) {
192 params.recursionPossible = recursionPossible;
193 params.useChirality = useChirality;
194 params.useQueryQueryMatches = useQueryQueryMatches;
195 params.maxMatches = 1;
196 std::vector<MatchVectType> matchVects =
197 SubstructMatch(resMolSupplier, query, params);
198 if (matchVects.size()) {
199 matchVect = matchVects.front();
200 } else {
201 matchVect.clear();
202 }
203 return matchVect.size() != 0;
204}
205
206template <>
208 const ROMol &query,
209 std::vector<MatchVectType> &matchVect,
210 bool uniquify, bool recursionPossible,
211 bool useChirality, bool useQueryQueryMatches,
212 unsigned int maxMatches, int numThreads) {
214 params.uniquify = uniquify;
215 params.recursionPossible = recursionPossible;
216 params.useChirality = useChirality;
217 params.useQueryQueryMatches = useQueryQueryMatches;
218 params.maxMatches = maxMatches;
219 params.numThreads = numThreads;
220 matchVect = SubstructMatch(resMolSupplier, query, params);
221 return static_cast<unsigned int>(matchVect.size());
222};
223
224//! Class used as a final step to confirm whether or not a given atom->atom
225//! mapping is a valid substructure match.
227 public:
228 MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
229 const SubstructMatchParameters &ps);
230
231 bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]);
232
233 private:
234 const ROMol &d_query;
235 const ROMol &d_mol;
236 const SubstructMatchParameters &d_params;
237 std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
238#ifdef RDK_INTERNAL_BITSET_HAS_HASH
239 // Boost 1.71 added support for std::hash with dynamic_bitset.
240 using HashedStorageType = boost::dynamic_bitset<>;
241#else
242 // otherwise we use a less elegant solution
243 using HashedStorageType = std::string;
244#endif
245 std::unordered_set<HashedStorageType> matchesSeen;
246};
247
248} // namespace RDKit
249
250#endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
MolBundle contains a collection of related ROMols.
Definition MolBundle.h:39
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[])
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition export.h:489
Std stuff.
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
RDKIT_SUBSTRUCTMATCH_EXPORT std::string substructMatchParamsToJSON(const SubstructMatchParameters &params)
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
bool recursionPossible
Allow recursive queries.
std::function< bool(const ROMol &mol, const std::vector< unsigned int > &match)> extraFinalCheck