跳到内容

原子特征化器

芳香性特征化器

基类:BaseAtomFeaturizer

用于基于原子芳香性进行特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
class AromaticityFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom based on its aromaticity."""

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 1

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of representing if atoms are aromatic as integers.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([int(mol.GetAtomWithIdx(a).GetIsAromatic()) for a in _atom_indices], dtype=torch.int)

n_dim: int property

返回计算出的特征的维度。

get_atom_features(mol, atom_indices=None)

计算所有或选定原子的原子特征。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,表示原子是否为芳香族整数。

源代码位于 bionemo/geometric/atom_featurizers.py
197
198
199
200
201
202
203
204
205
206
207
208
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of representing if atoms are aromatic as integers.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([int(mol.GetAtomWithIdx(a).GetIsAromatic()) for a in _atom_indices], dtype=torch.int)

原子序数特征化器

基类:BaseAtomFeaturizer

用于通过原子序数进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class AtomicNumberFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its atomic number."""

    def __init__(self, dim_atomic_num: Optional[int] = None) -> None:
        """Initializes AtomicNumberFeaturizer class."""
        DIM_ATOMIC_NUM = 118
        self.dim_atomic_num = dim_atomic_num if dim_atomic_num else DIM_ATOMIC_NUM

    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return self.dim_atomic_num

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of integers representing atomic numbers of atoms.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([mol.GetAtomWithIdx(a).GetAtomicNum() for a in _atom_indices], dtype=torch.int)

__init__(dim_atomic_num=None)

初始化 AtomicNumberFeaturizer 类。

源代码位于 bionemo/geometric/atom_featurizers.py
44
45
46
47
def __init__(self, dim_atomic_num: Optional[int] = None) -> None:
    """Initializes AtomicNumberFeaturizer class."""
    DIM_ATOMIC_NUM = 118
    self.dim_atomic_num = dim_atomic_num if dim_atomic_num else DIM_ATOMIC_NUM

get_atom_features(mol, atom_indices=None)

计算所有或选定原子的原子特征。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,表示原子的原子序数的整数。

源代码位于 bionemo/geometric/atom_featurizers.py
53
54
55
56
57
58
59
60
61
62
63
64
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of integers representing atomic numbers of atoms.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([mol.GetAtomWithIdx(a).GetAtomicNum() for a in _atom_indices], dtype=torch.int)

n_dim()

返回计算出的特征的维度。

源代码位于 bionemo/geometric/atom_featurizers.py
49
50
51
def n_dim(self) -> int:
    """Returns dimensionality of the computed features."""
    return self.dim_atomic_num

原子半径特征化器

基类:BaseAtomFeaturizer

用于通过其键半径、共价半径和 vdW 半径进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
class AtomicRadiusFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its bond, covalent, and vdW radii."""

    def __init__(self) -> None:
        """Initializes AtomicRadiusFeaturizer class."""
        self.pt = Chem.GetPeriodicTable()
        self._min_val = torch.Tensor(
            [
                0.0,  # Bond radius
                0.28,  # Covalent radius
                1.2,  # van der Waals radius
            ]
        )

        self._max_val = torch.Tensor(
            [
                2.4,  # Bond radius
                2.6,  # Covalent radius
                3.0,  # van der Waals radius
            ]
        )

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 3

    @property
    def min_val(self) -> torch.tensor:
        """Returns minimum values for features: bond, covalent, and vdW radius."""
        return self._min_val

    @property
    def max_val(self) -> torch.tensor:
        """Returns maximum values for features: bond, covalent, and vdW radius."""
        return self._max_val

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
        """Computes bond radius, covalent radius, and van der Waals radius without normalization.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.Tensor of different atomic radii. Each atom is featurizer by bond radius, covalent radius, and van der Waals radius.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

        feats = []
        for aidx in _atom_indices:
            atomic_num = mol.GetAtomWithIdx(aidx).GetAtomicNum()
            feats.append([self.pt.GetRb0(atomic_num), self.pt.GetRcovalent(atomic_num), self.pt.GetRvdw(atomic_num)])

        return torch.Tensor(feats)

max_val: torch.tensor property

返回特征的最大值:键半径、共价半径和 vdW 半径。

min_val: torch.tensor property

返回特征的最小值:键半径、共价半径和 vdW 半径。

n_dim: int property

返回计算出的特征的维度。

__init__()

初始化 AtomicRadiusFeaturizer 类。

源代码位于 bionemo/geometric/atom_featurizers.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
def __init__(self) -> None:
    """Initializes AtomicRadiusFeaturizer class."""
    self.pt = Chem.GetPeriodicTable()
    self._min_val = torch.Tensor(
        [
            0.0,  # Bond radius
            0.28,  # Covalent radius
            1.2,  # van der Waals radius
        ]
    )

    self._max_val = torch.Tensor(
        [
            2.4,  # Bond radius
            2.6,  # Covalent radius
            3.0,  # van der Waals radius
        ]
    )

get_atom_features(mol, atom_indices=None)

计算未归一化的键半径、共价半径和范德华半径。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
Tensor

不同原子半径的 torch.Tensor。每个原子都通过键半径、共价半径和范德华半径进行特征化。

源代码位于 bionemo/geometric/atom_featurizers.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
    """Computes bond radius, covalent radius, and van der Waals radius without normalization.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.Tensor of different atomic radii. Each atom is featurizer by bond radius, covalent radius, and van der Waals radius.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

    feats = []
    for aidx in _atom_indices:
        atomic_num = mol.GetAtomWithIdx(aidx).GetAtomicNum()
        feats.append([self.pt.GetRb0(atomic_num), self.pt.GetRcovalent(atomic_num), self.pt.GetRvdw(atomic_num)])

    return torch.Tensor(feats)

手性类型特征化器

基类:BaseAtomFeaturizer

用于通过其手性类型进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
class ChiralTypeFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its chirality type."""

    def __init__(self) -> None:
        """Initializes ChiralTypeFeaturizer class."""
        self.dim_chiral_types = len(ChiralType.values)

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return self.dim_chiral_types

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor representing chirality type of atoms as integers.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([int(mol.GetAtomWithIdx(a).GetChiralTag()) for a in _atom_indices], dtype=torch.int)

n_dim: int property

返回计算出的特征的维度。

__init__()

初始化 ChiralTypeFeaturizer 类。

源代码位于 bionemo/geometric/atom_featurizers.py
114
115
116
def __init__(self) -> None:
    """Initializes ChiralTypeFeaturizer class."""
    self.dim_chiral_types = len(ChiralType.values)

get_atom_features(mol, atom_indices=None)

计算所有或选定原子的原子特征。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,表示原子手性类型的整数。

源代码位于 bionemo/geometric/atom_featurizers.py
123
124
125
126
127
128
129
130
131
132
133
134
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor representing chirality type of atoms as integers.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([int(mol.GetAtomWithIdx(a).GetChiralTag()) for a in _atom_indices], dtype=torch.int)

Crippen 特征化器

基类:BaseAtomFeaturizer

用于通过 Crippen logP 和摩尔折射率进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
class CrippenFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by Crippen logP and molar refractivity."""

    def __init__(self):
        """Initializes CrippenFeaturizer class."""
        self._min_val = torch.Tensor(
            [
                -2.996,  # logP
                0.0,  # MR
            ]
        )

        self._max_val = torch.Tensor(
            [
                0.8857,  # logP
                6.0,  # MR
            ]
        )

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 2

    @property
    def min_val(self) -> torch.tensor:
        """Returns minimum values for features: logP and molar refractivity."""
        return self._min_val

    @property
    def max_val(self) -> torch.tensor:
        """Returns maximum values for features: logP and molar refractivity."""
        return self._max_val

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
        """Compute atomic contributions to Crippen logP and molar refractivity.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.Tensor featurizing atoms by its atomic contribution to logP and molar refractivity.
        """
        logp_mr_list = torch.Tensor(rdMolDescriptors._CalcCrippenContribs(mol))
        logp_mr_list = torch.clamp(logp_mr_list, min=self.min_val, max=self.max_val)
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return logp_mr_list[_atom_indices, :]

max_val: torch.tensor property

返回特征的最大值:logP 和摩尔折射率。

min_val: torch.tensor property

返回特征的最小值:logP 和摩尔折射率。

n_dim: int property

返回计算出的特征的维度。

__init__()

初始化 CrippenFeaturizer 类。

源代码位于 bionemo/geometric/atom_featurizers.py
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
def __init__(self):
    """Initializes CrippenFeaturizer class."""
    self._min_val = torch.Tensor(
        [
            -2.996,  # logP
            0.0,  # MR
        ]
    )

    self._max_val = torch.Tensor(
        [
            0.8857,  # logP
            6.0,  # MR
        ]
    )

get_atom_features(mol, atom_indices=None)

计算原子对 Crippen logP 和摩尔折射率的贡献。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
Tensor

一个 torch.Tensor,通过原子对 logP 和摩尔折射率的原子贡献进行特征化。

源代码位于 bionemo/geometric/atom_featurizers.py
497
498
499
500
501
502
503
504
505
506
507
508
509
510
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
    """Compute atomic contributions to Crippen logP and molar refractivity.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.Tensor featurizing atoms by its atomic contribution to logP and molar refractivity.
    """
    logp_mr_list = torch.Tensor(rdMolDescriptors._CalcCrippenContribs(mol))
    logp_mr_list = torch.clamp(logp_mr_list, min=self.min_val, max=self.max_val)
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return logp_mr_list[_atom_indices, :]

度特征化器

基类:BaseAtomFeaturizer

用于通过其连接度(不包括氢)进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class DegreeFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its degree (excluding hydrogens) of connectivity."""

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 6

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of integers representing degree of connectivity of atoms.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([mol.GetAtomWithIdx(a).GetDegree() for a in _atom_indices], dtype=torch.int)

n_dim: int property

返回计算出的特征的维度。

get_atom_features(mol, atom_indices=None)

计算所有或选定原子的原子特征。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,表示原子连接度的整数。

源代码位于 bionemo/geometric/atom_featurizers.py
75
76
77
78
79
80
81
82
83
84
85
86
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of integers representing degree of connectivity of atoms.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([mol.GetAtomWithIdx(a).GetDegree() for a in _atom_indices], dtype=torch.int)

电子性质特征化器

基类:BaseAtomFeaturizer

用于通过其电子性质进行原子特征化的类。

此类计算电子性质,如电负性、电离能和电子亲和力。

源代码位于 bionemo/geometric/atom_featurizers.py
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
class ElectronicPropertyFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its electronic properties.

    This class computes electronic properties like electronegativity, ionization energy, and electron affinity.
    """

    def __init__(self, data_file=None) -> None:
        """Initializes PeriodicTableFeaturizer class.

        Args:
            data_file: Path to the data file.
        """
        if data_file is None:
            # Use default
            root_path = Path(__file__).resolve().parent
            data_file = root_path / "data" / "electronic_data.csv"
        self.data_df = pd.read_csv(data_file).set_index("AtomicNumber")

        self.pauling_en_dict = self.data_df["Electronegativity"].to_dict()
        self.ie_dict = self.data_df["IonizationEnergy"].to_dict()
        self.ea_dict = self.data_df["ElectronAffinity"].to_dict()

        self._min_val = torch.Tensor(
            [
                self.data_df["Electronegativity"].min(),
                self.data_df["IonizationEnergy"].min(),
                self.data_df["ElectronAffinity"].min(),
            ]
        )

        self._max_val = torch.Tensor(
            [
                self.data_df["Electronegativity"].max(),
                self.data_df["IonizationEnergy"].max(),
                self.data_df["ElectronAffinity"].max(),
            ]
        )

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 3

    @property
    def min_val(self) -> torch.Tensor:
        """Returns minimum values for features: electronegativity, ionization energy, electron affinity."""
        return self._min_val

    @property
    def max_val(self) -> torch.Tensor:
        """Returns maximum values for features: electronegativity, ionization energy, electron affinity."""
        return self._max_val

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
        """Returns electronic features of the atom.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.Tensor consisting of Pauling scale electronegativity, ionization energy, and electron affinity for each atom.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

        feats = []
        for aidx in _atom_indices:
            atomic_num = mol.GetAtomWithIdx(aidx).GetAtomicNum()
            feats.append([self.pauling_en_dict[atomic_num], self.ie_dict[atomic_num], self.ea_dict[atomic_num]])
        return torch.Tensor(feats)

max_val: torch.Tensor property

返回特征的最大值:电负性、电离能、电子亲和力。

min_val: torch.Tensor property

返回特征的最小值:电负性、电离能、电子亲和力。

n_dim: int property

返回计算出的特征的维度。

__init__(data_file=None)

初始化 PeriodicTableFeaturizer 类。

参数

名称 类型 描述 默认值
data_file

数据文件的路径。

源代码位于 bionemo/geometric/atom_featurizers.py
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
def __init__(self, data_file=None) -> None:
    """Initializes PeriodicTableFeaturizer class.

    Args:
        data_file: Path to the data file.
    """
    if data_file is None:
        # Use default
        root_path = Path(__file__).resolve().parent
        data_file = root_path / "data" / "electronic_data.csv"
    self.data_df = pd.read_csv(data_file).set_index("AtomicNumber")

    self.pauling_en_dict = self.data_df["Electronegativity"].to_dict()
    self.ie_dict = self.data_df["IonizationEnergy"].to_dict()
    self.ea_dict = self.data_df["ElectronAffinity"].to_dict()

    self._min_val = torch.Tensor(
        [
            self.data_df["Electronegativity"].min(),
            self.data_df["IonizationEnergy"].min(),
            self.data_df["ElectronAffinity"].min(),
        ]
    )

    self._max_val = torch.Tensor(
        [
            self.data_df["Electronegativity"].max(),
            self.data_df["IonizationEnergy"].max(),
            self.data_df["ElectronAffinity"].max(),
        ]
    )

get_atom_features(mol, atom_indices=None)

返回原子的电子特征。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
Tensor

一个 torch.Tensor,由每个原子的鲍林标度电负性、电离能和电子亲和力组成。

源代码位于 bionemo/geometric/atom_featurizers.py
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.Tensor:
    """Returns electronic features of the atom.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.Tensor consisting of Pauling scale electronegativity, ionization energy, and electron affinity for each atom.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

    feats = []
    for aidx in _atom_indices:
        atomic_num = mol.GetAtomWithIdx(aidx).GetAtomicNum()
        feats.append([self.pauling_en_dict[atomic_num], self.ie_dict[atomic_num], self.ea_dict[atomic_num]])
    return torch.Tensor(feats)

杂化特征化器

基类:BaseAtomFeaturizer

用于通过其杂化类型进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
class HybridizationFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its hybridization type."""

    def __init__(self) -> None:
        """Initializes HybridizationFeaturizer class."""
        self.dim_hybridization_types = len(HybridizationType.values)

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return self.dim_hybridization_types

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor representing hybridization type of atoms as integers.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([int(mol.GetAtomWithIdx(a).GetHybridization()) for a in _atom_indices], dtype=torch.int)

n_dim: int property

返回计算出的特征的维度。

__init__()

初始化 HybridizationFeaturizer 类。

源代码位于 bionemo/geometric/atom_featurizers.py
166
167
168
def __init__(self) -> None:
    """Initializes HybridizationFeaturizer class."""
    self.dim_hybridization_types = len(HybridizationType.values)

get_atom_features(mol, atom_indices=None)

计算所有或选定原子的原子特征。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,表示原子杂化类型的整数。

源代码位于 bionemo/geometric/atom_featurizers.py
175
176
177
178
179
180
181
182
183
184
185
186
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor representing hybridization type of atoms as integers.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([int(mol.GetAtomWithIdx(a).GetHybridization()) for a in _atom_indices], dtype=torch.int)

元素周期表特征化器

基类:BaseAtomFeaturizer

用于通过其在元素周期表中的位置(周期和族)进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
class PeriodicTableFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its position (period and group) in the periodic table."""

    def __init__(self) -> None:
        """Initializes PeriodicTableFeaturizer class."""
        self.pt = Chem.GetPeriodicTable()
        # The number of elements per period in the periodic table
        self.period_limits = [2, 10, 18, 36, 54, 86, 118]

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 25

    def get_period(self, atom: Chem.Atom) -> int:
        """Returns periodic table period of atom."""
        atomic_number = atom.GetAtomicNum()

        # Determine the period based on atomic number.
        for period, limit in enumerate(self.period_limits, start=1):
            if atomic_number <= limit:
                return period
        return None

    def get_group(self, atom: Chem.Atom) -> int:
        """Returns periodic table group of atom."""
        group = self.pt.GetNOuterElecs(atom.GetAtomicNum())
        return group

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes periodic table position of atoms of all or select atoms specific in `atom_indices`.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of representing positions of atoms in periodic table. First index represents period and second index represents group.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor(
            [(self.get_period(mol.GetAtomWithIdx(a)), self.get_group(mol.GetAtomWithIdx(a))) for a in _atom_indices],
            dtype=torch.int,
        )

n_dim: int property

返回计算出的特征的维度。

__init__()

初始化 PeriodicTableFeaturizer 类。

源代码位于 bionemo/geometric/atom_featurizers.py
214
215
216
217
218
def __init__(self) -> None:
    """Initializes PeriodicTableFeaturizer class."""
    self.pt = Chem.GetPeriodicTable()
    # The number of elements per period in the periodic table
    self.period_limits = [2, 10, 18, 36, 54, 86, 118]

get_atom_features(mol, atom_indices=None)

计算所有或选定原子(在 atom_indices 中指定)的元素周期表位置。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,表示原子在元素周期表中的位置。第一个索引表示周期,第二个索引表示族。

源代码位于 bionemo/geometric/atom_featurizers.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes periodic table position of atoms of all or select atoms specific in `atom_indices`.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of representing positions of atoms in periodic table. First index represents period and second index represents group.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor(
        [(self.get_period(mol.GetAtomWithIdx(a)), self.get_group(mol.GetAtomWithIdx(a))) for a in _atom_indices],
        dtype=torch.int,
    )

get_group(atom)

返回原子的元素周期表族。

源代码位于 bionemo/geometric/atom_featurizers.py
235
236
237
238
def get_group(self, atom: Chem.Atom) -> int:
    """Returns periodic table group of atom."""
    group = self.pt.GetNOuterElecs(atom.GetAtomicNum())
    return group

get_period(atom)

返回原子的元素周期表周期。

源代码位于 bionemo/geometric/atom_featurizers.py
225
226
227
228
229
230
231
232
233
def get_period(self, atom: Chem.Atom) -> int:
    """Returns periodic table period of atom."""
    atomic_number = atom.GetAtomicNum()

    # Determine the period based on atomic number.
    for period, limit in enumerate(self.period_limits, start=1):
        if atomic_number <= limit:
            return period
    return None

骨架特征化器

基类:BaseAtomFeaturizer

用于基于原子是否出现在 Bemis-Murcko 骨架中进行特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
class ScaffoldFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom based on whether it is present in Bemis-Murcko scaffold."""

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 1

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Returns position of the atoms with respect to Bemis-Murcko scaffold.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor indicating if atoms are present in the Bemis-Murcko scaffold of the molecule.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

        scaffold = MurckoScaffold.GetScaffoldForMol(mol)
        scaffold_atom_idx = set(mol.GetSubstructMatch(scaffold))

        feats = [int(aidx in scaffold_atom_idx) for aidx in _atom_indices]
        return torch.tensor(feats, dtype=torch.int)

n_dim: int property

返回计算出的特征的维度。

get_atom_features(mol, atom_indices=None)

返回原子相对于 Bemis-Murcko 骨架的位置。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,指示原子是否出现在分子的 Bemis-Murcko 骨架中。

源代码位于 bionemo/geometric/atom_featurizers.py
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Returns position of the atoms with respect to Bemis-Murcko scaffold.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor indicating if atoms are present in the Bemis-Murcko scaffold of the molecule.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())

    scaffold = MurckoScaffold.GetScaffoldForMol(mol)
    scaffold_atom_idx = set(mol.GetSubstructMatch(scaffold))

    feats = [int(aidx in scaffold_atom_idx) for aidx in _atom_indices]
    return torch.tensor(feats, dtype=torch.int)

Smarts 特征化器

基类:BaseAtomFeaturizer

用于通过氢供体/受体和酸度/碱度进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
class SmartsFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by hydrogen donor/acceptor and acidity/basicity."""

    def __init__(self):
        """Initializes SmartsFeaturizer class."""
        self.hydrogen_donor = Chem.MolFromSmarts("[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0]),n&H1&+0]")
        self.hydrogen_acceptor = Chem.MolFromSmarts(
            "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),"
            "n&H0&+0,$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"
        )
        self.acidic = Chem.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
        self.basic = Chem.MolFromSmarts(
            "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))]),$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);"
            "!$([C,a](=O))]),$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"
        )

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 4

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes matches by prefixed SMARTS patterns.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            An torch.tensor indicating if atoms are hydrogen bond donors, hydrogen bond acceptors, acidic, or basic.
        """
        hydrogen_donor_match = sum(mol.GetSubstructMatches(self.hydrogen_donor), ())
        hydrogen_acceptor_match = sum(mol.GetSubstructMatches(self.hydrogen_acceptor), ())
        acidic_match = sum(mol.GetSubstructMatches(self.acidic), ())
        basic_match = sum(mol.GetSubstructMatches(self.basic), ())

        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        feats = [
            [
                aidx in hydrogen_donor_match,
                aidx in hydrogen_acceptor_match,
                aidx in acidic_match,
                aidx in basic_match,
            ]
            for aidx in _atom_indices
        ]

        return torch.tensor(feats, dtype=torch.int)

n_dim: int property

返回计算出的特征的维度。

__init__()

初始化 SmartsFeaturizer 类。

源代码位于 bionemo/geometric/atom_featurizers.py
416
417
418
419
420
421
422
423
424
425
426
427
def __init__(self):
    """Initializes SmartsFeaturizer class."""
    self.hydrogen_donor = Chem.MolFromSmarts("[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0]),n&H1&+0]")
    self.hydrogen_acceptor = Chem.MolFromSmarts(
        "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),"
        "n&H0&+0,$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]"
    )
    self.acidic = Chem.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]")
    self.basic = Chem.MolFromSmarts(
        "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))]),$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);"
        "!$([C,a](=O))]),$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]"
    )

get_atom_features(mol, atom_indices=None)

通过前缀 SMARTS 模式计算匹配项。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,指示原子是否为氢键供体、氢键受体、酸性或碱性。

源代码位于 bionemo/geometric/atom_featurizers.py
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes matches by prefixed SMARTS patterns.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        An torch.tensor indicating if atoms are hydrogen bond donors, hydrogen bond acceptors, acidic, or basic.
    """
    hydrogen_donor_match = sum(mol.GetSubstructMatches(self.hydrogen_donor), ())
    hydrogen_acceptor_match = sum(mol.GetSubstructMatches(self.hydrogen_acceptor), ())
    acidic_match = sum(mol.GetSubstructMatches(self.acidic), ())
    basic_match = sum(mol.GetSubstructMatches(self.basic), ())

    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    feats = [
        [
            aidx in hydrogen_donor_match,
            aidx in hydrogen_acceptor_match,
            aidx in acidic_match,
            aidx in basic_match,
        ]
        for aidx in _atom_indices
    ]

    return torch.tensor(feats, dtype=torch.int)

总度特征化器

基类:BaseAtomFeaturizer

用于通过其总度(包括氢)连接度进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
class TotalDegreeFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by its total degree (including hydrogens) of connectivity."""

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return 6

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of integers representing total connectivity (including hydrogens) of atoms.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([mol.GetAtomWithIdx(a).GetTotalDegree() for a in _atom_indices], dtype=torch.int)

n_dim: int property

返回计算出的特征的维度。

get_atom_features(mol, atom_indices=None)

计算所有或选定原子的原子特征。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,表示原子总连接度(包括氢)的整数。

源代码位于 bionemo/geometric/atom_featurizers.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of integers representing total connectivity (including hydrogens) of atoms.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([mol.GetAtomWithIdx(a).GetTotalDegree() for a in _atom_indices], dtype=torch.int)

总氢数特征化器

基类:BaseAtomFeaturizer

用于通过氢原子总数进行原子特征化的类。

源代码位于 bionemo/geometric/atom_featurizers.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
class TotalNumHFeaturizer(BaseAtomFeaturizer):
    """Class for featurizing atom by total number of hydrogens."""

    def __init__(self) -> None:
        """Initializes TotalNumHFeaturizer class."""
        self.dim_total_num_hydrogen = 5  # 4 + 1 (no hydrogens)

    @property
    def n_dim(self) -> int:
        """Returns dimensionality of the computed features."""
        return self.dim_total_num_hydrogen

    def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
        """Computes features of atoms of all of select atoms.

        Args:
            mol: An RDkit Chem.Mol object
            atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

        Returns:
            A torch.tensor of integers representing total number of hydrogens on atoms.
        """
        _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
        return torch.tensor([mol.GetAtomWithIdx(a).GetTotalNumHs() for a in _atom_indices], dtype=torch.int)

n_dim: int property

返回计算出的特征的维度。

__init__()

初始化 TotalNumHFeaturizer 类。

源代码位于 bionemo/geometric/atom_featurizers.py
140
141
142
def __init__(self) -> None:
    """Initializes TotalNumHFeaturizer class."""
    self.dim_total_num_hydrogen = 5  # 4 + 1 (no hydrogens)

get_atom_features(mol, atom_indices=None)

计算所有或选定原子的原子特征。

参数

名称 类型 描述 默认值
mol Mol

RDkit Chem.Mol 对象

必需
atom_indices Optional[Iterable]

用于特征计算的原子索引。默认情况下,计算所有原子的特征。

返回值

类型 描述
tensor

一个 torch.tensor,表示原子上氢原子总数的整数。

源代码位于 bionemo/geometric/atom_featurizers.py
149
150
151
152
153
154
155
156
157
158
159
160
def get_atom_features(self, mol: Mol, atom_indices: Optional[Iterable] = None) -> torch.tensor:
    """Computes features of atoms of all of select atoms.

    Args:
        mol: An RDkit Chem.Mol object
        atom_indices: Indices of atoms for feature computation. By default, features for all atoms is computed.

    Returns:
        A torch.tensor of integers representing total number of hydrogens on atoms.
    """
    _atom_indices = atom_indices if atom_indices else range(mol.GetNumAtoms())
    return torch.tensor([mol.GetAtomWithIdx(a).GetTotalNumHs() for a in _atom_indices], dtype=torch.int)