Stepwise Polymer Construction¶
This guide examines polymer construction at the reaction level. It shows which atoms are selected, which bond is formed, which atoms are removed, and why topology and force-field types must be regenerated after each coupling step. The workflow is first carried out explicitly with Atomistic operations — merge, def_bond, del_atom, get_topo, typify — and then repeated through Reacter to show the same logic in packaged form.
!!! note "Prerequisites"
This guide requires RDKit (for generate_3d) and the oplsaa.xml force field file.
The monomer is ethylene oxide (EO), represented in BigSMILES as {[][<]OCCOCCOCCO[>][]}. Each EO unit carries two reactive port markers: < on the left oxygen and > on the right oxygen. They are not separate atoms — they annotate existing atoms to signal where coupling occurs.
import molpy as mp
import numpy as np
from pathlib import Path
from molpy.reacter import find_neighbors, find_port
from molpy.typifier import OplsAtomisticTypifier
output_dir = Path("02_output")
output_dir.mkdir(exist_ok=True)
ff = mp.io.read_xml_forcefield("oplsaa.xml")
typifier = OplsAtomisticTypifier(ff, strict_typing=False)
MONOMER_BIGSMILES = "{[][<]OCCOCCOCCO[>][]}"
def make_eo_monomer():
m = mp.parser.parse_monomer(MONOMER_BIGSMILES)
m = mp.adapter.generate_3d(m, add_hydrogens=True, optimize=True)
m = m.get_topo(gen_angle=True, gen_dihe=True)
m = typifier.typify(m)
return m
mon_a = make_eo_monomer()
mon_b = make_eo_monomer()
mon_b.move([10.0, 0.0, 0.0]) # displace so coordinates don't overlap after merging
print(f"monomer: {len(mon_a.atoms)} atoms, {len(mon_a.bonds)} bonds")
print(f" {len(mon_a.angles)} angles, {len(mon_a.dihedrals)} dihedrals")
print(f"untyped: {sum(1 for a in mon_a.atoms if a.get('type') is None)} atoms")
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[1], line 1 ----> 1 import molpy as mp 2 import numpy as np 3 from pathlib import Path 4 from molpy.reacter import find_neighbors, find_port File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/__init__.py:4 1 """MolPy — Composable molecular modeling in Python.""" 3 # Submodules ----> 4 from . import adapter, data, engine, io, legacy, parser, potential, typifier 6 # Core 7 from .core.atomistic import Angle, Atom, Atomistic, Bond, Dihedral, Improper File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/adapter/__init__.py:12 10 # Optional RDKit adapter 11 try: # pragma: no cover ---> 12 from .rdkit import MP_ID, Generate3D, OptimizeGeometry, RDKitAdapter, generate_3d 14 _HAS_RDKIT = True 15 except ModuleNotFoundError: # rdkit missing File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/adapter/rdkit.py:18 15 from rdkit import Chem 16 from rdkit.Chem import AllChem ---> 18 from molpy.core.atomistic import Atomistic 20 from .base import Adapter 22 MP_ID = "mp_id" File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/core/__init__.py:1 ----> 1 from .atomistic import Angle, Atom, Atomistic, Bond, Dihedral, Improper 2 from .box import Box 3 from .cg import Bead, CGBond, CoarseGrain File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/core/atomistic.py:232 228 """Third substituent atom.""" 229 return self.endpoints[3] --> 232 class Atomistic(Struct, MembershipMixin, SpatialMixin, ConnectivityMixin, molrs.Graph): 233 """All-atom molecular structure with full topological information. 234 235 Atomistic is the primary container for molecular systems in MolPy. It (...) 245 Atom, Bond, Angle, Dihedral, Struct, Frame 246 """ 248 def __init__(self, **props) -> None: AttributeError: module 'molrs' has no attribute 'Graph'
Export the monomer as the first checkpoint:
def save_step(name: str, chain, ff, output_dir: Path) -> None:
frame = chain.to_frame()
atoms = frame["atoms"]
atoms["mol_id"] = np.ones(atoms.nrows, dtype=int)
coords = np.column_stack([atoms["x"], atoms["y"], atoms["z"]])
lo = coords.min(axis=0) - 5.0
hi = coords.max(axis=0) + 5.0
frame.box = mp.Box(matrix=hi - lo, origin=lo)
mp.io.write_lammps_system(output_dir / name, frame, ff)
print(f" saved: {output_dir / name / (name + '.data')}")
save_step("peo1", mon_a, ff, output_dir)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[2], line 1 ----> 1 def save_step(name: str, chain, ff, output_dir: Path) -> None: 2 frame = chain.to_frame() 3 atoms = frame["atoms"] 4 atoms["mol_id"] = np.ones(atoms.nrows, dtype=int) NameError: name 'Path' is not defined
Identify the four actors in the coupling reaction¶
Dehydration condensation removes one water molecule per bond formed. Before touching any topology, locate the four atoms involved:
- Anchor carbon — the carbon on
mon_a's right end that will form the new C–O bond - Port oxygen of mon_a (
>) — this is the hydroxyl oxygen; it leaves as part of –OH - Hydroxyl hydrogen — the H attached to that port oxygen; leaves with it
- Leaving hydrogen of mon_b — one H on
mon_b's left port oxygen (<); leaves as the second half of water
# Right port of mon_a and left port of mon_b
port_r = find_port(mon_a, ">")
port_l = find_port(mon_b, "<")
# The anchor carbon is the C neighbour of the right port oxygen
anchor_C = [a for a in find_neighbors(mon_a, port_r) if a.get("element") == "C"][0]
# The leaving –OH: the port oxygen and its single hydrogen
leaving_OH_oxygen = port_r
leaving_OH_hydrogen = find_neighbors(mon_a, leaving_OH_oxygen, element="H")[0]
# The leaving H: one hydrogen on mon_b's left port
leaving_H_b = find_neighbors(mon_b, port_l, element="H")[0]
print(f"new bond will form: {anchor_C.get('element')} — {port_l.get('element')}")
print(
f"leaving from mon_a: {leaving_OH_oxygen.get('element')} + {leaving_OH_hydrogen.get('element')} (= OH)"
)
print(f"leaving from mon_b: {leaving_H_b.get('element')} (= H)")
print(f"net removal: H₂O")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[3], line 2 1 # Right port of mon_a and left port of mon_b ----> 2 port_r = find_port(mon_a, ">") 3 port_l = find_port(mon_b, "<") 4 5 # The anchor carbon is the C neighbour of the right port oxygen NameError: name 'find_port' is not defined
Merge, form the bond, remove leaving atoms¶
Three Atomistic operations execute the coupling:
# 1. Merge both monomers into a single Atomistic (atoms and bonds from both are combined)
dimer = mon_a.merge(mon_b)
print(
f"after merge: {len(dimer.atoms)} atoms (= {len(mon_a.atoms)} + {len(mon_b.atoms)})"
)
# 2. Form the new C–O bond between the anchor carbon and mon_b's port oxygen
new_bond = dimer.def_bond(anchor_C, port_l)
print(
f"new bond: {new_bond.endpoints[0].get('element')}—{new_bond.endpoints[1].get('element')}"
)
# 3. Remove the three leaving atoms; their incident bonds are dropped automatically
dimer.del_atom(leaving_OH_oxygen, leaving_OH_hydrogen, leaving_H_b)
print(
f"after removal: {len(dimer.atoms)} atoms (= {len(dimer.atoms)} = {len(mon_a.atoms) + len(mon_b.atoms)} − 3)"
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[4], line 2 1 # 1. Merge both monomers into a single Atomistic (atoms and bonds from both are combined) ----> 2 dimer = mon_a.merge(mon_b) 3 print( 4 f"after merge: {len(dimer.atoms)} atoms (= {len(mon_a.atoms)} + {len(mon_b.atoms)})" 5 ) NameError: name 'mon_a' is not defined
del_atom drops all bonds, angles, and dihedrals that reference the removed atoms. The new C–O bond exists, but the topology around it is incomplete.
Regenerate topology and re-type the junction¶
The new bond creates three-body and four-body paths that did not exist in either monomer. get_topo enumerates them; typify assigns force field types to everything it finds.
# Check what is unresolved at the junction before fixing
print(
f"untyped bonds before get_topo: {sum(1 for b in dimer.bonds if b.get('type') is None)}"
)
print(
f"untyped angles before get_topo: {sum(1 for a in dimer.angles if a.get('type') is None)}"
)
print(
f"untyped dihedrals before get_topo: {sum(1 for d in dimer.dihedrals if d.get('type') is None)}"
)
angles_before = len(dimer.angles)
dihe_before = len(dimer.dihedrals)
dimer = dimer.get_topo(gen_angle=True, gen_dihe=True)
print(
f"\nangles: {angles_before} → {len(dimer.angles)} (+{len(dimer.angles) - angles_before} cross-junction)"
)
print(
f"dihedrals: {dihe_before} → {len(dimer.dihedrals)} (+{len(dimer.dihedrals) - dihe_before} cross-junction)"
)
dimer = typifier.typify(dimer)
print(f"\nuntyped bonds: {sum(1 for b in dimer.bonds if b.get('type') is None)}")
print(f"untyped angles: {sum(1 for a in dimer.angles if a.get('type') is None)}")
print(f"untyped dihedrals: {sum(1 for d in dimer.dihedrals if d.get('type') is None)}")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[5], line 3 1 # Check what is unresolved at the junction before fixing 2 print( ----> 3 f"untyped bonds before get_topo: {sum(1 for b in dimer.bonds if b.get('type') is None)}" 4 ) 5 print( 6 f"untyped angles before get_topo: {sum(1 for a in dimer.angles if a.get('type') is None)}" NameError: name 'dimer' is not defined
Every interaction is now typed. Export the dimer:
save_step("peo2", dimer, ff, output_dir)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[6], line 1 ----> 1 save_step("peo2", dimer, ff, output_dir) NameError: name 'save_step' is not defined
The same five operations build a chain of any length¶
The pattern — merge → def_bond → del_atom → get_topo → typify — can be repeated identically for every subsequent unit. A loop drives it from the dimer to a pentamer, writing a snapshot after each step.
chain = make_eo_monomer()
save_step("peo1", chain, ff, output_dir)
for i in range(1, 5):
unit = make_eo_monomer()
unit.move([10.0 * i, 0.0, 0.0])
# Identify reactive atoms on the current chain end and the incoming unit
p_r = find_port(chain, ">")
p_l = find_port(unit, "<")
anc = [a for a in find_neighbors(chain, p_r) if a.get("element") == "C"][0]
l_O = p_r
l_H1 = find_neighbors(chain, l_O, element="H")[0]
l_H2 = find_neighbors(unit, p_l, element="H")[0]
# Couple
chain = chain.merge(unit)
chain.def_bond(anc, p_l)
chain.del_atom(l_O, l_H1, l_H2)
chain = chain.get_topo(gen_angle=True, gen_dihe=True)
chain = typifier.typify(chain)
name = f"peo{i + 1}"
save_step(name, chain, ff, output_dir)
print(f"{name}: {len(chain.atoms)} atoms, {len(chain.bonds)} bonds")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[7], line 1 ----> 1 chain = make_eo_monomer() 2 save_step("peo1", chain, ff, output_dir) 3 4 for i in range(1, 5): NameError: name 'make_eo_monomer' is not defined
After this loop 02_output/ contains five data files — peo1.data through peo5.data — each a valid LAMMPS input at a different chain length.
Part 2 — The same coupling, automated by Reacter¶
Part 1 made every decision explicit. The five-step sequence it used — identify actors, merge, def_bond, del_atom, get_topo, typify — is exactly what Reacter encodes as a reusable rule. You write the selection logic once; rxn.run handles the rest.
Define the reaction rule¶
A Reacter needs four selector functions: one to find the anchor atom on each side, and one to find the leaving atoms on each side.
from molpy.core.atomistic import Atom, Atomistic
from molpy.reacter import (
Reacter,
form_single_bond,
select_hydrogens,
select_neighbor,
select_self,
)
def select_hydroxyl_group(struct: Atomistic, reaction_site: Atom) -> list[Atom]:
"""Return [O, H] — the hydroxyl leaving group on the anchor carbon."""
for neighbor in find_neighbors(struct, reaction_site):
if neighbor.get("element") != "O":
continue
h_neighbors = find_neighbors(struct, neighbor, element="H")
if h_neighbors:
return [neighbor, h_neighbors[0]]
raise ValueError("No hydroxyl group found near reaction site")
rxn = Reacter(
name="dehydration",
anchor_selector_left=select_neighbor("C"), # same as: find the C next to port >
anchor_selector_right=select_self, # same as: the port < O itself
leaving_selector_left=select_hydroxyl_group, # same as: l_O + l_H1
leaving_selector_right=select_hydrogens(1), # same as: l_H2
bond_former=form_single_bond, # same as: def_bond(anc, p_l)
)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[8], line 1 ----> 1 from molpy.core.atomistic import Atom, Atomistic 2 from molpy.reacter import ( 3 Reacter, 4 form_single_bond, File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/__init__.py:4 1 """MolPy — Composable molecular modeling in Python.""" 3 # Submodules ----> 4 from . import adapter, data, engine, io, legacy, parser, potential, typifier 6 # Core 7 from .core.atomistic import Angle, Atom, Atomistic, Bond, Dihedral, Improper File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/adapter/__init__.py:12 10 # Optional RDKit adapter 11 try: # pragma: no cover ---> 12 from .rdkit import MP_ID, Generate3D, OptimizeGeometry, RDKitAdapter, generate_3d 14 _HAS_RDKIT = True 15 except ModuleNotFoundError: # rdkit missing File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/adapter/rdkit.py:18 15 from rdkit import Chem 16 from rdkit.Chem import AllChem ---> 18 from molpy.core.atomistic import Atomistic 20 from .base import Adapter 22 MP_ID = "mp_id" File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/core/__init__.py:1 ----> 1 from .atomistic import Angle, Atom, Atomistic, Bond, Dihedral, Improper 2 from .box import Box 3 from .cg import Bead, CGBond, CoarseGrain File ~/.asdf/installs/python/3.13.3/lib/python3.13/site-packages/molpy/core/atomistic.py:232 228 """Third substituent atom.""" 229 return self.endpoints[3] --> 232 class Atomistic(Struct, MembershipMixin, SpatialMixin, ConnectivityMixin, molrs.Graph): 233 """All-atom molecular structure with full topological information. 234 235 Atomistic is the primary container for molecular systems in MolPy. It (...) 245 Atom, Bond, Angle, Dihedral, Struct, Frame 246 """ 248 def __init__(self, **props) -> None: AttributeError: module 'molrs' has no attribute 'Graph'
Every selector maps directly to an operation that Part 1 performed by hand.
Build the same pentamer with one call per step¶
chain = make_eo_monomer()
save_step("peo1_rxn", chain, ff, output_dir)
for i in range(1, 5):
unit = make_eo_monomer()
unit.move([10.0 * i, 0.0, 0.0])
result = rxn.run(
left=chain,
right=unit,
port_atom_L=find_port(chain, ">"),
port_atom_R=find_port(unit, "<"),
compute_topology=True,
)
chain = result.product
chain = chain.get_topo(gen_angle=True, gen_dihe=True)
chain = typifier.typify(chain)
name = f"peo{i + 1}_rxn"
save_step(name, chain, ff, output_dir)
print(f"{name}: {len(chain.atoms)} atoms")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[9], line 1 ----> 1 chain = make_eo_monomer() 2 save_step("peo1_rxn", chain, ff, output_dir) 3 4 for i in range(1, 5): NameError: name 'make_eo_monomer' is not defined
rxn.run performs the merge, def_bond, and del_atom internally — the same three operations from Part 1, in the same order. The caller is still responsible for get_topo and typify after each step, because those depend on force field context that the reaction rule itself does not carry.
for a in mon_a.atoms:
if a.get("port"):
print(f" element={a.get('element')} port={a.get('port')}")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[10], line 1 ----> 1 for a in mon_a.atoms: 2 if a.get("port"): 3 print(f" element={a.get('element')} port={a.get('port')}") NameError: name 'mon_a' is not defined
"No hydroxyl group found" (Part 2 only)
Print the neighbours of the anchor carbon to see what the selector is searching through:
p_r = find_port(mon_a, ">")
anc = [a for a in find_neighbors(mon_a, p_r) if a.get("element") == "C"][0]
for nb in find_neighbors(mon_a, anc):
print(f" element={nb.get('element')}")
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[11], line 1 ----> 1 p_r = find_port(mon_a, ">") 2 anc = [a for a in find_neighbors(mon_a, p_r) if a.get("element") == "C"][0] 3 for nb in find_neighbors(mon_a, anc): 4 print(f" element={nb.get('element')}") NameError: name 'find_port' is not defined
Untyped interactions after coupling (both parts)
get_topo must come before typify, and both must come after the merge + bond formation + removal:
chain = chain.merge(unit)
chain.def_bond(anc, p_l)
chain.del_atom(l_O, l_H1, l_H2)
chain.get_topo(gen_angle=True, gen_dihe=True) # first
chain = typifier.typify(chain) # second
Export fails: mol_id missing
LAMMPS full atom style requires mol_id. The save_step helper sets it, but if you export manually:
frame["atoms"]["mol_id"] = np.ones(frame["atoms"].nrows, dtype=int)
See also: Topology-Driven Assembly, Crosslinked Networks.