diff --git a/train.py b/train.py
index 996e48d..f967b67 100644
--- a/train.py
+++ b/train.py
@@ -123,7 +123,8 @@ def train():
         for p in optimizer.param_groups:
             # lower param count allows more aggressive training settings: i.e. SGD ~0.1 lr0, ~0.9 momentum
             p['lr'] *= 100
-            p['momentum'] *= 0.9
+            if p.get('momentum') is not None:  # for SGD but not Adam
+                p['momentum'] *= 0.9
 
         for p in model.parameters():
             if opt.prebias and p.numel() == nf:  # train (yolo biases)