current state: [ 1.429, 0, 3, 10, 10 ]

experience replay size: 0
exploration epsilon: 1
age: 1
average Q-learning loss: -1
smooth-ish reward: -1