Firstly thanks for great job - very convenient OS for mining with wide variety of user friendly options. I am quiet a small-scale miner (only 2 rigs), and I am thinking about fully migrating to HiveOS from previous linux based mining distro, but for there is lack of support of hardware watchdog customization.
One of my rigs is based on cheap and not very reliable ASRock motheboard with low-price powersources which leads to oftem hanging of the system, and what more serious - to booting it with some of the GPU fully offline (even fans are not spinning)
I have solved it by writing custom script which checked:
- Number of GPUs in system
- Their hashrate
- Writing to logs before sending pulse to reboot
Is it possible to implement something like this in HiveOS? Or is it possible to put custom script in hiveos for bumping watchdog?
Summary
GPUS_QUANTITY_REQUIRED=$1
RATE_REQUIRED=$2
MODIFICATION_DELAY=$3
OUTPUT_FILE=/home/someos/log.txt
MINING_LOG_FILE=/home/someos/mining_log.txt
STATE_FILE=/home/someos/state.txt
GPU_LIST_FILE=/var/run/someos/gpulist.file
PORT=/dev/ttyACM0
RATE_FILE=/var/run/someos/status.file
HASHES_FILE=/var/run/someos/miner_hashes.file
stty -F /dev/ttyACM0 9600 raw -echo
echo "___________________________________________________________________________________________________________________________________" >> ${OUTPUT_FILE}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Starting with: \"${GPUS_QUANTITY_REQUIRED}\" and \"${RATE_REQUIRED}\" and \"${MODIFICATION_DELAY}\"" >> ${OUTPUT_FILE}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Starting with: \"${GPUS_QUANTITY_REQUIRED}\" and \"${RATE_REQUIRED}\" and \"${MODIFICATION_DELAY}\"" >> ${MINING_LOG_FILE}
echo "-1" > ${STATE_FILE}
echo -n "~U" > $PORT
GPU_INCORRECT_QUANTITY=0
MINING_ALLOWED=0
disallow
while true
do
if [ ! -s ${GPU_LIST_FILE} ]; then
echo -e "$(date '+%Y-%m-%d %H:%M:%S') No or empty file ${GPU_LIST_FILE}. Non sending pulse" >> ${OUTPUT_FILE}
elif [ $(cat ${GPU_LIST_FILE} | wc -l) -lt "${GPUS_QUANTITY_REQUIRED}" ]; then
if [ ${GPU_INCORRECT_QUANTITY} -eq 0 ]; then
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Wrong number of gpus: \n$(cat ${GPU_LIST_FILE}) \nNon sending pulse" >> ${OUTPUT_FILE}
elif [ ${GPU_INCORRECT_QUANTITY} -ge 2 ]; then
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Wrong number of gpus: rebooting" >> ${OUTPUT_FILE}
sync
echo -n "~T2" > $PORT
fi
GPU_INCORRECT_QUANTITY=$((GPU_INCORRECT_QUANTITY+1))
elif [ $(cat ${GPU_LIST_FILE} | wc -l) -eq "${GPUS_QUANTITY_REQUIRED}" ] && [ ${MINING_ALLOWED} -eq 0 ]; then
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Gpu number is corret: allowing mining" >> ${OUTPUT_FILE}
echo -n "~U" > $PORT
MINING_ALLOWED=1
allow
elif [ ! -s ${RATE_FILE} ]; then
echo -e "$(date '+%Y-%m-%d %H:%M:%S') No or emty file ${RATE_FILE}. Non sending pulse" >> ${OUTPUT_FILE}
elif [ $(($(date +%s) - $(date +%s -r ${RATE_FILE}))) -gt ${MODIFICATION_DELAY} ]; then
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Too long since last modification of ${RATE_FILE}. Non sending pulse" >> ${OUTPUT_FILE}
else
CURRENT_RATE=$(grep -o '[[:digit:]]*' ${RATE_FILE} | head -n1)
PREVIOUS_RATE=$(grep -o '[[:digit:]]*' ${STATE_FILE} | head -n1)
if [ -z ${CURRENT_RATE} ]; then
echo -e "$(date '+%Y-%m-%d %H:%M:%S') File exists but no info inside it. Non sending pulse" >> ${OUTPUT_FILE}
elif [ ${CURRENT_RATE} -lt ${RATE_REQUIRED} ]; then
if [ ${CURRENT_RATE} -le ${PREVIOUS_RATE} ]; then
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Low hashrate: ${CURRENT_RATE} which not grows. Non sending pulse" >> ${OUTPUT_FILE}
echo -e "Rates: \"$(tail -n 1 ${HASHES_FILE})\"" >> ${OUTPUT_FILE}
echo -e "\n $(date '+%Y-%m-%d %H:%M:%S') output:\n" >> ${MINING_LOG_FILE}
tail -n 65 /var/run/miner.output >> ${MINING_LOG_FILE}
else
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Hashrate low but increased: ${CURRENT_RATE}. Sending pulse" >> ${OUTPUT_FILE}
echo ${CURRENT_RATE} > ${STATE_FILE}
echo -n "~U" > $PORT
fi
else
#echo -e "$(date '+%Y-%m-%d %H:%M:%S') All is OK" >> ${OUTPUT_FILE}
echo -n "~U" > $PORT
fi
fi
sync
sleep 10
done
exit 0