Hardware watchdog customization improvement

Firstly thanks for great job - very convenient OS for mining with wide variety of user friendly options. I am quiet a small-scale miner (only 2 rigs), and I am thinking about fully migrating to HiveOS from previous linux based mining distro, but for there is lack of support of hardware watchdog customization.

One of my rigs is based on cheap and not very reliable ASRock motheboard with low-price powersources which leads to oftem hanging of the system, and what more serious - to booting it with some of the GPU fully offline (even fans are not spinning)

I have solved it by writing custom script which checked:

  1. Number of GPUs in system
  2. Their hashrate
  3. Writing to logs before sending pulse to reboot

Is it possible to implement something like this in HiveOS? Or is it possible to put custom script in hiveos for bumping watchdog?

Summary
GPUS_QUANTITY_REQUIRED=$1
RATE_REQUIRED=$2
MODIFICATION_DELAY=$3

OUTPUT_FILE=/home/someos/log.txt
MINING_LOG_FILE=/home/someos/mining_log.txt
STATE_FILE=/home/someos/state.txt
GPU_LIST_FILE=/var/run/someos/gpulist.file
PORT=/dev/ttyACM0
RATE_FILE=/var/run/someos/status.file
HASHES_FILE=/var/run/someos/miner_hashes.file

stty -F /dev/ttyACM0 9600 raw -echo
echo "___________________________________________________________________________________________________________________________________" >> ${OUTPUT_FILE}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Starting with: \"${GPUS_QUANTITY_REQUIRED}\" and \"${RATE_REQUIRED}\" and \"${MODIFICATION_DELAY}\"" >> ${OUTPUT_FILE}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') Starting with: \"${GPUS_QUANTITY_REQUIRED}\" and \"${RATE_REQUIRED}\" and \"${MODIFICATION_DELAY}\"" >> ${MINING_LOG_FILE}
echo "-1" > ${STATE_FILE}
echo -n "~U" > $PORT
GPU_INCORRECT_QUANTITY=0
MINING_ALLOWED=0

disallow
while true
do
  if [ ! -s ${GPU_LIST_FILE} ]; then
      echo -e "$(date '+%Y-%m-%d %H:%M:%S') No or empty file ${GPU_LIST_FILE}. Non sending pulse" >> ${OUTPUT_FILE}
  elif [ $(cat ${GPU_LIST_FILE} | wc -l) -lt "${GPUS_QUANTITY_REQUIRED}" ]; then
      if [ ${GPU_INCORRECT_QUANTITY} -eq 0 ]; then
          echo -e "$(date '+%Y-%m-%d %H:%M:%S') Wrong number of gpus: \n$(cat ${GPU_LIST_FILE}) \nNon sending pulse" >> ${OUTPUT_FILE}
      elif [ ${GPU_INCORRECT_QUANTITY} -ge 2 ]; then
          echo -e "$(date '+%Y-%m-%d %H:%M:%S') Wrong number of gpus: rebooting" >> ${OUTPUT_FILE}
          sync
          echo -n "~T2" > $PORT
      fi
      GPU_INCORRECT_QUANTITY=$((GPU_INCORRECT_QUANTITY+1))
  elif [ $(cat ${GPU_LIST_FILE} | wc -l) -eq "${GPUS_QUANTITY_REQUIRED}" ]  && [ ${MINING_ALLOWED} -eq 0 ]; then
      echo -e "$(date '+%Y-%m-%d %H:%M:%S') Gpu number is corret: allowing mining" >> ${OUTPUT_FILE}
      echo -n "~U" > $PORT
      MINING_ALLOWED=1
      allow
  elif [ ! -s ${RATE_FILE} ]; then
      echo -e "$(date '+%Y-%m-%d %H:%M:%S') No or emty file ${RATE_FILE}. Non sending pulse" >> ${OUTPUT_FILE}
  elif [ $(($(date +%s) - $(date +%s -r ${RATE_FILE}))) -gt ${MODIFICATION_DELAY} ]; then
      echo -e "$(date '+%Y-%m-%d %H:%M:%S') Too long since last modification of ${RATE_FILE}. Non sending pulse" >> ${OUTPUT_FILE}
  else
      CURRENT_RATE=$(grep -o '[[:digit:]]*' ${RATE_FILE}   | head -n1)
      PREVIOUS_RATE=$(grep -o '[[:digit:]]*' ${STATE_FILE} | head -n1)

      if [ -z ${CURRENT_RATE} ]; then
         echo -e "$(date '+%Y-%m-%d %H:%M:%S') File exists but no info inside it. Non sending pulse" >> ${OUTPUT_FILE}
      elif [ ${CURRENT_RATE} -lt ${RATE_REQUIRED} ]; then
        if [ ${CURRENT_RATE} -le ${PREVIOUS_RATE} ]; then
           echo -e "$(date '+%Y-%m-%d %H:%M:%S') Low hashrate: ${CURRENT_RATE} which not grows. Non sending pulse" >> ${OUTPUT_FILE}
           echo -e "Rates: \"$(tail -n 1 ${HASHES_FILE})\"" >> ${OUTPUT_FILE}

           echo -e "\n $(date '+%Y-%m-%d %H:%M:%S') output:\n" >> ${MINING_LOG_FILE}
           tail -n 65 /var/run/miner.output >> ${MINING_LOG_FILE}
        else
           echo -e "$(date '+%Y-%m-%d %H:%M:%S') Hashrate low but increased: ${CURRENT_RATE}. Sending pulse" >> ${OUTPUT_FILE}
           echo ${CURRENT_RATE} > ${STATE_FILE}
           echo -n "~U" > $PORT
        fi
      else
        #echo -e "$(date '+%Y-%m-%d %H:%M:%S') All is OK" >> ${OUTPUT_FILE}
        echo -n "~U" > $PORT
      fi
  fi
  sync
  sleep 10
done
exit 0

This topic was automatically closed 416 days after the last reply. New replies are no longer allowed.