xymon-ext-scripts

Check-in [59939734bb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix monitoring logic for smart.sh Introduce yellow for when we cannot get valid data from SMART for some reason
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | master | trunk
Files: files | file ages | folders
SHA3-256:59939734bb442b642439a8a168242fd994434d35f782783af1de04b387eab35d
User & Date: feld@FreeBSd.org 2017-05-25 21:58:46
Context
2017-05-25
22:10
Add logic to skip if a disk in kern.disks but missing a device node This happens if you have a powered off USB drive (usbconfig -d ugenX.Y power_off) check-in: 7b23404a7e user: feld@FreeBSd.org tags: master, trunk
21:58
Fix monitoring logic for smart.sh Introduce yellow for when we cannot get valid data from SMART for some reason check-in: 59939734bb user: feld@FreeBSd.org tags: master, trunk
2017-03-14
15:15
Remove some needless greps in favor of awk Add checking and reporting of disk temperature Filter out nvd devices which aren't actually disks, but I may want to add logic to convert these to nvme equivalents... check-in: b701d7442e user: feld@feld.me tags: master, trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to smart.sh.

46
47
48
49
50
51
52

53

54

55


56
57
58
59
60
61
62
63
64
65
66
67



68
69

70
71
72
73
74
75

76
77





78
79


80
81
82
83
84
85
86
PATH=${PATH}:/usr/local/bin:/usr/local/sbin

COLUMN=smart

MSG=$(for i in $(sysctl -n kern.disks | tr ' ' '\n' | sort | egrep -v '^(cd|nvd)'); do
	OUTPUT=$(sudo smartctl -a /dev/${i});
	SERIAL=$(echo "${OUTPUT}" | awk '/Serial/ {print $3}')

	MODEL=$(echo "${OUTPUT}" | awk '/Device Model/ {print $3,$4}')

	REALLOCATED=$(echo "${OUTPUT}" | awk '/Reallocated_Sector/ {print $10}')

	TEMP=$(echo "${OUTPUT}" | awk '/Temperature_Celsius/ {print $10}')


	if [ ${REALLOCATED} -gt 0 ] ; then
		HEALTH="FAILED"
	elif [ ${TEMP} -gt 45 ] ; then
                HEALTH="FAILED"
	else
		HEALTH=$(echo "${OUTPUT}" | grep "overall-health")
	fi

        case "${HEALTH}" in
                *PASSED)
			echo "&green ${i} PASSED [ Serial: ${SERIAL} Model: ${MODEL} Reallocated: ${REALLOCATED} Temp: ${TEMP} ]"
                        ;;



		*)
			echo "&red ${i} FAILED [ Serial: ${SERIAL} Model: ${MODEL} Reallocated: ${REALLOCATED} Temp: ${TEMP} ]"

	esac
done)

STATUS="$(hostname) SMART health status"

if (echo "${MSG}" | grep -q FAILED); then

	COLOR=red
else





	COLOR=green
fi



${XYMON} ${XYMSRV} "status ${MACHINE}.${COLUMN} ${COLOR} $(date)

${STATUS}

${MSG}
"







>

>

>

>
>
|

|









>
>
>


>





|
>
|
<
>
>
>
>
>
|
<
>
>







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

87
88
89
90
91
92

93
94
95
96
97
98
99
100
101
PATH=${PATH}:/usr/local/bin:/usr/local/sbin

COLUMN=smart

MSG=$(for i in $(sysctl -n kern.disks | tr ' ' '\n' | sort | egrep -v '^(cd|nvd)'); do
	OUTPUT=$(sudo smartctl -a /dev/${i});
	SERIAL=$(echo "${OUTPUT}" | awk '/Serial/ {print $3}')
        if [ "x${SERIAL}" == "x" ]; then SERIAL="null"; fi
	MODEL=$(echo "${OUTPUT}" | awk '/Device Model/ {print $3,$4}')
        if [ "x${MODEL}" == "x" ]; then MODEL="null"; fi
	REALLOCATED=$(echo "${OUTPUT}" | awk '/Reallocated_Sector/ {print $10}')
        if [ "x${REALLOCATED}" == "x" ]; then REALLOCATED=0; fi
	TEMP=$(echo "${OUTPUT}" | awk '/Temperature_Celsius/ {print $10}')
        if [ "x${TEMP}" == "x" ]; then TEMP=0; fi

	if [ "${REALLOCATED}" -gt 0 ] ; then
		HEALTH="FAILED"
	elif [ "${TEMP}" -gt 45 ] ; then
                HEALTH="FAILED"
	else
		HEALTH=$(echo "${OUTPUT}" | grep "overall-health")
	fi

        case "${HEALTH}" in
                *PASSED)
			echo "&green ${i} PASSED [ Serial: ${SERIAL} Model: ${MODEL} Reallocated: ${REALLOCATED} Temp: ${TEMP} ]"
                        ;;
                "")
			echo "&yellow ${i} PASSED [ Serial: ${SERIAL} Model: ${MODEL} Reallocated: ${REALLOCATED} Temp: ${TEMP} ]"
                        ;;
		*)
			echo "&red ${i} FAILED [ Serial: ${SERIAL} Model: ${MODEL} Reallocated: ${REALLOCATED} Temp: ${TEMP} ]"
			;;
	esac
done)

STATUS="$(hostname) SMART health status"

case "${MSG}" in
	*'&red'*)
		COLOR=red

		;;
	*'&yellow'*)
		COLOR=yellow
		;;
	*)
		COLOR=green

		;;
esac

${XYMON} ${XYMSRV} "status ${MACHINE}.${COLUMN} ${COLOR} $(date)

${STATUS}

${MSG}
"