· 6 years ago · Jan 28, 2020, 03:13 PM
1#!/usr/bin/perl
2#
3# $Id: //AV/main/pss/support/proactive_check.pl#90 $
4# $Revision$
5#
6#use warnings;
7#use strict;
8#
9# Change Log:
10# 5.0 8/29 add --idpa=N. check avinstaller. check lasthfs not fail. add --avp for /space sizecheck. quite a few other checks to be detailed later
11# 5.01 9/13. fix mixed gen4s/4t checking 4s for 4t ibu. test ssh_admin key
12# 5.02 9/25. change status.dpn output warnings to warning instead of failed. fix avamaint typo. remove EMS checks for 7.2+
13# 5.03 10/25 dont prompt for newer version if idpa.
14# 5.04 11/6 intel fw not working node check comparing (0.0) to 0.0. add bug 281351. fix div/0 in checkswap. fix checkclient 18.x issue.
15# 5.042 11/7 debug code left in.
16# 5.05 11/30 fix getopersys if os-release file exists
17# 5.051 12/14 fix idpa & 32gb mem check. remove perftriallimit check and report. fix remote_management not report failed last node
18# 5.1 1/8 dont email if idpa. removed all paritysolver code. update mcs/gsan hotfixes. change ibu from 272154 to 304091. add disknogc check <89. use motd for ATO version
19# 5.10 2/13 fix gsan patches. add new mcs patches.
20# 5.11 2/14 fix gsan patches not saying passed
21# 5.12 2/14 fix gsan patches not working
22# 5.13 2/18 fix gsan 7.5.0 & 7.5.1 use same hotfix.
23# 5.14 2/28 fix SQL clientcheck. allow 7.4 w/18.2 clients. change dd wording to ddrmaint.
24# 5.15 5/25 fix etcprofile for new perms. add 8TB 16TB ave. inode check. disk>90% check. add spectre & fix IBU. check for watchdog. update gsan/mcs hotfix.
25# 5.151 5/29 fix --idpa to not require version
26# 5.2 6/09/2019 upd 295829-304094, 282000-304083, 272154-304091, 274401-304017. fix gen4t fw.
27# 5.21 6/11/2019 fix debug print. temp remove spectre check from checkemcstorage. running for gen4t but gen4s checks.
28# 5.3 7/09/2019 allow upg 7.4 > 18.2+. support dell switch. fix vba errdisk. check for balancelocaldisks. backups locked for single node/DD. enhance client compatibility matrix
29# 5.4 8/07/2019 backups locked for single node/DD. enhance client compatibility matrix. add mcserver_xml_diff check. fix all IBU, spectre checks for gen4s/t. fix secupd for newer pacakges location.
30# 5.41 8/07/2019 fix min bios version
31# 5.42 8/14/2019 fix min bios version for spectre (remove last .11234438). change MCS to info if HF appears newer. change O/S to report SLES versions and SP
32# 5.43 8/14/2019 change to O/S affected checks looking for "suse", changed to "suse|sles"
33# 5.44 8/21/2019 fix --capacity mcs open using ,sslmod=prefer instead of ;sslmode=prefer. fix tail + to tail -n + for sles12.4. pem_files not working but wrong anyway skipping it.
34# 5.45 9/16/2019 add 19.1 ddos min req. fix lastbu for checkclient. enhance mcserver.xml diff to skip some fields.
35# 5.46 10/21/2019 ibu 304091 to 314445.
36# 5.5 10/22/2019 check for smartd running. 19.2 upg only from 19.1/sles 11.4. add 19.2. gsan & mcs hotfixes.
37
38
39use DBI;
40use Time::Local;
41use POSIX;
42use Switch;
43use XML::Parser;
44use HTML::Entities;
45use File::Copy;
46use MIME::Base64;
47use Net::SMTP;
48use List::Util 'max';
49use File::Copy;
50use File::Basename;
51use Data::Dumper;
52use Fcntl ':mode';
53
54delete $ENV{GREP_OPTIONS};
55
56# Program name and version
57$PROG = "proactive_check.pl";
58$PROGVER='5.5';
59chomp($logdate = `date +%Y%m%d-%H%m%S`);
60$TMPFILE = "/tmp/proactive_check-$logdate.tmp";
61
62# "globals"
63my ($NODE_COUNT, $AVAMARVER, $DATANODEVERSION, $MCSERVER, $MCSERVER_VERSION, $EMSERVER_VERSION,$MCDBOPEN, $VERSNUM, %PARTLIST, $GOTCONFIGINFO,
64 $OS, $MANUFACTURER, $RACADM_CMD, $HOSTNAME, %CONFIG, %NODELIST, @NODES, @OMREPORT_STORAGE, $NODETYPE,
65 $DDCNT, @DD_INDEX, %DD, %NODE_INFO, $AVMGR_VERSION, $VMWARE_CLIENT, $MAINT_RUNNING, %CMDTOOLSUMMARY, %xmltree, %NODE_XREF ,
66 $METADATA_CAPACITY, $VBA, $VBA_RPM, $AVAMARHF, $VBA_VERSION, $SUDO, $AVSYSREPORT, $VIRTUALHW, $IDPA, $ADME_VERS, $IDPA_UPGRADE, $PREUPGRADE, $AVP_SIZE);
67
68$|=1;
69$SUCCESS=`echo -en "\\033[1;32m"`;
70$FAILURE=`echo -en "\\033[1;31m"`;
71$WARNING=`echo -en "\\033[1;33m"`;
72$NORMAL=`echo -en "\\033[0;39m"`;
73$INFO=`echo -en "\\033[0;36m"`;
74#$ALL = -r '/usr/local/avamar/var/probe.xml' ? '--all+' : '--all';
75$ALL = '--all';
76$VERBOSE=1; # change default behavior to be verbose
77
78########### Define known versions ##########
79@v60x=qw(6.0.0-580 6.0.0-592 6.0.1-63 6.0.1-65 6.0.1-66 6.0.2-150 6.0.2-153 6.0.2-156);
80@v61x=qw(6.1.0-276 6.1.0-280 6.1.0-333 6.1.0-402 6.1.0-9056 6.1.1-81 6.1.1-87 6.1.2-46 6.1.2-47);
81@v6x=(@v60x,@v61x);
82@v7x=qw(7.0.0-355 7.0.0-374 7.0.0-396 7.0.0-423 7.0.0-427 7.0.1-56 7.0.1-61 7.0.2-42 7.0.2-43 7.0.2-47 7.0.3-32 7.1.0-302 7.1.0-370 7.1.1-141 7.1.1-145 7.1.2-21 );
83@v72x=qw(7.2.0-390 7.2.0-401 7.2.1-31 7.2.1-32);
84@v73x=qw(7.3.0-207 7.3.0-211 7.3.0-226 7.3.0-233 7.3.1-125);
85@v74x=qw(7.4.0-199 7.4.0-242 7.4.1-58);
86@v75x=qw(7.5.0-183 7.5.1-101);
87@v18x=qw(18.1.0-33 18.2.0-51 18.2.0-134);
88@v19x=qw(19.1.0-38 19.2.0-155);
89########### VBA version
90@vba=qw(7.0.60-11 7.0.61-5 7.0.62-10 7.0.63-8 7.1.60-4 7.1.60-12 7.1.61-6 7.1.61-10 7.1.62-5 7.1.63-5 7.1.60-20 7.2.60-20 7.2.61-5);
91
92@supportedversions=(@v75x,@v6x,@v7x,@v72x,@v73x,@v74x,@vba,@v18x,@v19x);
93
94########### Client Compatibility
95# general home https://elabnavigator.emc.com/eln/elnhome
96# avamar specific. Look at Server https://elabnavigator.emc.com/eln/modernHomeAutomatedTiles?page=Avamar
97%CLIENT = (
98 18.1 => [ 7.4, 7.5, 18.1 ],
99 18.2 => [ 7.4, 7.5, 18.1, 18.2 ],
100 19.1 => [ 7.5, 18.1, 18.2, 19.1 ],
101 19.2 => [ 7.5, 18.1, 18.2, 19.1, 19.2],
102);
103
104
105###
106### START MAIN
107###
108 $ENV{'PATH'}= "/usr/local/avamar/bin:".$ENV{'PATH'};
109 chomp($HOSTNAME = `hostname -f`);
110 setuplog(); # Turn on logging
111 $SSHKEY=get_ssh_key();
112 getargs(); # Get args/values passed in from command line
113 setuphclog(); # Setup hc_results.txt log
114 getUser(); # Ensure script is being run by user admin
115 checkutilnode(); # Check that we are on the utility node
116 openmcdb();
117 nodexref();
118 if ($RUN) {
119 if ($EVAL) {
120 eval $EVAL;
121 }
122 &$RUN();
123 print "\nSee detailed ERROR information in hc_results.txt\n";
124 exit 0;
125 }
126 shownotes();
127 open(SETTINGS,">hc_settings.txt") if ($DO_HEALTHCHECK) ;
128 getinstalledversion(); # Get version of Avamar installed
129 check_script_version();
130 getuserdata() if ($LOGOFF); # Get user input if logging off
131 msg("Avamar Hostname",$HOSTNAME);
132
133 msg("Target Upgrade Version",$UPGRADE_VERSION) if ($PREUPGRADE);
134 if ($UPGRADE_VERSION =~ /7.2.0/) { my $msg="${WARNING} WARNING TO UPGRADE ENGINEER: When initiating the Upgrade Workflow, be certain that you do not include MCS Hotfix 244285 as an optional/callable AVP.$NORMAL\n"; printboth($msg); print $msg; }
135 my $msg=($AVAMARHF) ? "$AVAMARVER with $AVAMARHF" : $AVAMARVER;
136 msg("Avamar Server Version",$msg,"");
137
138 getavamarver(); # Get Avamar gsan & rpm versions other routines depend on this running early
139 getconfiginfo(); # get %CONFIG, %NODELIST, %SCHED, %MCSERVER
140
141 msg("System ID", $NODELIST{'/nodestatuslist/nodestatus/0.0/systemid'});
142###
143### PROACTIVE SPECIFIC CHECKS
144###
145if (!$SKIP_PROACTIVE) {
146 servicemode(); # See if service mode enabled
147 gethardware(); # Identify hardware
148 getopersys(); # Identify operating system
149 getnodetype(); # Get node type (deep or shallow)
150 getdatadomain(); # Get Data domain info
151 getvba(); # Get VBA Info/Versions
152 getear(); # Get ear encryption at rest status
153 remote_management(); # Check remote management
154 metadatacapacity(); # Check meta data capacity
155 aerplugin(); # Check for AER plugin
156 replication(); # Check for replication settings
157 rptsecupdvers(); # Report on Security Update version
158 plugin_catalog(); # Print Plugin Catalog version
159 checkversion(); # Check if version supported
160 lastflush(); # Check MCS & EMS last flushes
161 lasthfs(); # Check for last HFS time
162 checketh(); # Make sure ethernets are gb, autoneg, full duplex
163 checktime(); # Check avmaint time settings
164 dpnctl_status(); # Check dpnctl status output
165 status_dpn(); # Check status.dpn output
166 duplicateip() ; # Check for duplicate IP's
167 license(); # Check for license file and unexpired
168 checkclients(); # Check specific client plugins
169 adtcheck(); # Check for ADT existence
170 atocheck(); # Check for ATO existence
171 ddvers(); # Check for minimum datadomain version
172 etcprofile(); # Check for /etc/profile wrong
173 ipmi(); # check IPMI status
174 switchconf(); # Check switch configuration
175 susekernel(); # Check if SUSE has been up and may run into 208 day bug.
176 etchosts(); # Check for name resolution
177 kernelcnt(); # Check for Kernel RPM count
178 avhardening(); # Check for avhardening installed
179 oscheck(); # Check O/S things
180 micron_ssd(); # Check avsysreport output
181 pem_files(); # pem file check
182 mccli_java(); # Check mccli for java home
183 chkspace(); # Check for very high disk usage
184 chkcron(); # Check cron jobs.
185 mcserver_xml_diff(); # Compare current mcserver.xml to previous mcserver.xml
186
187# SKIP FOR UPGRADES
188 if (!$PREUPGRADE and !$ADDNODE){
189 bondconf(); # Check Bonding config
190 ddgcoob(); # Data Domain gcoob.pl installed
191 checkascd(); # Check ascd status
192 cronrunning(); # Check cron
193 checkconfig(); # Check avmaint config settings
194 checkswap(); # Verify swap is turned on and consistent
195 fileperms(); # Check file permissions (/var/log/messages)
196 checkpointxmlperms(); # Check checkpoint.xml owner/perms
197 hfschecktime(); # Check hfscheck run time
198 mandatoryupgrade(); # Check if any mandatory upgrades
199 gsanpatches(); # Check if GSAN needs to be patched
200 gsanflags(); # Check gsanflags like rwmutexmaxreadlocks="0"
201 mcspatches(); # Check if MCS needs to be patches
202 avagent(); # Check for avagent patches
203 qadir(); # Check for leftover QA directories
204 checkopenfiles(); # Check for open files parameters nodefile, file-max
205 checkmessages(); # Check /var/log/messages
206 dtltsecurity(); # Check for bug 235341 dtlt vulnerability
207 rpmversions(); # Check RPM versions
208 siteinv(); # Check for site inventory bugs
209 }
210
211# ONLY RUN FOR UPGRADES
212 if ($PREUPGRADE) {
213 repoempty(); # Check if /data01/repo dirs are empty
214 avamarsrc(); # Check if /data01/avamar/src exists
215 tomcatdir(); # Check if jakarta tomcat dir exist
216 upgradepath(); # Check upgrade path
217 getrestapi(); # Get REST API version if installed
218 lastemail(); # Check connectemc last email date
219 chage(); # Check expired logins
220 replforceaddr(); # Check for replication settings (forceaddr)
221 adsinfo(); # Check for ADS - Downloader service.
222 greenvillehotfix(); # Check for Greenville Hotfix applied
223 activesessions(); # Check for active backup/restore/etc sessions
224 }
225
226# ONLY RUN FOR ADDNODE
227 if ($ADDNODE) {
228 gen4sver(); # Warn if vers <6.1 to not add gen4s
229 }
230
231# ONLY RUN FOR VBA
232 if ($VBA) {
233 #chkspace(); # Check /space
234 chkproxy() # Check if proxies are running
235 }
236
237
238###
239### DELL SPECIFIC CHECKS
240###
241if ($MANUFACTURER =~ /dell/) {
242 checkostools(); # Check OS Tools are installed
243 virtualmedia() if (!$PREUPGRADE); # Ensure that virtual media device is not enabled (Dell nodes ONLY)
244 dellomlogs() if (!$PREUPGRADE); # Check for Dell OM logrotate bug 10783
245 getdellstorage(); # Get Dell status
246 checkdellstorage(); # Check Dell Status
247}
248
249###
250### EMC/Intel specific checks
251###
252if ($MANUFACTURER =~ /emc/) {
253 getcmdtool();
254 checkemcstorage();
255 checkarcconf();
256}
257
258# END PROACTIVE CHECK
259sendemail();
260}
261
262###
263### HEALTHCHECK SPECIFIC CHECKS
264###
265if ($DO_HEALTHCHECK) {
266
267 print "\n";
268 get_backup_info(); # Get logs of any failed, completed w/exceptions, and 5 highest capacity, time, change rate clients
269 get_repl_info(); # Get replication report and configuration
270 sched(); # Get schedule information
271
272# Get capacity and garbage collection info
273
274 print("HEALTHCHECK: Creating hc_capacity.txt\n");
275 print LOG "\n\n\n### ".localtime()." ### Starting capacity_info\n";
276 open(OUTPUT,">hc_capacity.txt");
277 capacity_info(30);
278 close OUTPUT;
279 print("HEALTHCHECK: Creating hc_settings.txt\n");
280 get_errlog();
281 get_esmlog();
282 get_maintlogs();
283 # FINISH HEALTHCHECK
284 $dbh->disconnect;
285}
286
287###
288### LOGOFF SPECIFIC CHECKS
289###
290if ($LOGOFF) {
291 backup_config();
292 test_flush();
293 check_capacity();
294 logoff_report();
295}
296
297if ($DO_HEALTHCHECK) {
298 printboth("All logs have been included in hc-${HOSTNAME}.tgz\n");
299 print LOG `tar czf hc-${HOSTNAME}.tgz hc_*`;
300}
301
302close(LOG);
303close (MAPALL);
304unlink($TMPFILE);
305
306$results=`echo -e "\n\n\n\n" >> hc_history.log; cat hc_results.txt >> hc_history.log`;
307print "\nSee detailed ERROR information in hc_results.txt\n";
308print "\nFINISHED\n";
309exit 0;
310########## End Main ##########
311
312############### Start sub getargs() ###############
313# Check for valid command line arguments.
314sub getargs {
315
316 my $invalid = 0;
317 print LOG "ARGS: ";
318 foreach(@ARGV) {
319 if($_ !~ /^--([^=]+)=?(.*)$/) {
320 print "Invalid command line argument: $_\n";
321 exit;
322 }
323 my $arg = $1;
324 my $value = $2;
325 print LOG "$arg='$value' ";
326
327 if (grep /--cap/, @ARGV) {
328 #*OUTPUT=*STDOUT;
329 capacity_info(); exit;
330 }
331
332 switch ($arg) {
333 case "help" { doHelp(1); exit 0;}
334 case "version" { print "$PROG: Version $PROGVER\n"; exit 0; }
335 case "debug" { $DEBUG="YES"; }
336 case "run" { $RUN=$2; }
337 case "eval" { $EVAL=$2; }
338 case "hc" { $DO_HEALTHCHECK=1 }
339 case "hco" { $DO_HEALTHCHECK=1; $SKIP_PROACTIVE=1; }
340 case "nopc" { $SKIP_PROACTIVE=1; }
341 case "logoff" { $LOGOFF =1; }
342 case "verbose" { $VERBOSE=0; }
343 case "sched" { $sched=1; }
344 case "force" { $FORCE=1; }
345 case "capacity" { $capacity=1; }
346 case "replrpt" { $replrpt=1; }
347 case "days" { $IN_DAYS=$value; }
348 case "wide" { $WIDE=1; }
349 case "update" { unlink "/home/admin/.noftp"; }
350 case "preupgrade" { $PREUPGRADE=1; $UPGRADE_VERSION=$value }
351 case /client/ { $CLIENT_VERSION_CHECK=1; $RUN=checkclients; $UPGRADE_VERSION=$value;}
352 case "addnode" { $ADDNODE=1; }
353 case "darksite" { $DARKSITE=1; }
354 case "text" { $SUCCESS="";$NORMAL="";$FAILURE="";$WARNING=""; $INFO=""}
355 case "cpoverhead" { $CPOVERHEAD=$value;}
356 case "retention" { $METADATA_RETENTION=$value;}
357 case "override" { $OVERRIDE=$value;}
358 case "metadata" { $METADATA_CAPACITY=1; $RUN="metadatacapacity";}
359 case "intel" { $INTEL_BLOCK=1 }
360 case "history" { showhistory(); exit 0; }
361 case "notes" { my $notefile=( -e "notes-proactive_check.pl") ? "notes-proactive_check.pl" : dirname($0)."/notes.txt" ; exec("vi $notefile"); }
362 case /F013017EE|F0/i { micron_ssd("exit"); }
363 case "idpa" { if ($value) { $PREUPGRADE=1; $UPGRADE_VERSION=$value; $IDPA_UPGRADE=1; }; $IDPA=1; }
364 case "avp" { $AVP_SIZE=$value }
365 case /^util/ { AvamarUtilizationCalc(); exit 0;}
366 case "servicemode" { if ($value) {servicemode($value); exit;}
367 else {print "Invalid option. Format is --servicemode=<hours> NOTE: Only 3 hours is currently supported\n\n";exit } }
368 case /^mcs/ {`cp /usr/local/avamar/var/mc/server_data/prefs/mcserver.xml .phc-mcserver.xml`; print "Updated saved mcserver.xml\n"; exit 0; }
369 else { print "Invalid Command line: --$arg\nTry --help\n"; exit; }
370 }
371 }
372 print LOG "\n";
373 if ($UPGRADE_VERSION !~ /\d+\.\d+\.\d+/ and $PREUPGRADE ) {
374 print "FATAL ERROR: Version must be included with --preupgrade=n.n.n flag. Example: --preupgrade=7.0.0-395 or --preupgrade-7.0.0\n";
375 exit 1;
376 }
377 if ($sched) { sched(); exit; }
378 if ($replrpt) { get_repl_info(); exit; }
379
380 print LOG "\n\n\n### Exit getargs\n";
381}
382############### End sub getargs() ###############
383
384
385########## Start sub doHelp() ##########
386# Help/Usage sub routine
387sub doHelp {
388 print <<"xxEndHelpxx";
389
390$PROG $PROGVER
391
392With no command line options summary info will be printed to the screen and detailed errors
393will go to the file hc_results.txt. With the --hc flag healthcheck information will be
394gathered into various hc_ files and tar'd into a single file with the name of the server.
395
396The --logoff option will run a few extra checks to make sure the grid has been placed
397in a healthy operating state.
398
399--addnode Run special add node checks and skip unnecessary checks.
400--avp=N Specify size of AVP package in MB for upgrades
401--capacity Print capacity info to screen (like capacity.sh). Use --capacity --help for additional commands to use with --capacity
402--clientvers[=X] Check Client Versions. Current GSAN Version is used unless you send in the version like: --clientvers=7.0.0-123
403--darksite Removes latest script version check and sending results to Avalanche (Only need to run it once with this flag)
404--days Number of days to include in sched, capacity, replrpt
405--idpa Specify the system is IDPA. Slightly different checks are run
406--hc Perform entire Grid Health Check and tar it into one file.
407--history Print maintenance history
408--help Display the help screen
409--logoff Check grid health and settings before logging off
410 --force Force Critical checks to pass for --logoff
411--metadata Only run metadata check
412 --cpoverhead=N Provide an estimated daily checkpoint overhead percent to metadata capacity check
413 --retention=N Provide the typical backup retention in days for metadata capacity
414 --override Override 5 days of maintenance routines required
415--notes Edit system note file displayed at the start of proactive check with vi editor
416--preupgrade[=X] Run special pre-upgrade checks and skip unnecessary checks. Example: --preupgrade=7.0.0-423
417--replrpt Print replication info to screen (like replrpt.sh)
418--sched Print schedule info to screen (like sched.sh)
419 --wide Print sched with more data points
420--servicemode=N Enable service mode for N hours. Prevents CLM from creating new service requests. Currently only 3 hours is supported.
421--text Removes ANSI color codes from output
422--update Allows FTP check once again (but does not remove darksite flag)
423--util Run AvamarCalcUtility
424--version Display the program version
425
426FCO Checks
427--F013017EE Check for Micron SSD 1000 day issue
428
429If there are any problems with the script please get the hc_proactive_check.log.
430Every command and check run is appended to the log file.
431
432xxEndHelpxx
433}
434
435########## End of sub doHelp() ##########
436
437
438########## Start setuplog ##########
439sub setuplog {
440
441 $DEBUG=grep(/--debug/i,@ARGV);
442
443 $logfile = "./hc_proactive_check.log";
444 if ($logfile) {
445# print "Logging to $logfile\n";
446 if ($DEBUG) {
447 open (LOG, ">" . "$logfile") || die "Unable to open logfile ($logfile) for writing: $!";;
448 } else {
449 open (LOG, ">>" . "$logfile") || die "Unable to open logfile ($logfile) for writing: $!";;
450 }
451 }
452 print LOG "\n\n\n###################################################################################################\n";
453 print LOG "### STARTING $PROG $PROGVER $logdate\n";
454 print LOG "###################################################################################################\n";
455}
456########## End autoLog ##########
457
458########## Start setuphclog ##########
459sub setuphclog {
460 print LOG "### ".localtime()." ### Starting setuphclog\n";
461 open(RESULTS,">hc_results.txt");
462 print RESULTS "========================================================================\n";
463 print RESULTS "Run Date: ".localtime()." Version $PROGVER\n";
464 print RESULTS "========================================================================\n";
465 print RESULTS "command line: @ARGV\n";
466 printboth("\nDISCLAIMER: The results from this script are intended for the exclusive use of EMC Support & Development Engineers to diagnose potential problems so that they can use their trained skills to see exactly how the issues might or might not affect an individual server's performance\n\n");
467 printboth("Please review KB534454 Avamar troubleshooting hierarchy if there are any issues found\n\n");
468}
469
470########## End setuphclog ##########
471
472
473########## Start sub getUser() ##########
474# Get the name of the user executing the script.
475# If the user is not admin or dpn, exit
476sub getUser {
477 print LOG "### ".localtime()." ### Starting getuser\n";
478 my $curuser = `whoami`;
479 chomp ($curuser);
480 if ($curuser ne "admin"){
481 print "\nNOTICE: This script must be run as admin.\n\n";
482 exit;
483 }
484
485}
486########## End sub getUser() ##########
487
488########## Start sub checkutilnode ##########
489sub checkutilnode {
490 print LOG "\n\n\n### ".localtime()." ### Starting checkutilnode\n";
491 if ( ! -e "/usr/local/avamar/var/mc/" ) {
492 print "\nERROR: This program must be run on the Utility node.\n\n";
493 exit;
494 }
495 if (!-w ".") {
496 print "\nERROR: This program must be run in a directory that you have write permissions\n\n";
497 exit ;
498 }
499 for(<0.*>){
500 if(!-w $_) {
501 print "\nERROR: This program needs to write to $_ but does not have write permissions\n\n";
502 exit;
503 }
504 }
505
506 my $result=`/usr/bin/ssh-agent bash -c "/usr/bin/ssh-add $SSHKEY 2>&1; /usr/local/avamar/bin/mapall $ALL --noerror '[ -e tmp -a \\( ! -d tmp -o ! -w tmp \\) ] && echo NotWritable || echo OkWritable'" 2>&1 `;
507 my $e="";
508 my %nodeok;
509 my $badphc;
510 for(split(/\n/,$result)) {
511 print LOG "$_\n";
512 if (/(\(0\..*\)) ssh/){
513 $node=$1;
514 $nodeok{$node}=1;
515 print LOG "add node $node\n";
516 }
517 if (/^NotWritable/) {
518 printboth("\nFATAL ERROR: Node $node /home/admin/tmp is not writable" );
519 print("\nFATAL ERROR: Node $node /home/admin/tmp is not writable" );
520 $e=1;
521 }
522 if (/^OkWritable/) {
523 delete $nodeok{$node};
524 print LOG "delete node $node\n";
525 }
526 }
527 if ($e) {
528 printboth("\nRESOLUTION: Check/Fix permissions on /home/admin/tmp directories\n" );
529 print("\nRESOLUTION: Check/Fix permissions on /home/admin/tmp directories\n\n" );
530 exit 1;
531 }
532 if (%nodeok ) {
533 for (sort keys %nodeok) {
534 print ("FATAL ERROR: Node $_ did not respond properly to mapall command\n");
535 }
536 print("ERROR OUTPUT:$result\n\nRESOLUTION: Fix ssh keys. Test by loading keys as admin:\nssh-agent bash\nssh-add $SSHKEY\n");
537 print(" \nThen try these commands:\nmapall date\nmapall date\nmapall $ALL date\n\n");
538 exit 1;
539 }
540
541}
542########## End sub checkutilnode ##########
543
544
545
546########## Start msg ##########
547# 1=category 2=status(passed,failed,warning,info), 3=addl info to status
548sub msg {
549if ($VERBOSE eq 1) {
550 my $col="";
551 my $p=" ".$_[1];
552 if ($_[1] eq "INFO") { $col=$INFO}
553 if ($_[1] =~ "PASSED") { $col=$SUCCESS}
554 if ($_[1] eq "FAILED") { $col=$FAILURE; $p="*FAILED*"}
555 if ($_[1] eq "WARNING") { $col=$WARNING}
556 my $norm=($col) ? $NORMAL : "";
557 my $msg2=($_[2]) ? $_[2] : "";
558
559 my $line=sprintf("%-30s %s%s%s %s\n",$_[0],$col,$p,$norm,$msg2);
560 print $line ;
561 my $line=sprintf("%-30s %s%s%s %s\n",$_[0],"",$p,"",$msg2);
562 print RESULTS "# --> $line";
563 print LOG "\n" if ($_[1] eq "FAILED");
564 printf LOG $line ;
565}
566}
567########## End msg ##########
568
569########## Start printboth ##########
570# Output to both screen and LOG and results file
571sub printboth {
572 print LOG "@_";
573 print RESULTS "@_";
574 return if ($VERBOSE eq 1 and !$DEBUG );
575# print "@_";
576}
577########## End printboth ##########
578
579########## Start makesshagent ##########
580sub get_ssh_key {
581 my $usekey;
582 if ( -e "/home/admin/.ssh/dpnid" and $VERSNUM < 730 ) {
583 $usekey=qq[ /home/admin/.ssh/dpnid];
584 } else {
585 if ( ! -e "/home/admin/.ssh/admin_key") {
586 print "\nFATAL ERROR: Could not find admin users dpnid or admin_key.\n\n";
587 exit 1;
588 }
589 $usekey=qq[ /home/admin/.ssh/admin_key];
590 }
591 print LOG "usekey=$usekey\n";
592 return "$usekey";
593}
594
595########## Start testsshkey ########
596sub testsshkey {
597 print LOG "\n\n\n### ".localtime()." ### Starting testsshkey\n";
598 if ($SSH_KEY eq "/home/admin/.ssh/admin_key") {
599 print LOG "already using admin key no reason to test it\n";
600 }
601 my $node,%nodeok;
602 my $savekey=$SSHKEY;
603 $SSHKEY="/home/admin/.ssh/admin_key";
604 my $cmd = qq[ echo "SSHkeyTest" ];
605 mapall($ALL,"$cmd");
606 open(CMD_PIPE,$TMPFILE);
607 while(<CMD_PIPE>) {chomp;
608 if (/(\(0\..*\)) ssh/){
609 $node=$1;
610 $nodeok{$node}=1;
611 print LOG "add node $node\n";
612 }
613 if (/^SSHkeyTest/) {
614 delete $nodeok{$node};
615 print LOG "delete node $node\n";
616 }
617 }
618 if (%nodeok) {
619 for (sort keys %nodeok) {
620 printboth("ERROR: Node $_ did not respond properly to mapall command with admin_key\n");
621 }
622 printboth("RESOLUTION: Fix admin_key ssh keys so mapall works\n\n");
623 msg("SSH admin_key","FAILED");
624 }
625
626 $SSHKEY=$SSHKEY;
627}
628########## Start testsshkey ########
629
630
631########## Start mapall ##########
632# $1 = flags (--all) $2=command to copy to nodes and run $3=1=dont error process
633sub mapall {
634 my ($args,$cmd)=@_;
635 my $mapalltime=time;
636 my $result;
637 $TMPMAPALL = "proactive_check-$logdate.mapall";
638 open (MAPALL, ">/tmp/".$TMPMAPALL);
639 print MAPALL "$cmd\n";
640 close (MAPALL);
641 print LOG "mapall: args='$args' cmd='$cmd'\n";
642 my $sshkey=get_ssh_key();
643 my $mapall=qq[ /usr/bin/ssh-agent bash -c "/usr/bin/ssh-add $SSHKEY 2>/dev/null; /usr/local/avamar/bin/mapall --parallel ];
644 print LOG "mapallcmd: $mapall\n";
645 if ($cmd =~ /^copy/) {
646 $result=`$mapall $args $cmd >$TMPFILE 2>&1 "`;
647 } else {
648 $result=`$mapall $args copy /tmp/$TMPMAPALL >$TMPFILE 2>&1 "`;
649 }
650 if ($? != 0 ){
651 printboth("ERROR: mapall copy command failed. Review log file or $TMPFILE\n\n");
652 print "FATAL ERROR: mapall copy command failed. Review log file or $TMPFILE\n\n";
653 open(CMD_PIPE,$TMPFILE);
654 while (<CMD_PIPE>) {
655 chomp;
656 print LOG "$_\n";
657 }
658 close(CMD_PIPE);
659 exit 1;
660 }
661 return if ($cmd =~ /^copy/) ;
662 $cmd=qq[ $mapall --capture --noerror --givestatus $args '(sh ./tmp/$TMPMAPALL; rm -f ./tmp/$TMPMAPALL) 2>&1 ' 2>&1 |
663sed -e '/^Using .usr.local.avamar.var.probe\\|^(0\\..*) ssh /d' -e 's/^(0.\\(\\S*\\))\\s*cat\\s*/(0.\\1) ssh /' >$TMPFILE " ];
664 $result=`$cmd`;
665 if ($? != 0 ) {
666 printboth("ERROR: mapall command failed. Review log file or $TMPFILE\n\n");
667 print "FATAL ERROR: mapall command failed. Review log file or $TMPFILE\n\n";
668 open(CMD_PIPE,$TMPFILE);
669 while (<CMD_PIPE>) {
670 print LOG "$_";
671 }
672 close(CMD_PIPE);
673 if ($3==1) { return 1; }
674 exit 1;
675 }
676 print LOG ("mapalltime: ". (time-$mapalltime) ."\n");
677 unlink "/tmp/$TMPMAPALL";
678 return 0;
679}
680########## End mapall ##########
681
682########## Start getinstalledversion ########
683# Just get installed version from rpm
684sub getinstalledversion {
685 print LOG "\n\n\n### ".localtime()." ### Starting getinstalledversion\n";
686 open(FILE,"rpm -qa | grep dpnserver | sort|");
687 while(<FILE>){ chomp;
688 print LOG "$_\n";
689 $AVAMARHF.=", " if ($AVAMARHF);
690 ($AVAMARHF .= $_ ) =~ s/dpnserver-// if ($AVAMARVER);
691 ($AVAMARVER = $_ ) =~ s/dpnserver-// if (!$AVAMARVER);
692 }
693 if (!$AVAMARVER){
694 printboth("FATAL ERROR: Unable to determine the Avamar version.\n");
695 printboth(" Check that the Avamar RPM's are installed with command 'rpm -qa | grep dpnserver'\n\n");
696 exit 1;
697 }
698 msg("Target Upgrade Version",$UPGRADE_VERSION) if ($PREUPGRADE);
699 my $msg=$AVAMARVER;
700 $msg.=" with $AVAMARHF" if ($AVAMARHF);
701 msg("Avamar Server Version",$msg,"");
702 $VERSNUM = $AVAMARVER;
703 $VERSNUM =~ s/\.//g;
704 $VERSNUM =~ s/-/./;
705 print LOG "VERSNUM=$VERSNUM\n";
706 $SUDO=($VERSNUM<730);
707}
708########## End getinstalledversion ########
709
710########## Start getavamarver ##########
711# Determine the versions of all components
712sub getavamarver {
713 print LOG "\n\n\n### ".localtime()." ### Starting getavamarver\n";
714 my $major_version;
715 my ($lastversion,$e);
716
717 # Get versions for each node
718 $cmd=qq[/home/admin/gsan --version; md5sum /home/admin/gsan];
719 mapall("",$cmd);
720 open(CMD_PIPE,$TMPFILE);
721 while (<CMD_PIPE>) { chomp;
722 print LOG "$_\n";
723 if (/node (.*?) (.*?) .*not responding, removing/) {
724 printboth("FATAL ERROR: Node $1 ($2) Not responding. All checks may not be correct\n");
725 msg("All Nodes Responding","FAILED");
726 print "\n$_\nprogram exiting.\n\n";
727 exit 1;
728 }
729 if ( $_ =~ /^ .version/ ) {
730 my ($foo, $version)=split(' ', $_ , 2);
731 print LOG "--> found node with $version\n";
732 if (!($version eq $lastversion) && (defined($lastversion)) ) {
733 printboth( "ERROR: GSAN versions do not match on data nodes\n");
734 printboth( " Found $lastversion and $version\n\n");
735 $e="yes";
736 }
737 $lastversion=$version;
738 }
739 # Get md5sums for each node
740 if ( $_ =~ / gsan$/ ) {
741 ($GSAN_MD5SUM, $foo)=split(' ', $_ , 2);
742 print LOG "--> found node with md5sum $GSAN_MD5SUM\n";
743 if (!($GSAN_MD5SUM eq $last_md5sum) && (defined($last_md5sum)) ) {
744 printboth( "ERROR: GSAN versions do not match on data nodes\n");
745 printboth( " Found $last_md5sum md5sum and $GSAN_MD5SUM\n\n");
746 $e="yes";
747 }
748 $last_md5sum=$GSAN_MD5SUM;
749 }
750 }
751
752 my $results=`/usr/local/avamar/bin/gsan --version | grep "^ .version:" `;
753 my ($foo1,$bingsan)=split(" ",$results,2);
754 chomp($bingsan);
755 print LOG "/usr/local/avamar/bin/gsan version: $bingsan\n";
756 if ($bingsan ne $lastversion) {
757 printboth( "ERROR: Data node GSAN version $lastversion does not match /usr/local/avamar/bin/gsan version $bingsan\n");
758 printboth( "RESOLUTION: See KB460393\n\n");
759 $e="yes";
760 }
761
762 if ( -e "/home/admin/gsan" ) {
763 $results=`/home/admin/gsan --version | grep "^ .version:" `;
764 my ($foo2,$homegsan)=split(" ",$results);
765 chomp($homegsan);
766 print LOG "/usr/local/avamar/bin/gsan version: $homegsan\n";
767 if ($homegsan ne $lastversion) {
768 printboth( "ERROR: Data node GSAN version $lastversion does not match /home/admin/gsan version $homegsan\n");
769 printboth( "RESOLUTION: See KB460368\n\n");
770 $e="yes";
771 }
772 }
773
774 $DATANODEVERSION=$lastversion;
775 if ($e) {
776 msg("GSAN Version","FAILED");
777 } else {
778 msg("GSAN Version",$DATANODEVERSION);
779 }
780 $major_version=major_version($DATANODEVERSION);
781
782 chomp( $MCSERVER_VERSION=`mcserver.sh --version| head -1` );
783 chomp( $MCSERVER_MD5SUM=`md5sum /usr/local/avamar/lib/mcserver.jar | awk '{print \$1}'`);
784 $MCSERVER_VERSION =~ s/\s*version:\s*[v]*//;
785 msg("MCS Version ",$MCSERVER_VERSION,"($MCSERVER_MD5SUM)");
786 print LOG "MCS Version..: $AVAMARVER md5sum $MCSERVER_MD5SUM\n";
787 if (major_version($MCSERVER_VERSION) ne $major_version) {
788 print LOG "mc=".major_version($MCSERVER_VERSION)." major=$major_version\n";
789 printboth("ERROR: MC Server version does not match GSAN Version\nRESOLUTION: Major version should match\n\n");
790 msg("Version match","FAILED");
791 }
792
793 chomp(my $javarunning=`ps -aef | grep -c java`);
794 printboth("# --> Java processes ($javarunning)\n");
795
796 chomp( $AVMAINT_VERSION=`rununtil 60 avmaint --version | grep '^[ ]*version'`);
797 if (!$AVMAINT_VERSION) {
798 printboth("ERROR: avmaint command does not appear to be working. Script cannot continue\n");
799 print ("FATAL ERROR: avmaint command does not appear to be working. Script cannot continue\n");
800 exit 1;
801 }
802 $AVMAINT_VERSION =~ s/\s*version:\s*//;
803 print LOG "AVMAINT Version: $AVMAINT_VERSION\n";
804 if (major_version($AVMAINT_VERSION) ne $major_version) {
805 print LOG "avmaint=".major_version($AVMAINT_VERSION)." major=$major_version\n";
806 printboth("ERROR: avmaint version does not match GSAN Version\nRESOLUTION: Major version should match\n\n");
807 msg("Version match","FAILED");
808 }
809
810
811 chomp( $AVTAR_VERSION=`rununtil 60 avtar --version | grep '^[ ]*version'`);
812 $AVTAR_VERSION =~ s/\s*version:\s*//;
813 print LOG "AVTAR Version: $AVTAR_VERSION\n";
814 msg("avtar Version:",$AVTAR_VERSION);
815
816
817 chomp ($AVMGR_VERSION=`avmgr --version | grep '^[ ]*version:'`);
818
819 $VBA=1 if (grep /$DATANODEVERSION/, @vba);
820
821}
822########## End getavamarver ##########
823
824########## Start mcdb ##########
825# OPEN MCS DATABASE
826sub openmcdb {
827 print LOG "### ".localtime()." ### Starting openmcdb\n";
828 $dbh = DBI->connect("dbi:Pg:dbname=mcdb;port=5555;sslmode=prefer", "admin", "" );
829 if ($dbh) {
830 $MCDBOPEN="yes";
831 } else {
832 printboth("ERROR: Could not connect to MCS database. Some checks will be skipped\n");
833 printboth("RESOLUTION: Determine why MCS is not running\n\n");
834 }
835}
836########## End openmcdb ##########
837
838########## Start gethardware #########
839# Get the hardware type
840sub gethardware {
841 print LOG "\n\n\n### ".localtime()." ### Starting gethardware\n";
842 nodexref() if (!$NODE_COUNT);
843 $MANUFACTURER = "";
844 my $node_manu;
845 my $cmd = qq[ sudo /usr/sbin/dmidecode ];
846 mapall($ALL,"$cmd");
847 open(CMD_PIPE,$TMPFILE);
848 while(<CMD_PIPE>) {chomp;
849 if (/sudo: no tty present/) {
850 printboth("FATAL ERROR: Node $node sudo dmidecode failed with $_ \n");
851 printboth("RESOLUTION: Fix sudo for dmidecode. All hardware info is invalid because of this.\n\n");
852 msg("Hardware Type info retrieval","FAILED");
853 return;
854 }
855 if (/(\(0\..*\)) ssh/) {
856 print LOG "$_\n";
857 $node=$1;
858 next;
859 }
860 next if (/Not Specified/);
861 $dmi_type=$1 if (/DMI type (\d+)/);
862
863# Identify manufacturer
864 if ( $dmi_type == 1 and /Manufacturer: (\S+)/) {
865 print LOG "$_\n";
866 my $manu=$1;
867 $node_manu="other";
868 SWITCH: {
869 if( $manu =~ m/Dell/i) { $node_manu="dell"; last SWITCH; }
870 if( $manu =~ m/Intel/i) { $node_manu="emc" ; last SWITCH;}
871 if( $manu =~ m/EMC/i) { $node_manu="emc" ; last SWITCH;}
872 if( $manu =~ m/VMware/i) { $node_manu="vmware" ; $VIRTUALHW=1; last SWITCH;}
873 if( $manu =~ m/Microsoft/i) { $node_manu="hyperv" ; $VIRTUALHW=1; last SWITCH;}
874 if( $manu =~ m/Xen/i) { $node_manu="aws" ; $VIRTUALHW=1; last SWITCH;}
875 }
876 if ($MANUFACTURER !~ /$node_manu/) {
877 $MANUFACTURER.=", " if ($MANUFACTURER);
878 $MANUFACTURER.="$node_manu";
879 print LOG "add to MANUFACTURER $node_manu\n";
880 }
881 $NODE_INFO{$node}{manufacturer}=$node_manu ;
882 $NODE_INFO{"(0.s)"}{manufacturer}=$node_manu if ($NODE_COUNT==1 and $node eq "(0.0)" ) ;
883 print LOG "Hardware: Node $node MANUFACTURER $node_manu\n";
884 }
885
886# Identify CPU type
887 if ($dmi_type == 4 and /Version: (.*)/i ) {
888 print LOG "$_\n";
889 $NODE_INFO{$node}{cpu}=$1 ;
890 $NODE_INFO{"(0.s)"}{cpu}=$1 if ($NODE_COUNT==1 and $node eq "(0.0)" ) ;
891 print LOG "Hardware: Node $node CPU $1\n";
892 }
893
894 }
895 msg("Hardware Manufacturer",$MANUFACTURER);
896}
897########## End gethardware ##########
898
899########## Start getopersys #########
900# Get the operating system
901sub getopersys {
902 print LOG "\n\n\n### ".localtime()." ### Starting getopersys\n";
903 nodexref() if (!$NODE_COUNT);
904 $cmd=" cd /etc; cat [SsRr]*release 2>/dev/null | sed -e 's/^/CAT:/' ; ls [SsRr]*release | head -1 | sed -e 's/^/OS:/' ";
905 mapall($ALL,$cmd,1);
906 open(CMD_PIPE,$TMPFILE);
907 my ($os, $ver, $patch, $node)=("")x4;
908 my $e="";
909 while (<CMD_PIPE>) { chomp;
910 print LOG "$_\n";
911 if (/(\(0\..*\)) ssh/) {
912 $node=$1 ;
913 $os="";
914 next;
915 }
916 next if (!$node);
917 if (/^CAT:/) {
918 $ver=$1 if (/VERSION\s*=\s*(.*)/);
919 $patch=$1 if (/PATCHLEVEL\s*=\s*(.*)/);
920 next;
921 }
922 if (/^OS:/) {
923 s/-release.*//;
924 s/^OS://;
925 $_=lc($_);
926 if (/redhat/){
927 $os=$_ ;
928 } elsif (/suse|sles/i) {
929 $os="SLES $ver.$patch";
930 } else {
931 printboth("ERROR: Node $node Unknown Operating System ($_)\n");
932 $os="unknown";
933 $e="yes";
934 }
935 }
936 $NODE_INFO{$node}{os}=$os;
937 $NODE_INFO{"(0.s)"}{os}=$os if ($NODE_COUNT==1 and $node eq "(0.0)" ) ;
938 print LOG "node $node o/s $os\n";
939 if(index($OS,$os)<0){
940 $OS.="," if ($OS);
941 $OS.=$os;
942 }
943 }
944 if ($e) {
945 printboth("RESOLUTION: Check if operating system is supported. Look at 'ls /etc/*-release'\n\n");
946 msg("Operating System","FAILED");
947 }
948 msg("Operating System",$OS);
949}
950########## END getopersys #########
951
952########## Start checkostools ##########
953# Check OS Tools are installed
954sub checkostools {
955 print LOG "\n\n\n### ".localtime()." ### Starting checkostools\n";
956 $RAN_OMREPORT="yes";
957 gethardware() if (!$MANUFACTURER);
958 if ($MANUFACTURER =~ /dell/){
959 my $nodes=getnodes_hw("dell");
960 if (!$nodes) {
961 print LOG "no dell nodes found\n";
962 return;
963 }
964 my $e="";
965 my $cmd=q[ which omconfig; which omreport; which racadm4; which racadm ];
966 mapall("--nodes=$nodes ",$cmd,1);
967 open(CMD_PIPE,$TMPFILE);
968 $OMCONFIG=1;
969 $OMREPORT=1;
970 my $node="";
971 while (<CMD_PIPE>) { chomp;
972 print LOG "$_\n";
973 if (/(\(0\..*\)) ssh/){
974 if ($node and !$RACADM_CMD) {
975 printboth("ERROR: Node $node does not have 'racadm' command\n");
976 }
977 $node=$1;
978 if ( $NODE_INFO{$node}{manufacturer} !~ /dell/ ) {
979 print LOG "Skipping node $node not dell: $NODE_INFO{$node}{manufacturer}\n";
980 next;
981 }
982 $RACADM_CMD="";
983 }
984 if ( $_ =~ /no omconfig/ ) {
985 $OMCONFIG=0;
986 $e="yes";
987 printboth("ERROR: Node $node does not have 'omconfig' command. Some checks will be skipped\n");
988 }
989 if (/no omreport in/){
990 $OMREPORT=0;
991 printboth("ERROR: Node $node does not have 'omreport' command. Some checks will be skipped\n");
992 }
993 $RACADM_CMD=$_ if (/racadm$|racadm4/);
994
995 }
996 if ($OMCONFIG eq 0 or $OMREPORT eq 0 or !$RACADM_CMD ){
997 printboth("ERROR: Unable to find Dell Open Manager Tools (omconfig,omreport or racadm). Some checks will be skipped.\n");
998 printboth(" See KB305350 for instructions to install Dell OM Tools.\n\n");
999 msg("Dell Open Manage Tools","FAILED");
1000 } else {
1001 print LOG "found omconfig,omreport,racadm on all nodes\n";
1002 msg("Dell Open Manage Tools","PASSED");
1003 }
1004 }
1005}
1006########## End checkostools ##########
1007
1008########## Start getomreport ##########
1009sub getomreport {
1010 print LOG "\n\n\n### ".localtime()." ### Starting getomreport\n";
1011 gethardware() if (!$MANUFACTURER);
1012 if ($MANUFACTURER !~ /dell/) {
1013 print LOG "Skipping. No Dell nodes\n";
1014 return;
1015 }
1016 my $nodes=getnodes_hw("dell");
1017 if (!$nodes) {
1018 print LOG "no dell nodes found\n";
1019 return;
1020 }
1021 checkostools() if (!$RAN_OMREPORT);
1022 if ($OMREPORT) {
1023 $cmd=q[ omreport storage controller controller=0];
1024 mapall("--nodes=$nodes",$cmd);
1025 open(CMD_PIPE,$TMPFILE);
1026 while(<CMD_PIPE>) { chomp;
1027 print LOG "omreport: $_\n";
1028 push(@OMREPORT_STORAGE,$_);
1029 }
1030 } else {
1031 print LOG "Skipping. No OMREPORT found\n";
1032 }
1033}
1034########## End getomreport ##########
1035
1036
1037########## Start virtualmedia ##########
1038# Check that virtual media is not mounted
1039sub virtualmedia {
1040 print LOG "\n\n\n### ".localtime()." ### Starting virtualmedia\n";
1041 gethardware() if (!$MANUFACTURER);
1042 if ($MANUFACTURER !~ /dell/) {
1043 print LOG "Skipping. No Dell nodes\n";
1044 return;
1045 }
1046 my $nodes=getnodes_hw("dell");
1047 if (!$nodes) {
1048 print LOG "no dell nodes found\n";
1049 return;
1050 }
1051 checkostools() if (!$RAN_OMREPORT);
1052 if ($RACADM_CMD) {
1053 if ($VERSNUM>=730 and $RACADM_CMD =~ /racadm4/ ) {
1054 print LOG "Skipping. no sudo support for racadm4\n";
1055 } else {
1056 $cmd=qq[ sudo $RACADM_CMD getconfig -g cfgracvirtual -o cfgvirmediaattached ];
1057 mapall("--nodes=$nodes ",$cmd);
1058 open(CMD_PIPE,$TMPFILE);
1059 my $virtenabled=0;
1060 while (<CMD_PIPE>) {
1061 chomp;
1062 print LOG "$_\n";
1063 if (/sudo: no tty present/) {
1064 printboth("ERROR: Node $node sudo $RACADM_CMD failed with $_ \n");
1065 printboth("RESOLUTION: Fix sudo allowed for arping\n\n");
1066 msg("Dell Virtual Media Disabled","FAILED");
1067 return;
1068 }
1069 $node=$1 if (/(\(0\..*\)) ssh/);
1070 if ( $NODE_INFO{$node}{manufacturer} !~ /dell/ ) {
1071 print LOG "Skipping node $node not dell: $NODE_INFO{$node}{manufacturer}\n";
1072 }
1073 if ( $_ =~ /^1$/ ) {
1074 $virtenabled++;
1075 if ($virtenabled eq 1) {printboth("\n"); }
1076 printboth("ERROR: Node $node has virtual media enabled\n");
1077 }
1078 }
1079 if ($virtenabled>0) {
1080 printboth(" Disable virtual media on all nodes with the following command:\n");
1081 printboth(" mapall --nodes=$nodes --noerror 'sudo racadm config -g cfgracvirtual -o cfgvirmediaattached 0'\n\n");
1082 msg("Dell Virtual Media Disabled","FAILED");
1083 } else {
1084 print LOG "Virtual media disabled\n";
1085 msg("Dell Virtual Media Disabled","PASSED");
1086 }
1087 }
1088 } else {
1089 msg("Dell Virtual Media Disabled","WARNING");
1090 print LOG "WARNING: Virtual Media not checked. 'racadm' not installed\n";
1091 }
1092}
1093########## End virtualmedia ##########
1094
1095
1096########## Start checkswap #########
1097# Check that swap is enabled
1098sub checkswap {
1099 print LOG "\n\n\n### ".localtime()." ### Starting checkswap\n";
1100 getopersys() if (!$OS);
1101 getdatadomain() if (!$DDRMAINT_VERSION) ;
1102 $cmd=q[ /sbin/sysctl vm.swappiness; /usr/bin/free ];
1103 mapall("--all",$cmd);
1104 open(CMD_PIPE,$TMPFILE);
1105 my $susemsg="";
1106 my (%swappiness,%nodeused,%nodeswap);
1107 while (<CMD_PIPE>) {
1108 chomp;
1109 print LOG "$_\n";
1110 our $node=$1 if (/(\(0\..*\)) ssh/);
1111 $swappiness{$node}=$1 if (/vm.swappiness\s*=\s*(\d+)/);
1112 if ( $_ =~ /Swap/ ) {
1113 my ($f1,$swap,$used,$f2)=split();
1114 $nodeswap{$node}+=int($swap/1000/1000) if ($swap>0);
1115 $nodeused{$node}+=int($used/1000/1000) if ($used>0);
1116 }
1117 }
1118
1119 my ($noswap,$moreswap,$swapping)=""x3;
1120 for my $node (sort keys %nodeswap) {
1121 print LOG "node:$node used:$nodeused{$node} swap:$nodeswap{$node} os:$NODE_INFO{$node}{os}\n";
1122 $noswap.="ERROR: Node $node has no swap\n" if ($nodeswap{$node}==0);
1123 $moreswap.="ERROR: Node $node has $nodeswap{$node}GB of swap which is less than 12GB to 16GB required\n" if ($nodeswap{$node}<12 and $node ne "(0.s)" and $DDCNT>=1 and !$VBA);
1124 #if ($nodeused{$node}>=2) { #2gb or more used
1125 if ( $nodeused{$node} / $nodeswap{$node} > .50 ) { # used more than 50% of swap.
1126 if ($NODE_INFO{$node}{os} =~ /suse|sles/i){
1127 $swapping.="WARNING: Node $node has used $nodeused{$node}GB of $nodeswap{$node}GB swap space. Swappiness is $swappiness{$node}\n";
1128 $susemsg=1;
1129 } else {
1130 $swapping.="WARNING: Node $node has used $nodeused{$node}GB of $nodeswap{$node}GB swap space\n";
1131 }
1132 }
1133 }
1134
1135 my $swap="PASSED";
1136 if ($noswap) {
1137 printboth("${noswap}RESOLUTION: No swap is probably because /etc/fstab swap entry does not match partition table.\n");
1138 printboth(" Compare /etc/fstab to disk drives partition table (fdisk -l <device>)\n");
1139 printboth(" Any swap being used needs to be investigated\n\n");
1140 $swap="FAILED";
1141 }
1142 if ($swapping) {
1143 if ($susemsg) {
1144 printboth("${swapping}RESOLUTION: See KB466431 for more info to set swappiness on SuSE\n");
1145 printboth(" See KB465947 for swap issues on Utility nodes\n");
1146 printboth(" Changing swappiness takes a few days to free up swap used\n\n");
1147 } else {
1148 printboth("${swapping}RESOLUTION: Determine what is causing the swapping. \n\n");
1149 }
1150 $swap="WARNING";
1151 }
1152 if ($moreswap and !$VBA ) {
1153 printboth("${moreswap}RESOLUTION: See KB 304257 to correct swap\n\n");
1154 $swap="FAILED";
1155 }
1156 msg("Swap Space",$swap);
1157}
1158########## End checkswap ##########
1159
1160########## Start replication ##########
1161sub replication{
1162
1163 print LOG "\n\n\n### ".localtime()." ### Starting replication\n";
1164
1165 getavamarver() if (!$AVTAR_VERSION);
1166
1167 if (-e "/usr/local/avamar/etc/repl_cron.cfg" and $UPGRADE_VERSION >= '7' ) {
1168 printboth("WARNING: Cron based replication is supported in 7.3 but cron jobs must be manually fixed to run as admin instead of DPN.\n");
1169 printboth(" The next major release of Avamar after 7.3 will no longer support cron based replication\n");
1170 printboth("RESOLUTION: Modify DPN cron jobs to run as admin.\n\n");
1171 msg("Cron Replication","WARNING");
1172 }
1173
1174my $replicating=0;
1175# Plugin based replication targets
1176if ($dbh) {
1177 if ($VERSNUM >= 700 ) {
1178 if ($VERSNUM>=730 ) {
1179 $sth = $dbh->prepare(qq[ select distinct value from repl_config_param c, repl_destinations d where d.dest_id=c.dest_id and c.name='dstaddr' ]);
1180 } else {
1181 $sth = $dbh->prepare(qq[ select distinct lower(name) from repl_destinations ]);
1182 }
1183 $sth->execute;
1184 my $e="";
1185 my %dupname;
1186 while(@R=$sth->fetchrow()) {
1187 $replicating=1;
1188 if (!$e) {
1189 $e=1;
1190 if (!`ps -ae | grep avagent.bin`) {
1191 printboth("WARNING: Plugin replication appears to be configured but avagent is not running\n");
1192 printboth("RESOLUTION: If plugin replication is configured then find out why avagent is not running\n\n");
1193 msg("Plugin Replication","WARNING");
1194 }
1195 }
1196 next if ($dupname{@R[0]});
1197 $dupname{@R[0]}=1;
1198 msg("Replication Partner","Source to @R[0]");
1199 }
1200 }
1201} else {
1202 printboth("WARNING: MCS database not connected. Skipping check\n");
1203 printboth("RESOLUTION: Make sure MCS is up and running\n\n");
1204 msg("Replication Partner","WARNING");
1205}
1206
1207# Cron based replication targets
1208 $cmd=qq[ grep -P '^\s*--dstaddr' /usr/local/avamar/etc/repl*_cron*.cfg 2>/dev/null | sort -u ];
1209 open(CMD_PIPE,"$cmd|");
1210 while (<CMD_PIPE>) {chomp;
1211 print LOG "$_\n";
1212 lc($_);
1213 my ($foo,$name)=split("=");
1214 next if ($dupname{$name});
1215 $dupname{$name}=1;
1216 $replicating=1;
1217 msg("Replication Partner","Source to $name");
1218 }
1219
1220# Normal replication sources
1221 $cmd=qq[ avmgr getl --path=/REPLICATE 2>&1 | tail -n +2 ];
1222 open(CMD_PIPE,"$cmd|");
1223 while (<CMD_PIPE>) {chomp;
1224 print LOG "$_\n";
1225 my ($foo,$name,$foo2)=split();
1226 msg("Replication Partner","Target for $name");
1227 }
1228
1229###
1230### Skip if not replication, preupgrade or add node
1231###
1232
1233return if (!$replicating or $PREUPGRADE or $ADDNODE);
1234
1235
1236### Look for DD repl avtar issues
1237 if (%DD) {
1238 my %replavtar;
1239 $replavtar{"6.0.101-66"} = { bug=>36954, dd=>1, res=>"RESOLUTION: See KB302080 for more information"};
1240 $replavtar{"6.0.102-156"}= { bug=>53090, dd=>1 };
1241 $replavtar{"6.1.101-87"} = { bug=>57063, dd=>1 };
1242 $replavtar{"6.1.102-47"} = { bug=>57064, dd=>1 };
1243 $replavtar{"7.0.101-56"} = { bug=>188345, dd=>1 };
1244 $replavtar{"7.0.101-61"} = { bug=>188345, dd=>1 };
1245
1246 if ($replavtar{$AVTAR_VERSION}{dd} ) {
1247 printboth("ERROR: avtar.bin is not patched for data domain replication bug $replavtar{$AVTAR_VERSION}{bug}\n");
1248 $res=($replavtar{$AVTAR_VERSION}{res}) ? $res=$replavtar{$AVTAR_VERSION}{msg} : "See hotfix $replavtar{$AVTAR_VERSION}{bug}";
1249 printboth("RESOLUTION: $res");
1250 msg("Replication avtar binary","FAILED");
1251 }
1252 }
1253
1254### Check for avtar's with hotfixes available
1255 my $hf="";
1256 $hf.="WARNING: Hotfix 234581 is available for avtar version $AVTAR_VERSION.\n" if ($AVTAR_VERSION eq "7.1.101-145") ;
1257 $hf.="WARNING: Hotfix 228382 is available for avtar version $AVTAR_VERSION.\n" if ($AVTAR_VERSION eq "7.1.100-370") ;
1258 $hf.="WARNING: Hotfix 202260 is available for avtar version $AVTAR_VERSION.\n" if ($AVTAR_VERSION eq "7.1.101-61") ;
1259 # check avreplicator.pl
1260
1261 if ($hf) {
1262 printboth("${hf}RESOLUTION: See bug for more information about the bug and if it is required for this grid\n\n");
1263 msg("Replication avtar binary","FAILED");
1264 }
1265
1266### Check for avrepl.bin issues
1267 chomp(my $avrepl_version=`/usr/local/avamar/bin/avrepl.bin --version | sed '/^ *version:/!d; s/^ *version: *//'`);
1268 print LOG "avrepl.bin version = '$avrepl_version'";
1269 my %avrepl_bugs;
1270 $avrepl_bugs{"7.3.100-226"} = { bug=>261631, res=>"See KB485094 for more information"};
1271
1272 if ( defined($avrepl_bugs{$avrepl_version}) ) {
1273 printboth("ERROR: avrepl.bin version $avrepl_version requires hotfix $avrepl_bugs{$avrepl_version}{bug} or later\n");
1274 printboth("RESOLUTION: $avrepl_bugs{$avrepl_version}{res}\n\n");
1275 msg("Replication avrepl binary","FAILED");
1276 }
1277}
1278########## End replication #########
1279
1280
1281########## Start mandatoryupgrade ##########
1282# Check if a mandatory upgrade is required
1283sub mandatoryupgrade
1284{
1285 print LOG "\n\n\n### ".localtime()." ### Starting mandatoryupgrade\n";
1286
1287 my %mandatoryupgrade = (
1288 "3.7.1-93" => "must be upgraded to 3.7.1-100 or later",
1289 "4.0.0-321" => "must be upgraded to 4.0.3-28 or later",
1290 "4.0.1-30" => "must be upgraded to 4.0.3-28 or later",
1291 "4.0.2-27" => "must be upgraded to 4.0.3-28 or later",
1292 "4.0.2-35" => "must be upgraded to 4.0.3-28 or later",
1293 "6.0.0-580" => "must be upgraded to 6.0.1-66 or later",
1294 "6.0.0-592" => "should schedule an upgrade to 6.0.1-66 or later",
1295 "7.1.1-141" => "must be upgraded to 7.1.1-145 or later",
1296 "7.2.1-31" => "must be upgraded to 7.2.1-32 or later"
1297 );
1298 if (%DD) {
1299 my %mandatoryupgrade = (
1300 "6.0.1-65" => "must be upgraded to 6.0.1-66 or later when Data Domain is attached (KB302080)",
1301 "6.0.0-580" => "must be upgraded to 6.0.1-66 or later when Data Domain is attached (KB302080)",
1302 "6.0.0-592" => "must be upgraded to 6.0.1-66 or later when Data Domain is attached (KB302080)"
1303 ) ;
1304 }
1305 if ($AVAMARVER eq "6.0.0-592" and !%DD) {
1306 printboth("WARNING: It is not required but an upgrade should be scheduled to 6.0.1-66 or later\n\n");
1307 msg("Mandatory Upgrades","WARNING");
1308 return;
1309 }
1310 if ( $mandatoryupgrade{$AVAMARVER}) {
1311 printboth("ERROR: Version $AVAMARVER $mandatoryupgrade{$AVAMARVER}\n\n");
1312 msg("Mandatory Upgrades","FAILED");
1313 } else {
1314 print LOG "No mandatory upgrade found\n";
1315 msg("Mandatory Upgrades","PASSED");
1316 }
1317}
1318########## End mandatoryupgrade ##########
1319
1320
1321########## Start checkversion ##########
1322# Check that valid Avamar version found
1323sub checkversion {
1324 print LOG "\n\n\n### ".localtime()." ### Starting checkversion\n";
1325 if (grep $_ eq $AVAMARVER, @supportedversions) {
1326 print LOG "Version $AVAMARVER is supported\n";
1327 msg("Version Supported","PASSED");
1328 } else {
1329 printboth("ERROR: Version $AVAMARVER is not known and should be upgraded to a supported version\n\n");
1330 msg("Version Supported","FAILED");
1331 }
1332}
1333########## End checkVersion ##########
1334
1335
1336########## Start gsanpatches ##########
1337# Check for GSAN requires a patch
1338sub gsanpatches{
1339 print LOG "\n\n\n### ".localtime()." ### Starting gsanpatches\n";
1340 getavamarver() if (!$DATANODEVERSION);
1341 getnodetype() if (!$NODETYPE);
1342 if ($NODETYPE =~ /AER/ ) {
1343 print LOG "Skip for AER\n";
1344 return
1345 }
1346
1347 my ($e);
1348
1349# Define gsan pathces. can override default and buglink severity by adding sev=>"FAILED"
1350# example "5.0.4" => { sev=>"INFO", desc=>"Schedule an upgrade to a newer version. Alternativel apply hotfix 30432" },
1351 my %gsanpatches = (
1352 "5.0.0" => { bug=>30432 },
1353 "5.0.1" => { bug=>30432 },
1354 "5.0.2" => { bug=>30432 },
1355 "5.0.3" => { bug=>30432 },
1356 "5.0.4" => { bug=>30432 },
1357 "6.0.0" => { bug=>36424},
1358 "6.0.1" => { bug=>36424},
1359 "6.0.2" => { bug=>36424},
1360 "6.1.0" => { bug=>51932},
1361 "6.1.1" => { bug=>51932},
1362 "6.1.2" => { bug=>225098},
1363 "7.0.0" => { bug=>200794},
1364 "7.0.1" => { bug=>200794},
1365 "7.0.2" => { bug=>200794},
1366 "7.1.1" => {bug=>275857},
1367 "7.1.2" => {bug=>275857}, # 2017-02-24 11:11:50
1368 "7.2.0-401" => {bug=>278646}, # 2016-05-04 08:01:32. ignore 260098 for now.
1369 "7.2.1-32" => {bug=>299101}, # 2018-05-23 11:03:27
1370 "7.3.0-226" => {bug=>298625},
1371 "7.3.0-233" => {bug=>298625}, # 2017-02-24 10:59:32
1372 "7.3.1-125" => {bug=>298753}, # 2018-06-04 01:26:07
1373 "7.4.0-242" => {bug=>298754}, # 2018-05-18 09:58:06
1374 "7.4.1-58" => {bug=>302842}, # 2018-12-07 09:22:33
1375 "7.5.0-183" => {bug=>315476}, # 2019-11-22 09:55:53
1376 "7.5.1-101" => {bug=>315476}, # 2019-11-22 09:55:53
1377 "18.1.0-33" => {bug=>309829}, # 2019-04-29 09:09:56
1378 "18.2.0-134" => {bug=>312549}, # 2019-08-19 11:09:04
1379 );
1380
1381
1382# NOTE: 7.5.0-183 and 7.5.1-181 are using the same hotfixes
1383# update 12/17/2019
1384
1385
1386# Define bug links. Severity defaults to info. can override for entire bug here by adding sev=>"FAILED"
1387 my %buglinks = (
1388 30432 => {link=>"ftp://ftp.avamar.com/software/hotfixes/30432/README" },
1389 36424 => {link=>"ftp://ftp.avamar.com/software/hotfixes/36424/README_HF-36424.htm" },
1390 51932 => {link=>"ftp://ftp.avamar.com/software/hotfixes/51932/README_hf51932.htm" },
1391 200794 => {link=>"ftp://ftp.avamar.com/software/hotfixes/200794/README.htm"},
1392 244875 => {link=>"ftp://ftp.avamar.com/software/hotfixes/244875/README.htm"},
1393 261790 => {link=>"ftp://ftp.avamar.com/software/hotfixes/261790/README.htm", sev=>"FAILED", KB=>484891 },
1394 );
1395
1396 my ($gsan_maj,$foo)=split("-",$DATANODEVERSION);
1397 my ($gsan_ver,$current_hotfix)=split("_",$DATANODEVERSION);
1398 $current_hotfix =~ s/^HF//;
1399
1400# Get bug#
1401 my $bug=$gsanpatches{$gsan_ver}{bug} ;
1402 $bug=$gsanpatches{$gsan_maj}{bug} if (!$bug);
1403
1404
1405 if ( $bug == $current_hotfix) {
1406 print LOG "on latest hotfix\n";
1407 msg("GSAN Patches","PASSED");
1408 return;
1409 }
1410
1411 if ( $bug < $current_hotfix) {
1412 print LOG "current hotfix newer then phc\n";
1413 msg("GSAN Patches","PASSED");
1414 return;
1415 }
1416
1417# see if last 5.0.4-906 is installed (HF30432 and not HF28864 which has bad avmaint);
1418 $bug="" if ($DATANODEVERSION eq "5.0.4-906" and $AVMAINT_VERSION ne "5.0.4-906") ;
1419
1420 print LOG "DATANODEVERSION=$DATANODEVERSION Ver=$ver GSANMAJOR=$gsan_maj CurrHF=$current_hotfix bug=$bug\n";
1421
1422 if ($bug){
1423 # Set severity
1424 my $sev=$gsanpatches{$DATANODEVERSION}{sev} || $gsanpatches{$gsan_maj}{sev} || $buglinks{$bug}{sev} || "INFO";
1425 # Set Resolution
1426 my $KB=$gsanpatches{$DATANODEVERSION}{KB} || $gsanpatches{$gsan_maj}{KB} || $buglinks{$bug}{KB} || "";
1427 my $resolution=($KB) ? "See KB$KB for more information " : "Update GSAN if needed based on the readme file $buglinks{$bug}{link}";
1428
1429 printboth("$sev: Updated GSAN $bug is available.\n");
1430 printboth("RESOLUTION: $resolution\n\n");
1431 msg("GSAN Patches",$sev);
1432 } else {
1433 print LOG "No gsan patches found\n";
1434 msg("GSAN Patches","PASSED");
1435 }
1436}
1437########## End gsanpatches ##########
1438
1439########## Start mcspatches ##########
1440# Check that emserver.jar & mcserver.jar patches applied
1441sub mcspatches {
1442 print LOG "\n\n\n### ".localtime()." ### Starting mcspatches\n";
1443 getinstalledversion() if (!$AVAMARVER);
1444 getavamarver() if (!$MCSERVER_VERSION);
1445 getnodetype() if (!$NODETYPE);
1446 getopersys() if (!$OS);
1447 if ($NODETYPE =~ /AER/ ) {
1448 print LOG "Skip for AER\n";
1449 return
1450 }
1451 chomp(my $sha=`sha256sum /usr/local/avamar/lib/mcserver.jar | awk '{print \$1} '`);
1452
1453 my $msg;
1454 my ($foo1,$mchotfix,$foo2) = split("_",$MCSERVER_VERSION);
1455 $mchotfix =~ s/HF//;
1456 print LOG "AVAMARVER=$AVAMARVER MCS=$MCSERVER_VERSION HF=$mchotfix sha256sum=$sha $md5=$MCSERVER_VERSION\n";
1457
1458
1459 # Bad MCS version that require an upgrade
1460 my %badmd5sum=("213b76ca717eaa2e2964488f3e197ab7",
1461 "152bcfa79be98942862d977e669178b5",
1462 "85f4e8fa4e629a6849d0542af2f2db45" );
1463 if (defined($badmd5sum{$MCSERVER_MD5SUM})) {
1464 printboth("ERROR: MCS Version $MCSERVER_VERSION has serious bugs\n");
1465 printboth("RESOLUTION: This version cannot be patched an upgrade is required. See bug 33101 for more info\n\n");
1466 msg("MCS Patches","FAILED");
1467 return;
1468 }
1469
1470 # bug (bug#), patchmd5sum (mcserver.jar sum of patch), KB (used instead of bug if avail)
1471 # error (print instead of default err msg), resolution (print instead of default resolution)
1472 $mcspatch->{"5.0.0-410"} = { error=>"MCS Version $MCSERVER_VERSION has serious bugs",
1473 resolution=>"This version cannot be patched an upgrade is required. See bug 33101 for more info" };
1474 $mcspatch{"5.0.1-32"} = { bug=>33101, md5sum=>"679bfe31df12c20d5405e4005ee170f0", esg=>"KB302087" };
1475 $mcspatch{"5.0.2-41"} = { bug=>33101, md5sum=>"b4dbd0ebc93b6603729623c3a5dba652", esg=>"KB302087" };
1476 $mcspatch{"5.0.3-29"} = { bug=>33101, md5sum=>"7bdecdce6c7cefb51c527b56d1e3e19f", esg=>"KB302087" };
1477 $mcspatch{"5.0.4-30"} = { bug=>36971, md5sum=>"0c22b5e86340cf1ac3bbc137fd19693c", esg=>"KB302087" };
1478 $mcspatch{"6.0.0-592"} = { bug=>36897, md5sum=>"c7d59a7c98e1b02ec082d488e78286d6", esg=>"KB302087" };
1479 $mcspatch{"6.0.1-66"} = { bug=>37753, md5sum=>"51ae5e4426435f6648ae50877e3ca8dc", esg=>"KB302087" };
1480 $mcspatch{"6.0.2-153"}= { bug=>46907, md5sum=>"b261587b8ea6908372a757969f98bde5"};
1481 $mcspatch{"6.0.2-156"}= { bug=>50844, md5sum=>"b90234dadf0b55ff2eb342f7448e1b29"};
1482 $mcspatch{"6.1.0-276"} = { bug=>49734, md5sum=>"06f180076d5c27c7c4197f481aff7b5b" };
1483 $mcspatch{"6.1.0-280"} = { bug=>49734, md5sum=>"06f180076d5c27c7c4197f481aff7b5b" };
1484 $mcspatch{"6.1.0-333"} = { bug=>49734, md5sum=>"06f180076d5c27c7c4197f481aff7b5b" };
1485 $mcspatch{"6.1.0-402"} = { bug=>49734, md5sum=>"06f180076d5c27c7c4197f481aff7b5b" };
1486 $mcspatch{"6.1.1-81"} = { bug=>51416, md5sum=>"d2373e5259362aba5f1287ed06c236f7" };
1487 $mcspatch{"6.1.1-87"} = { bug=>200044, md5sum=>"cfbd8589b40b005bf2e504f8c4f99d0d" };
1488 $mcspatch{"6.1.2-47"} = { bug=>228097, md5sum=>"2930d658878e5c31723b2266514ac0f5" };
1489 $mcspatch{"7.0.0-427"} = { bug=>196804, md5sum=>"1ccf5546aede3b0473b0c397e0fe327f" };
1490 $mcspatch{"7.0.1-61"} = { bug=>205494,md5sum=>"dce592c7a811c86ee6ee2ab19949cf1a" };
1491 $mcspatch{"7.0.2-43"} = { bug=>197922,md5sum=>"818e3baa91e9ebef64e2fdc8b29e8d0f" };
1492 $mcspatch{"7.0.2-47"} = { bug=>256732,md5sum=>"1bd54a64a71c55f7bb7eae221a572460" };
1493 $mcspatch{"7.0.3-32"} = { bug=>242697,md5sum=>"8b8af3eb35e100bfcad9ef9f5add6a5b" };
1494 $mcspatch{"7.1.0-370"} = { bug=>241893,md5sum=>"63cae9251df054c002559292d5df1011" };
1495 $mcspatch{"7.1.1-141"} = { bug=>225423,md5sum=>"99518dd44fa34ea001760f1122ea56aa" };
1496 $mcspatch{"7.1.1-145"} = { bug=>258017,md5sum=>"6dc48adc4f6b613efca3f434f82f232e" };
1497 $mcspatch{"7.1.2-21"} = { bug=>271927, sha256sum=>"b07d2eeb9a7045debcad724ddc170703f688d91318cde3b2c9d70e82317e40df" }; # 2017-01-30 10:29:41
1498 $mcspatch{"7.1.2-21-rhel"}={bug=>247565,md5sum=>"418ff94170afd4348e8c7b2ef9a38c7c" }; # 2017-01-30 10:29:41
1499 $mcspatch{"7.2.0-401"} = { bug=>279816, sha256sum=>"f3731202c67ba2aea49b0f5b0eca1fc97d50ac2c524d6b6b4f9e7227433e166a" }; # 2017-04-27 02:58:47
1500 $mcspatch{"7.2.1-32"} = { bug=>295614, sha256sum=>"9e601f926f2e433c787fdf7735efafab4c15658f65991fc4511124966775442a" } ; # 2018-08-01 09:00:39
1501 $mcspatch{"7.3.0-226"} = { bug=>260344, md5sum=> "dc3859ac07c37c540f880734993e3785", esg=>"KB483144" }; # this is newest
1502 $mcspatch{"7.3.0-233"} = { bug=>286090, sha256sum=>"d9480700e6fd0261acf68aa188e034e401689ab798fc9006f64bf55e65f5e6ef" } ; # 2017-09-05 07:26:58
1503 $mcspatch{"7.3.1-125"} = { bug=>298951, sha256sum=>"a980e0dabf5a88f5e784ac34b8cc4654b42ac156351ef62868fdeff9dabc991c" } ; # 2018-08-13 10:36:00
1504 $mcspatch{"7.4.1-58"} = { bug=>311380, sha256sum=>"81cd3419468175f72ed154c98fbfb56dd08231f3dc8d8c46a385936a887b41ef" } ; # 2019-09-03 13:26:14
1505 $mcspatch{"7.5.0-183"} = { bug=>311826, sha256sum=>"6245c53ce52113366d9c656e91600ac896b2a825efd2f7f5b1ff362f11c75030" } ; # 2019-09-06 08:14:48
1506 $mcspatch{"7.5.1-101"} = { bug=>311381, sha256sum=>"a25b7ddef4ad90e73bea0b0e4e51e36cbaa04f9c274fe909a3991018128dfc46" } ; # 2019-08-06 07:33:59
1507 $mcspatch{"18.1.0-33"} = { bug=>315214, sha256sum=>"ae00afebb809c0e6cce38a1df4cb330d2e47915aa6424a386562444a5ec0fb6e" } ; # 2019-12-02 09:25:25
1508 $mcspatch{"18.2.0-134"} = { bug=>314524, sha256sum=>"ee0762a5936912fdaf54587178f2eb24dac31a7fb9b33c670b79620bace16d31" } ; # 2019-10-29 09:20:19
1509 $mcspatch{"19.1.0-38"} = { bug=>315222, sha256sum=>"03a597b4d9adfb3f7bf58924ec3a6a5f479276bf88e93675fbdf155762336536" } ; # 2019-11-22 09:46:04
1510
1511
1512# Updated 12/17/2019
1513
1514 my $tmpver=$AVAMARVER;
1515 $tmpver.="-rhel" if ( $OS =~ /rhel|redhat/ and defined($mcspatch{"$AVAMARVER-rhel"}) ) ;
1516 print LOG "tmpver=$tmpver\n";
1517 if ($mcspatch{$tmpver}) {
1518 if ($MCSERVER_MD5SUM eq $mcspatch{$tmpver}{"md5sum"} or $mchotfix eq $mcspatch{$tmpver}{"bug"} or $sha eq $mcspatch{$tmpver}{"sha256sum"}) {
1519 print LOG "No Update required\n";
1520 } else {
1521 # compile all event code 1 for 50070
1522 if ($mcspatch{$tmpver}{"bug"} == 51416) {
1523 bug47560();
1524 }
1525
1526 my $bug=$mcspatch{$tmpver}{"bug"};
1527 if ($bug < $mchotfix and $mchotfix ) {
1528 printboth("INFO: Current hotfix $mchotfix appears to be newer than latest known $bug\n");
1529 printboth("RESOLUTION: If this is correct no action needs to be taken\n\n");
1530 msg("MCS Patches","INFO");
1531 return;
1532 } else {
1533 my $err="MCS version $AVAMARVER requires hotfix $bug or later";
1534 $err=$t if ($t=$mcspatch{$tmpver}{"error"});
1535 printboth("INFO: $err\n");
1536 $msg="See hot fix $bug for more information";
1537 $msg="See $t for more information" if ($t=$mcspatch{$tmpver}{esg});
1538 $msg="$t" if ($t=$mcspatch{$tmpver}{"resolution"});
1539 printboth("RESOLUTION: $msg\n\n");
1540 msg("MCS Patches","INFO");
1541 return;
1542 }
1543 }
1544 }
1545 msg("MCS Patches","PASSED");
1546}
1547########## End mcspatches ##########
1548
1549
1550########## Start avagent ##########
1551sub avagent {
1552 print LOG "\n\n\n### ".localtime()." ### Starting avagent\n";
1553 chomp(my $version=`sudo /usr/local/avamar/bin/avagent.bin --version | sed '/^ *version:/!d; s/^ *version: *//'`);
1554 print LOG "avagent version = $version\n";
1555 if ($version eq "7.3.100-226") {
1556 printboth("ERROR: avagent.bin version $version requires hotfix 262112 or later\n");
1557 printboth("RESOLUTION: See KB484273 & KB484778 for more info\n\n");
1558 msg("avagent binary","FAILED");
1559 }
1560}
1561########## End avagent ##########
1562
1563
1564########## Start getnodetype ##########
1565# Determing deep or shallow node
1566sub getnodetype {
1567my $exit=shift;
1568my %hwconf=("10-32-6" => "100-580-617",
1569 "5-36-3" => "100-580-618",
1570 "3-24-2" => "100-580-619",
1571 "1-12-1" => "100-580-620",
1572 "1-6-4" => "100-580-601",
1573 "2-18-3" => "100-580-602",
1574 "4-36-1" => "100-580-603",
1575 "2-16-3" => "100-580-584",
1576 "1-4-4" => "100-580-585",
1577 "9-32-5" => "100-580-622",
1578 "0-12-1" => "100-580-616",
1579 "0-11-1" => "100-580-616",
1580 );
1581my %emcconf=("10-32-6" => "100-580-642",
1582 "10-32-1" => "100-580-682",
1583 "5-32-3" => "100-580-643",
1584 "2-16-3" => "100-580-644",
1585 "9-32-5" => "100-580-646",
1586 "0-12-1" => "100-580-640",
1587 "0-11-1" => "100-580-640",
1588 "0-32-1" => "100-580-641",
1589 "4t-10-32-6" => "100-580-203",
1590 "4t-10-32-1" => "100-580-208",
1591 "4t-5-32-3" => "100-580-202",
1592 "4t-2-16-2" => "100-580-207",
1593 "4t-2-15-2" => "100-580-207",
1594 "4t-1-15-1" => "100-580-201",
1595 "4t-1-48-1" => "100-580-205",
1596 );
1597 %PARTLIST=("100-580-601" => { desc=>"1.0TB Gen3", maxstripe=>15000 },
1598 "100-580-602" => { desc=>"Gen3-2.0TB", maxstripe=>46000 },
1599 "100-580-603" => { desc=>"Gen3-3.3TB", maxstripe=>46000 },
1600 "100-580-585" => { desc=>"Gen2-1.0TB", maxstripe=>15000 },
1601 "100-580-584" => { desc=>"Gen2-2.0TB", maxstripe=>46000 },
1602 "100-580-575" => { desc=>"Gen1-1.0TB", maxstripe=>0 },
1603 "100-580-620" => { desc=>"Gen4-1.3TB", maxstripe=>31000 },
1604 "100-580-619" => { desc=>"Gen4-2.6TB", maxstripe=>62000 },
1605 "100-580-618" => { desc=>"Gen4-3.9TB", maxstripe=>92000 },
1606 "100-580-617" => { desc=>"Gen4-7.8TB", maxstripe=>92000, ssd=>100 },
1607 "100-580-616" => { desc=>"Gen4-Util/Accel", maxstripe=>0 },
1608 "100-580-640" => { desc=>"Gen4s-Util/Accel", maxstripe=>0 },
1609 "100-580-641" => { desc=>"Gen4s-L-Accel", maxstripe=>0 },
1610 "100-580-642" => { desc=>"Gen4s-M2400", maxstripe=>92000, ssd=>100 },
1611 "100-580-643" => { desc=>"Gen4s-M1200", maxstripe=>92000 },
1612 "100-580-644" => { desc=>"Gen4s-M600", maxstripe=>46000 },
1613 "100-580-646" => { desc=>"Gen4s-MAC AER", maxstripe=>0 },
1614 "100-580-682" => { desc=>"Gen4s-S2400", maxstripe=>92000, ssd=>100 },
1615 "100-580-622" => { desc=>"Gen4-AER Media Access Node", maxstripe=>0 },
1616 "100-580-201" => { desc=>"Gen4t-Util", maxstripe=>0 },
1617 "100-580-202" => { desc=>"Gen4t-M1200", maxstripe=>92000 },
1618 "100-580-203" => { desc=>"Gen4t-M2400", maxstripe=>92000, ssd=>100 },
1619 "100-580-205" => { desc=>"Gen4t-Accel", maxstripe=>0 },
1620 "100-580-207" => { desc=>"Gen4t-M600",maxstripe=>46000 },
1621 "100-580-208" => { desc=>"Gen4t-S2400", maxstripe=>92000, ssd=>100 },
1622 "100-580-251" => { desc=>"Gen4t-Util", IDPA=>1, maxstripe=>0 },
1623 "100-580-252" => { desc=>"Gen4t-M1200", IDPA=>1, maxstripe=>92000 },
1624 "100-580-255" => { desc=>"Gen4t-Accel", IDPA=>1, maxstripe=>0 },
1625 "ave-16" => { desc=>"AVE 16TB", maxstripe=>96000, minmem=>96, hypervmem=>28 },
1626 "ave-8" => { desc=>"AVE 8TB", maxstripe=>96000, minmem=>48, hypervmem=>28 },
1627 "ave-4" => { desc=>"AVE 4TB", maxstripe=>96000, minmem=>36, hypervmem=>28 },
1628 "ave-2" => { desc=>"AVE 2TB", maxstripe=>48000, minmem=>16, hypervmem=>14 },
1629 "ave-1" => { desc=>"AVE 1TB", maxstripe=>24000, minmem=>8, hypervmem=>6 },
1630 "ave-.5" =>{ desc=>"AVE .5TB", maxstripe=>12000, minmem=>6, hypervmem=>6 },
1631);
1632
1633 print LOG "\n\n\n### ".localtime()." ### Starting getnodetype\n";
1634 gethardware() if (!$MANUFACTURER);
1635 getconfiginfo() if (!$GOTCONFIGINFO);
1636
1637 $NODETYPE = "";
1638 my $partno;
1639 my ($partcnt,$nodesize,$mem,$e,$diske,$nodeerr,$node,$sawdto,$cputype);
1640 $origsize="S";
1641 $cmd=q[ test -d /data01/pool && echo "POOL" || echo "NOPOOL"; /bin/df;free -om; ];
1642 mapall("--all",$cmd);
1643 my ($cape,$poole)="";
1644 $news="fr";
1645 open(CMD_PIPE,$TMPFILE);
1646 while (<CMD_PIPE>) { chomp;
1647 print LOG "$_\n";
1648 if (/(\(0\..*\)) ssh/){
1649 $node=$1 ;
1650 ($nodetot,$parttot,$memtot,$haspool)=0;
1651 $partno="";
1652 $cputype=($NODE_INFO{$node}{cpu} =~ /2609/) ? "4t-" : "";
1653 print LOG "cputype '$cputype' $NODE_INFO{$node}{cpu}\n";
1654 # 4t=2609, 4s=2603
1655 }
1656 next if (!$node);
1657 $haspool=1 if (/^POOL$/);
1658 $sawdto=1 if (/\/DTO$/);
1659 if (/(.*?)\s+(\d*)\s+.*(\/data[0-9]*)/) {
1660 $partcnt++;
1661 $nodesize+=$2;
1662 }
1663 if ($PREUPGRADE) {
1664 if (/(.*?)\s+(\d+)\s+(\d+)\s+(\d*)\s+(\d*).*(\/space)/) {
1665 my $spaceneeded=($AVP_SIZE) ? $AVP_SIZE*5 : 20*1024;
1666 print LOG "avpsize=$AVP_SIZE needed=$spaceneeded have=". ($4/1024) ."\n";
1667 if ($4/1024 < $spaceneeded and $VIRTUALHW ) {
1668 $diske.="ERROR: $node /space partition has less then ${spaceneeded}MB of space available\n";
1669 }
1670 }
1671 if (/(.*?)\s+(\d+)\s+(\d+)\s+(\d*)\s+(\d*).*(\/data01)/) {
1672 if ($4/1024/1024 < 20 ) {
1673 $diske.="ERROR: $node /data01 partition has less then 20GB of space available\n";
1674 }
1675 }
1676 }
1677 if (/(.*?)\s+(\d+)\s+(\d+)\s+(\d*)\s+(\d*).*(\/var)/) {
1678 if ($4/1024 < 300) {
1679 $diske.="ERROR: $node /var partition has less then 300MB of space available\n";
1680 }
1681 }
1682 if (/(.*?)\s+(\d+)\s+(\d+)\s+(\d*)\s+(\d*).*(\/$)/) {
1683 if ($4/1024/1024 < 1 ) {
1684 $diske.="ERROR: $node / (root) partition has less then 1GB of space available\n";
1685 }
1686 }
1687
1688 if (/^Mem:\s*(\d*) /){
1689 # +300 to fudge close up and over
1690 $mem=$1+300;
1691 $nodetot=int($nodesize/1024/1024/1024);
1692 if (int($nodetot) != int($lastsize) and $lastsize) {
1693 $cape="ERROR: Node size mismatch $lastnode is $lastsize, $node is $nodetot\n";
1694 }
1695 $lastsize=$nodetot if $node ne "(0.s)";
1696 $memtot=int($mem/1000);
1697 $NODE_INFO{$node}{memory}=$memtot;
1698 my $nodekey="$cputype$nodetot-$memtot-$partcnt";
1699 print LOG "key = $nodekey\n";
1700 $ThisNodeType="Unknown $nodetot TB" ;
1701
1702 if ( $NODE_INFO{$node}{manufacturer} eq "dell" ) {
1703 $partno=$hwconf{$nodekey};
1704 $ThisNodeType=$PARTLIST{$partno}{desc} if (defined($PARTLIST{$partno}{desc}));
1705 print LOG "Dell $partno\n";
1706 } elsif ( $NODE_INFO{$node}{manufacturer} eq "emc" ) {
1707 $partno=$emcconf{$nodekey};
1708 $ThisNodeType=$PARTLIST{$partno}{desc} if (defined($PARTLIST{$partno}{desc}));
1709 print LOG "EMC $partno\n";
1710 }
1711 if ($ThisNodeType eq "1.0TB Gen2") {
1712 # if gen1/2 look for perc controller to decide
1713 getomreport() if (!@OMREPORT_STORAGE);
1714 foreach (@OMREPORT_STORAGE) {
1715 $omrptnode=$1 if (/(\(0\..*\)) ssh/);
1716 next if ($omrptnode ne $node);
1717 if (/PERC (\d)/) {
1718 if ($1 ==5) {
1719 $partno="100-580-575";
1720 $gen=1;
1721 $ThisNodeType="1.0TB Gen1";
1722 }
1723 }
1724 }
1725 }
1726 ( $nodenum = $node ) =~ s/[)(]//g;
1727 my $key="/nodestatuslist/nodestatus/$nodenum/hardware-id";
1728 print LOG "--> hardware-id $NODELIST{$key}\n--> Key: $key\n";
1729 my($serial,$tmppart,$rev,$ip,$ipaddr)=split("_",$NODELIST{$key});
1730 # use first 3 sets of numbers in part#
1731 my $part=join("-", (split("[-_]",$tmppart))[0,1,2]);
1732
1733 print LOG "--> Part# $part\n";
1734
1735 if ( $PARTLIST{$part}{desc}) {
1736 $IDPA+=$PARTLIST{$partno}{IDPA};
1737 print LOG "IDPA FLAG: $PARTLIST{$part}{IDPA}\n";
1738 if ($ThisNodeType =~ /Unknown/ and $node !~ /0.s/ ) {
1739 $ThisNodeType = $PARTLIST{$part}{desc} if (defined($PARTLIST{$part}{desc}));
1740 msg("Node Type","WARNING");
1741 printboth("WARNING: Node $node was identified by part# $part and not hardware. disk ($nodetot), mem ($memtot), partitions ($partcnt)\n");
1742 printboth("RESOLUTION: Verify health of the hardware\n\n");
1743 } else {
1744 if ($PARTLIST{$part}{desc} ne $ThisNodeType and $node !~ /0.s/) {
1745 $ThisNodeType="unknown" if (!$ThisNodeType);
1746 printboth("ERROR: Node $node Part $part is a $PARTLIST{$part}{desc} but memory/disks match $ThisNodeType\n");
1747 $nodeerr="x";
1748 }
1749 }
1750 }
1751
1752 if ( $VIRTUALHW ) {
1753 print LOG "--> Manufacturer=$NODE_INFO{$node}{manufacturer}. Changing node type to AVE. nodesize=$nodetot\n";
1754 if ($nodetot >=16) {
1755 $partno="ave-16";
1756 } elsif ($nodetot >=8 ) {
1757 $partno="ave-8";
1758 } elsif ($nodetot >=4 ) {
1759 $partno="ave-4";
1760 } elsif ($nodetot>=2) {
1761 $partno="ave-2";
1762 } elsif ($nodetot>=1) {
1763 $partno="ave-1";
1764 } else {
1765 $partno="ave-.5";
1766 }
1767 $ThisNodeType=$PARTLIST{$partno}{desc} if (defined($PARTLIST{$partno}{desc}));
1768 my $manu=$NODE_INFO{$node}{manufacturer};
1769 my $minmem=defined($PARTLIST{$partno}{"${manu}mem"}) ? $PARTLIST{$partno}{"${manu}mem"} : $PARTLIST{$partno}{minmem} ;
1770 print LOG "chk ave minmem. manu=$manu memtot=$memtot minmem=$minmem\n";
1771 if ($IDPA) {
1772 if ($memtot < 32 ) {
1773 printboth("ERROR: IDPA Node $node installed memory of ${memtot}GB is less than the required 32GB\n");
1774 printboth("RESOLUTION: Check IDPA minimum requirements. \n\n");
1775 msg("IDPA Minimum Requirements","FAILED");
1776 }
1777 } elsif ( ($UPGRADE_VERSION >= '7' or $VERSNUM>=700) and $memtot < $minmem ) {
1778 printboth("ERROR: Node $node installed memory of ${memtot}GB is less than the required $PARTLIST{$partno}{minmem}GB\n");
1779 printboth("NOTE: IDPA AVE's currently only require 32GB but can't be identified from normal AVE\n");
1780 printboth("RESOLUTION: Memory requirements changed from v6 to v7. See AVE Installation guide for minimum requirements. \n\n");
1781 msg("AVE Minimum Requirements","FAILED");
1782 }
1783 }
1784 $ThisNodeType="IDPA $ThisNodeType" if ($IDPA);
1785 if ($NODETYPE !~ /$ThisNodeType/) {
1786 print LOG "Adding: $ThistNodeType to $NODETYPE \n" if ($DEBUG);
1787 $NODETYPE .= ", " if ($NODETYPE);
1788 $NODETYPE .= $ThisNodeType;
1789 }
1790 $NODE_INFO{$node}{gendesc}=$ThisNodeType;
1791 print LOG "node $node partno=$partno\n";
1792 $NODE_INFO{$node}{partno}=$partno;
1793 print LOG "--> H/W=$ThisNodeType P/N=$PARTLIST{$part}{desc} size=$nodetot, partitions=$partcnt, memory=$mem\n";
1794 if ( $ThisNodeType =~/Unknown/) {
1795 printboth("WARNING: Node $node Unknown node type based on disk size ($nodetot TB), memory ($memtot GB) and disk partitions ($partcnt)\n");
1796 $nodeerr="x";
1797 }
1798 if ($ThisNodeType eq "3.3TB Gen3" and !$haspool and $PREUPGRADE and $node !~ /0.s/ ) {
1799 printboth("ERROR: Node $node is missing the file pool\n");
1800 $poole="yes";
1801 }
1802 ($partcnt,$nodesize,$mem)=0;
1803 $lastnode=$node;
1804 }
1805 }
1806 $origsize.="P";
1807 if ($poole) {
1808 if ($PREUPGRADE) {
1809 printboth("RESOLUTION: Upgrade will require a work around. See RCM coach for additional information\n\n");
1810 } else {
1811 printboth("RESOLUTION: Rebuild node onto new hardware or for multiple nodes contact support/engineering\n\n");
1812 }
1813 msg("Gen3 3.3TB File Pool","FAILED");
1814 }
1815 $news.="ie";
1816 if ($cape) {
1817 printboth($cape);
1818 printboth("RESOLUTION: Make sure all nodes have the same capacity\n\n");
1819 msg("Node Size Consistent","FAILED");
1820 }
1821
1822 if ( $nodeerr ) {
1823 printboth("RESOLUTION: Thoroughly check hardware status, size of partitions, number of partitions and amount of memory\n\n");
1824 msg("Node Type","FAILED");
1825 }
1826
1827 $origsize.="W";
1828 my $size="1x$NODE_COUNT ";
1829 $size="Single Node " if ($NODE_COUNT == 1 );
1830 msg("Node Type",$size.$NODETYPE);
1831
1832 if ($diske) {
1833 printboth($diske);
1834 printboth("RESOLUTION: Reduce the usage of the partition\n\n");
1835 msg("Available Disk Space","FAILED");
1836 } else {
1837 msg("Available Disk Space","PASSED") if !$exit;
1838 }
1839
1840 if ($NODETYPE =~ /Gen[123]|Gen4-/i ) {
1841 if ($PREUPGRADE) {
1842 printboth("ERROR: This Server cannot be upgraded because Hardware is End-Of-Service-Life.\n");
1843 printboth("RESOLUTION: Please contact the Sales Account Team or the DSM to meet with the customer to discuss their best course of action.\n\n");
1844 msg("Hardware EOSL","FAILED");
1845 } else {
1846 printboth("INFO: The hardware has reached the End-Of-Service-Life.\n");
1847 printboth("RESOLUTION: Please contact the Sales Account Team or the DSM to meet with the customer to discuss their best course of action.\n\n");
1848 msg("Hardware EOSL","INFO");
1849 }
1850 } else {
1851 msg("Hardware EOSL","PASSED");
1852 }
1853 $$origsize=$news."nd";
1854 if ( ! $VIRTUALHW ) {
1855 my $generr="",$nopart="";
1856 for $node(sort @NODES) {
1857 if ($PREUPGRADE and $NODELIST{"/nodestatuslist/nodestatus/$node/hardware-id"} !~ /100[-_]/ ) {
1858 if ($AVAMARVER =~ /7.3.0/ and $NODETYPE =~ /gen4t/i) {
1859 print LOG "No part # but due to bug 266725 thats expected\n";
1860 } else {
1861 $nopart.="WARNING: Node $node does not have an EMC part number in the nodelist output\n";
1862 }
1863 }
1864 if ($NODELIST{"/nodestatuslist/nodestatus/$node/sysconfig/hwcheck/generation"} eq "0" ) {
1865 $generr.=("ERROR: Node $node The nodelist hardware generation is being identified as '0'\n");
1866 }
1867 }
1868 if ($generr){
1869 printboth($generr);
1870 printboth("RESOLUTION: Find out why hardware is not correctly being identified. See KB470058\n\n");
1871 msg("Hardware Generation","FAILED");
1872 }
1873 if ($nopart) {
1874 printboth($nopart);
1875 printboth("RESOLUTION: Ensure hardware is supplied by EMC. See KB 474924 for more info\n");
1876 printboth(" Customer supplied hardware is no longer supported for upgrades to 6.1 or higher\n\n");
1877 msg("Hardware Supplier","WARNING");
1878 }
1879 }
1880}
1881########## End getnodetype ##########
1882
1883
1884########## Start lastflush ##########
1885# Check for number of mcs/ems flushes in past 24 hours
1886sub lastflush {
1887 print LOG "\n\n\n### ".localtime()." ### Starting lastflush\n";
1888
1889 chomp(my $results=qx{avtar --backups --path=/MC_BACKUPS --after=`date --date="24 hours ago" '+%Y-%m-%d'` --noinformationals --count=1 | tail -n +3});
1890 print LOG "MCS Flush: $results\n";
1891 if ( !$results ) {
1892 chomp(my $last=qx[avtar --backups --path=/MC_BACKUPS --noinformationals --count=1 | awk '/^ *[0-9]/ {print \$1,\$2}' ]);
1893 printboth("ERROR: No MCS flush in past 24 hours. Last one is $last\n");
1894 printboth("RESOLUTION: Try 'mcserver.sh --flush' to see why MCS flushes are failing\n\n");
1895 msg("MC flush in past 24 hours","FAILED");
1896 } else {
1897 msg("MC flush in past 24 hours","PASSED");
1898 }
1899
1900 if (!$VBA or $VERSNUM<720){ # Removed in 7.2
1901 chomp(my $results=qx{avtar --backups --path=/EM_BACKUPS --after=`date --date="24 hours ago" '+%Y-%m-%d'` --noinformationals --count=1 | tail -n +3});
1902 print LOG "EMS FLUSH: $results\n";
1903 if ( !$results ) {
1904 chomp(my $last=qx[avtar --backups --path=/EM_BACKUPS --noinformationals --count=1 | awk '/^ *[0-9]/ {print \$1,\$2}' ]);
1905 printboth("ERROR: No EMS flush in past 24 hours. Last one is $last\n");
1906 printboth("RESOLUTION: Try 'emserver.sh --flush' to see why EMS flushes are failing\n\n");
1907 msg("EM flush in past 24 hours","FAILED");
1908 } else {
1909 msg("EM flush in past 24 hours","PASSED");
1910 }
1911 }
1912}
1913########## End lastflush ##########
1914
1915########## Start dellomlogs ##########
1916# Check that logrotate is configured for Dell logs
1917sub dellomlogs {
1918 print LOG "\n\n\n### ".localtime()." ### Starting dellomlogs\n";
1919
1920 if ($MANUFACTURER !~ /dell/) {
1921 print LOG "Skipping for manufacturer $MANUFACTURER\n";
1922 return;
1923 }
1924
1925 my $nodes=getnodes_hw("dell");
1926 if (!$nodes) {
1927 print LOG "no dell nodes found\n";
1928 return;
1929 }
1930 my $e="";
1931 $cmd=q[ test -e /etc/logrotate.d/dellomlogs||echo "error" ];
1932 mapall("--nodes=$nodes",$cmd);
1933 open(CMD_PIPE,$TMPFILE);
1934 while (<CMD_PIPE>) { chomp;
1935 print LOG "$_\n";
1936 $node=$1 if (/(\(0\..*\)) ssh/);
1937 if (/error/) {
1938 printboth("ERROR: Node $node Dell log files are not setup to rotate.\n");
1939 $e="yes";
1940 }
1941 }
1942 if ($e) {
1943 printboth("RESOLUTION: See KB444473 for more info to configure log rotation.\n\n");
1944 msg("Dell log rotate","FAILED");
1945 } else {
1946 print LOG "/etc/logrotate.d/dellomlogs found\n";
1947 msg("Dell log rotate","PASSED");
1948 }
1949}
1950########## End dellomlogs ##########
1951
1952
1953########## Start get_repl_info ########
1954sub get_repl_info {
1955if ($IN_DAYS) {
1956 $repldate="$IN_DAYS days ago"
1957} else {
1958 $repldate="30 days ago";
1959 $repldate="9999 days ago";
1960}
1961$replfile="/usr/local/avamar/var/cron/replicate.log";
1962
1963print LOG "\n\n\n### ".localtime()." ### Starting get_repl_info\n" if (!$replrpt);
1964print("HEALTHCHECK: Creating hc_replrpt.txt\n") if (!$replrpt);
1965if (!$replrpt) {
1966 open(OUTPUT,">hc_replrpt.txt")
1967} else {
1968 *OUTPUT=*STDOUT;
1969}
1970
1971if (! -e $replfile) {
1972 print OUTPUT "$replfile does not exist\n";
1973 print LOG "$replfile does not exist\n" if (!$replrpt);
1974 return;
1975 }
1976
1977my $DATE=`date --date="$repldate" '+%s'`;
1978my $cnt=0;
1979chomp($DATE);
1980print LOG "repldate=$repldate\nDATE=$DATE\n" if (!$replrpt);
1981printf OUTPUT "%16s %-10s %13s %15s %14s %10s\n",
1982 "DATE","STATUS","REPLICATED","THROUGHPUT","TIME","BACKUPS" ;
1983printf OUTPUT "%16s %10s %13s %15s %14s %10s\n",
1984 "================","==========","=============","==============","==============","==========";
1985
1986open(FILE,$replfile);
1987while(<FILE>) {
1988 if (!$starteval) {
1989 if (/(\d\d\d\d)\/(\d\d)\/(\d\d)-(\d\d):(\d\d):/) {
1990 my ($yy,$mm,$dd,$hr,$mn)=($1,$2,$3);
1991 $gmt = timegm(0,$min,$hr,$dd,$mm-1,$yy);
1992 if ($gmt >= $DATE) {
1993 $starteval=1;
1994 } else {
1995 next;
1996 }
1997 }
1998 }
1999 next if (!$starteval);
2000 if ( $_ =~ /=== Running / ) {
2001 $currday=substr($_,0,16);
2002 $status="";
2003 next;
2004 }
2005 if ( $_ =~ /<5675>/ and $_ !~ /client "(MC|EM)_BACKUPS"/) {
2006 s/^.*Replicated //;
2007 s/ .*$//;
2008 $totrepl+=$_;
2009 $grpl+=$_;
2010 }
2011 if ( $_ =~ /<6090>/ ) {
2012 s/^.*Restored //;
2013 my ($size,$type,$foo,$mins)=split(" ");
2014 $size=$size*1024*1024;
2015 if ($type eq "GB") {$size=$size*1024}
2016 $totsize+=$size;
2017 $tottime+=$mins;
2018 $gsz+=$size;
2019 $gtm+=$mins
2020 }
2021 if ( $_ =~ /<7211>/ ) {
2022 s/^.*Client "//;
2023 s/".*$//;
2024 $clientname=$_;
2025 chomp($clientname);
2026 }
2027 if ( $_ =~ /infomessage/ and !$status ) {
2028 if ( $_ =~ /failed/ ) { $status="FAILED"};
2029 if ( $_ =~ /completed/ ) { $status="COMPLETED"};
2030 if ( $_ =~ /timed/ ) { $status="TIMED OUT"};
2031 }
2032 if ( $_ =~ /Warning <|Error <|FATAL|ERROR:/ and $_ !~ /<6618>|<5237>/) {
2033 if ( $_ =~ /MSG_ERR_CANCEL/ ) { $status="TARGET CANCELED";next }
2034 chomp;
2035 print OUTPUT $_ ."(last client=$clientname)\n";
2036 }
2037 if ( $_ =~ /=== Finished / ) {
2038 if ($tottime gt 0) {
2039 $speed=(($totsize*8)/($tottime*60))/1024;
2040 } else {
2041 $speed="N/A";
2042 }
2043 printf OUTPUT "%16s %-10s %10d MB %10d Kbps %10d min %10d\n",$currday,
2044 $status,$totsize/1024/1024,$speed,$tottime,$totrepl;
2045 $totsize=0;
2046 $tottime=0;
2047 $totrepl=0;
2048 $cnt++;
2049 }
2050}
2051 if ($gtm gt 0) {
2052 $speed=(($gsz*8)/($gtm*60))/1024;
2053 } else {
2054 $speed="N/A";
2055 }
2056 printf OUTPUT "%16s %10s %13s %15s %14s %10s\n",
2057 "================","==========","=============","==============","==============","==========";
2058 if ($cnt gt 0) {
2059 printf OUTPUT "%16s %10s %10d MB %10d Kbps %10d min %10d\n","AVERAGES","",
2060 $gsz/1024/1024/$cnt,$speed,$gtm/$cnt,$grpl/$cnt;
2061 } else {
2062 print OUTPUT "No replication records found\n";
2063 }
2064if (!$replrpt) {
2065print("HEALTHCHECK: Creating hc_repl_cron.cfg\n");
2066$a=`cp /usr/local/avamar/etc/repl_cron.cfg ./hc_repl_cron.cfg 2>/dev/null`;
2067}
2068}
2069########## End get_repl_info ########
2070
2071
2072########## Start get_backup_info ########
2073sub get_backup_info {
2074
2075openmcdb() if (!$dbh);
2076print LOG "\n\n\n### ".localtime()." ### Starting get_backup_info\n";
2077
2078# COMMENT OUT TO GET LOGS
2079if ( 2+2==5) {
2080 print("HEALTHCHECK: Creating hc_backup_logs.txt\n");
2081 open(OUTPUT,">hc_backup_logs.txt");
2082 $xml = new XML::Parser( Style => 'Tree' );
2083 print LOG "parsing mccli activity show --verbose --completed --xml\n";
2084 my $tree=$xml->parsefile("mccli activity show --verbose --completed --xml |");
2085 bu_XMLTree( $tree);
2086
2087 for $key ( keys %$latest_backup) {
2088 my ($sec, $min, $hour, $day,$month,$year,$foo) = localtime($latest_backup->{$key});
2089 print OUTPUT "\n\n=============================================================================\n";
2090 printf OUTPUT "CLIENT: %s %4d/%02d/%02d %02d:%02d %s\n",$key,$year+1900,$month+1,$day,$hour,$min,"$latest_status->{$key}";
2091 $cmd="avtar --showlog --path=$key 2>&1";
2092 $log=`$cmd`;
2093 print OUTPUT $log;
2094 }
2095}
2096
2097# get high change rate,long running, high new data clients
2098
2099print("HEALTHCHECK: Creating hc_backup_clients.txt\n");
2100 open(OUTPUT2,">hc_backup_clients.txt");
2101 my $DAYS=30*86400;
2102 my $sql = qq[
2103 select domain,client_name,
2104 avg(bytes_modified_sent),
2105 max(bytes_modified_sent),
2106 avg(bytes_scanned),
2107 avg(num_of_files),
2108 max(client_os),max(client_ver),
2109 avg(completed_ts - started_ts),
2110 max(completed_ts - started_ts)
2111 from v_activities
2112 where $DAYS >= (date_part('epoch',current_date) - date_part('epoch',started_ts))
2113 and (type like '%Snap%' or type like '%Dest%')
2114 group by domain,client_name
2115 order by domain,client_name
2116 ];
2117
2118 my $sth = $dbh->prepare($sql);
2119 $sth->execute;
2120
2121 while ( @row = $sth->fetchrow_array() ) {
2122 next if ($row[1] =~ /(MC|EM)_BACKUPS/);
2123 next if ($row[0] =~ /REPLICATE/);
2124 ($domain,$client,$sent,$maxsent,$scan,$files,$os,$ver,$elap,$maxelap)=@row;
2125 $client="$domain/$client";
2126 $s_maxelap{$maxelap}=$client;
2127 $s_elap{$elap}=$client;
2128 $s_maxsent{$maxsent}=$client;
2129 $s_sent{$sent}=$client;
2130 $s_chg{$sent/$scan}=$client if ($scan gt 0);
2131 $clients->{$client} = [ @row ] ;
2132 }
2133
2134 printf OUTPUT2 "%-50s %7s %7s %7s %5s %7s %7s %7s %13s %s\n",
2135 "CLIENT","AVG", "MAX", "AVG", "CHG", "ELAP","MAXELAP","NUM OF","CLIENT","OPERATING";
2136 printf OUTPUT2 "%-50s %7s %7s %7s %5s %7s %7s %7s %13s %s\n",
2137 "CLIENT","NEW GB","NEW GB","SCAN GB","RATE","TIME","TIME", "FILES", "VERSION","SYSTEM";
2138 printf OUTPUT2 "%50s %7s %7s %7s %5s %7s %7s %7s %13s %s\n",
2139 "==================================================","=======","=======","=======","======","=======","=======","========","===========","=====";
2140print_client(%s_maxelap);
2141print_client(%s_elap);
2142print_client(%s_sent);
2143print_client(%s_maxsent);
2144print_client(%s_chg);
2145
2146close OUTPUT;
2147}
2148sub print_client {
2149 my %hash = @_;
2150 $c=0;
2151
2152foreach $key (sort {$hash{$b} cmp $hash{$a} } keys %hash) {
2153 if (!$printed_client{$hash{$key}}) {
2154 $printed_client{$hash{$key}} = 1;
2155 $row = $clients{$hash{$key}};
2156 ($domain,$client,$sent,$maxsent,$scan,$files,$os,$ver,$elap,$maxelap) = @$row;
2157 $chgrate=0;
2158 $chgrate=($sent/$scan)*100 if ($scan gt 0) ;
2159 printf OUTPUT2 "%-50s %7.1f %7.1f %7.1f %5.2f%% %7s %7s %8d %13s %s\n",
2160 "$domain/$client",
2161 $sent/1024/1024/1024,$maxsent/1024/1024/1024,$scan/1024/1024/1024,$chgrate,
2162 substr($elap,0,5),substr($maxelap,0,5),$files,
2163 $ver,$os;
2164
2165 print OUTPUT "\n\n=========================================================================\n";
2166 printf OUTPUT "CLIENT: $domain/$client PERFORMANCE\n";
2167 $cmd="avtar --showlog --path=$domain/$client 2>&1";
2168 $log=`$cmd`;
2169 print OUTPUT $log;
2170
2171 $c+=1; last if ($c>4);
2172 }
2173}
2174print LOG "FINISHED\n";
2175}
2176
2177# Begin XMLTree
2178sub bu_XMLTree{ bu_printElement(@{ shift @_ }); }
2179sub bu_printElement
2180{
2181 my ($tag, $content) = @_;
2182 if (ref $content) { # This is a XML element OPEN TAG:
2183 $in_tag=$tag;
2184 my $attrHash = $content->[0];
2185 $in_tag=$tag if ($tag ne "param");
2186 for (my $i = 1; $i < $#$content; $i += 2) {
2187 bu_printElement(@$content[$i, $i+1]);
2188 }
2189 ### CLOSE TAG
2190 if ($tag eq "Row" ){
2191 next if ($backup->{'Status'} eq "Completed") ;
2192 next if ($backup->{'Status'} =~ "Waiting") ;
2193 next if ($backup->{'Status'} =~ "Replication") ;
2194
2195 my $started = $backup->{'StartTime'};
2196 $started =~ s/[-:]/ /g;
2197 my ($yy,$mm,$dd,$hr,$min) = split(" ",$started);
2198 $started = timegm(0,$min,$hr,$dd,$mm-1,$yy);
2199 my $client="$backup->{'Domain'}/$backup->{'Client'}";
2200
2201 next if ($latest_backup->{$client} > $started);
2202 $latest_backup->{$client}=$started;
2203 $latest_status->{$client}=$backup->{'Status'};
2204 }
2205 } else {
2206 ### This is a text pseudo-element:
2207 my $testcontent = $content;
2208 $testcontent =~ s/[\t\n ]//g;
2209 if ( $testcontent ) {
2210 $backup->{$in_tag}=$content;
2211 }
2212 }
2213} # end printElement
2214########## End get_backup_info ########
2215
2216########## Start getconfiginfo ########
2217sub getconfiginfo {
2218 $GOTCONFIGINFO=1;
2219
2220# Get mcserver.xml
2221 $xml = new XML::Parser( Style => 'Tree' );
2222 print LOG "parse mcserver.xml";
2223 my $tree=$xml->parsefile("/usr/local/avamar/var/mc/server_data/prefs/mcserver.xml");
2224 SimpleXMLTree($tree);
2225 %MCSERVER=%xmltree;
2226 #for (sort keys %MCSERVER) { print "$_ = $MCSERVER{$_}\n"; } exit 0;
2227
2228# Get nodelist settings
2229 $xml = new XML::Parser( Style => 'Tree' );
2230 print LOG "parse avmaint nodelist\n";
2231 my $tree=$xml->parsefile("avmaint nodelist|");
2232 SimpleXMLTree($tree);
2233 %NODELIST=%xmltree;
2234# Get config settings
2235 $xml = new XML::Parser( Style => 'Tree' );
2236 print LOG "parse avmaint --ava config\n";
2237 $tree=$xml->parsefile("avmaint --ava config|");
2238 SimpleXMLTree( $tree);
2239 %CONFIG=%xmltree;
2240if ($DO_HEALTHCHECK) {`avmaint --ava config > hc_avmaint_config.txt`;}
2241# Get sched settings
2242 $xml = new XML::Parser( Style => 'Tree' );
2243 print LOG "parse avmaint --ava sched status \n";
2244 $tree=$xml->parsefile("avmaint --ava sched status|");
2245 SimpleXMLTree($tree);
2246 %SCHED=%xmltree;
2247 if ($DO_HEALTHCHECK) {`avmaint --ava sched status > hc_avmaint_sched.txt`;}
2248#
2249# collect data in one mapall
2250#
2251
2252# --ALL USER=ROOT
2253 print LOG "Get data where root is required\n";
2254# NOTE: this section creates dynamically named arrays. @RPMS and @DATA_SECUPD
2255 my $needsu=($SUDO) ? "sudo" : "";
2256 my $cmd=qq[
2257 rpm -qa | sed -e 's/^/RPMS:/'
2258 # securityupdates
2259 $needsu ls -1t /usr/local/avamar/var/package-survey-SLES*post_errata* |head -1 | xargs grep 'package-survey version' |sed -e 's/sles[^-]*-//i' |sed -e 's/^/DATA_SECUPD:/'
2260 ];
2261 mapall("$ALL",$cmd );
2262 open(CMD_PIPE,$TMPFILE);
2263 while (<CMD_PIPE>) {chomp;
2264 print LOG "$_\n" if (!/RPMS/);
2265 my($sub,$line)=split(":",$_,2);
2266 if ( /DATA_SECUPD:/ ) {
2267 my ($a,$b,$c)=split('"', $line);
2268 $line=$b;
2269 }
2270 $node=$1 if (/(\(0\..*\)) ssh/);
2271 next if (!/:/);
2272 #print LOG "add to $sub $node $line\n" if $DEBUG;
2273 push(@$sub,"$node $line");
2274 }
2275
2276# USER=ADMIN
2277##### NOTE: this section creates dynamically named arrays of DATA_* like DATA_SECUPD, DATA_REPO, DATA_SRC
2278 $cmd=qq[
2279 # repoempty
2280 # x="/data01/avamar/repo/packages"; find \$x -type f -exec echo "REPO:\$x" \\; | uniq
2281 [ -x /data01/avamar/src ] && echo "SRC:OK" || echo "SRC:MISSING"
2282 x="/data01/avamar/repo/temp"; find \$x -type f -exec echo "REPO:\$x" \\; | uniq
2283 sed -n '/Neighbor table overflow/{p;q;}' /var/log/messages | sed -e 's/^/checkmessages:/g'
2284
2285 if [ -e "/sys/devices/system/cpu/vulnerabilities/meltdown" -a\\
2286 -e "/sys/devices/system/cpu/vulnerabilities/spectre_v1" -a\\
2287 -e "/sys/devices/system/cpu/vulnerabilities/spectre_v2" -a\\
2288 -e "/sys/devices/system/cpu/vulnerabilities/spec_store_bypass" -a \\
2289 -e "/sys/devices/system/cpu/vulnerabilities/l1tf" \\
2290 ]; then echo "SPECTRE:ok"; else echo "SPECTRE:bad"; fi
2291
2292 ];
2293 mapall("$ALL",$cmd);
2294 open(CMD_PIPE,$TMPFILE);
2295 while (<CMD_PIPE>) {chomp;
2296 my($sub,$line)=split(":",$_,2);
2297 $node=$1 if (/(\(0\..*\)) ssh/);
2298 next if (!/:/);
2299 $sub="DATA_$sub";
2300 push(@$sub,"$node $line");
2301 $$sub{$node}.="$line\n";
2302 print LOG "push $sub {$node} = $line\n";
2303 }
2304
2305# SET SOME VARIABLES BASED ON STATUS:
2306 $MAINT_RUNNING="";
2307 $MAINT_RUNNING="GC" if ($NODELIST{'/nodestatuslist/gcstatus/status'} eq "processing");
2308 $MAINT_RUNNING="CP" if ($NODELIST{'/nodestatuslist/cpstatus/status'} eq "processing");
2309 $MAINT_RUNNING="HFS" if ($NODELIST{'/nodestatuslist/hfscheckstatus/status'} eq "waitcgsan");
2310
2311}
2312########## End getconfiginfo ########
2313
2314########## Start checkconfig ########
2315sub checkconfig {
2316 print LOG "\n\n\n### ".localtime()." ### Starting checkconfig\n";
2317 getconfiginfo() if (!$GOTCONFIGINFO);
2318 getinstalledversion() if (!$AVAMARVER);
2319
2320 %gsanconfig=(
2321 "/gsanconfig/disknocreate" => ["90",""],
2322 "/gsanconfig/cpmostrecent" => ["^2",""],
2323 "/gsanconfig/disknocp" => ["96",""],
2324 "/gsanconfig/disknogc" => ["8[56789]","Can be higher in version 5 but should be less than 90"],
2325 "/gsanconfig/disknoflush" => ["94",""],
2326 "/gsanconfig/cphfschecked" => ['^1$',""],
2327 "/gsanconfig/diskreadonly" => ["65","Settings lower than 65 are usually intentional (AVE, Encrypt at Rest or licensing). Consult local team before raising."],
2328 "/gsanconfig/asynccrunching" => ["true",""],
2329 "/gsanconfig/cpdaily" => [2,""],
2330 );
2331
2332# 12/14/2018 remove perftriallimit check. probably add check to make it 0.
2333#"/gsanconfig/perftriallimit" => ["^[3-9]|^[1-9][0-9]","Should be 3 or higher. See KB465715"]
2334
2335# Change settings for AER
2336 if ($NODETYPE =~ /AER/i) {
2337 %gsanconfig=(
2338 "/gsanconfig/disknocreate" => ["93",""],
2339 "/gsanconfig/disknocp" => ["95",""],
2340 "/gsanconfig/disknogc" => ["92",""],
2341 "/gsanconfig/diskreadonly" => ["90",""]
2342 );
2343 }
2344
2345# Dont check balancemin after 6.0
2346 if ($VERSNUM <610 ) {
2347 %gsanconfig=(%gsanconfig, "/gsanconfig/balancemin" => ['^0$',""]);
2348 }
2349
2350# Add config unless VBA
2351 %gsanconfig=(%gsanconfig, "/gsanconfig/indexcacheallowed"=> [1, "Can be set to 0 on 2TB nodes that are swapping. See KB305359"]) if !$VBA;
2352
2353# Check Disknogc
2354 if ($CONFIG{"/gsanconfig/disknogc"} < 89 ) {
2355 printboth("INFO: disknogc is less than 89%\n");
2356 printboth("RESOLUTION: Increase disknogc to the new default of 89 (avmaint --ava config disknogc=89). See KB529432 for more info\n\n");
2357 msg("avmaint disknogc Setting","INFO");
2358 }
2359
2360# Check maxconn for 7.1 or 7.2
2361print LOG "versnum=$VERSNUM maxconn=$CONFIG{'/gsanconfig/maxconn'} \n";
2362 if ($VERSNUM>=710 and $VERSNUM < 730 and $CONFIG{"/gsanconfig/maxconn"} < 500 and $DDCNT>=1 ) {
2363 printboth("ERROR: Number of simultaneous backups is too low");
2364 printboth("RESOLUTION: See KB469043 for more details to set maxconn=500\n\n");
2365 msg("Maxconn Configuration","FAILED");
2366 }
2367
2368
2369# Check for CP settings
2370 if ($CONFIG{"/gsanconfig/cphfschecked"} eq 1 and $CONFIG{"/gsanconfig/cpmostrecent"} eq 2){
2371 $CP_RETENTION="PASSED";
2372 msg("Checkpoint Retention","PASSED");
2373 } else {
2374 $CP_RETENTION="FAILED";
2375 printboth("CRITICAL ERROR: Checkpoint retention not the default of cpmostrecent=2 and cphfschecked=1\n"); $LOGOFF_ERR++;
2376 msg("Checkpoint Retention","FAILED");
2377 }
2378
2379# balancelocaldisks
2380 if ($CONFIG{"/gsanconfig/balancelocaldisks"} ne "false" ) {
2381 printboth("WARNING: balancelocaldisks is set to true. This will prevent stripe balancing.\nRESOLUTION: See KB526653 for more info\n\n");
2382 msg("Config balancelocaldisks","WARNING");
2383 }
2384
2385# Perftrial
2386# msg("perftriallimit setting",$CONFIG{'/gsanconfig/perftriallimit'} );
2387
2388
2389 my $msg="";
2390 foreach $key (keys %gsanconfig) {
2391 ($foo,$foo,$pkey)=split("/",$key);
2392 printf LOG "%-30s = %-3s Default=%-3s %s\n",$pkey,
2393 $CONFIG{$key},$gsanconfig{$key}[0],$gsanconfig{$key}[1];
2394 $regex=$gsanconfig{$key}[0];
2395 if ($CONFIG{$key} !~ m/$regex/ ) {
2396 printf SETTINGS "%-30s = %-3s Default=%-3s %s\n",$pkey, $CONFIG{$key},$gsanconfig{$key}[0],$gsanconfig{$key}[1];
2397 $msg = $msg.sprintf " %-20s Default=%-5s %s\n",$pkey."=". $CONFIG{$key},$gsanconfig{$key}[0],$gsanconfig{$key}[1];
2398 }
2399 }
2400 if ($msg) {
2401 printboth("WARNING: Configuration parameters changed from defaults\n");
2402 printboth($msg);
2403 printboth("RESOLUTION: Find out why they have been changed.\n\n");
2404 msg("Config Settings","WARNING");
2405 } else {
2406 msg("Config Settings","PASSED");
2407 }
2408
2409
2410# Go through sched status settings
2411
2412 if ($SCHED{'/maintenance-windows/task-param-list/gc/usehistory'} eq "true") {
2413 printboth("ERROR: Garbage collection use history is set to true. This may cause GC to prematurely stop\n");
2414 printboth("RESOLUTION: Set it to false: avmaint --ava sched gc --usehistory=false --permanent\n\n");
2415 msg("Garbage Collection Use History","FAILED");
2416 }
2417 if ($SCHED{'/maintenance-windows/task-param-list/hfscheck/overtime'} ne "true" ){
2418 printboth("ERROR: HFSCheck overtime is set to false\n");
2419 printboth("RESOLUTION: See esc 3597 for more info. Set it to true:\n avmaint --ava sched hfscheck --overtime=true --permanent\n\n");
2420 msg("HFSCheck overtime allowed","FAILED");
2421 } else {
2422 msg("HFSCheck overtime allowed","PASSED");
2423 }
2424 if ($DO_HEALTHCHECK) {
2425 $mytree="/maintenance-windows/window-param-list/window-params/";
2426 print SETTINGS
2427 "backup-window/start = ".$SCHED{$mytree.'backup-window/start'} ."\n".
2428 "backup-window/duration = ".$SCHED{$mytree.'backup-window/duration'} ."\n".
2429 "blackout-window/start = ".$SCHED{$mytree.'blackout-window/start'} ."\n".
2430 "blackout-window/duration = ".$SCHED{$mytree.'blackout-window/duration'} ."\n".
2431 "maintenance-window/start = ".$SCHED{$mytree.'maintenance-window/start'} ."\n".
2432 "maintenance-window/duration = ".$SCHED{$mytree.'maintenance-window/duration'} ."\n".
2433 "hfscheck modified = ".
2434 $SCHED{'/maintenance-windows/task-param-list/hfscheck/modified'} ."\n";
2435 }
2436
2437
2438# Print some of the already gathered data
2439 printf SETTINGS "%-30s = %-30s\n","systemid",$NODELIST{'/nodestatuslist/nodestatus/0.0/systemid'};
2440 printf SETTINGS "%-30s = %-30s\n","GSAN md5sum",$GSAN_MD5SUM;
2441 printf SETTINGS "%-30s = %-30s\n","Avamar RPM Version ",$AVAMARVER;
2442 printf SETTINGS "%-30s = %-30s\n","Data Node Version",$DATANODEVERSION;
2443 printf SETTINGS "%-30s = %-30s\n","MCS Version",$MCSERVER_VERSION;
2444 printf SETTINGS "%-30s = %-30s\n","EMS Version",$EMSERVER_VERSION;
2445
2446# defined an "--encrypt" setting in usersettings.cfg a warning
2447 my $r;
2448 chomp($r=`grep encrypt /usr/local/avamar/etc/usersettings.cfg`);
2449 if ($r) {
2450 print LOG "encrypt: $r\n";
2451 printboth("ERROR: encrypt setting found in usersettings.cfg\n\nRESOLUTION: Find out why the flag was added. See escalation 30362\n\n");
2452 msg("Usersettings.cfg Encrypt","WARNING");
2453 }
2454
2455# Check for avtar.cmd, avmaint.cmd, etc. existence
2456 my $e="";
2457 for (qw(avmaint.cmd avmgr.cmd)) {
2458 if (-e "/usr/local/avamar/var/$_") {
2459 print LOG "FileExists: $_\n";
2460 $e=1;
2461 printboth("INFO: File /usr/local/avamar/var/$_ exists which is not typical\n");
2462 }
2463 }
2464 if ($e) {
2465 printboth("RESOLUTION: Verify the file should exist on this system\n\n");
2466 msg("Control Files","INFO");
2467 }
2468
2469}
2470
2471########## End checkconfig ########
2472
2473
2474########## Start SimpleXMLTree ########
2475# This flattens out an XML separating the tags with a / and adding node.id and disk.id
2476# for example <gsanconfig status=good cp=3> will be: /gsanconfig/status and /gsanconfig/cp
2477# for example <nodestatus><node id=0.4 status=good><node id=0.5 status=bad> will be
2478# /nodestatus/node/0.4/status /nodestatus/node/0.4/id /nodestatus/node/0.5/status etc...
2479sub SimpleXMLTree{
2480 %xmltree=();
2481 $in_tag="";
2482 SimplePrintElement("",@{ shift @_ });
2483}
2484
2485sub SimplePrintElement
2486{
2487 my ($in_tag,$tag, $content) = @_;
2488 if (ref $content) { # This is a XML element:
2489 my $attrHash = $content->[0];
2490 if ($tag eq "param"){
2491 my $newkey="$in_tag/$attrHash->{'name'}";
2492 (my $tmp=$newkey) =~ s:/preferences/root/node/com/node/avamar/node::;
2493 $xmltree{$tmp} = $attrHash->{'value'};
2494 #print LOG "$newkey = ". $attrHash->{'value'} ."\n" ;
2495 } else {
2496 $prev_tag=$in_tag;
2497 $in_tag = $in_tag."/".$tag;
2498 if ($tag eq "node") {
2499 $in_tag = $in_tag."/".$attrHash->{name};
2500 }
2501 if ($tag eq "entry") {
2502 (my $tmp=$prev_tag."/". $attrHash->{key} ) =~ s:/preferences/root/node/com/node/avamar/node::;
2503 $xmltree{$tmp}= $attrHash->{value};
2504 #print LOG "$newkey = ". $attrHash->{'value'} ."\n" ;
2505 next;
2506 }
2507 if ($tag eq "nodestatus") {
2508 push(@NODES,$attrHash->{'id'});
2509 $in_tag = $in_tag."/".$attrHash->{'id'};
2510 }
2511 if ($tag eq "disk") {
2512 $in_tag = $in_tag."/".$attrHash->{'id'};
2513 }
2514 if ($tag eq "ddrconfig") {
2515 push(@DD_INDEX,$attrHash->{'index'});
2516 $in_tag = $in_tag."/".$attrHash->{'index'};
2517 }
2518 if ($tag eq "checkpoint") {
2519 $in_tag = $in_tag."/".$attrHash->{'tag'};
2520 }
2521# DD show-prop
2522 if ($tag eq "Row" and $DDXMLFLAG) {
2523 $DDXML++;
2524 $in_tag = $in_tag."/$DDXML";
2525 }
2526 for $key (keys %$attrHash) {
2527 my $newkey=$in_tag."/".$key;
2528 (my $tmp=$newkey) =~ s:/preferences/root/node/com/node/avamar/node::;
2529 $xmltree{$tmp} = $attrHash->{$key};
2530 #print LOG "$newkey = ". $attrHash->{$key} ."\n" ;
2531 }
2532 }
2533 for (my $i = 1; $i < $#$content; $i += 2) {
2534 SimplePrintElement($in_tag,@$content[$i, $i+1]);
2535 }
2536 ### CLOSE TAG
2537 } else {
2538 ### This is a text pseudo-element:
2539 my $testcontent = $content;
2540 $testcontent =~ s/[\t\n ]//g;
2541 if ( $testcontent ) {
2542 (my $tmp=$in_tag) =~ s:/preferences/root/node/com/node/avamar/node::;
2543 $xmltree{$tmp}=$content;
2544 #print LOG "$tmp = $content\n" ;
2545 }
2546 }
2547} # end printElement
2548########## End SimpleXMLTree ########
2549
2550
2551########## Start checktime ########
2552sub checktime {
2553
2554 print LOG "\n\n\n### ".localtime()." ### Starting checktime\n";
2555
2556 getavamarver() if (!$AVTAR_VERSION);
2557
2558 if ($VBA and qx{ps -A | grep ntpd} ) {
2559 printboth("ERROR: NTPD time server is running\n");
2560 printboth("RESOLUTION: Stop and disable ntpd timer server\n\n");
2561 msg("Time Server","FAILED");
2562 return;
2563 }
2564
2565 if ($NODE_COUNT == 1) {
2566 print LOG "Check skipped for single node servers\n";
2567 return;
2568 }
2569
2570 $cmd=q[ echo -e "DATE \c"; date '+%s' ; /usr/sbin/ntpq -pn ];
2571 mapall("--all",$cmd);
2572 open(CMD_PIPE,$TMPFILE);
2573 my ($badtime,$timeserver,$node)="";
2574 while (<CMD_PIPE>) { chomp;
2575 print LOG "$_\n";
2576 if (/(\(0\..*\)) ssh/){
2577 if ($node and !$primntp) {
2578 printboth("ERROR: Node $node Primary time server not found\n");
2579 $timeserver="yes";
2580 }
2581 $node=$1;
2582 }
2583 if (/^DATE (.*)/) {
2584 $lastepoch=$1 if (!$lastepoch);
2585 if (abs($lastepoch-$1)>8) {
2586 printboth("ERROR: Node $node time is out of sync by ". ($lastepoch-$1) ." seconds\n");
2587 $badtime="yes";
2588 }
2589 $lastepoch=$1 ;
2590 }
2591 if ( $_ =~ /^\*/ ) {
2592 my ($ip,$refid,$foo)=split(" ");
2593 print LOG "--> $node primary is $ip\n";
2594 $primntp=$ip;
2595 }
2596 }
2597 if (!$timeserver and !$badtime) {
2598 msg("Time Settings","PASSED");
2599 } else {
2600 printboth("RESOLUTION: Verify nodes are out of time sync. Run asktime if they are.\n\n");
2601 msg("Time Settings","FAILED");
2602 }
2603}
2604########## End checktime ########
2605
2606########## Start sched ########
2607sub sched {
2608
2609 openmcdb() if (!$dbh);
2610 print LOG "\n\n\n### ".localtime()." ### Starting sched\n" if !($sched);
2611$sched_days=30;
2612if ($IN_DAYS) { $sched_days=$IN_DAYS; }
2613
2614 print("HEALTHCHECK: Creating hc_sched.txt\n") if (!$sched);
2615 if (!$sched) {
2616 open(OUTPUT,">hc_sched.txt");
2617 } else {
2618 *OUTPUT=*STDOUT
2619 }
2620 if ($WIDE || !$sched) {
2621 $interval=4;
2622 print OUTPUT " 1 1 1 1 1 1 1 1 1 1 2 2 2 2 ".`date '+%Z'`;
2623 print OUTPUT " 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 \n";
2624 } else {
2625 $interval=3;
2626 print OUTPUT " 1 1 1 1 1 1 1 1 1 1 2 2 2 2 ".`date '+%Z'`;
2627 print OUTPUT " 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3\n";
2628}
2629
2630# GET BACKUPS
2631 my $SECONDS=$sched_days*86400;
2632 my $sql = qq[
2633 select started_ts, completed_ts, status_code, type from v_activities
2634 where $SECONDS >= (date_part('epoch',current_date) - date_part('epoch',started_ts))
2635 and status_code <> 30901
2636 order by type
2637 ];
2638
2639 my $sth = $dbh->prepare($sql);
2640 $sth->execute;
2641
2642 while ( @row = $sth->fetchrow_array() ) {
2643 my ($start,$complete,$status,$type)=@row;
2644 $type="u";
2645 SWITCH: {
2646 if( $type =~ m/Snapup|Backup/i) { $type="b"; last SWITCH; }
2647 if( $type =~ m/Destination/i) { $type="d" ; last SWITCH;}
2648 if( $type =~ m/Source/i) { $type="" ; last SWITCH;}
2649 if( $type =~ m/Restore/i) { $type="e" ; last SWITCH;}
2650 }
2651
2652 if ($status !~ /30000|30005/) { uc $type }
2653 $s=sched_toepoch(split(" ",$start));
2654 $e=sched_toepoch(split(" ",$complete));
2655 sched_addtype($s, $e, $type ) if ($type);
2656 }
2657
2658# Maint routines
2659 my ($sec, $mn, $hr, $dd, $mm, $yy) = (localtime(time - 86400*$sched_days))[0,1,2,3,4,5,6];
2660 $mm++; $yy+=1900;
2661 my $DATE=sprintf("%4d-%02d-%02d",$yy,$mm,$dd);
2662 $sql = qq[
2663 select date,time,code
2664 from v_events
2665 where date>= '$DATE'
2666 and code > 4000 and code < 4999
2667 order by date,time
2668 ];
2669
2670 $sth = $dbh->prepare($sql);
2671 $sth->execute;
2672
2673 while ( @row = $sth->fetchrow_array() ) {
2674 my ($date,$time,$code)=@row;
2675 my $epoch = sched_toepoch($date,$time);
2676
2677 $startcp=$epoch if ($code eq "4300");
2678 $starthfs=$epoch if ($code eq "4002");
2679 $startgc=$epoch if ($code eq "4200");
2680 $startrepl=$epoch if ($code eq "4600");
2681
2682 SWITCH_CODE: {
2683 if ($code == 4002) { $starthfs=$epoch; last SWITCH_CODE; }
2684 if ($code == 4003) { sched_addtype($starthfs,$epoch,"h","") if ($starthfs);$starthfs=undef; last SWITCH_CODE;}
2685 if ($code == 4004) { sched_addtype($starthfs,$epoch,"H","") if ($starthfs);$starthfs=undef; last SWITCH_CODE;}
2686 if ($code == 4200) { $startgc =$epoch ; last SWITCH_CODE;}
2687 if ($code == 4201) { sched_addtype($startgc,$epoch,"g","") if ($startgc);$startgc=undef; last SWITCH_CODE;}
2688 if ($code == 4202) { sched_addtype($startgc,$epoch,"G","") if ($startgc);$startgc=undef; last SWITCH_CODE;}
2689 if ($code == 4300) { $startcp =$epoch ; last SWITCH_CODE;}
2690 if ($code == 4301) { sched_addtype($startcp,$epoch,"c","") if ($startcp);$startcp=undef; last SWITCH_CODE;}
2691 if ($code == 4302) { sched_addtype($startcp,$epoch,"C","") if ($startcp);$startcp=undef; last SWITCH_CODE;}
2692 if ($code == 4600) { $startrepl=$epoch ; last SWITCH_CODE;}
2693 if ($code == 4601) { sched_addtype($startrepl,$epoch,"r","") if ($startrepl);$startrepl=undef; last SWITCH_CODE;}
2694 if ($code == 4602) { sched_addtype($startrepl,$epoch,"R","") if ($startrepl);$startrepl=undef; last SWITCH_CODE;}
2695 }
2696 }
2697
2698# Print It
2699my $lastdate="";
2700foreach $key (sort keys %daysused) {
2701 ($date,$line)=split(" ",$key);
2702 if ($lastdate ne $date and $lastdate) { print OUTPUT "\n"; }
2703 $lastdate=$date;
2704 print OUTPUT "$date ";
2705 for ($hour=0;$hour<=$interval*24;$hour++) {
2706 if ($daysused{$key}[$hour]) {
2707 if ( $daysused{$key}[$hour] > 0 ) {
2708 $daysused{$key}[$hour] = 0+int($daysused{$key}[$hour] / 10);
2709 $daysused{$key}[$hour] ="b" if ( $daysused{$key}[$hour] > 10 ) ;
2710 }
2711 print OUTPUT $daysused{$key}[$hour]
2712 } else {
2713 print OUTPUT ".";
2714 }
2715 }
2716 print OUTPUT "\n";
2717}
2718 print OUTPUT "\nh=hfs, c=CP, g=GC, r=Repl, d=ReplDest, e=restore, uppercase means failed activity, u=unknown\nA number is how many backups are running, 0=0-9, 1=10-19, b=100 or more backups\n";
2719 if (!$sched) {
2720 print LOG "FINISHED\n"
2721 } else {
2722 print "\n";
2723 }
2724
2725}
2726
2727sub sched_toepoch {
2728 my ($yy,$mm,$dd)=split("-",$_[0]);
2729 my ($hr,$min,$sec)=split(":",$_[1]);
2730 return timegm(0,$min,$hr,$dd,$mm-1,$yy) ;
2731}
2732
2733sub sched_ndx { return sprintf"%d/%02d/%02d %d",$_[0],$_[1],$_[2],$_[3] }
2734
2735sub sched_addtype {
2736 my ($start,$complete,$type) = @_;
2737 my $epoch;
2738 $line=0;
2739 if ($type ne "b") {
2740 $bad=1;
2741 while ($bad eq 1) {
2742 $bad=0;
2743 for ($epoch=$start;$epoch<=$complete;$epoch+=(3600/$interval)) {
2744 my ($sec, $mn, $hr, $dd, $mm, $yy) = (localtime($epoch))[0,1,2,3,4,5,6];
2745 $mm++; $yy +=1900;
2746 $min=int($mn/(60/$interval));
2747 if ($daysused{sched_ndx($yy,$mm,$dd,$line)}[$hr*$interval+$min]
2748 and $daysused{sched_ndx($yy,$mm,$dd,$line)}[$hr*$interval+$min] ne $type) {
2749 $bad=1;
2750 $line++;
2751 last;
2752 }
2753 }
2754 }
2755 }
2756 for ($epoch=$start;$epoch<=$complete;$epoch+=(3600/$interval)) {
2757 my ($sec, $mn, $hr, $dd,$mm,$yy) = (localtime($epoch))[0,1,2,3,4,5,6];
2758 $mm++; $yy +=1900;
2759 $min=int($mn/(60/$interval));
2760 if ($type eq "b") {
2761 $daysused{sched_ndx($yy,$mm,$dd,$line)}[$hr*$interval+$min] +=1;
2762 } else {
2763 if ($epoch+(3600/$interval)>=$complete) {
2764 $daysused{sched_ndx($yy,$mm,$dd,$line)}[$hr*$interval+$min]=$type;
2765 } else {
2766 $daysused{sched_ndx($yy,$mm,$dd,$line)}[$hr*$interval+$min]=lc($type);
2767 }
2768 }
2769 }
2770}
2771########## End sched ########
2772
2773########## Start backup_config ########
2774sub backup_config {
2775
2776 print LOG "\n\n\n### ".localtime()." ### Starting backupconfig\n";
2777 if (!$SUDO ) {
2778 print LOG "skipping. no sudo support\n";
2779 return;
2780 }
2781
2782 open(CMD_PIPE,qq[sudo sh -c "/usr/local/avamar/bin/backup_upgrade_files 2>&1; chmod a+rwx -R /tmp/backups_*" |]);
2783 while (<CMD_PIPE>) { chomp;
2784 print LOG "$_\n";
2785 next if ($_ =~ /copying/);
2786 #if ($_ =~ /Completed. Inspect .root.backup_upgrade_files.(.*).log/){
2787 if (/Completed.*backup_upgrade_files.(\d+_\d+).log/){
2788 my $basename=$1;
2789 print LOG "Found files $basename\n";
2790 $cmd="/bin/tar czf /tmp/backup_upgrade_files.$basename.tgz /tmp/backups_$basename 2>&1";
2791 $results=`$cmd`;
2792 print LOG "Creating tar file: $cmd\n$results\n";
2793
2794 if ($NODE_COUNT>1){
2795 print LOG "Copying tar file to nodes:";
2796 mapall("--nodes=0.0,0.1","copy /tmp/backup_upgrade_files.$basename.tgz");
2797 msg("backup_upgrade_files","PASSED");
2798 } else {
2799 if (-e "/usr/local/avamar/etc/repl_cron.cfg"){
2800 my $dstaddr = `grep dstaddr /usr/local/avamar/etc/repl_cron.cfg | grep -v "^#"`;
2801 my ($var1,$addr) = split('=',$dstaddr);
2802 chomp($addr,);
2803 my $cmd="cd /tmp; scp -i ~/.ssh/dpnid backup_upgrade_files.$basename.tgz admin@".$addr.":";
2804 print LOG "scp cmd: $cmd\nResults: $results\n";
2805 $results=`$cmd`;
2806 if ($? ne 0) {
2807 printboth("ERROR: Unable to copy backup_upgrade_files to replication target. Results: $results\n\n");
2808 msg("backup_upgrade_files","FAILED");
2809 } else {
2810 msg("backup_upgrade_files","PASSED");
2811 }
2812 } else {
2813 printboth("WARNING: File /tmp/backup_upgrade_files.$basename.tgz should be manually copied/saved\n\n");
2814 msg("backup_upgrade_files run","FAILED");
2815 }
2816 }
2817 } #if completed
2818 } #while
2819} #endsub
2820
2821########## End backup_config ########
2822
2823
2824########## Start getuserdata ########
2825sub getuserdata {
2826
2827 print LOG "--> showMenu \n";
2828 unlink("hc_email.txt");
2829 my $rc = do ("/home/admin/.pac_defaults");
2830 my $modified=0;
2831
2832 $input = "";
2833 while(1)
2834 {
2835 print "\n$PROG $PROGVER\n";
2836 print "======================\n";
2837 if ($FORCE) {
2838 print "0) Force Reason.................$FORCE_reason\n";
2839 }
2840 print "1) SR Number....................$srnumber\n";
2841 print "2) EMC Engineer Contact Email...$yourname\n";
2842 print "3) Customer Contact.............$customername\n";
2843 print "4) SR Description...............$srdesc\n";
2844 print "5) Create Report\n";
2845 print "q) Quit\n\n";
2846
2847 print "Enter your choice: ";
2848 chomp($input = <STDIN>);
2849 if($input eq "q") { exit 0; }
2850 if($input eq "5") {
2851 if ( (!$srnumber) or (!$yourname) or (!$customername) or (!$srdesc) or ($FORCE and !$FORCE_reason) ) {
2852 print "You must enter all information before creating the report\n";
2853 next;
2854 } else {
2855 writehistory() if $modified;
2856 msg("Logoff Info","","$srnumber|$yourname|$customername|$srdesc|$FORCE|$FORCE_reason");
2857 last;
2858 }
2859 }
2860
2861 switch ($input) {
2862 case 1 { print "SR#"; chomp($in=<STDIN>); if ($in) {$srnumber=$in; writedefaults();$modified=1 } }
2863 case 2 { print "Your Name: "; chomp($in=<STDIN>); if ($in) { $yourname=$in; writedefaults();$modified=1 } }
2864 case 3 { print "Customer Name: "; chomp($in=<STDIN>); if ($in) { $customername=$in; writedefaults();$modified=1 }}
2865 case 4 { print "SR Description: "; chomp($in=<STDIN>); if ($in) { $srdesc=$in; writedefaults();$modified=1 }}
2866 case 0 { print "Force Reason: "; chomp($in=<STDIN>); if ($in) { $FORCE_reason=$in; writedefaults();
2867 printboth("Critical Checks forced to pass: $FORCE_reason\n"); }}
2868 else { print "Invalid selection\n\n"; }
2869 }
2870 }
2871}
2872sub writedefaults {
2873 open(DEFAULTS,">/home/admin/.pac_defaults");
2874 print DEFAULTS "\$srnumber='$srnumber';\n\$yourname='$yourname';\n\$customername='$customername';\n\$srdesc='$srdesc';\n\$FORCE_reason='$FORCE_reason';\n";
2875 close(DEFAULTS);
2876}
2877# HISTORY
2878sub writehistory {
2879 open(my $fh,">>/home/admin/.pac_history");
2880 $srnumber=~s/\|/ /g;
2881 $yourname=~s/\|/ /g;
2882 $customername=~s/\|/ /g;
2883 $srdesc=~s/\|/ /g;
2884 $FORCE_reason=~s/\|/ /g;
2885 my $line=time."|$srnumber|$yourname|$customername|$srdesc|$FORCE_reason";
2886 print $fh "$line\n";
2887 close($fh);
2888}
2889########## End getuserdata ########
2890
2891########## Start logoff_report ########
2892sub logoff_report {
2893
2894 print LOG "\n\n\n### ".localtime()." ### Starting logoff_report\n";
2895 print LOG "logoff_err = $LOGOFF_ERR\n";
2896
2897 if ($LOGOFF_ERR > 0 and (!$FORCE_reason) ) {
2898 printboth("\nCRITICAL checks have failed. Resolve the errors before leaving the grid\n");
2899 print "\nCRITICAL checks have failed. Resolve the errors before leaving the grid\n";
2900 if ($GSAN_MCS_EMS !~ /PASSED|WARNING/ ) { print "GSAN, MCS or EMS services are down (dpnctl status)\n"; }
2901 if ($MAINT_SCHED ne "PASSED") { print "Maintenance Scheduler is not enabled or running (dpnctl status)\n";}
2902 if ($BACKUP_SCHED ne "PASSED") { print "Backup Scheduler is not enabled or running (dpnctl status)\n";}
2903 if ($CRON_SCHED ne "PASSED") { print "Cron is not running or scheduler not enabled (dpnctl status or ps -ef)\n";}
2904 if ($MCGUI_STATUS ne "PASSED") { print "Access to MCGUI is not working (mccli server show-prop)\n";}
2905 if ($BACKUP_DONE !~ /PASSED|WARNING/) { print "Backup to the GSAN failed (mcserver.sh --flush)\n";}
2906 if ($CP_RETENTION ne "PASSED") { print "Checkpoint retention is not set to defaults (avmaint --ava config | grep cp)\n";}
2907 return;
2908 }
2909
2910 printboth("\nAll CRITICAL checks have passed. View hc_email.txt for email message\n");
2911 print "\nAll CRITICAL checks have passed. View hc_email.txt for email message\n";
2912 my $email = qq[
2913SR $srnumber reference Avamar system: $HOSTNAME & issue $srdesc
2914
2915
2916Dear $customername
2917
2918The issue $srdesc related to Avamar system $HOSTNAME has been resolved and the Avamar system has been brought up to a fully operational state by TSE $yourname. The current system status is as outlined below.
2919
2920- This Service Request has been placed in monitor mode pending completion of one cycle of the backup scheduler and the maintenance scheduler.
2921- As this Avamar System is emailing home, we will confirm this through the email home report and follow-up with a final email to inform and confirm that the SR is being closed as the system had successfully reported in.
2922- If you run into issues directly related to this SR before closure, please do not hesitate in informing us by updating the service request so that the SR can remain open and the issue worked by an Avamar TSE
2923
2924Avamar System Status Summary:
2925
2926 1. $GSAN_MCS_EMS - GSAN, MCS and EMS services are up and fully operational
2927 2. $MAINT_SCHED - Maintenance Scheduler has been checked to ensure it has been enabled and running
2928 3. $BACKUP_SCHED - Backup Scheduler has been checked to ensure it has been enabled and running
2929 4. $CRON_SCHED - Cron scheduler has been <enabled/disabled> for your version of Avamar and your custom configuration
2930 5. $MCGUI_STATUS - Access to MCGUI for MCS and EM for Enterprise Manager has been confirmed
2931 6. $BACKUP_DONE - The gsan ability to receive backup data has been confirmed by performing an MCS and EMS Flush
2932 7. $CAPACITY_STATUS - The current overall capacity of the system has been confirmed to be healthy
2933 8. $CP_RETENTION - Checkpoint retention has been returned to default setting of cpmostrecent="2" and cphfschecked="1"
2934 9. The Avamar System has been checked to confirm that it is emailing home to Avalanche & ConnectEMC/SYR. Sample email home report to Avalanche has been included below for reference
2935
2936];
2937
2938print LOG "$email";
2939open(OUTPUT,">hc_email.txt");
2940print OUTPUT "$email";
2941close OUTPUT;
2942}
2943########## End logoff_report ########
2944
2945
2946########## Start dpnctl_status ########
2947sub dpnctl_status {
2948
2949 my $status;
2950 print LOG "\n\n\n### ".localtime()." ### Starting dpnctl_status\n";
2951 getnodetype() if (!%PARTLIST);
2952 getconfiginfo() if (!$GOTCONFIGINFO);
2953 nodexref() if (!$NODE_COUNT);
2954
2955 open(FILE,"dpnctl status 2>&1|");
2956
2957 while(<FILE>) {
2958 print LOG $_ ;
2959 if ($_ =~ /gsan status: (.*)/) { $gsan_status = $1 ; print LOG "GSAN: $1\n"; }
2960 if ($_ =~ /MCS status: (.*)\./) { $mcs_status = $1 ; print LOG "MCS : $1\n"; }
2961 if ($_ =~ /EMS status: (.*)\./ and $VERSNUM<720) { $ems_status = $1 ; print LOG "EMS: $1\n"; }
2962 if ($_ =~ /emt status: (.*)\./) { $emt_status = $1 ; print LOG "emt: $1\n"; }
2963 if ($_ =~ /avinstaller status: (.*)\./) { $avi_status = $1 ; print LOG "avi: $1\n"; }
2964 if ($_ =~ /Backup scheduler status: (.*)\./) { $backupsched_status = $1; print LOG "BACKUP Sched: $1\n";}
2965 if ($_ =~ /Scheduler status: (.*)\./) { $backupsched_status = $1 ; print LOG "BACKUP Sched: $1\n"; }
2966 if ($_ =~ /dtlt status: (.*)\./) { $dtlt_status = $1; print LOG "DTLT: $1\n"};
2967 if ($_ =~ /windows scheduler status: (.*)\./) { $maintsched_status = $1; print LOG "Maint Sched: $1\n";}
2968 if ($_ =~ /cron jobs status: (.*)\./) { $cronjobs_status = $1; print LOG "Cron: $1\n";}
2969 if ($_ =~ /Maintenance operations status: (.*)\./) { $cronjobs_status = $1; print LOG "Cron: $1\n";}
2970 if ($_ =~ /Unattended startup status: (.*)\./) { $unattendedstart_status = $1 ; print LOG "startup: $1\n";}
2971 if ($_ =~ /ddrmaint-service status: (.*)\./) { $ddrmaint_service= $1 ; print LOG "ddrmaint-service: $1\n";}
2972 }
2973
2974 $GSAN_MCS_EMS = "PASSED";
2975 if ($MAINT_RUNNING) {
2976 printboth("WARNING: Maintenance routine $MAINT_RUNNING is running. Skipping some checks\n");
2977 msg("Maintenance Running","WARNING");
2978 }
2979 if ($gsan_status ne "ready" and $gsan_status ne "up") {
2980 if ($MAINT_RUNNING) {
2981 printboth("WARNING: GSAN status is $gsan_status because $MAINT_RUNNING is running. Skipping some checks\n");
2982 msg("GSAN status","WARNING");
2983 } else {
2984 printboth("CRITICAL ERROR: GSAN status is $gsan_status\n"); $LOGOFF_ERR++;
2985 msg("GSAN status","FAILED");
2986 $LOGOFF_ERR++;
2987 $GSAN_MCS_EMS = "FAILED";
2988 }
2989 } else {
2990 msg("GSAN status","PASSED");
2991 }
2992
2993 if ($mcs_status ne "up" ) {
2994 printboth("CRITICAL ERROR: MCS status is $mcs_status\n"); $LOGOFF_ERR++;
2995 msg("MCS status","FAILED");
2996 $LOGOFF_ERR++;
2997 $GSAN_MCS_EMS = "FAILED";
2998 } else {
2999 msg("MCS status","PASSED");
3000 }
3001
3002 if ($ddrmaint_service ne "up" ) {
3003 printboth("WARNING: ddrmaint service status is $ddrmaint_service\n");
3004 printboth("RESOLUTION: Start ddrmaint service if required with the ddrmaint-service command\n\n");
3005 msg("ddrmaint service","WARNING");
3006 } else {
3007 msg("ddrmaint service","PASSED");
3008 }
3009
3010 if (!$VBA and $VERSNUM<720 ){
3011 if ($ems_status ne "up" and $ems_status) {
3012 printboth("ERROR: EMS status is $ems_status\n");
3013 msg("EMS status","FAILED");
3014 $LOGOFF_ERR++;
3015 $GSAN_MCS_EMS = "FAILED";
3016 } else {
3017 msg("EMS status","PASSED");
3018 }
3019 }
3020 if (!$VBA and $avi_status and $avi_status ne "up"){
3021 printboth("ERROR: AVI Installer status is $avi_status\n");
3022 msg("AVI Installer status","FAILED");
3023 }
3024 if (!$VBA and $emt_status and $emt_status ne "up"){
3025 printboth("ERROR: EMT status is $emt_status\n");
3026 msg("EMT status","FAILED");
3027 }
3028
3029print LOG "NODETYPE is $NODETYPE\n";
3030 if ($NODETYPE =~ /AER/i) {
3031 print LOG "Skipping Backup Sched, DTLT, MaintSched for AER $NODETYPE\n";
3032 } else {
3033 if ($backupsched_status ne "up" ) {
3034 printboth("CRITICAL ERROR: Backup Scheduler Status status is $backupsched_status\n");
3035 $LOGOFF_ERR++;
3036 $BACKUP_SCHED="FAILED";
3037 } else {
3038 $BACKUP_SCHED="PASSED";
3039 }
3040 msg("Backup Scheduler running",$BACKUP_SCHED);
3041
3042 if ( $VERSNUM>500 and $VERSNUM <720 and !$VBA ) {
3043 if ($dtlt_status ne "up") {
3044 printboth("CRITICAL ERROR: DTLT status is $dtlt_status\n");
3045 $LOGOFF_ERR++;
3046 msg("Desktop/Laptop running","FAILED");
3047 } else {
3048 msg("Desktop/Laptop running","PASSED");
3049 }
3050 }
3051
3052 if ($VERSNUM > 500) {
3053 if ($maintsched_status ne "enabled" ) {
3054 printboth("CRITICAL ERROR: Maintenance windows scheduler status is $maintsched_status\n");
3055 $LOGOFF_ERR++;
3056 $MAINT_SCHED="FAILED";
3057 } else {
3058 $MAINT_SCHED = "PASSED";
3059 }
3060 msg("Maintenance scheduler running",$MAINT_SCHED);
3061 } else {
3062 print LOG "Maint Scheduler Skipped for version $AVAMARVER\n";
3063 $MAINT_SCHED = "PASSED";
3064 }
3065 }
3066 # for vers 6+ cron maint is not printed. check for suspended file only
3067 if ( -e "/usr/local/avamar/var/cron/suspended") {
3068 $cronjobs_status="suspended."
3069 } else {
3070 $cronjobs_status="enabled"
3071 }
3072 if ($cronjobs_status ne "enabled") {
3073 printboth("CRITICAL ERROR: Maintenance cron jobs status is $cronjobs_status\n");
3074 printboth("RESOLUTION: Determine why they are suspended. Use 'resume_crons'\n\n");
3075 $LOGOFF_ERR++;
3076 $CRON_SCHED="FAILED";
3077 msg("Cron jobs enabled","FAILED");
3078 } else {
3079 $CRON_SCHED="PASSED";
3080 msg("Cron jobs enabled","PASSED");
3081 }
3082
3083 if ($VERSNUM > 600 and $NODE_COUNT > 1 ) {
3084 print LOG "Unattended startup not checked for v6 multinode: $AVAMARVER with $NODE_COUNT nodes\n";
3085 msg("Unattended startup","PASSED");
3086 } else {
3087 if ( $unattendedstart_status eq "enabled" and ( $NODE_COUNT > 1 or $DD)) {
3088 printboth("ERROR: Unattended startup status is $unattendedstart_status\n");
3089 printboth("RESOLUTION: Unattended startup should not be enabled on multi-node servers or if Data Domain is attached\n\n");
3090 msg("Unattended startup","FAILED");
3091 } else {
3092 msg("Unattended startup","PASSED");
3093 }
3094 }
3095
3096}
3097########## End dpnctl_status ########
3098
3099
3100########## Start cronrunning ########
3101sub cronrunning {
3102
3103 print LOG "\n\n\n### ".localtime()." ### Starting cronrunning\n";
3104 if ( $NODE_INFO{"(0.s)"}{os} =~ /suse|sles/i ) {
3105 $results=`ps -C cron`;
3106 print LOG "suse - Checking for cron: ps -C cron\n";
3107 if ($results =~ /cron/ ) { $results="running\n".$results; }
3108 } else {
3109 print LOG "checking for crond\n";
3110 $results=`service crond status`;
3111 }
3112 print LOG "RESULTS: $results";
3113 if ( $results !~ /running/) {
3114 printboth("CRITICAL ERROR: cron or crond is not running\n");
3115 printboth("RESOLUTION: Check /var/log/messages for cron messages. Run 'service crond start' as root.\n\n");
3116 msg("Cron Running","FAILED");
3117 $LOGOFF_ERR++;
3118 } else {
3119 msg("Cron Running","PASSED");
3120 print LOG "Cron running\n";
3121 }
3122}
3123########## End cronrunning ########
3124
3125########## Start test_flush ########
3126sub test_flush {
3127
3128 print LOG "\n\n\n### ".localtime()." ### Starting test_flush\n";
3129 dpnctl_status() if (!$gsan_status);
3130 if ($gsan_status ne "ready" and $gsan_status ne "up") {
3131 printboth("\nWARNING: Unable to backup MCS database because GSAN status is $gsan_status\n");
3132 printboth("RESOLUTION: Check the gsan status\n");
3133 $BACKUP_DONE="FAILED";
3134 msg("Test Backup with MC Flush","WARNING");
3135 return;
3136 }
3137
3138 my $cmd = qq[ rununtil 60 mcserver.sh --flush ];
3139 my $results = `$cmd`;
3140 print LOG "Command: $cmd\nResults:\n$results\n";
3141 if ($results =~ /Administrator Server flushed/) {
3142 print LOG "--> flush successful\n";
3143 $BACKUP_DONE="PASSED";
3144 msg("Test Backup","PASSED");
3145 } else {
3146 printboth("CRITICAL ERROR: Unable to backup MCS database\n");
3147 printboth("RESOLUTION: Check /usr/local/avamar/var/mc/server_log/flush.log, GSAN status and $logfile\n\n");
3148 $LOGOFF_ERR++;
3149 $BACKUP_DONE="FAILED";
3150 msg("Test Backup","FAILED");
3151 }
3152}
3153########## End test_flush ########
3154
3155
3156########## Start check_capacity ########
3157sub check_capacity {
3158 print LOG "\n\n\n### ".localtime()." ### Starting check_capacity\n";
3159
3160 my $err=1;
3161
3162 my $cmd = qq[ mccli server show-prop ] ;
3163 print LOG "Command: $cmd\n";
3164 open(CMD_PIPE,$cmd."|");
3165 while (<CMD_PIPE>) { chomp;
3166 print LOG "$_\n";
3167 if ( $_ =~ /Server utilization/) {
3168 my ($foo,$util)=split(" ");
3169 $util =~ s/\%//g;
3170 print LOG "Utilization is $util\n";
3171 if ($1>90) {
3172 $CAPACITY_STATUS="FAILED";
3173 print LOG "$CAPACITY_STATUS\n";
3174 msg("Capacity Level","PASSED");
3175 } else {
3176 $CAPACITY_STATUS="PASSED";
3177 msg("Capacity Level","PASSED");
3178 }
3179 }
3180 }
3181 if ($err) {
3182 $MCGUI_STATUS="PASSED";
3183 msg("MCS Responding","PASSED");
3184 } else {
3185 $MCGUI_STATUS="FAILED";
3186 msg("MCS Responding","FAILED");
3187 }
3188}
3189########## End check_capacity ########
3190
3191########## Start checketh #########
3192sub checketh {
3193 print LOG "\n\n\n### ".localtime()." ### Starting checketh\n";
3194 nodexref() if (!$NODE_COUNT);
3195 if ( $VIRTUALHW ) {
3196 print LOG "skip for virtual hardware: $MANUFACTURER\n";
3197 return;
3198 }
3199 my ($speed,$eth,$duplex,$node,$link,$autoneg,$err,$warn) = "";
3200
3201 $cmd=q[ ls /etc/sysconfig/network/ifcfg-*.* 2>/dev/null | sed -e 's/^/IFCFG:/' ;
3202 ls /etc/sysconfig/network-scripts/ifcfg-*.* 2>/dev/null | sed -e 's/^/IFCFG:/' ;
3203 ifconfig | grep -v "^ \|^$\|^lo" | awk '{system("sudo ethtool "$1)}' ] ;
3204
3205 my $iferr="";
3206 mapall($ALL,$cmd);
3207 open(CMD_PIPE,$TMPFILE);
3208 my $worked=0;
3209 while (<CMD_PIPE>) { chomp;
3210 print LOG "$_\n";
3211 if (/sudo: no tty present/) {
3212 printboth("ERROR: Node $node sudo ethtool failed with $_ \n");
3213 printboth("RESOLUTION: Fix sudo problem. Ethernet checks will not be valid because of this\n\n");
3214 msg("Duplicate IP info retrieval","FAILED");
3215 return;
3216 }
3217
3218 $node=$1 if (/(\(0\..*\)) ssh/);
3219 if ( !/bond\d+\.\d+/ and /^IFCFG:(.*)/ ) {
3220 $iferr.="WARNING: Node $node has extra interface config file $1\n";
3221 next;
3222 }
3223 if ( $_ =~ /^Settings /) {
3224 ($foo,$foo,$eth)=split(' ');
3225 $eth =~ s/://;
3226 $worked=1;
3227 }
3228 if ( $_ =~ /Speed: (\d+)M/ ) { $speed=$1; $speed =~ s/ //g; }
3229 if ( $_ =~ /Duplex:/) { ($foo,$duplex)=split(":"); $duplex=~ s/ //g; }
3230 if ( $_ =~ /Auto-negotiation:/) { ($foo,$autoneg)=split(":"); $autoneg=~ s/ //g; }
3231
3232 if ( $_ =~ /Link detected: (\w+)/ ) {
3233 print LOG "---> Node $node port $eth speed $speed duplex $duplex link $1 autoneg $autoneg\n";
3234 if ($1 eq "yes" and $eth =~ /eth/ ) {
3235 if ($speed < 1000) {
3236 if ($NODE_COUNT eq 1 and $speed == 100 ) {
3237 if (!$PREUPGRADE){
3238 printboth("WARNING: $node $eth speed is $speed instead of at least 1000Mb/s\n");
3239 printboth("RESOLUTION: 100Mb/s is allowed on single node but at least 1000Mb/s is recommended\n");
3240 $warn="yes";
3241 }
3242 } else {
3243 printboth("ERROR: $node $eth speed is $speed instead of at least 1000Mb/s\n");
3244 $err="yes";
3245 }
3246 }
3247 if ($duplex ne "Full") {
3248 printboth("ERROR: $node $eth duplex is $duplex instead of Full\n");
3249 $err="yes";
3250 }
3251 if ($autoneg ne "on" and $speed < 10000 ) {
3252 printboth("ERROR: $node $eth auto negotiation is $autoneg instead of on\n");
3253 $err="yes";
3254 }
3255 ($speed,$autoneg,$duplex,$link)="";
3256 }
3257 }
3258 }
3259 if ($warn eq "yes") {
3260 msg("Ethernet Speed Settings","WARNING");
3261 }
3262 if ($err eq "yes") {
3263 printboth("RESOLUTION: See KB466119 for troubleshooting NIC cards and speeds\n\n");
3264 msg("Ethernet Settings","FAILED");
3265 } else {
3266 msg("Ethernet Settings","PASSED");
3267 }
3268 if ($iferr) {
3269 printboth("${iferr}RESOLUTION: Extra config files may cause node to not reboot. Make sure these files are required or rename them before rebooting\n\n");
3270 msg("Network Interface Config","WARNING");
3271 }
3272 if (!$worked) {
3273 printboth("WARNING: Expected output was not found. Review hc_proactive_check.log for checketh\n");
3274 msg("Network Interface Config","WARNING");
3275 }
3276}
3277########## End checketh #########
3278
3279########## Start status_dpn ######
3280sub status_dpn {
3281 print LOG "\n\n\n### ".localtime()." ### Starting status_dpn\n";
3282 my $e="";
3283 my $sw=""; #sched warn
3284 open(CMD_PIPE,"status.dpn|");
3285 while (<CMD_PIPE>) { chomp;
3286 print LOG "$_\n";
3287 if ($_ =~ /^(0\.[0-9A-Z]+)\s+(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/ ){
3288 #print "Node $1 IP $2 ver $3 State $4 mode $5 flags $6 dis $7 susp $8 load $9 \n";
3289 if ($4 ne "ONLINE") {printboth ("ERROR: Node $1 State is $4 but should be ONLINE\n");$e=1}
3290 if ($5 ne "fullaccess") {printboth ("ERROR: Node $1 Runlevel is $5 but should be fullaccess\n");$e=1}
3291 if ($8 ne "false") {printboth ("ERROR: Node $1 Suspend is $8 but should be false. See KB469194\n");$e=1}
3292 }
3293 if (/stripes SUSPENDED/){
3294 printboth("WARNING: Stripes are suspended\n");
3295 $e=1;
3296 }
3297 if ($_ =~ /System-Status: (\w+)/) {
3298 if ($1 ne "ok") {printboth ("ERROR: System-Status is $1 but should be ok\n");$e=1}
3299 }
3300 if (/Access-Status: (\w+)/) {
3301 my $status=$1;
3302 if ($status ne "full") {
3303 getconfiginfo() if (!$GOTCONFIGINFO);
3304 if ($MAINT_RUNNING) {
3305 printboth ("WARNING: Access-Status is $status because $MAINT_RUNNING is running. Skipping some checks\n")
3306 } else {
3307 printboth ("ERROR: Access-Status is $status but should be full\n");
3308 $e=1;
3309 }
3310 }
3311 }
3312 if (/WARNING:/) {
3313 $sw="RESOLUTION: See KB424498 for troubleshooting maintenance scheduler problems.\n\n" if (!$sw);
3314 $sw="WARNING: $_\n$sw";
3315 }
3316 }
3317 if ($e or $sw) {
3318 printboth($sw) if ($sw);
3319 printboth("RESOLUTION: Fix problems identified\n\n") if $e;
3320 my $status=($e) ? "FAILED" : "WARNING";
3321 msg("Status.dpn",$status);
3322 } else {
3323 msg("Status.dpn","PASSED");
3324 }
3325}
3326########## End status_dpn ######
3327
3328########## Start checkascd ######
3329sub checkascd {
3330 print LOG "\n\n\n### ".localtime()." ### Starting checkascd\n";
3331
3332 my $tabfile="";
3333 $cmd=q[ avmaint datacenterlist --debug 2>&1 ];
3334 open(CMD_PIPE,$cmd."|");
3335 while(<CMD_PIPE>) { chomp;
3336 print LOG "$_\n";
3337 if (/tabfilename = (.*)/) {
3338 $tabfile=$1 if (/tabfilename = (.*)/);
3339 print LOG "Moved $1 to /tmp\n";
3340 }
3341 if (/Loaded dispatcher table/) {
3342 move($tabfile,"/tmp/$1");
3343 print LOG "Moved $1 to /tmp\n";
3344 }
3345 }
3346 $cmd=q[ avmaint datacenterlist --debug 2>&1 ];
3347 open(CMD_PIPE,$cmd."|");
3348 while(<CMD_PIPE>) { chomp;
3349 print LOG "$_\n";
3350 if ($_ =~ /cannot connect to server/ ) {
3351 printboth("ERROR: ascd is not responding\n");
3352 printboth("RESOLUTION: See KB 163211 - How to test if ASCD is working correctly\n\n");
3353 msg("ascd status","FAILED");
3354 return;
3355 }
3356 }
3357 msg("ascd status","PASSED");
3358}
3359########### End checkascd ######
3360
3361########### Start checkpointxmlperms ######
3362sub checkpointxmlperms {
3363
3364 print LOG "\n\n\n### ".localtime()." ### Starting checkpointxmlperms\n";
3365
3366 if (! -e "/usr/local/avamar/var/checkpoints.xml" ) {
3367 print LOG "Check skipped. File not found\n";
3368 return;
3369 }
3370 my $e="";
3371 $results=`ls -al /usr/local/avamar/var/checkpoints.xml`;
3372 print LOG $results;
3373 chomp($results);
3374 my ($perms,$foo1,$owner,$group,$foo2)=split(" ",$results);
3375 print LOG "perms=$perms owner=$owner group=$group\n";
3376 if ($perms !~ /-r..r..r../ ) {
3377 printboth("ERROR: Permissions should be -rw-rw-r-- but is $perms for checkpoints.xml\n");
3378 $e="yes";
3379 }
3380 if ($owner ne "admin") {
3381 printboth("ERROR: Owner should be admin but is $owner for checkpoints.xml\n");
3382 $e="yes";
3383 }
3384 if ($group ne "admin"){
3385 printboth("ERROR: Group should be admin but is $group for checkpoints.xml\n");
3386 $e="yes";
3387 }
3388 if ($e) {
3389 printboth("RESOLUTION: Fix permissions, owner or group for /usr/local/avamar/var/checkpoints.xml\n\n");
3390 msg("Checkpoint.xml Perms","FAILED");
3391 } else {
3392 msg("Checkpoint.xml Perms","PASSED");
3393 }
3394}
3395########### End checkpointxmlperms ######
3396
3397########## Start duplicateip ######
3398sub duplicateip {
3399 print LOG "\n\n\n### ".localtime()." ### Starting duplicateip\n";
3400
3401 nodexref() if (!$NODE_COUNT);
3402 if ($NODE_COUNT eq 1) {
3403 print LOG "Check skipped for single node servers\n";
3404 }
3405 ($e,$ipaddr,$errors,$iface,$ifmac)="";
3406 $cmd=q[ /sbin/ifconfig ] ;
3407 mapall("",$cmd);
3408 open(CMD_PIPE,$TMPFILE);
3409 while(<CMD_PIPE>) { chomp;
3410 push(@ifconfig,$_);
3411 }
3412 foreach (@ifconfig) {
3413 if (/(\(0\..*\)) ssh/) {
3414 $node=$1;
3415 $iface="";
3416 print LOG "\n\n";
3417 }
3418 print LOG $_ ."\n" if ($_);
3419 if (/(.*)\s*Link encap/) {
3420 $iface=$1;
3421 ($rx,$ipaddr,$errors,$ifmac)="";
3422 }
3423 $rx=$1 if (/RX bytes:(\d*) /);
3424 $ipaddr=$1 if (/inet addr:(\d+\.\d+\.\d+\.\d+) /);
3425 $errors=$1 if (/errors:(\d*)/ and $errors==0);
3426 $ifmac=$1 if (/HWaddr (.*)/);
3427 if ( $_ eq "" ) {
3428 $iface =~ s/ //g;
3429 print LOG "--> Node $node IF=$iface IP=$ipaddr MAC=$ifmac ERR=$errors RX=$rx\n";
3430 if ($iface and $ipaddr and $rx>0 and $iface =~ /bond0|eth0/ ) {
3431 if ($iface =~ /bond/) {
3432 mapall("--nodes=0.s","sudo arping -I $iface -c3 $ipaddr");
3433 } else {
3434 mapall("--nodes=0.s","sudo arping -c3 $ipaddr");
3435 }
3436 open(CMD_PIPE2,$TMPFILE);
3437 my $last="";
3438 while (<CMD_PIPE2>) { chomp;
3439 print LOG "$_\n";
3440 if (/sudo: no tty present/) {
3441 printboth("ERROR: Node $node sudo arping failed with $_ \n");
3442 printboth("RESOLUTION: Fix sudo problem. Duplicate IP checks will not be valid because of this\n\n");
3443 msg("Duplicate IP info retrieval","FAILED");
3444 return;
3445 }
3446
3447 if (/^Unicast .*\[(.*)\]/) {
3448 my $mac=$1;
3449 if ($last ne $mac and $last) {
3450 $e="yes";
3451 printboth("ERROR: Duplicate IP Address $iface $ipaddr MAC=$mac MAC=$last\n");
3452 ($errors,$ipaddr,$iface,$ifmac)="";
3453 }
3454 $last=$mac;
3455 }
3456 }
3457 } else {
3458 print LOG "SKIPPING: empty iface, ipaddr, RX bytes=0 or iface<>eth0 or bond0\n\n";
3459 }
3460 }
3461 }
3462 if ($e) {
3463 printboth("RESOLUTION: Find duplicate IP addresses and fix. Try 'arping -c3 <ip>'\n\n");
3464 msg("Duplicate IP","FAILED");
3465 } else {
3466 msg("Duplicate IP","PASSED");
3467 }
3468}
3469########## End duplicateip ######
3470
3471########## Start bondconf ######
3472sub bondconf {
3473 print LOG "\n\n\n### ".localtime()." ### Starting bondconf\n";
3474
3475 my $e="";
3476 my $edown="";
3477 my $iface;
3478 $cmd=qq[ cat /proc/net/bonding/bond* ];
3479 mapall($ALL,$cmd);
3480 open(CMD_PIPE,$TMPFILE);
3481 while(<CMD_PIPE>) { chomp;
3482 if (/(\(0\..*\)) ssh/){
3483 $node=$1 ;
3484 $iface="";
3485 }
3486 print LOG "$_\n";
3487 if (/Bonding Mode:/ and !/active-backup/) {
3488 printboth("WARNING: Node $node bonding setup is wrong if in a high availability configuration (HA)\n $_\n");
3489 $e="yes";
3490 }
3491
3492 }
3493 if ($e) {
3494 if ($NODETYPE =~ /gen4/i) {
3495 printboth("RESOLUTION: Configure bonding to be active-passive if grid is setup for high availability (HA). See KB333477 for Gen4/4s.\n\n");
3496 }
3497 if ($NODETYPE =~ /gen3/i) {
3498 printboth("RESOLUTION: Configure bonding to be active-passive if grid is setup for high availability (HA). See tech note 300-011-174-DualSwtch.pdf or KB336400 for Gen3\n\n");
3499 }
3500 msg("Bonding Configuration","WARNING");
3501 } else {
3502 msg("Bonding Configuration","PASSED");
3503 }
3504}
3505########## End bondconf ######
3506
3507
3508########## Start hfschecktime ##########
3509sub hfschecktime {
3510 print LOG "\n\n\n### ".localtime()." ### Starting hfschecktime\n";
3511
3512 if (!$MCDBOPEN) {
3513 printboth("WARNING: Unable to open MCS database\n\n");
3514 msg("HFSCheck run time","WARNING");
3515 return;
3516 }
3517 my $e="";
3518 my ($sec, $mn, $hr, $dd, $mm, $yy) = (localtime(time - 86400*7))[0,1,2,3,4,5,6];
3519 $mm++; $yy+=1900;
3520 my $date=sprintf("%4d-%02d-%02d",$yy,$mm,$dd);
3521 my $sql = qq[
3522 select date,time,code
3523 from v_events
3524 where date>= '$date'
3525 and (code=4002 or code=4003)
3526 order by date,time
3527 ];
3528
3529 my $sth = $dbh->prepare($sql);
3530 $sth->execute;
3531
3532 my $starthfs="";
3533 while ( @row = $sth->fetchrow_array() ) {
3534 my ($date,$time,$code)=@row;
3535 my $epoch = sched_toepoch($date,$time);
3536 print LOG "$date $time $code $epoch\n";
3537 if ($code eq "4002" ) { $starthfs=$epoch; }
3538 if ($code eq "4003" and $starthfs) {
3539 $diff=$epoch-$starthfs;
3540 print LOG "diff=$diff\n";
3541 if ($diff > (7*3600)) {
3542 $diffhr=int($diff/3600);
3543 printboth("WARNING: HFSCheck took longer than $diffhr hours on $date\n");
3544 $e="yes";
3545 }
3546 }
3547 }
3548 if ($e) {
3549 printboth("RESOLUTION: Look for overlap, modified=2 settings, hardware issues\n\n");
3550 msg("HFSCheck run time","FAILED");
3551 } else {
3552 msg("HFSCheck run time","PASSED");
3553 }
3554
3555}
3556########## End hfschecktime ##########
3557sub showhistory {
3558 if (-s "/home/admin/.pac_history" ) {
3559 open(my $fh,"/home/admin/.pac_history");
3560 my $c=0;
3561 my $line="";
3562 print "\n===== ACTIVITY HISTORY\n";
3563 while(<$fh>){chomp;
3564 my($date,$sr,$tse,$cust,$desc,$force)=split(/\|/, $_);
3565 my ($dd,$mm,$yr) = (gmtime(substr($date,0,10)))[3,4,5];
3566 my $prtdate=sprintf("%04d-%02d-%02d",1900+$yr,1+$mm,$dd);
3567 print "DATE....: $prtdate SR#$sr TSE:$tse\nCUSTOMER: $cust\n$desc $force\n========================================================\n";
3568 }
3569 }
3570exit 0;
3571}
3572
3573########## Start shownotes ##########
3574sub shownotes {
3575 if (-s "/home/admin/.pac_history" ) {
3576 open(my $fh,"/home/admin/.pac_history");
3577 my $c=0;
3578 my $line="";
3579 while(<$fh>){chomp;
3580 $c++;
3581 my($date,$sr,$tse,$cust,$desc,$force)=split(/\|/, $_);
3582 my ($dd,$mm,$yr) = (gmtime(substr($date,0,10)))[3,4,5];
3583 my $prtdate=sprintf("%04d-%02d-%02d",1900+$yr,1+$mm,$dd);
3584 $line="DATE....: $prtdate SR#$sr TSE:$tse\nCUSTOMER: $cust\n$desc $force";
3585 }
3586 print "\n===== LAST ACTIVITY ($c --history records)\n$line\n";
3587 }
3588
3589 my $notefile=( -e "notes-proactive_check.pl") ? "notes-proactive_check.pl" : dirname($0)."/notes.txt" ;
3590 if ( -e $notefile) {
3591 open(NOTES,$notefile);
3592 print "\n===== SITE NOTES\n";
3593 while (<NOTES>) {
3594 print $_;
3595 printboth("$_");
3596 }
3597 printboth("\n");
3598 }
3599 print "\n===== START HEALTHCHECK\n";
3600}
3601########## End shownotes ##########
3602
3603########## Start check_script_version ##########
3604sub check_script_version {
3605
3606 print LOG "\n\n\n### ".localtime()." ### Starting check_script_version\n";
3607 print "\n$PROG $PROGVER (".localtime().")\n\n" if(!$LOGOFF);
3608 if ($DARKSITE) {
3609 open(FH,">/home/admin/.noftp");
3610 print FH $logdate."\n";
3611 close(FH);
3612 open(FH,">/home/admin/.darksite");
3613 print FH $logdate."\n";
3614 close(FH);
3615 }
3616 if (-e "/home/admin/.noftp") {
3617 printboth("ERROR: FTP not allowed at this site.\n");
3618 printboth("RESOLUTION: If FTP is allowed use --update to turn check back on\n\n");
3619 msg("Latest script version","DISABLED");
3620 return;
3621 }
3622 print "Checking Script Version...";
3623 if ($PROGVER eq "NNN"){
3624 printboth("WARNING: TEST MODE. NO VERSION CHECK\n");
3625 msg("Latest script version","FAILED");
3626 return;
3627 }
3628 my $port;
3629 if ($VERSNUM >=710 and $VERSNUM <730 ){
3630 # enable FTP
3631 print LOG "Enable ftp\n";
3632 system("sudo /usr/local/avamar/lib/admin/security/ftp_service &");
3633 my $results=qx{sudo iptables -L | grep -P 'anywhere.*anywhere.*:ftp'};
3634 print LOG "Results: $results\n";
3635 if (!$results){
3636 printboth("WARNING: Automatic opening of temporary outbound FTP connections appears to have failed. Trying FTP anyway\n");
3637 printboth("RESOLUTION: Review Solve-desktop procedures to manually open firewall.\n\n");
3638 }
3639 $port="-P `hostname -i`:35000-35010";
3640 } else {
3641 $port="-P -";
3642 }
3643 my $curlflags=qq[ --disable-eprt $port --connect-timeout 30 -v --user avamar_ftp:anonymous ];
3644 my $curlcmd=qq[curl $curlflags ftp://ftp.emc.com/software/scripts/proactive_check.version 2>&1 ];
3645 print LOG "curlcmd: $curlcmd\n";
3646 $results=qx{$curlcmd/proactive_check.version 2>&1};
3647 print LOG "Result: $results\n";
3648 if ($results !~ /PROGVER\s*=\s*"Version (\d*\.\d*)"/) {
3649 if ($IDPA) {
3650 printboth("WARNING: Update check skipped due to temporary network errors\n");
3651 msg("Latest script version","WARNING");
3652 } else {
3653 print "FAILED.\n$results\n\nDoes this site allow FTP? ";
3654 chomp($input = <STDIN>);
3655 print LOG "--> FTP Allowed input = '$input'\n";
3656 print "\n";
3657 if ($input =~ /^n/i) {
3658 open( my $FH,">/home/admin/.noftp");
3659 print $FH $logdate."\n";
3660 close($FH);
3661 msg("Latest script version","DISABLED");
3662 } else {
3663 printboth("Update check skipped due to temporary network errors\n");
3664 msg("Latest script version","SKIPPED");
3665 }
3666 }
3667 } else {
3668 $NEWEST=$1;
3669 if ($NEWEST ne $PROGVER) {
3670 if ($IDPA) {
3671 printboth("WARNING: Newer script version available.");
3672 msg("Latest script version","WARNING");
3673 } else {
3674 my $curlcmd=qq[curl $curlflags -O ftp://ftp.emc.com/software/scripts/proactive_check.pl 2>&1 ];
3675 printboth("ERROR: Newest version is $NEWEST. Running $PROGVER.\n");
3676 printboth("RESOLUTION: Get most recent version. Run this command\n $curlcmd\n");
3677 print "\nERROR: Newest version is $NEWEST. Do you want to update now? ";
3678 chomp($input = <STDIN>);
3679 print LOG "--> Continue input = '$input'\n";
3680 if ($input =~ /^y/i) {
3681 if(!copy("proactive_check.pl", "x-proactive_check.pl")) {
3682 printboth("ERROR: Unable to backup file: $!\n");
3683 exit 0;
3684 }
3685 print LOG "download:", qx{$curlcmd};
3686 print LOG "newcmd: ./proactive_check.pl".join(" ",@ARGV)."\n";
3687 exec "./proactive_check.pl ".join(" ",@ARGV);
3688 } else {
3689 printboth("WARNING: Newer script version available.");
3690 msg("Latest script version","WARNING");
3691 }
3692 }
3693 } else {
3694 print "OK\n";
3695 printboth("Passed\n");
3696 msg("Latest script version","PASSED");
3697 }
3698 }
3699}
3700########## End check_script_version ##########
3701
3702########## Start rptsecupdvers ##########
3703sub rptsecupdvers {
3704 print LOG "\n\n\n### ".localtime()." ### Starting rptsecupdvers\n";
3705 getconfiginfo() if (!$GOTCONFIGINFO);
3706 my $secupds;
3707 my $lastitem="x";
3708 my ($msg,$e,$w)=("")x3;
3709
3710 my @utility=($NODE_COUNT==1) ? @NODES : (@NODES,("0.s"));
3711 for my $node (sort @utility) {
3712 my $physnode=$NODE_LXREF{$node};
3713 my ($line)=grep(/^\($physnode\)/,@DATA_SECUPD);
3714 $line =~ s/SLES.*?-//i;
3715 $line =~ s/SLES.*?//i;
3716 my ($nodeid,$item)=split(/\s/,$line);
3717 print LOG "phys:$physnode logic:$node line:$line\n";
3718 $w.="WARNING: Node $node sudo failed or no read permission checking security update versions\n" if ($line =~ /permission denied/i);
3719 $item="" if ($line =~ /fatal:/);
3720 $msg.=" Node $node $item\n";
3721 $e="yes" if ($item ne $lastitem and $lastitem ne "x") ;
3722 $lastitem=$item;
3723 }
3724 if ($w) {
3725 printboth($w."RESOLUTION: Fix permissions on /usr/local/avamar/var/package-survey-*post_errata* files\n\n");
3726 msg("Security Updates info retrieval","FAILED");
3727 return;
3728 }
3729 if ($e) {
3730 printboth("WARNING: Mismatch of Security Updates installed\n$msg");
3731 printboth("RESOLUTION: Review versions of security updates on each node. They are not required to match but usually do\n\n") ;
3732 msg("Security Updates","WARNING");
3733 return;
3734 }
3735 $lastitem="NONE FOUND" if ($lastitem eq "x" or !$lastitem) ;
3736 msg("Security Updates",$lastitem) ;
3737}
3738########## End rptsecupdvers ##########
3739
3740########## Begin lasthfs ##########
3741# Check for hfs in past $1 hours
3742sub lasthfs {
3743 print LOG "\n\n\n### ".localtime()." ### Starting lasthfs\n";
3744 if ($NODETYPE =~ /AER/) {
3745 print LOG "Skipping: $NODETYPE node\n";
3746 return;
3747 }
3748 getconfiginfo() if (!$GOTCONFIGINFO);
3749
3750 my $hfstime=36;
3751 my $cptime=24;
3752 if ($IDPA and $PREUPGRADE) {
3753 $hfstime=24;
3754 $cptime=6;
3755 }
3756 print LOG "hfstime allowed=$hfstime. cptime allowed=$cptime\n";
3757
3758 my $xml = new XML::Parser( Style => 'Tree' );
3759 my $tree=$xml->parsefile("avmaint lscp|") ;
3760 SimpleXMLTree($tree);
3761 my %lscp=%xmltree;
3762
3763 my ($lastcp,$lasthfs)=(0)x2;
3764 for (sort keys %xmltree ) {
3765 next if (!m{/tag});
3766 my $key="/checkpointlist/checkpoint/$lscp{$_}";
3767 $lastcp=$lscp{"$key/cpctime"} if ( (!$lastcp or $lastcp<$lscp{"$key/cpctime"}) and $lscp{"$key/isvalid"} eq "true");
3768 $lasthfs=$lscp{"$key/hfscheck/starttime"} if ( (!$lasthfs or $lasthfs<$lscp{"$key/hfscheck/starttime"}) and $lscp{"$key/hfscheck/validcheck"} eq "true");
3769 print LOG qq[CP: $key $lscp{"$key/cpctime"}\n];
3770 print LOG qq[HFS: $key $lscp{"$key/hfscheck/starttime"}\n];
3771 }
3772
3773 print LOG "Last HFS $lasthfs\nLast CP $lastcp\nCur Time ".time."\n";
3774
3775 if ( time - $lasthfs > $hfstime * 3600 ) {
3776 printboth("ERROR: No HFSCheck in past $hftime hours. Last one is ".localtime($lasthfs)."\n");
3777 printboth("RESOLUTION: See KB466562 for hfscheck failure troubleshooting.\n\n");
3778 msg("HFSCheck in past 36 hours","FAILED");
3779 } else {
3780 my $info=($PREUPGRADE) ? "(".localtime($lasthfs).")" : "";
3781 msg("HFSCheck in past 36 hours","PASSED",$info);
3782 print LOG "LASTHFS PASSED\n";
3783 }
3784# Check last CP TIME
3785 if ( time - $lastcp > $cptime * 3600 ) {
3786 printboth("ERROR: No Checkpoint in past $cptime hours. Last one is ".localtime($lastcp) ."\n");
3787 printboth("RESOLUTION: Investigate logs for reason or may be due to grid just restarting\n\n");
3788 msg("Checkpoint Status","FAILED");
3789 } else {
3790 my $info=($PREUPGRADE) ? "(".localtime($lastcp).")" : "";
3791 msg("Checkpoint Status","PASSED",$info);
3792 }
3793# Check last HFS
3794 if ($NODELIST{'/nodestatuslist/hfscheckstatus/result'} =~ /MSG_ERR/ and $NODELIST{'/nodestatuslist/hfscheckstatus/result'} != /MSG_ERR_KILLED/ ) {
3795 printboth("ERROR: Last HFSCheck returned $NODELIST{'/nodestatuslist/hfscheckstatus/result'}\n");
3796 printboth("RESOLUTION: Investigate hfscheck failure.\n\n");
3797 msg("Last HFSCheck Status","FAILED");
3798 }
3799# Check if backups done after last checkpoint
3800 if ($PREUPGRADE) {
3801 my $sql = qq[ select extract(epoch from completed_ts) as epoch from v_activities where type ~* 'Backup|Snapup' order by completed_ts desc limit 1 ];
3802 my $sth = $dbh->prepare($sql);
3803 $sth->execute;
3804 if ( $R = $sth->fetchrow_hashref() ) {
3805 print LOG "lastbackup epoch=$R->{epoch} lastcp=$lastcp\n";
3806 if (R->{epoch} >= $lastcp ) {
3807 printboth("WARNING: Backups performed after last checkpoint\nRESOLUTION: Create a new checkpoint\n\n");
3808 msg("Backups after Checkpoint","WARNING");
3809 }
3810 }
3811 }
3812
3813# Check that last HFS failure is cleared
3814 openmcdb() if (!$dbh);
3815 my $sql = qq[ select resetcode,checkpoint from v_hfscheck_failures where alert=true order by hfscheck_failure_id desc limit 1 ];
3816 my $sth = $dbh->prepare($sql);
3817 $sth->execute;
3818 my ($resetcode,$checkpoint)=$sth->fetchrow_hashref();
3819 print LOG "hfscheckfailure cp:$checkpoint resetcode:$resetcode\n";
3820 if (!$resetcode and $checkpoint) {
3821 printboth("ERROR: Event 22426 Data Integrity Alert for a failed HFSCheck has not been cleared\n");
3822 printboth("RESOLUTION: See KB 335440 for information to clear the failure\n\n");
3823 msg("Data Integrity Alert","FAILED");
3824 }
3825}
3826########## End lasthfs ##########
3827
3828########### Start fileperms ######
3829sub fileperms {
3830 print LOG "\n\n\n### ".localtime()." ### Starting fileperms\n";
3831 my ($e,$fs,$fail)=("")x3 ;
3832 $cmd=q[ test ! -s /var/log/messages && echo "ZEROSIZE:" ;ls -al /var/log/messages ];
3833 mapall("--all",$cmd);
3834 open(CMD_PIPE,$TMPFILE);
3835 while (<CMD_PIPE>) { chomp;
3836 print LOG "$_\n";
3837 $node=$1 if (/(\(0\..*\)) ssh/);
3838 if (/^ZEROSIZE:/) {
3839 printboth("ERROR: Nodes $node File size of /var/log/messages is 0.\n");
3840 $fs="yes";
3841 }
3842 next if ( $_ !~ /messages$/) ;
3843 my ($perms,$foo1,$owner,$group,$foo2)=split(" ");
3844 print LOG "perms=$perms owner=$owner group=$group\n";
3845 if ($perms !~ /-rw.r...../) {
3846 printboth("ERROR: Permissions should be -rw-r.-r-- but is $perms. See KB472524 and bug 21573\n");
3847 $e="yes";
3848 }
3849 if ($owner ne "root") {
3850 printboth("ERROR: Owner should be root but is $owner.\n");
3851 $e="yes";
3852 }
3853 if ($group ne "root" and $group ne "admin" ){
3854 printboth("ERROR: Group should be root but is $group.\n");
3855 $e="yes";
3856 }
3857 }
3858 my $flushe=0;
3859 $cmd=qq[ find /usr/local/avamar/var/mc/server_data ! \\( \\( -user admin -perm -u=r \\) -o \\( -group admin -perm -g=r \\) -o \\( ! \\( -user admin -o -group admin \\) -perm -o=r \\) \\) ];
3860 open(my $fh,"$cmd|");
3861 while(<$fh>) {chomp;
3862 print LOG "$_\n";
3863 printboth("ERROR: File $_ is not readable by admin and will cause flush failures\n");
3864 $flushe=1;
3865 }
3866
3867
3868 #if ($VERSNUM>=741 and $VERSNUM < 750) {
3869 if (! -r '/usr/local/avamar/lib/mcserver.xml' or ! -w '/usr/local/avamar/lib/mcserver.xml' ) {
3870 printboth("ERROR: /usr/local/avamar/lib/mcserver.xml is not readable or writable\n");
3871 printboth("RESOLUTION: See KB504382 to set correct permissions\n\n");
3872 $fail=1;
3873 }
3874
3875 my ($uid,$gid)=(lstat "/usr/local/avamar/var/client")[4,5];
3876 my $owner = getpwuid($uid);
3877 my $group = getgrgid($gid);
3878 if ( $owner ne "admin" or $group ne "admin") {
3879 printboth("ERROR: /usr/local/avamar/var/client owner should be admin:admin but is $owner:$group\n");
3880 printboth("RESOLUTION: Change owner and/or group. See ESC 34958 for more info\n\n");
3881 $fail=1;
3882 }
3883
3884 if ($e or $fs or $flushe or $fail) {
3885 printboth("RESOLUTION: Change permissions or move files. See KB 456858 for more information.\n\n") if $flushe;
3886 printboth("RESOLUTION: For 0 byte files restart syslogd and make sure events get logged to /var/log/messages\n") if ($fs) ;
3887 printboth("RESOLUTION: Fix permissions, owner or group for /var/log/messages. See KB472524\n\n") if ($e);
3888 msg("File Permissions","FAILED");
3889 } else {
3890 msg("File Permissions","PASSED");
3891 }
3892}
3893########### End fileperms ######
3894
3895########### Start license ######
3896sub license {
3897 print LOG "\n\n\n### ".localtime()." ### Starting license\n";
3898 if ($VBA) {
3899 print LOG "Skipping for VBA\n";
3900 return;
3901 }
3902 getconfiginfo() if (!$GOTCONFIGINFO);
3903 my $e="";
3904 my $saw_license=0;
3905 open(FILE,"avmaint --ava license|");
3906 while(<FILE>) { chomp;
3907 print LOG "$_\n";
3908 $saw_license=1 if (/licensekey/);
3909 if (/expires="(.*)"/) {
3910 if ($1-86400 < time and $1 > 0 ) {
3911 printboth("ERROR: License has expired.\n");
3912 $e="FAILED";
3913 } elsif ($1 != 0 ) {
3914 printboth("WARNING: License has an expiration date.\n");
3915 $e="WARNING";
3916 }
3917 }
3918 }
3919 if (!$saw_license) {
3920 printboth("ERROR: Unexpected output in avmaint --ava license\n");
3921 printboth("RESOLUTION: Fix any license issues.\n\n") if $e;
3922 $e="FAILED";
3923 } else {
3924 $e="PASSED";
3925 }
3926
3927# Check for ascd license on multi node server installed less than 40 days
3928 my $created=$CONFIG{'/gsanconfig/systemcreatetime'};
3929 print LOG "created: $created\n";
3930 if ( time - $created < 86400*40 and $NODE_COUNT>1 ) {
3931 chomp(my $ascd_version=`/usr/local/avamar/bin/ascd --version 2>/dev/null | sed '/^ *version:/!d; s/^ *version: *//'`);
3932 print LOG "ascd version = '$ascd_version'\n";
3933 if ($ascd_version eq "7.3.0-226") {
3934 printboth("ERROR: acsd version 7.3.0-226 will causes MSG_ERR_CGSAN_FAILED errors\n");
3935 printboth("RESOLUTION: See KB485314 for more info\n\n");
3936 $e="FAILED";
3937 }
3938 }
3939
3940
3941 if ($e) {
3942 msg("License",$e);
3943 } else {
3944 msg("License","PASSED");
3945 }
3946}
3947########### End license ######
3948
3949
3950########### Start susekernel ######
3951sub susekernel {
3952 print LOG "\n\n\n### ".localtime()." ### Starting susekernel\n";
3953 getopersys() if (!$OS);
3954 getconfiginfo() if (!$GOTCONFIGINFO);
3955 if ( $OS !~ /suse|sles/i ) {
3956 print LOG "Skipping check for O/S $OS only check for suse\n";
3957 return;
3958 }
3959 my $e208,$exfs=("")x2;
3960 for $node (sort @NODES) {
3961 my $kernel=$NODELIST{"/nodestatuslist/nodestatus/$node/version/kernel"} ;
3962 print LOG "node $node kernel $kernel\n";
3963 next if ($kernel !~ /suse|sles/i);
3964 $e208.="ERROR: Node ($node) requires 208 day uptime patch\n" if ($kernel =~ /2.6.32.12-0.7-default/);
3965 if ($kernel =~ /2.6.32.[123]/) {
3966 if ($NODE_COUNT == 1 ) {
3967 $exfs.="INFO: Node ($node) may be affected by XFS kernel bug\n" if ($kernel =~ /2.6.32.[123]/);
3968 } else {
3969 $exfs.="INFO: Node ($node) may be affected by XFS kernel bug\n" if ($kernel =~ /2.6.32.[123]/);
3970 }
3971 }
3972 }
3973 if (!$PREUPGRADE) {
3974 if ($e208) {
3975 printboth($e208);
3976 printboth("RESOLUTION: See KB454009 to install hot fix\n\n");
3977 msg("SLES 208 days bug","FAILED");
3978 } else {
3979 msg("SLES 208 days bug","PASSED");
3980 }
3981 }
3982 if ($exfs) {
3983 printboth($exfs);
3984 printboth("RESOLUTION: See KB457873 for more information\n\n");
3985 msg("SLES XFS Kernel bug","INFO");
3986 } else {
3987 msg("SLES XFS Kernel bug","PASSED");
3988 }
3989}
3990########### End susekernel ######
3991
3992########### Start lastemail ######
3993sub lastemail {
3994 print LOG "\n\n\n### ".localtime()." ### Starting lastemail\n";
3995 if ($UPGRADE_VERSION !~ /6.0.1/) {
3996 print LOG "Only check if upgrading to 6.0.1 (upgd to = $UPGRADE_VERSION)\n";
3997 return;
3998 }
3999 openmcdb() if (!$dbh);
4000 my $sql = qq[ select last_email from ev_cus_prof where epid='INIT_EV_HIGH_PRIORITY'; ];
4001 my $sth = $dbh->prepare($sql);
4002 $sth->execute;
4003 my $lastemail = int ( $sth->fetchrow_array() /1000) ;
4004 my $diff=time-$lastemail;
4005 print LOG "last email is $lastemail\n";
4006 print LOG "current time is ".time ."\n";
4007 if ( $diff > 14*86400 ) {
4008 printboth("ERROR: Last High Priority Events Email Home date is ". int($diff/86400) ." days ago\n");
4009 printboth("RESOLUTION: See hotfix 34788. Already fixed in 6.0.2 or later\n\n");
4010 msg("Last Emailhome ","FAILED");
4011 } else {
4012 msg("Last Emailhome","PASSED");
4013 }
4014}
4015########### End lastemail ######
4016
4017########## Start get_errlog ########
4018sub get_errlog {
4019 print LOG "\n\n\n### ".localtime()." ### Starting get_errlog\n";
4020 print("HEALTHCHECK: Creating hc_errlog.txt\n");
4021 open(OUTPUT,">hc_errlog.txt");
4022 my $cmd=qq[ grep -h "ERROR\\|0642. gsan" /data01/cur/err.log* ];
4023 mapall("",$cmd);
4024 open(CMD_PIPE,$TMPFILE);
4025 while (<CMD_PIPE>) {
4026 if (/(\(0\..*\)) ssh/){
4027 print OUTPUT "================================================================\n";
4028 print OUTPUT "NODE: $1\n";
4029 print OUTPUT "================================================================\n";
4030 }
4031 print OUTPUT $_;
4032 }
4033}
4034########## End get_errlog ########
4035
4036########## Start get_esmlog ########
4037sub get_esmlog {
4038 print LOG "\n\n\n### ".localtime()." ### Starting get_esmlog\n";
4039 print("HEALTHCHECK: Creating hc_esmlog.txt\n");
4040 gethardware() if (!$MANUFACTURER);
4041 if ($MANUFACTURER !~ /dell/) {
4042 print LOG "Check not applicable to manufacturer $MANUFACTURER\n";
4043 return;
4044 }
4045 my $nodes=getnodes_hw("dell");
4046 if (!$nodes) {
4047 print LOG "no dell nodes found\n";
4048 return;
4049 }
4050 open(OUTPUT,">hc_esmlog.csv");
4051 print OUTPUT "Node,Severity,Code,Date,Category,Description\n";
4052 %mon2num = qw( jan 1 feb 2 mar 3 apr 4 may 5 jun 6 jul 7 aug 8 sep 9 oct 10 nov 11 dec 12);
4053 my $cmd=qq[ omreport system esmlog; omreport system alertlog ];
4054 mapall("--nodes=$nodes ",$cmd);
4055 open(CMD_PIPE,$TMPFILE);
4056 while (<CMD_PIPE>) {chomp;
4057 $node=$1 if (/(\(0\..*\)) ssh/);
4058 ($foo,$sev)=split(": ") if (/^Severity/);
4059 ($foo,$code)=split(": ") if (/^ID/);
4060 ($foo,$date)=split(": ") if(/^Date/);
4061 ($foo,$cat)=split(": ") if (/^Category/);
4062 if (/^Description/) {
4063 my($foo,$desc)=split(": ",$_,2);
4064 my($day,$month,$dd,$time,$yy)=split(" ",$date);
4065 my $mm=$mon2num{lc($month)};
4066 print OUTPUT "$node,$sev,$code,$yy-$mm-$dd $time,$cat,$desc\n";
4067 }
4068 }
4069}
4070########## End get_errlog ########
4071
4072########## Start get_maintlogs ########
4073sub get_maintlogs {
4074 print LOG "\n\n\n### ".localtime()." ### Starting maintlogs\n";
4075 print("HEALTHCHECK: Creating hc_maintlogs.txt\n");
4076 $result=`dumpmaintlogs --days=30 >hc_maintlogs.txt`;
4077 $result=`tail -c 1000000 /usr/local/avamar/var/cron/replicate.log > hc_replicate.log`;
4078}
4079########## End get_maintlogs ########
4080
4081########## Start sendemail ########
4082sub sendemail {
4083 print LOG "\n\n\n### ".localtime()." ### Starting sendemail\n";
4084 if ($IDPA) {
4085 print LOG "Skipping IDPA\n";
4086 return;
4087 }
4088
4089 # Check for dark site
4090 if (-e "/home/admin/.darksite") {
4091 print LOG "Skipping: Darksite\n";
4092 return ;
4093 }
4094 if ($VBA) {
4095 print LOG "Skipping: VBA\n";
4096 return;
4097 }
4098
4099 # See if a HPE is enabled
4100 openmcdb() if (!$dbh);
4101 my $sth = $dbh->prepare(qq[
4102 select count(*)
4103 from ev_cus_prof
4104 where epid in ( 'INIT_EV_HIGH_CONNECTEMC', 'INIT_EV_LOGS_CONNECTEMC', 'INIT_EV_HIGH_PRIORITY')
4105 and (connectemc_notify_enabled or email_notify_enabled)
4106 ]);
4107 $sth->execute;
4108 my $hpe_enabled = $sth->fetchrow_array();
4109 if ( $hpe_enabled <= 0 ) {
4110 print LOG "Skipping. HPE did not find connectemc or email notify = true. found '$hpe_enabled'\n";
4111 return;
4112 }
4113
4114 my ($subject,$file_contents)=@_;
4115 $subject="proactive_check: $HOSTNAME" if (!$subject);
4116 if (!defined($file_contents) ) {
4117 open FILE, "<hc_results.txt";
4118 $file_contents = do { local $/; <FILE> };
4119 }
4120
4121 my $to = 'emailhome@avamar.com';
4122 my $boundary = '_BoUnDaRy_';
4123 my $result=`grep smtpHost /usr/local/avamar/var/mc/server_data/prefs/mcserver.xml`;
4124 my ($smtpserver,$from,$smtp)="";
4125 if ($result =~ /value="(.+)"/) {
4126 $smtpserver=$1 ;
4127 } else {
4128 print LOG "No SMTP server found: $result\n";
4129 return;
4130 }
4131 $result=`grep admin_mail_sender_address /usr/local/avamar/var/mc/server_data/prefs/mcserver.xml`;
4132 if ($result =~ /value="(.+)"/) {
4133 $from=$1;
4134 } else {
4135 print LOG "No From address: $result\n";
4136 return;
4137 }
4138 $smtp=Net::SMTP->new($smtpserver);
4139 if (!$smtp) {
4140 print LOG "Could not establish connection with SMTP server: $smtpserver\n";
4141 return;
4142 }
4143 $smtp->mail($from);
4144 $smtp->recipient($to);
4145 $smtp->data();
4146 $smtp->datasend("Subject: $subject\n");
4147 $smtp->datasend("MIME-Version: 1.0\nContent-type: multipart/mixed;\n boundary=\"$boundary\"\n");
4148 $smtp->datasend("\n--$boundary\nContent-type: text/plain\n\n");
4149 $smtp->datasend("$file_contents\n");
4150 $smtp->datasend("\n--$boundary--\n"); $smtp->dataend();
4151 $smtp->quit;
4152 print LOG "Sent from $from to $to using $smtpserver\nsubj: $subject\n";
4153}
4154########## End sendemail ########
4155
4156########## Start checkclients ########
4157sub checkclients {
4158 getinstalledversion() if (!$VERSNUM);
4159 print LOG "\n\n\n### ".localtime()." ### Starting checkclients\n";
4160 if ($ADDNODE) {
4161 print LOG "Skipping for addnode\n";
4162 return;
4163 }
4164
4165 my $use_vers=($UPGRADE_VERSION) ? $UPGRADE_VERSION : $DATANODEVERSION;
4166 (my $upgvers=$use_vers) =~ s/^(\d+\.\d+).*/$1/;
4167 print LOG "upgrade version=$upgvers\n";
4168
4169 # Get minimum allowed client versions for upgrade target
4170 my %allowed;
4171 my $previous_versions=2;
4172 if ($upgvers>=18) {
4173 print LOG "Allow ". join(',', @{$CLIENT{$upgvers}}) ."\n";
4174 } else {
4175 print LOG "Prev Version=$previous_versions\n";
4176 for (reverse sort {$a <=> $b} @supportedversions) {
4177 next if (major_version($_) != $upgvers and !%allowed or $allowed{major_version($_)} ) ;
4178 $allowed{major_version($_)}=1;
4179 print LOG "Allow ".major_version($_)."\n";
4180 last if (keys %allowed > $previous_versions);
4181 }
4182 }
4183
4184 $sql = qq[ select clients.descr,cl_plugins.pid_number,cl_plugins.version, cl_plugins.build,
4185 trunc(cl_plugins.backed_up_ms::double precision/1000::double precision) AS backed_up_epoch,
4186 clients.cid, clients.client_type
4187 from clients,cl_plugins
4188 where clients.cid = cl_plugins.cid
4189 and clients.enabled = true
4190 ];
4191
4192 my $sth = $dbh->prepare($sql);
4193 $sth->execute;
4194
4195 my $VMPatch="yes" if (grep(/download-AvamarVmImageProxy-linux-ova-6.0.101-901/,@RPMS)) ;
4196 my $client_upgrade_needed="";
4197
4198 my ($sqle,$e,$vme,$ndmp30382,$ndmphf,$anyndmp,$anyvmware,$anynetworker);
4199 while ( @row = $sth->fetchrow_array() ) {
4200 my($client,$plugin,$major_version,$build,$backedup,$cid,$client_type)=@row;
4201 next if ($client =~ /MC_DELETED|MC_RETIRED/);
4202 my $version="$major_version-$build";
4203 my $clvers=major_version($major_version);
4204 (my $hotfix=$build) =~ s/.*_HF//;
4205
4206 print LOG "Client: $client $version pid=$plugin lastbu=$backedup clv=$clvers upv=$upgvers: ";
4207 my $lastbu=int((time-$backedup)/86400);
4208 if ($lastbu>365) {
4209 print LOG "SKIPPING. Last Backup $lastbu days\n";
4210 next;
4211 }
4212
4213 # Check Client Versions for 2 versions prior
4214 if ( $PREUPGRADE or $CLIENT_VERSION_CHECK) {
4215 if ( $upgvers >=18 ) {
4216 if (!grep(/$clvers/, @{$CLIENT{$upgvers}} )) {
4217 print LOG "ERR: client vers $clvers not in t CLIENT{$upgvers}\n";
4218 $client_upgrade_needed.="ERROR: Client $client version $version needs to be upgraded for $use_vers\n";
4219 } else { print LOG "OK: clvers=$clvers in $updvers allowed ". join(",", @{$CLIENT{$upgvers}}) ."\n"; }
4220 } elsif ( !$allowed{$clvers} ) {
4221 print LOG "ERR: >2 vers back. ";
4222 $client_upgrade_needed.="ERROR: Client $client version $version needs to be upgraded for $use_vers\n";
4223 }
4224 }
4225
4226 # Client things for Upgrade to 6.1.0 or running 6.1.0
4227 # Check for SQL client at least 6.1 on version 6.1+
4228 if ($plugin == 3006 and $major_version < 6.1 and ($VERSNUM >= 610 or $PREUPGRADE) ) {
4229 print LOG "SQL->\n";
4230 $sqle.="ERROR: SQL Client $client version $version must be upgraded.\n";
4231 }
4232
4233 # Skip rest of checks for preupgrade
4234
4235 # Check NDMP
4236 if ($plugin =~ /^[178]003$|14003/ ) {
4237 if ($PREUPGRADE) {
4238 $anyndmp=1;
4239 next;
4240 }
4241 print LOG "NDMP->";
4242
4243 if ($version =~ /6\.0\.10[01]/ and $version ne "6.0.101-66_HF34539" ) {
4244 print LOG "\n";
4245 $ndmphf.="ERROR: NDMP Client hotfix 34539 required for $client version $version\n";
4246 }
4247 if ($version eq "6.1.101-87") {
4248 $ndmphf.="WARNING: NDMP Client hotfix 49413 available for $client version $version\n";
4249 }
4250 if ($version eq "7.1.101-141") {
4251 print LOG "\n";
4252 $ndmphf.="WARNING: NDMP Client hotfix 223295 available for $client version $version\n";
4253 }
4254 if ($version eq "7.1.101-145") {
4255 $ndmphf.="WARNING: NDMP Client hotfix 229389 available for $client version $version\n";
4256 }
4257 if ($version eq "7.2.101-401") {
4258 $ndmphf.="INFO: NDMP Client hotfix 246816 for browsing performance is available for $client version $version\n";
4259 }
4260 # Any version 5.0.10x
4261 if ($version =~ /^5\.0\.10./ and $version !~ /30382/) {
4262 if ($version eq "5.0.106-28") {
4263 print LOG "\n";
4264 $ndmp30382.="WARNING: Client $client is an NDMP accelerator that might need hot fix 30382 installed\n";
4265 } else {
4266 print LOG "\n";
4267 $ndmp30382.="ERROR: Client $client is an NDMP accelerator that needs hot fix 30382 installed\n";
4268 }
4269 }
4270 }
4271
4272 # VMWare check
4273 if ($plugin =~ /^3016$|^1016$/) {
4274 if ($PREUPGRADE) {
4275 $anyvmware=1;
4276 next;
4277 }
4278 print LOG "VMProxy->";
4279 $VMWARE_CLIENT=1;
4280 if ($client_type eq "VMACHINE") {
4281 print LOG "Skipping VM machine";
4282 } else {
4283 if (!$VMPatch and $VERSNUM >= 503 and $VERSNUM <= 601) {
4284 print LOG "\n";
4285 $vme.="ERROR: Server version $AVAMARVER is affected by VMware bug 35252.\n";
4286 $VMPatch="err";
4287 }
4288 if ($version =~ /^5|^6.0.10[01]/ and $build != 901) {
4289 print LOG "\n";
4290 $vme.="ERROR: VMware client $client version $version is affected by bug 35252.\n";
4291 }
4292 }
4293 if ($VERSNUM>=600 and $VERSNUM<=601 and $DDCNT>=1 and -e "/usr/local/avamar/etc/repl_cron.cfg"){
4294
4295 }
4296 }
4297
4298 # Next client check here
4299 print LOG "\n";
4300 }
4301
4302# Print any found problems
4303 if ( $PREUPGRADE or $CLIENT_VERSION_CHECK) {
4304 if ($client_upgrade_needed) {
4305 printboth($client_upgrade_needed);
4306 printboth("RESOLUTION: Upgrade clients to a newer version\n\n");
4307 msg("Client Version Supported","FAILED");
4308 } else {
4309 msg("Client Version Supported","PASSED");
4310 }
4311 }
4312
4313# Check MCS datasets
4314 $sql = qq[ select dataset_name, plugin_name, domain from v_ds_commands where command_name like 'parallel' and plugin_name like '%File System%' ];
4315 $sth = $dbh->prepare($sql);
4316 $sth->execute;
4317 my $parallel="";
4318 while ( my $R = $sth->fetchrow_hashref() ) {
4319 $parallel.="WARNING: Dataset $R->{domain}$R->{dataset_name} for $R->{plugin_name} has the parallel flag\n";
4320 }
4321 if ($parallel) {
4322 printboth("${parallel}RESOLUTION: See bug 263066 for more information\n\n");
4323 msg("Parallel flag","WARNING");
4324 }
4325
4326
4327#### PREUPGRADE ONLY - note this if block returns
4328 if ($PREUPGRADE) {
4329 if ($sqle) {
4330 printboth($sqle);
4331 printboth("RESOLUTION: If upgrading to 6.1.0 or later all SQL clients to be upgraded at the same time\n\n");
4332 msg("Pre-upgrade Clients","FAILED");
4333 } else {
4334 msg("Pre-upgrade Clients","PASSED");
4335 }
4336 chomp($anynetworker=`avmgr getb --path=/NETWORKER 2>/dev/null| grep -c "^\[[]"`);
4337 msg("Pre-Upgrade Clients","Networker Backups Found") if ($anynetworker) ;
4338 msg("Pre-Upgrade Clients","VMware Backups Found") if ($anyvmware) ;
4339 msg("Pre-Upgrade Clients","NDMP Backups Found") if ($anyndmp) ;
4340 return;
4341 }
4342#### END OF PREUPGRADE
4343
4344 if ($sqle) {
4345 printboth($sqle);
4346 printboth("RESOLUTION: All SQL clients on server version 6.1.0 or later must be upgraded\n\n");
4347 $e="yes";
4348 }
4349 if ($vme) {
4350 printboth($vme);
4351 printboth("RESOLUTION: Install hotfix 35252. See KB302087 for more info\n\n");
4352 $e="yes";
4353 }
4354 if ($ndmp30382) {
4355 printboth($ndmp30382);
4356 printboth("NOTE: You may have to manually check the accelerator node to see if the patch has been applied\n");
4357 printboth(" The md5sum of avndmp will be 11e03fc123141f9216dd92c1a7bcc2e1 if it has been patched\n");
4358 printboth("RESOLUTION: Install hotfix 30382.\n\n");
4359 $e="yes";
4360 }
4361 if ($ndmphf) {
4362 printboth($ndmphf);
4363 printboth("NOTE: NDMP hotfixes are not always detected. Manually run 'avndmp --version' on the acclerator node to verify installed version\n");
4364 printboth("RESOLUTION: See NDMP hot fix for more info\n\n");
4365 $e="yes";
4366 }
4367 if ($e) {
4368 printboth("NOTE: The script cannot detect a client upgrade until a backup is done\n\n");
4369 msg("Mandatory Client Upgrades","FAILED");
4370 } else {
4371 msg("Mandatory Client Upgrades","PASSED");
4372 }
4373
4374 if ($PREUPGRADE and $anynetworker and $UPGRADE_VERSION =~ /7.1.0/ ) {
4375 printboth("ERROR: Networker integration is not supported in Avamar 7.1.0\n");
4376 printboth("RESOLUTION: Do not upgrade\n");
4377 msg("Networker Integration","FAILED");
4378 }
4379}
4380########## End checkclients ########
4381
4382########## Start qadir ########
4383sub qadir {
4384 print LOG "\n\n\n### ".localtime()." ### Starting qadir\n";
4385 my $cmd=qq[ du -s /data0?/QA ];
4386 mapall($ALL,$cmd);
4387 open(CMD_PIPE,$TMPFILE);
4388 my $e="";
4389 while (<CMD_PIPE>) {chomp;
4390 $node=$1 if (/(\(0\..*\)) ssh/);
4391 print LOG "$_\n";
4392 if (/^\d/ ) {
4393 my($tot,$dir)=split();
4394 $tot=int($tot/1024/1024);
4395 if ($tot>0) {
4396 printboth("ERROR: Node $node has $tot GB of QA data in $dir\n");
4397 $e="yes";
4398 }
4399 }
4400 }
4401 if ($e) {
4402 printboth("RESOLUTION: Remove the leftover QA test directories with the following command\n");
4403 printboth(" mapall --parallel --bg --user=root --noerror --all+ '/usr/local/avamar/bin/dtsh --cleanup'\n\n");
4404 msg("QA Directories","FAILED");
4405 }
4406}
4407########## End qadir ########
4408
4409########## Start getdatadomain ########
4410sub getdatadomain {
4411 print LOG "\n\n\n### ".localtime()." ### Starting getdatadomain\n";
4412 getinstalledversion() if (!$VERSNUM);
4413 getconfiginfo() if (!$GOTCONFIGINFO);
4414 $DDCNT=0;
4415 if ($VERSNUM < 600 ) {
4416 print LOG "Skipping pre v6: $VERSNUM\n";
4417 $DDRMAINT_VERSION="x";
4418 return;
4419 }
4420 my $read=`ddrmaint read-ddr-info 2>/dev/null`;
4421 print LOG "ddrmaint:\n$read\n";
4422 if ($read =~ /MSG_ERR_NOT_PRESENT/ or !$read ) {
4423 $DDRMAINT_VERSION="x";
4424 print LOG "No DD Attached: $read\n";
4425 return;
4426 }
4427
4428 my $result=`ddrmaint cplist`;
4429 if ($?) {
4430 print LOG "code: $?\noutput:$result\n";
4431 printboth("ERROR: Data Domain did not respond to ddrmaint cplist\n");
4432 printboth("RESOLUTION: Check Data Domain connectivity and status\n");
4433 msg("Data Domain Available","FAILED");
4434 }
4435
4436 $xml = new XML::Parser( Style => 'Tree' );
4437 print LOG "parsing ddrmaint read-ddr-info\n";
4438 my $tree=$xml->parse($read);
4439 SimpleXMLTree($tree);
4440 $DDCNT=$xmltree{"/avamar/datadomain/count"};
4441 %DD=%xmltree;
4442 foreach (@DD_INDEX) {
4443 print LOG "ddr index: $_\n";
4444 my $base="/avamar/datadomain/ddrconfig/$_";
4445 my $t= $line=$DD{"$base/hostname"} ." Vers:". $DD{"$base/ddos-version"} ." S/N:". $DD{"$base/serialno"} ;
4446 print LOG "$t\n";
4447 msg("Datadomain",$t);
4448 }
4449 chomp($DDRMAINT_VERSION=`ddrmaint --version 2>&1|grep "^[ ]*version:"`);
4450 $DDRMAINT_VERSION =~ s/.version:\s*//;
4451 $DDRMAINT_VERSION =~ s/ //;
4452 print LOG "DDRMAINT_VERSION = $DDRMAINT_VERSION\n";
4453
4454
4455# Check for single node w/DD and dd_only mode
4456 if ($NODE_COUNT == 1 and $MCSERVER{"/mc/node/datadomain/map/dd_only_mode"} ne "ALL" ) {
4457 print LOG "node_count=$NODE_COUNT dd_only_mode=$MCSERVER{'/mc/node/datadomain/map/dd_only_mode'}\n";
4458 printboth("WARNING: Backups to the Data Domain is not locked. If this Avamar system is intended to be used as a pure metadata node (no backups going to Avamar), consider locking down the backups to the Data Domain to prevent accidental backups going to the Avamar system\n");
4459 printboth("RESOLUTION: See the newest admin guide for Avamar AVE Installs and the dd_only_mode flag\n\n");
4460 msg("Datadomain dd_only_mode","WARNING");
4461 }
4462
4463# Check for CP backup to DD
4464if ($NODE_COUNT ==1) {
4465 my $result=`mccli dd show-prop --xml`;
4466 print LOG "parsing dd show-prop\n$result";
4467 if ($?) {
4468 print LOG "code: $?\noutput:$result\n";
4469 printboth("ERROR: Data Domain did not respond to dd show-prop command\n");
4470 printboth("RESOLUTION: Check Data Domain connectivity and status\n\n");
4471 msg("Data Domain Checkpoints","FAILED");
4472 } else {
4473 my $xml = new XML::Parser( Style => 'Tree' );
4474 my $tree=$xml->parse($result);
4475 $DDXML=0; $DDXMLFLAG=1;
4476 SimpleXMLTree($tree);
4477 undef $DDXML; undef $DDXMLFLAG;
4478 my %DD=%xmltree;
4479 my $flag=0;
4480 foreach (grep(/TargetForAvamarCheckpointBackups/, %DD)) {
4481 print LOG "ddr: $_ $DD{$_}\n";
4482 $flag=1 if ($DD{$_} eq "Yes");
4483 }
4484 if (!$flag) {
4485 printboth("WARNING: Checkpoint Backups not going to Data Domain\n");
4486 printboth("RESOLUTION: Change TargetForAvamarCheckpointBackups to ensure checkpoint are backed up to Data Domain\n\n");
4487 msg("Data Domain Checkpoints","WARNING");
4488 }
4489 }
4490}
4491
4492
4493 return if ($PREUPGRADE);
4494
4495 my $e="";
4496 if ($DDRMAINT_VERSION =~ /^6.0.1-66$|^6.0.0/) {
4497 printboth("ERROR: Bug 39953 patches not found for data domain ddrmaint version $DDRMAINT_VERSION. \n");
4498 printboth("RESOLUTION: See KB302080 but use bug 39953 instead of 33177\n\n");
4499 $e="yes";
4500 }
4501 if ($DDRMAINT_VERSION =~ /^6.0.2-153$/) {
4502 printboth("ERROR: Bug 40855 patches not found for data domain ddrmaint version $DDRMAINT_VERSION. \n");
4503 printboth("RESOLUTION: Apply hot fix bug 40855\n\n");
4504 $e="yes";
4505 }
4506 if ($DDRMAINT_VERSION =~ /^6.1.0-402$/) {
4507 printboth("ERROR: Bug 40857 patches not found for data domain ddrmaint version $DDRMAINT_VERSION. \n");
4508 printboth("RESOLUTION: Apply hot fix bug 40857\n\n");
4509 $e="yes";
4510 }
4511 if ($DDRMAINT_VERSION eq "7.1.1-141" ) {
4512 printboth("ERROR: Bug 226000 patches not found for data domain ddrmaint version $DDRMAINT_VERSION. \n");
4513 printboth("RESOLUTION: Apply hot fix bug 226000\n\n");
4514 $e="yes";
4515 }
4516 if ($DDRMAINT_VERSION eq "7.3.0-226"){
4517 printboth("ERROR: ddrmaint version $DDRMAINT_VERSION requires hotfix 261714 or later\n");
4518 printboth("RESOLUTION: See KB485127 for more info\n\n");
4519 $e="yes";
4520 }
4521 if ($DDRMAINT_VERSION eq "7.4.1-58_TB30267" ){
4522 printboth("WARNING: ddrmaint version $DDRMAINT_VERSION is a test binary that may cause issues with /dev/null\n");
4523 printboth("RESOLUTION: See KB516218 for more info and ESC 31417 for the problem it causes and a solution to fix it\n\n");
4524 $e="yes";
4525 }
4526 if ($DDRMAINT_VERSION eq "7.4.1-58" ){
4527 printboth("ERROR: Bug 281351 patches not found for data domain ddrmaint version $DDRMAINT_VERSION. \n");
4528 printboth("RESOLUTION: Apply hot fix bug 281351\n\n");
4529 $e="yes";
4530 }
4531
4532
4533 if ($e) {
4534 msg("ddrmaint Patches","FAILED");
4535 } else {
4536 msg("ddrmaint Patches","PASSED");
4537 }
4538}
4539########## End getdatadomain ########
4540
4541########## Start ddgcoob ########
4542sub ddgcoob {
4543 print LOG "\n\n\n### ".localtime()." ### Starting ddgcoob\n";
4544 getdatadomain() if (!$DDRMAINT_VERSION) ;
4545 if (!%DD) {
4546 print LOG "Skipping, no data domains\n";
4547 return;
4548 }
4549 if ($VERSNUM >=700 ) {
4550 print LOG "Skipping, version 7+\n";
4551 return;
4552 }
4553 my $e="";
4554 if (! -e "/usr/local/avamar/bin/gcoob.pl" ) {
4555 printboth("ERROR: Datadomain attached but gcoob.pl is not installed\n");
4556 $e="yes";
4557 } else {
4558 $e2="yes";
4559 mapall("--user=root --nodes=0.s","crontab -l -u admin");
4560 open(CMD_PIPE,$TMPFILE);
4561 while(<CMD_PIPE>) {
4562 next if (/^\s*#/);
4563 print LOG $_;
4564 $e2="" if (/gcoob/);
4565 }
4566 if ($e2) {
4567 printboth("ERROR: Data Domain attached and gcoob.pl installed but not in admin users crontab\n");
4568 $e="yes";
4569 }
4570 }
4571 if ($e) {
4572 printboth("RESOLUTION: Install or configured gcoob.pl. See KB 302077\n\n");
4573 msg("Data Domain gcoob.pl","FAILED");
4574 } else {
4575 msg("Data Domain gcoob.pl","PASSED");
4576 }
4577}
4578########## End ddgcoob ########
4579
4580########## Start ddvers ########
4581sub ddvers {
4582 print LOG "\n\n\n### ".localtime()." ### Starting ddvers\n";
4583 getdatadomain() if (!$DDRMAINT_VERSION) ;
4584 if (!%DD) {
4585 print LOG "Skipping, no data domains\n";
4586 return;
4587 }
4588 if (!$PREUPGRADE and $VERSNUM < 610 ) {
4589 print LOG " Skipping: Not upgrading, not version 6.1.0 or later\n";
4590 return;
4591 }
4592 my $e=""; my $er="";
4593 my (%em,%er);
4594 my $msg=$AVAMARVER;
4595 foreach (@DD_INDEX) {
4596 my $base="/avamar/datadomain/ddrconfig/$_";
4597 $_=$DD{"$base/ddos-version"};
4598 $vers=$_;
4599 print LOG "DDOS VERSION = $_\n";
4600 s/\.//g;
4601 s/-.*//;
4602 print LOG "avamarver $AVAMARVER upgdver $UPGRADE_VERSION vers=$vers\n";
4603 if ($_ < 5023 and ($AVAMARVER =~ /^6.1/ or $UPGRADE_VERSION =~ /^6.1/) ) {
4604 $em{61}.="ERROR: Data Domain ".$DD{"$base/hostname"}." version $vers is not supported on Avamar 6.1\n";
4605 $er{61}="RESOLUTION: Upgrade Data Domain to version 5.0.2.3 or later if upgrading to Avamar 6.1 \n\n"
4606 }
4607 if ( ($AVAMARVER =~ /^7.0/ or $UPGRADE_VERSION =~ /^7.0/)
4608 and ( $_<5305 or ($_>5400 and $_<5404) ) ) {
4609 $em{70}.="ERROR: Data Domain ".$DD{"$base/hostname"}." version $vers is not supported on Avamar 7.0\n";
4610 $er{70}="RESOLUTION: Upgrade Data Domain to version 5.3.0.5+ or 5.4.0.4+ if upgrading to Avamar 7.0 \n\n"
4611 }
4612 if ( ($AVAMARVER =~ /^7.1/ or $UPGRADE_VERSION =~ /^7.1/)
4613 and minver($vers,"5.4.1.1") ) {
4614 $em{71}.="ERROR: Data Domain ".$DD{"$base/hostname"}." version $vers is not supported on Avamar 7.1\n";
4615 $er{71}="RESOLUTION: Upgrade Data Domain to version 5.4.1.1 or later if upgrading to Avamar 7.1 \n\n"
4616 }
4617 if ( ($AVAMARVER =~ /^7.2/ or $UPGRADE_VERSION =~ /^7.2/)
4618 and minver($vers,"5.5") ) {
4619 $em{72}.="ERROR: Data Domain ".$DD{"$base/hostname"}." version $vers is not supported on Avamar 7.2\n";
4620 $er{72}="RESOLUTION: Upgrade Data Domain to version 5.5.0.9 or later if upgrading to Avamar 7.2 \n\n"
4621 }
4622 if ( ($AVAMARVER =~ /^7.[34]/ or $UPGRADE_VERSION =~ /^7.[34]/)
4623 and minver($vers,"5.6.0.3") ) {
4624 $em{73}.="ERROR: Data Domain ".$DD{"$base/hostname"}." version $vers is not supported on Avamar 7.3/7.4\n";
4625 $er{73}="RESOLUTION: Upgrade Data Domain to version 5.6.0.3 or later if upgrading to Avamar 7.3/7.4\n\n"
4626 }
4627 if ( ($AVAMARVER =~ /^7.5/ or $UPGRADE_VERSION =~ /^7.5/)
4628 and minver($vers,"5.7") ) {
4629 $em{75}.="ERROR: Data Domain ".$DD{"$base/hostname"}." version $vers is not supported on Avamar 7.5\n";
4630 $er{75}="RESOLUTION: Upgrade Data Domain to version 5.7.x or later if upgrading to Avamar 7.5\n\n"
4631 }
4632 if ( ($AVAMARVER =~ /^18\./ or $UPGRADE_VERSION =~ /^18\./)
4633 and minver($vers,"5.7") ) {
4634 $em{181}.="ERROR: Data Domain ".$DD{"$base/hostname"}." version $vers is not supported on Avamar 18.x\n";
4635 $er{181}="RESOLUTION: Upgrade Data Domain to version 5.7.x or later if upgrading to Avamar 18.x\n\n"
4636 }
4637 if ( ($AVAMARVER =~ /^19\./ or $UPGRADE_VERSION =~ /^19\./)
4638 and minver($vers,"6.0") ) {
4639 $em{191}.="ERROR: Data Domain ".$DD{"$base/hostname"}." version $vers is not supported on Avamar 19.x\n";
4640 $er{191}="RESOLUTION: Upgrade Data Domain to version 6.0.x or later if upgrading to Avamar 19.x\n\n"
4641 }
4642
4643 }
4644 for (sort keys %em) {
4645 printboth($em{$_}.$er{$_});
4646 }
4647 if (%em) {
4648 msg("Data Domain Version","FAILED");
4649 } else {
4650 msg("Data Domain Version","PASSED");
4651 }
4652}
4653########## End ddvers ########
4654
4655########## Start adtcheck ########
4656sub adtcheck {
4657 print LOG "\n\n\n### ".localtime()." ### Starting adtcheck\n";
4658 my $cmd=qq[ ps -aef |grep -v grep | grep -c AdaGridService ];
4659 my $results=`$cmd`;
4660 print LOG "cmd: $cmd\nresults: $results\n";
4661 if ($results < 2 ){
4662 print LOG "Skiping. No ADT process found\n";
4663 return;
4664 }
4665 if (!$PREUPGRADE and $VERSNUM < 610 ) {
4666 print LOG " Skipping: Not upgrading, not version 6.1.0 or later\n";
4667 return;
4668 }
4669 my($ADTMD5SUM,$flnm)=split(" ",`md5sum /opt/EMC/TransportSystemService/lib/grid-service.jar`);
4670 print LOG "ADT grid-server.jar md5sum: $ADTMD5SUM\n";
4671 if ( $ADTMD5SUM ne 'c1f0eeb7386475c8ef5b8aaf89b27895'
4672 and $ADTMD5SUM ne '2ae52cadd9957b35ec8c914a46ee9e52') {
4673 printboth("ERROR: ADT is attached and must be version 1.0 SP3 version or later on Avamar 6.1 or later\n");
4674 printboth("RESOLUTION: Upgrade ADT before using it on a 6.1 server.\n");
4675 msg("ADT Check","FAILED");
4676 } else {
4677 msg("ADT Check","PASSED");
4678 }
4679}
4680########## End adtcheck ########
4681
4682########## Start atocheck ########
4683sub atocheck {
4684 print LOG "\n\n\n### ".localtime()." ### Starting atocheck\n";
4685 if (! -e "/usr/local/avamar/bin/ato" ) {
4686 print LOG "Skipping: no ato file found\n";
4687 return 1;
4688 }
4689 if ($VBA) {
4690 print LOG "Skipping: dont check for VBA\n";
4691 return;
4692 }
4693 if (!$PREUPGRADE and $VERSNUM < 610 ) {
4694 print LOG " Skipping: Not upgrading, not version 6.1.0 or later\n";
4695 return;
4696 }
4697
4698 $_=`cat /etc/motd | grep Version | awk '{print $3}'`;
4699#Version-4.1.11.45i
4700 #$_=`grep "^Version=" /usr/local/avamar/bin/ato`;
4701 chomp;
4702 print LOG "Version $_\n";
4703 #my($foo,$vers)=split("=");
4704 $vers =~ s/Version[-=]//;
4705 my $save=$vers;
4706 $vers =~ s/[\."]//g;
4707 $ADME_VERS=$vers;
4708 if ($vers lt "411") {
4709 printboth("ERROR: ATO version $save needs to be upgraded to ADM(e) Version 4.1.1 or later\n");
4710 printboth("RESOLUTION: ATO must be upgraded before being used but can be done before or after Avamar is upgraded.\n");
4711 printboth(" For ADM(e) details see https://community.emc.com/docs/DOC-7910\n\n");
4712 msg("ATO/ADMe Check","FAILED");
4713 } else {
4714 msg("ATO/ADMe Check","PASSED");
4715 }
4716}
4717########## End atocheck ########
4718
4719########## Start chage ########
4720# and passwords
4721sub chage {
4722 print LOG "\n\n\n### ".localtime()." ### Starting chage\n";
4723
4724 #my $cmd=qq[ awk '{system("sudo chage -l $1|sed 's/^/$1:/'")}' /etc/passwd ];
4725 my $cmd=qq[ sudo chage -l root |sed -e 's/^/root:/'; sudo chage -l admin |sed -e 's/^/admin:/' ];
4726 mapall($ALL,$cmd);
4727 open(CMD_PIPE,$TMPFILE);
4728 my ($e,$lastmax,$lasterr)="";
4729 while (<CMD_PIPE>) {chomp;
4730 print LOG "$_\n";
4731 if (/sudo: no tty present/) {
4732 printboth("ERROR: Node $node sudo chage failed with $_ \n");
4733 printboth("RESOLUTION: Fix sudo problem. Password age checks will not be valid because of this\n\n");
4734 msg("Password Age info retrieval","FAILED");
4735 return;
4736 }
4737
4738 if (/(\(0\..*\)) ssh/){
4739 $node=$1;
4740 ($max,$warn)=0;
4741 }
4742 $max=$1 if (/Maximum:\s*(\d+)/);
4743 $warn=$1 if (/Warning:\s*(\d+)/);
4744 $warn=($warn==0) ? 1 : $warn;
4745 if ($max and $max != $lastmax and $lastmax) {
4746 printboth("ERROR: Node $node Security hardening is different than last node!\n");
4747 $e="yes";
4748 }
4749 if ($max != 99999 and /Password Expires:\s*(.+)/ and !/Never/) {
4750 $expdt=$1;
4751 $expjul=`date --date="$expdt" "+%s"` ;
4752 print LOG "Expjul: $expjul Time:".time."\n";
4753 if ($expjul - $warn*86400 <= time) {
4754 $user=$1 if (/^(\w*):/);
4755 printboth("ERROR: Node $node User $user password expiration is $expdt\n");
4756 $e="yes";
4757 }
4758 }
4759 }
4760 if ($e) {
4761 printboth("RESOLUTION: Change password before upgrade to reset expiration date\n");
4762 msg("Password Expiration","FAILED");
4763 return;
4764 }
4765 if ($lastmax != 99999 ) {
4766 msg("Password Expiration","PASSED");
4767 }
4768}
4769########## End chage ########
4770
4771########## Start getnodes_hw ##########
4772sub getnodes_hw {
4773 print LOG "-> getnodes_hw $_[0]\n";
4774 gethardware() if (!$MANUFACTURER);
4775 my $nodelist;
4776 foreach (keys %NODE_INFO) {
4777 $nodeid=$1 if /\((.*)\)/;
4778 print LOG "$nodeid x $NODE_INFO{$_}{manufacturer}\n" if ($DEBUG);
4779 if ( $NODE_INFO{$_}{manufacturer} =~ /$_[0]/ ) {
4780 $nodelist.="," if ($nodelist);
4781 $nodelist.=$nodeid
4782 }
4783 }
4784 print LOG "-> returned: $nodelist\n";
4785 return $nodelist;
4786}
4787########## End getnodes_hw ##########
4788
4789########## Start getnodes_os ##########
4790sub getnodes_os {
4791 print LOG "-> getnodes_os\n";
4792 nodexref() if (!$NODE_COUNT);
4793 my $nodelist;
4794 for ($node=-1; $node<$NODE_COUNT; $node++) {
4795 next if ($NODE_COUNT==1 and $nodeid<0);
4796 $nodeid=($node<0) ? "0.s" : sprintf("0.%d",$node);
4797 print LOG "$nodeid x $NODE_INFO{\"(\".$nodeid.\")\"}{os}\n" if ($DEBUG);
4798 if ( $NODE_INFO{"(".$nodeid.")"}{os} =~ /$_[0]/ ) {
4799 $nodelist.="," if ($nodelist);
4800 $nodelist.=$nodeid
4801 }
4802 }
4803 print LOG "-> returned: $nodelist\n";
4804 return $nodelist;
4805}
4806########## End getnodes_os ##########
4807
4808########## Start getnodes_gen ##########
4809sub getnodes_gen {
4810 print LOG "-> getnodes_gen $_[0]\n";
4811 nodexref() if (!$NODE_COUNT);
4812 my $nodelist="";
4813 for ($node=-1; $node<$NODE_COUNT; $node++) {
4814 next if ($NODE_COUNT==1 and $nodeid<0);
4815 $nodeid=($node<0) ? "0.s" : sprintf("0.%d",$node);
4816 print LOG "$nodeid x $NODE_INFO{\"(\".$nodeid.\")\"}{gendesc}\n" ;
4817 if ( $NODE_INFO{"(".$nodeid.")"}{gendesc} =~ $_[0] ) {
4818 $nodelist.="," if ($nodelist);
4819 $nodelist.=$nodeid
4820 }
4821 }
4822 print LOG "-> returned: $nodelist\n";
4823 return $nodelist;
4824}
4825########## End getnodes_gen ##########
4826
4827
4828########## Start replforceaddr ##########
4829sub replforceaddr {
4830 print LOG "\n\n\n### ".localtime()." ### Starting replforceaddr\n";
4831
4832 my $e,$e1;
4833 open(FILE,"avmgr getl --path=/REPLICATE 2>/dev/null|");
4834 while(<FILE>) {
4835 next if (!/^2/);
4836 my($type,$name,$foo)=split();
4837 printboth("ERROR: Replication may be setup from $name to this grid.\n");
4838 $e1="yes";
4839 }
4840 if ($e1) {
4841 printboth("RESOLUTION: Manually check the source grid for the --forceaddr flag in /usr/local/avamar/etc/repl_cron.cfg\n");
4842 printboth(" This flag requires special configuration after upgrading to 6.1. See Esc 5048 for more info\n\n");
4843 }
4844
4845 if ( -r "/usr/local/avamar/etc/repl_cron.cfg" ) {
4846 my $result = `grep "forceaddr" /usr/local/avamar/etc/repl_cron.cfg|grep -v "^#"`;
4847 if ($result =~ /forceaddr/ ) {
4848 printboth("ERROR: Replication is using the --forceaddr flag which requires special configuration after upgrading to 6.1 and later\n");
4849 printboth("RESOLUTION: See escalation 5048 before performing an upgrade.\n\n");
4850 $e="yes";
4851 }
4852 }
4853 if ($e) {
4854 msg("Replication Force Addr","FAILED");
4855 } else {
4856 if ($e1) {
4857 msg("Replication Force Addr","WARNING");
4858 } else {
4859 msg("Replication Force Addr","PASSED");
4860 }
4861 }
4862}
4863########## End replforceaddr ##########
4864
4865########## Start etcprofile ##########
4866sub etcprofile {
4867 print LOG "\n\n\n### ".localtime()." ### Starting etcprofile\n";
4868 my $cmd=q[ grep -q '/usr/local/avamar/bin' /etc/profile && echo PROFILE_OK1;
4869 grep -q 'profile.d' /etc/profile && [ -e /etc/profile.d/avamar-path.sh ] && echo PROFILE_OK2
4870 grep -q 'profile.d' /etc/profile && [ -e /etc/profile.d/zzzavamar.sh ] && echo PROFILE_OK3
4871 ];
4872 mapall($ALL,$cmd);
4873 open(CMD_PIPE,$TMPFILE);
4874 my ($e)="";
4875 my $nodeok="OK";
4876 while (<CMD_PIPE>) {chomp;
4877 print LOG "$_\n";
4878 if (/(\(0\..*\)) ssh/) {
4879 if (!$nodeok and $node) {
4880 $e="yes";
4881 printboth("ERROR: Node $node /etc/profile PATH is wrong\n");
4882 }
4883 $node=$1;
4884 $nodeok=0;
4885 }
4886 $nodeok=1 if (/PROFILE_OK/);
4887 }
4888 if ($e) {
4889 printboth("RESOLUTION: See esc5358. Fix by replacing /etc/profile with /etc/profile.rpmsave\n\n");
4890 msg("/etc/profile","FAILED");
4891 } else {
4892 msg("/etc/profile","PASSED");
4893 }
4894}
4895########## End etcprofile ##########
4896
4897########## Start aerplugin ##########
4898sub aerplugin {
4899 print LOG "\n\n\n### ".localtime()." ### Starting aerplugin\n";
4900 if (-e "/opt/EMC/TransportSystemService/config/GridService.xml") {
4901 $xml = new XML::Parser( Style => 'Tree' );
4902 $tree=$xml->parsefile("/opt/EMC/TransportSystemService/config/GridService.xml");
4903 SimpleXMLTree($tree);
4904 my $man="";
4905 for (grep /hostname/i, keys %xmltree) {
4906 $man.=", " if ($man);
4907 $man.="$xmltree{$_}";
4908 }
4909 msg("Registered Media Access Nodes","DETECTED at $man");
4910 } else {
4911 msg("Registered Media Access Nodes","NONE");
4912 }
4913
4914
4915}
4916
4917########## End aerplugin ##########
4918
4919
4920########## Start bug47560 ##########
4921# Code 1 marked obsolete, bug 47560 fixed in rollup 50070/51416
4922sub bug47560 {
4923# Called from MCS patches if bug 50070 is needed
4924 print LOG "\n\n\n### ".localtime()." ### Starting bug47560\n";
4925 printboth(" Part of this bug is event code 1 will not create service requests.\n");
4926 printboth(" The file event_code_1.txt has all the code 1 events that have been missed\n");
4927 $cmd=qq[ sed -n '/6.1.1-8[17]/,\$p' /data01/cur/err.log | sed -n '/<0001>/p' ];
4928 mapall($ALL,$cmd);
4929 open(CMD_PIPE,$TMPFILE);
4930 open(EVENT,">event_code_1.txt");
4931 chomp($dt = `date +%Y-%m-%d`);
4932 print EVENT "======================================================\n";
4933 print EVENT "\n$dt Event Code 1 missed since upgrade\n\n";
4934 my ($nodes)="";
4935 while (<CMD_PIPE>) {chomp;
4936 print LOG "$_\n";
4937 if (/(\(0\..*\)) ssh/) {
4938 print EVENT "======================================================\n";
4939 print EVENT "Node $1\n";
4940 } else {
4941 print EVENT "$_\n";
4942 }
4943 }
4944}
4945########## End bug47560 ##########
4946
4947########## Start ipmi ##########
4948sub ipmi {
4949 print LOG "\n\n\n### ".localtime()." ### Starting ipmi\n";
4950 gethardware() if (!$MANUFACTURER);
4951 my $nodes=getnodes_hw("dell|emc");
4952 if (!$nodes) {
4953 print LOG "no dell/emc nodes found\n";
4954 return;
4955 }
4956 $cmd=qq[ echo "IPMI: `/sbin/lsmod | grep -c ipmi`" ];
4957 mapall("--nodes=$nodes",$cmd);
4958 open(CMD_PIPE,$TMPFILE);
4959 my $e;
4960 my $sawipmi=-1;
4961 while (<CMD_PIPE>) {chomp;
4962 print LOG "$_\n";
4963 if (/(\(0\..*\)) ssh/) {
4964 if ($sawipmi==0) {
4965 printboth("ERROR: Unexpected output '$_' (last node $node)\n");
4966 $e="yes"
4967 }
4968 $node=$1;
4969 $sawipmi=0;
4970 next;
4971 }
4972 if (/IPMI: (.*)/) {
4973 $sawipmi=1;
4974 if ($1 < 2) {
4975 $e="yes";
4976 printboth("ERROR: Node $node IPMI is not working\n");
4977 }
4978 next;
4979 }
4980 }
4981 if ($e) {
4982 printboth("RESOLUTION: IPMI does not appear to be working. See KB463268 for Dell hardware\n") if ($MANUFACTURER =~ /dell/i);
4983 printboth("RESOLUTION: IPMI does not appear to be working. See KB465167 for EMC hardware\n") if ($MANUFACTURER =~ /emc/i);
4984 if ($PREUPGRADE) {
4985 printboth(" An upgrade cannot be performed until this is fixed\n");
4986 }
4987 printboth("\n");
4988 msg("IPMI Check","FAILED");
4989 } else {
4990 msg("IPMI Check","PASSED");
4991 }
4992
4993}
4994########## End ipmi ##########
4995
4996########## Start activesessions ##########
4997sub activesessions {
4998 print LOG "\n\n\n### ".localtime()." ### Starting activesessions\n";
4999 open(FILE,"avmaint sessions|");
5000 my $e=0;
5001 while(<FILE>) {chomp;
5002 if (/path/) {
5003 print LOG "Found session path: $_\n";
5004 $e++;
5005 }
5006 }
5007 if ($e) {
5008 printboth("WARNING: There are $e sessions active\nRESOLUTION: If that is an issue wait or cancel active sessions\n\n");
5009 msg("Active Sessions","FAILED");
5010 }
5011}
5012########## End activesessions ##########
5013
5014########## Start adsinfo ##########
5015sub adsinfo {
5016 print LOG "\n\n\n### ".localtime()." ### Starting adsinfo\n";
5017 $cmd=qq[ tac /usr/local/avamar/var/avi/server_log/avinstaller.log.0 | grep 'ADS info: '] ;
5018 my $ads="";
5019 my %dupads="";
5020 open(FILE,"$cmd|");
5021 while(<FILE>) {chomp;
5022 if (/hostname: (.*), port.*version: (.*), last/) {
5023 next if ($dupads{$1});
5024 $dupads{$1}=1;
5025 $ads.=", " if ($ads);
5026 $ads.="$1 ($2)";
5027 }
5028 }
5029 if ($ads) {
5030 msg("Downloader Server:",$ads);
5031 }
5032}
5033########## End adsinfo ##########
5034
5035########## Start switchconf ##########
5036sub switchconf {
5037 print LOG "\n\n\n### ".localtime()." ### Starting switchconf\n";
5038 getnodetype() if (!%PARTLIST);
5039 if ($NODETYPE !~ /gen4/i) {
5040 print LOG "Skipping check. No Gen4 in nodetype: $NODETYPE\n";
5041 return;
5042 }
5043 if ($NODE_COUNT == 1) {
5044 print LOG "Skipping check. Single Node\n";
5045 return;
5046 }
5047 my $result=`ifconfig bond1`;
5048 print LOG $result;
5049 if ( $result !~ /Bcast:192.168.255.255/) {
5050 print LOG "Skipping check. Default IP not in use\n";
5051 return;
5052 }
5053 mapall("--nodes=0.s","sudo arping -I bond1 -c3 192.168.255.3");
5054 open(CMD_PIPE2,$TMPFILE);
5055 my $last="";
5056 my $e;
5057 while (<CMD_PIPE2>) { chomp;
5058 print LOG "$_\n";
5059 if (/sudo: no tty present/) {
5060 printboth("ERROR: Node $node sudo arping failed with $_ \n");
5061 printboth("RESOLUTION: Fix sudo problem. Switch config checks will not be valid because of this\n\n");
5062 msg("Switch Config info retrieval","FAILED");
5063 return;
5064 }
5065
5066 if (/^Unicast .*\[(.*)\]/) {
5067 my $mac=$1;
5068 if ($last ne $mac and $last) {
5069 $e="yes";
5070 printboth("ERROR: Duplicate IP Address bond1 192.168.255.3 MAC=$mac MAC=$last\n");
5071 printboth(" This is probably a conflict between node 0.1 and the switch\n");
5072 printboth("RESOLUTION: Resolve duplicate IP addresses\n");
5073 last;
5074 }
5075 $last=$mac;
5076 }
5077 }
5078
5079 my $resolution="";
5080 my $err=0;
5081 ($resolution,my $switchtype1)=switchconf1("192.168.255.200","avg4_swa","Switch A","dell_swa",$resolution);
5082 ($resolution,my $switchtype2)=switchconf1("192.168.255.201","avg4_swb","Switch B","dell_swb",$resolution);
5083
5084 if ($resolution) {
5085 printboth("$resolution\n");
5086 $err=1;
5087 }
5088
5089 if ($switchtype1 ne $switchtype2) {
5090 printboth("ERROR: Switch A is $switchtype1 and Switch B is $switchtype2\n");
5091 printboth("RESOLUTION: Replace one of the switches. Difference switches are not supported\n\n");
5092 $err=1;
5093 }
5094
5095 if ($switchtype1 eq "Missing" or $switchtype2 eq "Missing") {
5096 msg("Switch Configuration","INFO") ;
5097 return;
5098 }
5099 if ($err) {
5100 msg("Switch Configuration","FAILED");
5101 } else {
5102 msg("Switch Configuration","PASSED");
5103 }
5104}
5105
5106# check each switch
5107sub switchconf1 {
5108 my($ip,$allied_pattern,$brocade_pattern,$dell_pattern,$resolution)=@_;
5109 my $switchtype="Unknown";
5110 my $ping=`ping -c1 $ip`;
5111 if ($? ne 0) {
5112 print LOG "Ping result=$ping\n";
5113 printboth("INFO: No switch found at default location $ip\n");
5114 $resolution="RESOLUTION: This may not be a problem but should be verified\n";
5115 return($resolution,"Missing");
5116 }
5117 my $result = qx{
5118 echo -e 'set timeout 5
5119 spawn telnet $ip
5120 expect {
5121 "login:" {
5122 send "manager\r"
5123 } timeout {
5124 send_user "TryDell\r"
5125 expect {
5126 User: {
5127 send -- "admin\r"
5128 } timeout exit 1;
5129 }
5130 expect sword:
5131 send -- "DellN1124T\r"
5132 expect ">"
5133 send -- "quit\r"
5134 exit
5135 }
5136 }
5137 expect "sword:"
5138 send -- "$SPW\r"
5139 expect ">"
5140 send -- "show conf\r"
5141 expect ">"
5142 send -- "quit\r"'|expect - 2>&1
5143 }; # apostrophe ' is just to fix color formatting
5144 print LOG "#--> switch $ip: $result\n";
5145 if ($result =~ /No route to host|timed out/i) {
5146 printboth("INFO: No switch found at default location $ip\n");
5147 $resolution="RESOLUTION: This may not be a problem but should be verified\n";
5148 $switchtype="Missing";
5149 } elsif ($result !~ /$allied_pattern|$brocade_pattern|$dell_pattern/i) {
5150 printboth("WARNING: Switch $ip connected but did not respond as expected\n");
5151 $resolution="RESOLUTION: Configure switch to Avamar settings. See KB453705\n";
5152 } else {
5153 $switchtype=($result =~ /$allied_pattern/) ? "Allied Telesys" : $switchtype ;
5154 $switchtype=($result =~ /$brocade_pattern/) ? "Brocade" : $switchtype ;
5155 $switchtype=($result =~ /$dell_pattern/) ? "Dell" : $switchtype ;
5156 print LOG "Found pattern '$allied_pattern|$brocade_pattern|$dell_pattern'. Switchtype $switchtype\n";
5157 }
5158 return ($resolution,$switchtype);
5159}
5160########## End switchconf ##########
5161
5162########## Start greenvillehotfix ##########
5163sub greenvillehotfix {
5164 print LOG "\n\n\n### ".localtime()." ### Starting greenvillehotfix\n";
5165 getinstalledversion() if (!$AVAMARVER);
5166 if ($AVAMARVER !~ /6.1.0-402/) {
5167 print LOG "Skipping: $AVAMARVER is not 6.1.0-402\n";
5168 return;
5169 }
5170 my $result=`grep -c expire_data_after_secs /usr/local/avamar/var/mc/server_data/prefs/mcserver.xml`;
5171 print LOG "Count: $result\n";
5172 if ($result > 0 ) {
5173 printboth("ERROR: Greenville Hotfix has been applied that will cause an upgrade to fail\n");
5174 printboth("RESOLUTION: See esc6199, comment#9 for instructions before upgrading.\n\n");
5175 msg("Greenville Hotfix","FAILED");
5176 } else {
5177 msg("Greenville Hotfix","PASSED");
5178 }
5179
5180}
5181########## End greenvillehotfix ##########
5182
5183
5184########## Start get_metadatacapacity ##########
5185# Get current max metadata capacity by stripe count or CUR value
5186sub get_metadatacapacity {
5187 print LOG "\n\n\n### ".localtime()." ### Starting get_metadatacapacity\n";
5188
5189 my $result=`grep stripeUtilizationCapacityFactor /usr/local/avamar/var/mc/server_data/prefs/mcserver.xml`;
5190print LOG " result $result\n";
5191 my($curr_stripefactor)=$result=~m {value="(.*)"};
5192 my $readonly=$NODELIST{'/nodestatuslist/gsanconfig/diskreadonly'};
5193 print LOG "stripefactore=$curr_stripefactor readonly=$readonly\n";
5194
5195 # Get each nodes stripe count from avmaint ping
5196 my %nsc;
5197 open(my $fh,"avmaint ping|");
5198 while(<$fh>) {
5199 next if (!/id="(0\..+)-/);
5200 $nsc{$1}++;
5201 }
5202
5203 # Get max stripe_count_pct and max cur
5204 my ($cur,$stripe_count_pct)=(0)x2;
5205 foreach my $node (sort @NODES) {
5206 my $physnode=$NODE_LXREF{$node};
5207 my $partno=$NODE_INFO{"$physnode"}{partno};
5208 my $maxstripe=$PARTLIST{$partno}{maxstripe};
5209 print LOG "node:$node part:$partno maxstripe:$maxstripe stripecount:$nsc{$node} pct:";
5210 if ($maxstripe<=0) {
5211 printboth("ERROR: Node $node does not have a maximum stripe allowed value\n");
5212 $err="FAILED";
5213 } else {
5214 $stripe_count_pct=max($nsc{$node}/$maxstripe,$stripe_count_pct);
5215 print LOG ($nsc{$node}/$maxstripe),"\n";
5216 }
5217 for (my $disk=0;$disk<$NODELIST{"/nodestatuslist/nodestatus/$node/disks/count"};$disk++){
5218 my $key="/nodestatuslist/nodestatus/$node/disks/disk/$disk";
5219 my $srpct=$NODELIST{"$key/stripe-reserved"} / $NODELIST{"$key/fs-size"} ;
5220 print LOG qq[srpct $NODELIST{"$key/stripe-reserved"} / $NODELIST{"$key/fs-size"} = $srpct\n];
5221 $cur=max($cur, $srpct);
5222 }
5223 }
5224 my $disk_util=sprintf("%d",$cur/ ($curr_stripefactor*$readonly) *100*100);
5225 print LOG "diskutil: $disk_util stripeutil: $stripe_count_pct\n";
5226 if ($disk_util > $stripe_count_pct) {
5227 return $disk_util, "CUR";
5228 } else {
5229 return $stripe_count_pct, "stripes";
5230 }
5231
5232}
5233########## end get_metadatacapacity ##########
5234
5235########## Start metadatacapacity ##########
5236sub metadatacapacity {
5237 print LOG "\n\n\n### ".localtime()." ### Starting metadatacapacity\n";
5238 if (!$PREUPGRADE and !$METADATA_CAPACITY) {
5239 print LOG "Skipping. No preupgrade\n";
5240 return;
5241 }
5242 getnodetype() if (!%PARTLIST );
5243 getdatadomain() if (!$DDRMAINT_VERSION ) ;
5244 openmcdb() if (!$dbh);
5245 if (!%DD and !$METADATA_CAPACITY) {
5246 print LOG "Skipping. No DD\n";
5247 return if !$DEBUG;
5248 }
5249 my $use_vers=($UPGRADE_VERSION) ? $UPGRADE_VERSION : $DATANODEVERSION;
5250 if ($use_vers < "7" and !$METADATA_CAPACITY) {
5251 print LOG "Skipping. Pre v7\n";
5252 return;
5253 }
5254 if ($VERSNUM >=700 and !$METADATA_CAPACITY) {
5255 print LOG "Skipping. Already V7 ($VERSNUM)\n";
5256 return if !$DEBUG;
5257 }
5258
5259 my $result=`grep stripeUtilizationCapacityFactor /usr/local/avamar/var/mc/server_data/prefs/mcserver.xml`;
5260 my($curr_stripefactor)=$results=~m {value="(.*)"};
5261
5262print LOG "CS: $curr_stripefactor\n";
5263 my $err="";
5264 my ($utilization,$cpoverhead,$overhead,$gridsize,$cur,$readonly,$stripe_count_pct)=(0)x7;
5265 my (%disk,%node);
5266 $readonly=$NODELIST{'/nodestatuslist/gsanconfig/diskreadonly'};
5267 $readonly=(!$readonly and $DEBUG) ? 65 : $readonly ;
5268 if ($readonly >65) {
5269 printboth("WARNING: Read-only is not the default which may cause unpredicatble results. Using 65% instead of $readonly\n");
5270 $readonly=65;
5271 } elsif ($readonly <65) {
5272 printboth("WARNING: Read-only is $readonly which is not the default and may cause unpredicatble results.\n");
5273 }
5274
5275
5276 # Get each nodes stripe count from avmaint ping
5277 my %nsc;
5278 open($fh,"avmaint ping|");
5279 while(<$fh>) {
5280 next if (!/id="(0\..+)-/);
5281 $nsc{$1}++;
5282 }
5283
5284 # Get max stripe_count_pct and max cur
5285 foreach my $node (sort @NODES) {
5286 my $physnode=$NODE_LXREF{$node};
5287 my $partno=$NODE_INFO{"$physnode"}{partno};
5288 my $maxstripe=$PARTLIST{$partno}{maxstripe};
5289 print LOG "node:$node part:$partno maxstripe:$maxstripe stripecount:$nsc{$node} pct:";
5290 if ($maxstripe<=0) {
5291 print LOG "ERR\n";
5292 printboth("ERROR: Node $node does not have a maximum stripe allowed value\n");
5293 $err="FAILED";
5294 } else {
5295 $stripe_count_pct=max($nsc{$node}/$maxstripe,$stripe_count_pct);
5296 print LOG ($nsc{$node}/$maxstripe),"\n";
5297 }
5298 for (my $disk=0;$disk<$NODELIST{"/nodestatuslist/nodestatus/$node/disks/count"};$disk++){
5299 my $key="/nodestatuslist/nodestatus/$node/disks/disk/$disk";
5300 my $srpct=$NODELIST{"$key/stripe-reserved"} / $NODELIST{"$key/fs-size"} ;
5301 print LOG qq[srpct $NODELIST{"$key/stripe-reserved"} / $NODELIST{"$key/fs-size"} = $srpct\n];
5302 $cur=max($cur, $srpct);
5303 $gridsize+=$NODELIST{"$key/fs-size"};
5304 }
5305 #$gridsize=$gridsize/$NODELIST{"/nodestatuslist/nodestatus/$node/disks/count"};
5306 }
5307 $utilization=sprintf("%d",$cur/$readonly*100*100);
5308 my $max=sprintf("%d",100*$cur);
5309 # GREEN A
5310 if ($cur<.52) { #80%
5311 printboth("INFO: Avamar system is at $utilization% ($max% CUR) capacity for metadata storage for backups to Data Domain. (Green A)\n");
5312 printboth(qq[RESOLUTION: Please read the "EMC Avamar Metadata Capacity Reporting and Monitoring" Document for 7.0 before adding any additional workloads to Data Domain.\n\n]);
5313 msg("Metadata Capacity","INFO");
5314 return;
5315 }
5316
5317 # RED
5318 if ($cur>=.78) { #120%
5319 $msg=qq[ERROR: Avamar system has been fully utilized at $utilization% ($max% CUR) capacity for metadata storage for backups to Data Domain. (Red)\nRESOLUTION: Please consult with the EMC account team to increase the capacity prior to upgrading to v7.0.
5320 Upgrading now will cause the grid to go into a permanent, irrecoverable state where the Backup Scheduler is disabled.
5321 Restores can still be performed but additional capacity will need to be added to the system to resume normal operations.\n\n];
5322 printboth($msg);
5323 msg("Metadata Capacity","FAILED");
5324 print "\n${msg}Do you want to continue performing other health checks? ";
5325 $input=<STDIN>;
5326 exit if ($input !~ /^y/i) ;
5327 return;
5328 }
5329
5330###
5331 # Check for 3.3TB nodes which we cant identify cpoverhead
5332###
5333 if ($NODETYPE eq "3.3TB Gen3" and !$CPOVERHEAD ) {
5334 print LOG "Gen3 3.3TB metadata\n";
5335 $file="/tmp/admincron.cps";
5336 my $msg="";
5337 if (! -e "$file") { while() { # while is to make easy way to exit on failure
5338# File does not exist, add commands to cron
5339 my $xml = new XML::Parser( Style => 'Tree' );
5340 my $tree=$xml->parsefile("avmaint lscp|") ;
5341 SimpleXMLTree($tree);
5342 my %lscp=%xmltree;
5343 my $time="";
5344 for (grep /tag/, %lscp ) {
5345 my $key="/checkpointlist/checkpoint/$lscp{$_}";
5346 next if ($lscp{"$key/hfscheck/validcheck"} ne "true");
5347 my ($sec, $mn, $hr, $dd, $mm, $yy) = (localtime($lscp{"$key/cpctime"}));
5348 print LOG qq[HFS: $key $lscp{"$key/cpctime"} $mm/$dd/$yy $hr:$mn:$sec\n];
5349 $time="$mn $hr";
5350 }
5351 if (!$time) {
5352 $msg="ERROR: Unable to get checkpoint start time from last HFSCheck (avmaint lscp)\n";
5353 last;
5354 }
5355 my $result=`crontab -l |grep -v '$file'> /tmp/admincron`;
5356 if ($? != 0 ) {
5357 $msg="ERROR: Failed to get current crontab (crontab -l): Err $? - $result\n";
5358 last;
5359 }
5360 print LOG "crontab -l : $? : $result\n";
5361 my $result=`cd /usr/local/avamar/bin; /usr/bin/ssh-agent bash -c "/usr/bin/ssh-add ~/.ssh/dpnid; /usr/local/avamar/bin/mapall --parallel copy cps 2>&1" 2>&1`;
5362 if ($?!=0) {
5363 $msg="ERROR: Failed to copy 'cps' to nodes (mapall copy cps): Err $? - $result\n";
5364 last;
5365 }
5366 print LOG "mapall copy: $? : $result\n";
5367 open(my $fh,">>/tmp/admincron");
5368 print $fh qq[$time * * * (/usr/bin/ssh-agent bash -c "/usr/bin/ssh-add ~/.ssh/dpnid; /usr/local/avamar/bin/mapall './cps'") 2>/dev/null >> $file; sed -i -e :a -e '\$q;N;2000,\$D;ba' $file\n];
5369 my $result=qx{crontab /tmp/admincron};
5370 if ($?!=0) {
5371 $msg="ERROR: Failed to install new cron (crontab /tmp/admincron): Err $? - $result\n";
5372 last;
5373 }
5374 print LOG "updating cron: $? : $result\n";
5375 last;
5376 } #end while
5377 if ($msg) {
5378 printboth("NOTICE: Avamar system is at $utilization% ($max% CUR) capacity for metadata storage for backups to Data Domain.\n");
5379 printboth("${msg} Automatic installation of overhead monitoring for Gen3 3.3TB nodes failed.\n");
5380 printboth(qq[RESOLUTION: Resolve problem identified or manually add overhead collecting to cron and "cps" to data nodes.
5381-------------------------------------------------------------------------------
5382Steps to start collecting checkpoint overhead on Gen3 3.3TB Nodes:
53831. Log in to the grid as admin
53842. Load SSH keys
5385# ssh-agent bash
5386# ssh-add ~/.ssh/dpnid
5387
53883. Copy 'cps' program to every node (even on single node servers):
5389# cd /usr/local/avamar/bin
5390# mapall copy cps
5391
53924. Identify what time the validated checkpoint was created (07:12 in the example). The "rol" is the validated rolling hfscheck.
5393# cplist
5394# cp.20140320160917 Thu Mar 20 07:12:17 2014 valid rol --- nodes 1/1 stripes 24125
5395# cp.20140320172253 Thu Mar 20 10:22:53 2014 valid --- --- nodes 1/1 stripes 24125
5396
53975. Add the crontab line to the admin cron to run cps every day at the HFScheck time from step 3. Note the order is minute hour.
5398# crontab -e
539907 12 * * * (/usr/bin/ssh-agent bash -c "/usr/bin/ssh-add ~/.ssh/dpnid; /usr/local/avamar/bin/mapall './cps'") 2>/dev/null >> /tmp/admincron.cps; sed -i -e :a -e '\$q;N;2000,\$D;ba' /tmp/admincron.cps
5400-------------------------------------------------------------------------------
5401\n]);
5402 msg("Metadata Capacity","FAILED");
5403 } else {
5404 printboth("NOTICE: Avamar system is at $utilization% ($max% CUR) capacity for metadata storage for backups to Data Domain.\n");
5405 printboth(" Automatic installation of overhead monitoring for Gen3 3.3TB succeeded.\n");
5406 printboth("RESOLUTION: Re-run script after five days to get metadata recommendation\n\n");
5407 my $results=`touch $file`;
5408 msg("Metadata Capacity","FAILED");
5409 }
5410 return;
5411 } else {
5412# File exists, check for 5 days worth of history to use
5413 open(my $fh,$file);
5414 my ($oh,$cnt,$days)=(0)x3;
5415 while(<$fh>) {
5416 my($gb,$pct,$cp)=split();
5417 $oh+=$pct if (/cp\./);
5418 $cnt++ if (/ cur/);
5419 $days++ if (/Using/);
5420 }
5421 print LOG "Days: $days count:$cnt TotOH:$oh\n";
5422 if ($cnt>1) {
5423 $CPOVERHEAD=$oh/$cnt;
5424 print LOG "CPOVERHEAD=$CPOVERHEAD\n";
5425 }
5426 if ($days<5 and !$OVERRIDE) {
5427 printboth("NOTICE: Avamar system is at $utilization% ($max% CUR) capacity for metadata storage for backups to Data Domain.\n");
5428 printboth(" Overhead monitoring has been installed for Gen3 3.3TB nodes but there are only $days days of overhead monitoring.\n");
5429 printboth("RESOLUTION: Re-run script after five days to get metadata recommendation\n\n");
5430 msg("Metadata Capacity","FAILED");
5431 return;
5432 }
5433 }
5434 }
5435# End of Gen3 3.3TB
5436
5437 # YELLOW cur>=80% <=120% Get maintenance history. Count cp,hfs,gc. Build criteria for sql to select hfs CP's
5438 if ($CPOVERHEAD) {
5439 print LOG "Skipping all cpoverhead checks. using flag cpoverhead=$CPOVERHEAD * 5\n";
5440 $overhead=$CPOVERHEAD/100*5 ;
5441 } else {
5442 my $crit="ERROR";
5443 my $days=0;
5444 while($crit =~ /ERROR/ and $days<=30) {
5445 $crit=cpoverhead($days);
5446 print LOG "$crit\n";
5447 $days+=1;
5448 }
5449 if ($crit =~ /ERROR/ ) {
5450 printboth("WARNING: Avamar system is at $utilization% ($max% CUR) capacity for metadata storage for backups to Data Domain.\n");
5451 printboth("$crit\n Cannot determine checkpoint overhead. No reliable 5 day period in past 30 days\n");
5452 printboth("RESOLUTION: Determine why maintenance routines have not run reliably for 5 days in a row any time in the past month\n\n");
5453 printboth(" Escalate to an RCM SME and then Avamar AppEng if needed. Include hc_proactive_check.log with email\n\n");
5454 msg("Metadata Capacity","FAILED");
5455 return;
5456 }
5457
5458### Get CP overhead: Highest value of O/S at HFSCP, lowest O/S for that day, also grab cur and disk size
5459 my $sday=$days+5;
5460
5461 my $sql=qq[ select avg(cpavg) as cpavg
5462 from (
5463 select date_time,avg (max) as cpavg
5464 from (
5465 select date_time,node,disk,max(used_mb/capacity_mb)-
5466 (select min(used_mb/capacity_mb)
5467 from v_node_space m
5468 where date_time>=NOW()-INTERVAL '$sday day'
5469 and date <= NOW()-INTERVAL '$days day'
5470 and n1.node=m.node and n1.disk=m.disk
5471 ) as max from v_node_space n1
5472 where ($crit)
5473 and disk >= 0 group by 1,2,3
5474 ) as dayavg
5475 group by date_time order by avg(max) desc offset 2
5476 ) as gridavg
5477 ];
5478 print LOG $sql,"\n";
5479 my $sth = $dbh->prepare($sql); $sth->execute;
5480 my $R=$sth->fetchrow_hashref();
5481 $overhead=$R->{cpavg}*5;
5482 print LOG "cnt=$c overhead: $overhead \n" if $DEBUG;
5483 }
5484
5485 ### Check Retenion
5486 my ($retenmsg,$retenset)=("")x2;
5487 if (!$METADATA_RETENTION) {
5488 $retention=60;
5489 $retenmsg=".\nThe default retention of 60 days is being used which affects these estimates. Use --retention=N to change retention days";
5490 } else {
5491 $retention=$METADATA_RETENTION;
5492 $retenmsg=" with $retention days of retention";
5493 }
5494
5495### Calculate everything
5496 $gridsize=$gridsize/1024/1024;
5497 my $maxcur=$overhead+$cur;
5498 my $disknogc=.89;
5499 my $maxallowcur=($disknogc-$overhead >.78) ? .78 : $disknogc-$overhead;
5500 my $availcur=$maxallowcur-$cur;
5501 my $parity=1;
5502 if ($NODE_COUNT>2) {
5503 my $parity_nodes=($NODE_COUNT>9) ? 9 : $NODE_COUNT-1;
5504 $parity=1-(1/$parity_nodes);
5505 }
5506 my $metadata=$availcur * $gridsize * $parity;
5507 my $reten_adjust=2+(.03*$retention);
5508 my $stripefactor=sprintf("%3.2f",$maxallowcur/($readonly/100) );
5509 my $minfs=$metadata * 100 / $reten_adjust ;
5510 my $maxfs=$minfs*4;
5511
5512 printf LOG "Node Count.....: %d\n",$NODE_COUNT;
5513 printf LOG "Read-only......: %d\n",$readonly;
5514 printf LOG "CUR............:%6.2f%% (Utilization=%d)\n",100*$cur, $utilization;
5515 printf LOG "Disknogc.......:%6.2f%% \n",100*$disknogc;
5516 printf LOG "Overhead.......:%6.2f%% \n",100*$overhead;
5517 printf LOG "Max allow Cur..:%6.2f%% (disknogc-overhead to max of 78%%)\n",100*$maxallowcur;
5518 printf LOG "Cur............:%6.2f%% (stripe-reserved)\n",100*$cur;
5519 printf LOG "Available Cur..:%6.2f%% (MaxAllowCur-Cur)\n",100*$availcur;
5520 printf LOG "Grid Size......:%6.2fTB (raw disk space)\n",$gridsize;
5521 printf LOG "Parity factor..:%6.2f%%\n",100*$parity;
5522 printf LOG "AV metadata....:%6.2fTB (gridsize*Parity%%*AvailCur%%)\n",$metadata;
5523 printf LOG "RetentionFactor:%6.2f (based on $retention days)\n",$reten_adjust;
5524 printf LOG "Min FS on DD...:%6.2fTB (avmetadata * 100 / retention)\n",$minfs;
5525 printf LOG "Max FS on DD...:%6.2fTB (avmetadata * 400 / retention)\n",$maxfs;
5526 printf LOG "StripeFactor...:%6.2fTB (maxallowcur / readonly)\n",$stripefactor;
5527 print LOG "\n" ;
5528
5529# OUTPUT
5530 if ($metadata<0 or $overhead+$cur>.89 or $stripefactor<1.05 ) {
5531 printboth("ERROR: There appears to be an error in metadata capacity. Available metadata=$metadata. Max Overhead+CUR=",int(($overhead+$cur)*100)," Stripefactor=$stripefactor\n");
5532 printboth("RESOLUTION: Escalate to an RCM SME and then Avamar AppEng if needed. Include hc_proactive_check.log with email\n\n");
5533 msg("Metadata Capacity","FAILED");
5534 return;
5535 }
5536
5537 printboth("WARNING: Avamar system is at $utilization% ($max% CUR) capacity for metadata storage for backups to Data Domain with an estimate of ".sprintf("%d",$overhead*100)."% overhead. ");
5538 printboth(qq[\nRESOLUTION: The customer must be advised of and agree to new requirements.
5539 If the customer does not agree, consult with the EMC account team prior to upgrading to v7.0.
5540 Upgrading now and not adhering to the new requirements may cause the server to go into a permanent,
5541 irrecoverable state where the Backup Scheduler is disabled. Restores can still be performed but
5542 additional capacity will need to be added to the system in order to resume normal operations.
5543 Please be sure to read the "EMC Avamar Metadata Capacity Reporting and Monitoring" Document for 7.0.
5544 Consult with the EMC account team to ensure the Avamar grid is ready for additional workloads to Data Domain.
5545]);
5546 printboth(qq[\nThe new requirements will allow configuration changes so there is an additional ].sprintf("%.1fTB",$metadata).qq[ of metadata storage for Data Domain backups.\nThis will protect between an estimated],sprintf("%dTB and %dTB",$minfs,$maxfs),qq[of front end file system data sent to Data Domain$retenmsg.\n]);
5547
5548 my $category,$ddonly;
5549 if ($overhead + $cur < .75 ) {
5550 $req=" - There are no new requirements for this category.\n";
5551 $category="Yellow A+";
5552 } elsif ($overhead + $cur < .85) {
5553 $req =" - New clients must have their backups sent to Data Domain\n";
5554 $req.=" - Existing Avamar clients can continue to backup to Avamar.\n";
5555 $category="Yellow A";
5556 } elsif ($overhead+$cur < .89) {
5557 $req =" - New clients must have their backups sent to Data Domain\n";
5558 $req.=" - Existing Avamar clients must change to send their backups to Data Domain if supported\n";
5559 $req.=" - Existing Avamar clients not supported by Data Domain can continue to backup to Avamar\n";
5560 $category="Yellow B";
5561 $ddonly=" - dd_only_mode=SUPPORTED (mcserver.xml)\n";
5562 } else {
5563 $req=" - New clients must have their backups sent to Data Domain\n";
5564 $req.=" - Existing Avamar client must change to send their backups to Data Domain if supported.\n";
5565 $req.=" - Existing Avamar clients not supported by Data Domain must be stopped.\n";
5566 $req.=" - No new data can be added to the Avamar grid from new or existing clients.\n";
5567 $category="Yellow C";
5568 $stripefactor=1.20;
5569 $ddonly=" - dd_only_mode=ALL (mcserver.xml)\n";
5570 }
5571 $stripefactor=1.05 if ($stripefactor<1.05);
5572 printboth("\nNEW REQUIREMENTS:\n$req");
5573 $disknogc*=100;
5574 printboth("\nConfiguration Changes For $category:\n - stripeUtilizationCapacityFactor=$stripefactor (mcserver.xml)\n${ddonly} - disknogc=$disknogc (avmaint config)\n\n");
5575 msg("Metadata Capacity","WARNING");
5576}
5577########## End metadatacapacity ##########
5578
5579
5580########## Start cpoverhead ##########
5581sub cpoverhead {
5582 my $days=shift;
5583 my $sdays=$days+7;
5584 print LOG "cpoverhead date $date\n";
5585 my $sql = qq[ select code,date,time,summary from v_events where code in (4003,4004,4201,4202,4301,4302)
5586 and date >= NOW() - INTERVAL '$sdays day' and date <= NOW()-INTERVAL '$days day' order by date, time ];
5587 my $sth = $dbh->prepare($sql); $sth->execute;
5588 my %event;
5589 my $crit="";
5590 my $LASTCP;
5591 while ( my $R = $sth->fetchrow_hashref() ) {
5592 print LOG "Event Code: $R->{code} $R->{date} $R->{time} $R->{summary}\n";
5593 if ($R->{code}==4004) { return "ERROR: Failed HFSCheck on $R->{date}.";}
5594 if ($R->{code}==4202) { return "ERROR: Failed GC on $R->{date}.";}
5595 if ($R->{code}==4302) { return "ERROR: Failed CP on $R->{date}.";}
5596 $event{$R->{code}}++;
5597 # Add last CP to criteria when we see HFS
5598 if ($R->{code}==4003){
5599 print LOG "HFS Checkpoint: $LASTCP->{date} $LASTCP->{time}\n";
5600 my $time=substr($LASTCP->{time},0,4);
5601 $crit.=" or " if ($crit);
5602 $crit.=" (date_time >= '$R->{date} ${time}0:00' and date_time < '$R->{date} ${time}9:59') \n ";
5603 }
5604 $LASTCP={%$R};
5605 }
5606 print LOG "5 day HFS cnt.: $event{4003}\n5 day CP count: $event{4301}\n5 day GC count: $event{4201}\n";
5607 return "ERROR: $event{4003} Successful HFSChecks is fewer than the required 5 in a 5 day span." if ($event{4003} < 5 and !$OVERRIDE);
5608 return "ERROR: $event{4201} Successful Garbage Collect runs is fewer than required 5 in a 5 day span" if ($event{4201} < 5 and !$OVERRIDE);
5609 return "ERROR: $event{4301} Successful Checkpoints is fewer than required 5 in a 5 day span" if ($event{4301} < 5 and !$OVERRIDE);
5610 printboth("WARNING: $event{4301} Checkpoints is fewer than the recommended 10 in a 5 day span. This could cause checkpoint overhead to double.") if ($event{4301}<10) ;
5611 return $crit;
5612}
5613########## End cpoverhead ##########
5614
5615
5616########## Start nodexref ##########
5617sub nodexref {
5618 print LOG "\n\n\n### ".localtime()." ### Starting nodexref\n";
5619 $cmd='grep " {0" /data01/cur/err.log | tail -1; echo "SHELL:"$SHELL' ;
5620 mapall("",$cmd);
5621 open(CMD_PIPE,$TMPFILE);
5622 $NODE_COUNT=0;
5623 my $shell="";
5624 while (<CMD_PIPE>) {chomp;
5625 print LOG "$_\n";
5626 if (/(\(0\..*\)) ssh/) {
5627 $node=$1;
5628 $NODE_COUNT++;
5629 } elsif (/^SHELL:(.*)/) {
5630 if (!/bash/) {
5631 $shell.="ERROR: Node $node shell is $1 and not the default /bin/bash\n";
5632 }
5633 } else {
5634 my($date,$logical)=split();
5635 $logical =~ s/[{}()]//g;
5636 $NODE_XREF{$node}=$logical;
5637 $NODE_LXREF{$logical}=$node;
5638 print LOG "phys $node log $logical for $_\n";
5639 }
5640 }
5641 if ($NODE_COUNT>1) {
5642 my $node="(0.s)"; my $logical="0.s";
5643 print LOG "phys $node log $logical for multinode system\n";
5644 $NODE_XREF{$node}=$logical;
5645 $NODE_LXREF{$logical}=$node;
5646 }
5647 if ($shell) {
5648 printboth("${shell}RESOLUTION: See KB452837\n\n");
5649 msg("Shell Environment","FAILED");
5650 }
5651}
5652########## End nodexref ##########
5653
5654########## Start etchosts ##########
5655sub etchosts {
5656 print LOG "\n\n\n### ".localtime()." ### Starting etchosts\n";
5657
5658 my $e=0;
5659 $cmd=qq[ ping -c1 `hostname` ];
5660 mapall($ALL,$cmd);
5661 open(CMD_PIPE,$TMPFILE);
5662 while (<CMD_PIPE>) {chomp;
5663 print LOG "$_\n";
5664 $node=$1 if (/(\(0\..*\)) ssh/);
5665 if (/unknown host (.*)/) {
5666 printboth("Error: Node $node hostname $1 is not resolvable.\n");
5667 $e=1;
5668 }
5669 }
5670 if ($e) {
5671 printboth("RESOLUTION: Fix DNS or add hostname to /etc/hosts\n\n");
5672 msg("Host Name Entry","FAILED");
5673 }
5674 chomp(my $result=`avhostname`);
5675 chomp(my $resultf=`avhostname -f`);
5676 if ( (!$result or !$resultf) and -e '/usr/local/avamar/bin/avhostname' ) {
5677 printboth("ERROR: avhostname or avhostname -f returns an empty string\n");
5678 printboth("RESOLUTION: Fix DNS or /etc/hosts so avhostname returns a value. See ESC 24144\n\n");
5679 msg("avhostname","FAILED");
5680 }
5681
5682 # Check for localhost
5683 my $r=`grep -c '127.0.0.1.*localhost' /etc/hosts `;
5684 if ($r != 1 ) {
5685 printboth("ERROR: Entry for localhost not found in /etc/hosts\n");
5686 printboth("RESOLUTION: Check /etc/hosts for localhost entry\n\n");
5687 msg("localhost","FAILED");
5688 }
5689}
5690########## End etchosts ##########
5691
5692########## Start plugin_catalog ##########
5693sub plugin_catalog {
5694 print LOG "\n\n\n### ".localtime()." ### Starting plugin_catalog\n";
5695 if (!$PREUPGRADE) {
5696 print LOG "Skipping: only run for preupgrade\n";
5697 return;
5698 }
5699 #<plugin-list version="70100.427">
5700 $_=qx{ grep 'plugin-list version' /usr/local/avamar/lib/plugin_catalog.xml };
5701 my ($plugver)= m/"(.*)"/;
5702 msg("Plugin Catalog Version",$plugver);
5703}
5704########## End plugin_catalog ##########
5705
5706########## Start checkemctools ##########
5707sub getcmdtool {
5708 print LOG "\n\n\n### ".localtime()." ### Starting checkemctools\n";
5709 gethardware() if (!$MANUFACTURER);
5710 my $nodes=getnodes_hw("emc");
5711 if (!$nodes) {
5712 print LOG "no emc nodes found\n";
5713 return;
5714 }
5715 my $e="";
5716 $cmd=q[ which CmdTool2 && CmdTool2 -encinfo -a0 -nolog | awk '{print "ENC:"$0}';
5717 which CmdTool2 && CmdTool2 -ShowSummary -A0 -nolog | awk '{print "SS:"$0} ';
5718 which CmdTool2 && CmdTool2 -LDInfo -Lall -aALL -nolog | awk '{print "VD:"$0} ';
5719 which CmdTool2 && CmdTool2 -AdpBbuCmd -a0 | awk '{print "BBU:"$0} ';
5720 sudo ipmitool sdr | awk '{print "SDR:"$0}';
5721 sudo ipmitool raw 0x30 0x2e 0x01 | awk '{print "CR:"$0} ';
5722 sudo /usr/sbin/dmidecode | grep SandyBridge | awk '{print "SB:"$0} ';
5723 which flashupdt && sudo flashupdt -i | awk '{print "FLSH:"$0} ' ;
5724 ];
5725
5726 mapall("--nodes=$nodes",$cmd);
5727 open(CMD_PIPE,$TMPFILE);
5728 while (<CMD_PIPE>) { chomp;
5729 print LOG "$_\n" ;
5730 if (/sudo: no tty present/) {
5731 printboth("INFO: Node $node sudo ipmitool,flashupdt or dmidecode failed with $_ \n");
5732 printboth("RESOLUTION: Fix sudo problem. Not all hardware health checks will performed\n");
5733 printboth(" For Gen4s make sure the command 'sudo flashupdt -i' works. See KB 485887 and 494057.\n\n");
5734 msg("Hardware health info retrieval","INFO");
5735 }
5736 $node=$1 if (/(\(0\..*\)) ssh/);
5737 (my $cmd,$_)=split(":",$_,2);
5738 if ($cmd eq "SDR") {
5739 my($name,$info,$value)=split(/\s*\|\s*/);
5740 $CMDTOOL{"SDR"}{$node}{$name}{$name}=$value if $value;
5741 next;
5742 }
5743 if ($cmd eq "CR") {
5744 $CMDTOOL{"flash"}{$node}{"CR"}=$_;
5745 next;
5746 }
5747
5748 s/^\s*//; s/\s*$//;
5749 my($field,$value)=split(/\s*:\s*/,$_,2);
5750 $field=~ s/\s*$//;
5751
5752 if ($cmd eq "SS") {
5753 $category=$_ if (/^System$|^Controller$|^BBU$|^Enclosure$|^PD$|^Virtual Drives$/ );
5754 $name=$value if ($field =~ /^ProductName$|^BBU Type$|^Connector$|^Virtual Drive$/);
5755 $name=$value if ($field eq "Product Id" and $category ne "PD" );
5756 $CMDTOOL{$category}{$node}{$name}{$field}=$value if $value;
5757 print LOG "Add SS:CMDTOOL {$category}{$node}{$name}{$field}=$value\n" if $value;
5758 } elsif ($cmd eq "ENC") {
5759 $name=$value if ($field =~ /Product Identification/);
5760 $CMDTOOL{"Enclosure"}{$node}{$name}{$field}=$value if ($field =~ /Product Revision Level/);
5761 print LOG "Add ENC:CMDTOOL {Enclosure}{$node}{$name}{$field}=$value\n" if $value;
5762 } elsif ($cmd eq "VD") {
5763 $name=$value if ($field =~ /^Virtual Drive/);
5764 $CMDTOOL{"VirtualDrive"}{$node}{$name}{$field}=$value if $value;
5765 print LOG "Add VD:CMDTOOL {Virtual Drive}{$node}{$name}{$field}=$value\n" if $value;
5766 } elsif ($cmd eq "FLSH"){
5767 $name=$field if (/System BIOS and FW Versions|BMC Firmware Version:|Baseboard Information:|System Information:|Chassis Information:/);
5768 $value =~ s/^[ \.]*//;
5769 $CMDTOOL{"flash"}{$node}{$name}{$field}=$value if $value;
5770 print LOG "Add FLSH:CMDTOOL {flash}{$node}{$name}{$field}=$value\n" if $value;
5771 } elsif ($cmd eq "SB") {
5772 $CMDTOOL{"dmidecode"}{$node}{"SandyBridge"}=$_;
5773 print LOG "Add SB:CMDTOOL {dmidecode}{$node}{SandyBridge}=$_\n" if $value;
5774 } elsif ($cmd eq "BBU") {
5775 $CMDTOOL{"BBU"}{$node}{$field}=$value if $value;
5776 print LOG "Add BBU:CMDTOOL {BBU}{$node}{$field}=$value\n" if $value;
5777 }
5778 }
5779}
5780
5781########## End checkemctools ##########
5782
5783########## Start checkarcconf ##########
5784sub checkarcconf {
5785 print LOG "\n\n\n### ".localtime()." ### Starting checkarcconf\n";
5786 getnodetype() if (!$NODETYPE);
5787 if ( $NODETYPE !~ /gen4t/i ) {
5788 print LOG "No gen4t node: $NODETYPE\n";
5789 return;
5790 }
5791 my $dev;
5792 my (%SEG,%DEVICE,%STATUS);
5793
5794 mapall("","arcconf getconfig 1 ld");
5795 open(CMD_PIPE,$TMPFILE);
5796 while(<CMD_PIPE>) {
5797 print LOG;chomp;
5798 $node=$1 if (/(\(0\..*\)) ssh/);
5799 if (/Logical device name\s+:\s+(.*)/){
5800 $dev="$node,$1";
5801 $DEVICE{$dev}++;
5802 print LOG "Found logical device $dev\n";
5803 }
5804 if (/Status of logical device\s+:\s+(.*)/) {
5805 my $status=$1;
5806 print LOG "saw status of $status\n";
5807 if ($STATUS{$dev} !~ /$status/) {
5808 $STATUS{$dev}.="," if ($STATUS{$dev});
5809 $STATUS{$dev}.=$status ;
5810 }
5811 }
5812 if (/Segment (\d+)\s+:\s+Missing/) {
5813 $SEG{$dev}="Segment $1 Missing" ;
5814 print LOG "Saw missing segment for $1\n";
5815 }
5816 }
5817 my $e=0;
5818 for (sort keys %DEVICE) {
5819 print LOG "name:$_ count:$DEVICE{$_} status:$STATUS{$_} segmiss:$SEG{$_}\n";
5820 if ($STATUS{$_} =~ /Degraded/ or $DEVICE{$_} > 1 or $SEG{$_} ) {
5821 my($node,$dev)=split(",");
5822 printboth("ERROR: Node $node Device name:$dev has an issue with the count ($DEVICE{$_}) or status ($STATUS{$_}) or missing segment ($SEG{$_})\n");
5823 $e=1;
5824 }
5825 }
5826
5827 if ($e) {
5828 printboth("RESOLUTION: See KB 491050 for possible solutions\n\n");
5829 msg("Intel Controller", "FAILED");
5830 }
5831}
5832########## End checkarcconf ##########
5833
5834
5835
5836########## Start checkemcstorage ##########
5837sub checkemcstorage {
5838 print LOG "\n\n\n### ".localtime()." ### Starting checkemcstorage\n";
5839 getconfiginfo() if (!$GOTCONFIGINFO);
5840 getcmdtool() if (!%CMDTOOL);
5841 getnodetype() if (!%PARTLIST );
5842 getavsysreport() if (!%AVSYSREPORT);
5843 my $e="";
5844 my $fail="PASSED";
5845
5846 if (!$PREUPGRADE) {
5847 # Check Controller FW Package
5848 $e="";
5849 $section="Enclosure";
5850 $field="Product Revision Level";
5851 for $node (sort keys %{$CMDTOOL{$section}} ) {
5852 for $name (sort keys %{$CMDTOOL{$section}{$node}} ) {
5853 $value=$CMDTOOL{$section}{$node}{$name}{$field};
5854 print LOG qq[Enc: key: $key node:$node name:$name $field: $value\n];
5855 if ($name =~ /RES2SV240/ and $value !~/0d00/ ) {
5856 printboth("INFO: Node $node SAS expander firmware '$value' does not match most recent known version of '0d00'\n");
5857 $e="yes";
5858 $fail="WARNING";
5859 }
5860 }
5861 }
5862 printboth("RESOLUTION: Apply hot fix 56915 to upgrade firmware.\n\n") if ($e);
5863 } # END OF SKIP FOR PREUPGRADE
5864
5865# INTEL BLOCK UPDATE
5866 my %rpm = (
5867 );
5868# "CmdTool2" =>"8.07.16-1",
5869# "storcli" =>"1.13.06-1",
5870# "selviewer" =>"11.0-B10",
5871# "syscfg" =>"2.0-B10",
5872# "ipmiutil" =>"2.7.9-1.EMC.SLES11",
5873# "sysinfo" =>"12.0-B11",
5874# "flashupdt" =>"11.0-B15",
5875# "lsi-megaraid_sas-kmp-default" =>"06.704.15.00_2.6.32.12_0.7-3.1",
5876# "intel-gb" =>"3.4.7-4.1",
5877# "qlogic-qla2xxx-kmp-default" =>"8.03.07.03.11.1.k_2.6.32.12_0.7-1"
5878#"intel-ixgbe" =>"3.14.5-1",
5879#"intel-igb-kmp-default" =>"3.4.7_2.6.32.12_0.7-4.1",
5880#"intel-ixgbe-kmp-default" =>"3.14.5_2.6.32.12_0.7-1",
5881
5882# Intel Block
5883 my $emcblock="";
5884 my $biostransfer="";
5885 my $spectre="";
5886 my $fwu="";
5887 my $nam4="";
5888
5889 my @utility=($NODE_COUNT==1) ? @NODES : (@NODES,("0.s"));
5890 for my $lognode (sort @utility) {
5891 my $node=$NODE_LXREF{$lognode};
5892 (my $plain_node=$node) =~ s/[()]//g;
5893
5894 print LOG "IBU Check node $node logical:$lognode\n";
5895# for my $component (sort keys %rpm) {
5896# my $version=getmaxrpmver($component,$node);
5897# print LOG "component: $component version: $version\n";
5898# if ( $component eq "CmdTool2" and $version =~ /8.02.16-1/) {
5899# # do nothing for now. temporary? change to allow two versions of CmdTool2
5900# } else {
5901# $emcblock.="INFO: Node $node $component version '$version' does not match latest known version of $rpm{$component}\n" if ( $version !~ /$rpm{$component}/ and $version) ;
5902# }
5903# }
5904
5905#
5906# GEN4S
5907#
5908 if ($NODE_INFO{$node}{gendesc} =~ /Gen4s/i ) {
5909 $biostransfer.="WARNING: Node $node is in BIOS firmware transfer mode\n" if ($CMDTOOL{"dmidecode"}{"($node)"}{"SandyBridge"});
5910
5911 my $version=$CMDTOOL{"Controller"}{$node}{"Intel(R) Integrated RAID Module RMS25CB080(Bus 0, Dev 0)"}{"FW Package Version"} ;
5912 my $minver="23.33.0-0022";
5913 print LOG "Node $node Controller Firmware version '$version' (latest $minver)\n";
5914 $emcblock.="INFO: Node $node Controller Firmware version '$version' does not match latest known version of $minver\n" if (minver($version,$minver)) ;
5915
5916 $version=$CMDTOOL{"flash"}{$node}{"System BIOS and FW Versions"}{"BIOS Version"} ;
5917 $minver="SE5C600.86B.02.04.0007";
5918 print LOG "Node $node System BIOS version '$version' (latest $minver)\n";
5919 $emcblock.="INFO: Node $node System BIOS version '$version' does not match latest known version of $minver\n" if (minver($version,$minver));
5920
5921 $version=$CMDTOOL{"flash"}{$node}{"BMC Firmware Version"}{"ME Firmware Version"} ;
5922 $minver="02.01.07.328";
5923 print LOG "Node $node ME Firmware version '$version' (latest $miniver)\n";
5924 $emcblock.="INFO: Node $node ME Firmware version '$version' does not match latest known version of $minver\n" if (minver($version,$minver));
5925
5926 $version=$CMDTOOL{"flash"}{$node}{"BMC Firmware Version"}{"Op Code"};
5927 $minver="1.28.11044";
5928 print LOG "Node $node Op Code version '$version' (latest $minver)\n";
5929 $emcblock.="INFO: Node $node Op Code version '$version' does not match latest known versions of $minver\n" if ( minver($version,$minver));
5930
5931 $version=$CMDTOOL{"flash"}{$node}{"BMC Firmware Version"}{"SDR Version"} ;
5932 $minver="SDR Package 1.14";
5933 print LOG "Node $node SDR Firmware version '$version' (latest $minver)\n";
5934 $emcblock.="INFO: Node $node SDR Firmware version '$version' does not match latest known version of $minver\n" if (minver($version,$minver));
5935
5936 # Supercap firmware
5937 $version=$CMDTOOL{"BBU"}{"$node"}{"Module Version"};
5938 $minver="25849-04";
5939 print LOG "Node $node BBU Module Version '$version' (latest $minver)\n";
5940 $emcblock.="INFO: Node $node BBU Module Version '$version' does not match latest known version of $minver\n" if ( minver($version,$minver));
5941
5942 # Check kernel
5943 (my $a,my $c,$version,my $b)=split(" ",$NODELIST{"/nodestatuslist/nodestatus/$plain_node/version/kernel"}) ;
5944 $minver="2.6.32.59-0.19.1.17443.1.PTF-default";
5945 print LOG "Node $node kernel $version (latest $minver)\n";
5946 $spectre.="INFO: Node $node kernel version $version does not match remediated version $minver\n" if (minver($version,$minver));
5947 }
5948
5949#
5950# GEN4T
5951#
5952 if ($NODE_INFO{$node}{gendesc} =~ /Gen4t/i ) {
5953
5954 # Check bios version
5955 $version=$AVSYSREPORT{$node}{'chassis-info'}{'BIOS Version'};
5956 $minver="41.96";
5957 print LOG "Node $node BIOS Version $version (latest $minver)\n";
5958 $spectre.="INFO: Node $node BIOS Version $version does not match remediated version $minver\n" if (minver($version,$minver));
5959
5960 # Check kernel for spectre
5961 (my $a,my $c,$version,my $b)=split(" ",$NODELIST{"/nodestatuslist/nodestatus/$plain_node/version/kernel"}) ;
5962 $minver="3.0.101-0.47.106.50";
5963 print LOG "Node $node kernel $version (latest $minver)\n";
5964 $spectre.="INFO: Node $node kernel version $version does not match remediated version $minver\n" if (minver($version,$minver));
5965
5966 # Check controller firmware
5967 $version=$AVSYSREPORT{$plain_node}{"controller"}{"Firmware Version"};
5968 $minver="7.10-0";
5969 print LOG "Node $node controller firmware version $version (latest $minver)\n";
5970 $fwu.="INFO: Node $node Controller $id Firmware $1 does not match latest know version of 7.10-0\n" if (minver($version,$minver));
5971
5972 # Check disk firmware
5973 for(split (/[\r\n]/,$AVSYSREPORT{$node}{'physical-disk'})) {
5974print LOG "#>DISK $_\n";
5975 if (/Physical Disk ID.*: (\d+)/) {
5976 $disk=$1;
5977 print LOG "set disk $disk\n";
5978 }
5979 if (/Firmware Revision.*: (\S*)/) {
5980 $firmware=$1 ;
5981 print LOG "set firmware $firmware\n";
5982 }
5983 if (/Product ID.*ST2000NXCLAR2000/ and $firmware ne "GS20" ) {
5984 $fwu.="INFO: Node $node Disk $disk Firmware $firmware does not match latest know version of GS20\n";
5985 }
5986 if (/Product ID.*HUS72602CLAR2000/ and $firmware ne "NAM4" ) {
5987 $nam4.="INFO: Node $node Disk $disk Firmware $firmware does not match latest know version of NAM4\n";
5988 }
5989
5990 }
5991
5992 } # End of Gen4t
5993
5994 # Spectre
5995 # check cpu vulnerability for both gen4s/gen4t
5996 $spectre.="INFO: Node $node CPU vulnerability remediation not found\n" if ($DATA_SPECTRE{$node} =~ /bad/);
5997
5998
5999
6000 } # End of node loop
6001
6002 if ($emcblock) {
6003 my $msg="\n Readme contains additional actions for nodes in BIOS firmware transfer mode" if ($biostransfer);
6004 printboth("$emcblock${biostransfer}RESOLUTION: See hotfix 305351 for more information about the Intel block update$msg\n\n");
6005 msg("Intel block update","INFO");
6006 } else {
6007 msg("Intel block update","PASSED");
6008 }
6009
6010 if ($nam4) {
6011 printboth("${nam4}RESOLUTION: See hotfix 299999 for more information\n\n");
6012 msg("Gen4T Hitachi Firmware","INFO");
6013 }
6014
6015 if ($fwu) {
6016 printboth("${fwu}RESOLUTION: See hotfix 305019 for more information\n\n");
6017 msg("Gen4T Firmware","INFO");
6018 } else {
6019 msg("Gen4T Firmware","PASSED");
6020 }
6021
6022 if ($spectre) {
6023 printboth("${spectre}RESOLUTION: See KB 519675 for Spectre/Meltdown remediation information\n\n");
6024 msg("Spectre/Meltdown","INFO");
6025 }
6026
6027
6028# Megaraid and Megalodon drive
6029 my $errmegalodon="";
6030 my $errmegaraid="";
6031 for $node (sort keys %{$CMDTOOL{"PD"}} ) {
6032 for $disk (sort keys %{$CMDTOOL{"PD"}{$node}} ) {
6033 print LOG "disk: $node $disk $CMDTOOL{'PD'}{$node}{$disk}{'Product Id'}\n";
6034 $megalodon=1 if ($CONFIG{'/gsanconfig/perftriallimit'}<16 and $CMDTOOL{"PD"}{$node}{$disk}{"Product Id"} eq "ST2000NM0033-9ZM175" and $CMDTOOL{"PD"}{$node}{$disk}{"Revision"} =~ /GT0[26]/) ;
6035 }
6036 print LOG "driver $CMDTOOL{'System'}{$node}{'SGPIO'}{'Driver Version'}\n";
6037 if ( $megalodon and $CMDTOOL{"System"}{$node}{"SGPIO"}{"Driver Version"} eq "00.00.05.38-SL1") {
6038 $errmegaraid.="ERROR: Node $node Megaraid_sas driver 00.00.05.38-SL1 and Megalodon drive ST2000NM0033-9ZM175 found.\n" if ($errmegaraid !~ /$node/);
6039 } elsif ($megalodon) {
6040 $errmegalodon.="WARNING: Node $node has drive ST2000NM0033-9ZM175\n" if ($errmegalodon !~ /$node/);
6041 }
6042 }
6043 if ($errmegaraid) {
6044 printboth("${errmegaraid}RESOLUTION: See hotfix bug 225015 for more information\n\n");
6045 msg("Megaraid Driver Version","FAILED");
6046 }
6047 if ($errmegalodon) {
6048 printboth("${errmegalodon}RESOLUTION: Set perftriallimit to 16. See esc8504 for more info\n\n");
6049 msg("Megalodon Drive","WARNING");
6050 }
6051
6052 if (!$PREUPGRADE) {
6053 # Check For cold redundancy bug
6054 $e="";
6055 $section="flash";
6056 my @utility=($NODE_COUNT==1) ? @NODES : (@NODES,("0.s"));
6057 for my $lognode (sort @utility) {
6058 my $node=$NODE_LXREF{$lognode};
6059 $value=$CMDTOOL{$section}{$node}{"BMC Firmware Version"}{"Op Code"};
6060 my $cr=$CMDTOOL{$section}{$node}{"CR"};
6061 print LOG "OpCode:$value CR:$cr\n";
6062 if ( $value < "1.20.5793" and $cr !~ /01 00/ and $value and $cr ) {
6063 printboth("ERROR: Node $node power supply incorrectly set to cold redundancy\n");
6064 $e="yes";
6065 $fail="FAILED";
6066 }
6067 }
6068 printboth("RESOLUTION: See KB458344 for more information.\n\n") if ($e);
6069 }
6070
6071 # Check For bad status or state
6072 $e="";
6073 for $section ("BBU","Enclosure","Controller","PD","Virtual Drives","VirtualDrive") {
6074 for $node (sort keys %{$CMDTOOL{$section}} ) {
6075 for $name (sort keys %{$CMDTOOL{$section}{$node}} ) {
6076 $field="State"; $value=$CMDTOOL{$section}{$node}{$name}{$field};
6077 if (!$value) {
6078 $field="Status"; $value=$CMDTOOL{$section}{$node}{$name}{$field};
6079 }
6080 next if !$value;
6081 if ($value !~ /ok|online|active|healthy|optimal/i) {
6082 printboth("ERROR: Node $node $section $name hardware in unknown $field '$value'\n");
6083 $e="yes";
6084 $fail="FAILED";
6085 }
6086 }
6087 }
6088 }
6089 printboth("RESOLUTION: Resolve hardware issues.\n For disk issues try 'CmdTool2 -PDGetMissing -a0' or 'avsysreport vdisk|grep Slot' and look for a missing number\n\n") if ($e);
6090
6091 # Check For bad SDR value
6092 $e="";
6093 my $resolution="";
6094 $section="SDR";
6095 for $node (sort keys %{$CMDTOOL{$section}} ) {
6096 for $name (sort keys %{$CMDTOOL{$section}{$node}} ) {
6097 $value=$CMDTOOL{$section}{$node}{$name}{$name};
6098 #print LOG qq[SDR: key: $key node:$node name:$name $field: $value\n] ;
6099 if ($value !~ /ok|ns/) {
6100 printboth("ERROR: Node $node Sensor '$name' has unexpected status of '$value'\n");
6101 $resolution.=" For BB +3.3V Vbat issues see KB471257\n" if ($name =~ /BB.*Vbat/ and $resolution !~ /Vbat/);
6102 $e="yes";
6103 $fail="FAILED";
6104 }
6105 }
6106 }
6107 printboth("RESOLUTION: Check and resolve any hardware issues. Use 'sudo ipmitool sdr' to see the error\n$resolution\n") if ($e);
6108
6109
6110### Check for ROC S/N fix
6111 if (my $nodes=getnodes_gen("Gen4t-")) {
6112 mapall("--nodes=$nodes","arcconf getconfig 1 ad");
6113 open(CMD_PIPE,$TMPFILE);
6114 $e="";
6115 while(<CMD_PIPE>) {chomp;
6116 print LOG "$_\n";
6117 $node=$1 if (/(\(0\..*\)) ssh/) ;
6118 if (/Controller Serial Number\s*: FFFFFF00/ or /Controller World Wide Name\s*: 50000D1FFFFFFF00/) {
6119 $e.="WARNING: Node $node ROC Serial Number or World Wide Name is the default\n";
6120 }
6121 }
6122 if ($e) {
6123 printboth("${e}RESOLUTION: See hotfix 304094 for more information\n\n");
6124 msg("Gen4T ROC SN/WWN","WARNING");
6125 }
6126 }
6127
6128 msg("EMC Hardware Health", $fail);
6129}
6130########## End checkemcstorage ##########
6131
6132
6133
6134########## Start avhardening ##########
6135sub avhardening {
6136 print LOG "\n\n\n### ".localtime()." ### Starting avhardening\n";
6137 getconfiginfo() if (!$GOTCONFIGINFO);
6138 # Example rpm: (0.0) avhardening-2.0.0-3
6139 if (my @noderpms=grep(/avhardening/,@RPMS)){
6140 my $lastrpm="x";
6141 my ($msg,$e,$upgmsg)=("")x4;
6142 for my $node (sort @NODES) {
6143 my $physnode=$NODE_LXREF{$node};
6144 my ($ver)=grep(/^\($physnode\)/,@noderpms);
6145 my ($rpmnode,$rpm)=split(/\s/,$ver);
6146 print LOG "phys:$physnode logic:$node rpm:$rpm\n";
6147
6148 # Min req for SLES
6149 if ($NODE_INFO{$node}{os} =~ /suse|sles/i){
6150 # remove everything in rpm up to first number. replace any - with . for version comparison
6151 (my $tmprpm=$rpm) =~ s/^.*?(\d)/\1/;
6152 my @VER=split(/[-\.]/, $tmprpm);
6153 my @MIN=split(/[-\.]/, "2.0.0-7");
6154 my $error=0;
6155 my $index=0;
6156 for (@MIN) {
6157 last if (@VER[$index] > @MIN[$index]) ;
6158 if (@VER[$index] < @MIN[$index]) {
6159 $error=1;
6160 last;
6161 }
6162 $index++;
6163 }
6164 }
6165 if ($PREUPGRADE and $error) {
6166 $upgmsg.="ERROR: Node $node $rpm is less than version 2.0.0-7\n";
6167 }
6168 $msg.=" Node $node AvHardening RPM $rpm\n";
6169 $e="yes" if ($rpm ne $lastrpm and $lastrpm ne "x") ;
6170 $lastrpm=$rpm;
6171 }
6172 if ($upgmsg) {
6173 printboth($upgmsg) ;
6174 printboth("RESOLUTION: Before upgrading avhardening must be removed. See Esc 7192 for more information\n\n");
6175 }
6176 if ($e) {
6177 printboth("ERROR: Mismatch of security RPM's installed\n$msg");
6178 printboth("RESOLUTION: Review version of avhardeing RPM's installed on each node\n\n") ;
6179 }
6180 if ($e or $upgmsg) {
6181 msg("Avamar Hardening RPM","FAILED");
6182 } elsif ($lastrpm ne "x" ) {
6183 $lastrpm =~ s/^.*avhardening-//;
6184 msg("Avamar Hardening RPM",$lastrpm);
6185 }
6186 } else {
6187 print LOG "No hardening RPMS found\n";
6188 }
6189}
6190########## End avhardening #########
6191
6192
6193########## Start gen4sver ##########
6194sub gen4sver {
6195 print LOG "\n\n\n### ".localtime()." ### Starting gen4sver\n";
6196 getinstalledversion() if (!$AVAMARVER);
6197 if ($VERSNUM >= 611 ) {
6198 print LOG "Gen4s ok on $VERSNUM\n";
6199 return;
6200 }
6201 printboth("WARNING: Avamar Version $AVAMARVER does not support Gen4s hardware. Make sure you are not adding Gen4s hardware.\n\n") ;
6202 msg("Hardware Type", "WARNING");
6203}
6204########## End gen4sver ##########
6205
6206########## Start getear ##########
6207sub getear {
6208 print LOG "\n\n\n### ".localtime()." ### Starting getear\n";
6209 getconfiginfo() if (!$GOTCONFIGINFO);
6210 for (grep /atrestencryption-status\/enabled/, %NODELIST) {
6211 if ( $NODELIST{$_} eq "true" ) {
6212 if ($PREUPGRADE and $UPGRADE_VERSION =~ /7.1.0/) {
6213 printboth("WARNING: If proper steps are not taken prior to running the Upgrade Workflow, the workflow\n");
6214 printboth(" will fail, and additional more difficult steps will have to be taken.\n");
6215 printboth("RESOLUTION: Be sure to follow the preupgrade steps outlined in the Solve Desktop Procedure Generator in the section\n");
6216 printboth(" called 'Configuration update for systems with Encryption at Rest enabled' before starting the Upgrade Workflow.\n\n");
6217 msg("Upgrade to 7.1.0 with EAR caution","WARNING");
6218 } else {
6219 printboth("INFO: Encryption At Rest is enabled\n");
6220 msg("Encryption At Rest", "ENABLED");
6221 }
6222 last;
6223 }
6224 }
6225
6226}
6227########## End getear ##########
6228
6229########## Start getrestapi ##########
6230sub getrestapi {
6231 print LOG "\n\n\n### ".localtime()." ### Starting getrestapi\n";
6232 getconfiginfo() if (!$GOTCONFIGINFO);
6233 my $restapi_version="Not Installed";
6234 for(grep(/rest-api/,@RPMS)) {chomp;
6235 ($restapi_version)=$_=~ m/rest-api-(.*)/;
6236 }
6237 msg("REST API Version", $restapi_version);
6238}
6239########## End getrestapi ##########
6240
6241########## Start getdellstorage ##########
6242sub getdellstorage {
6243 print LOG "\n\n\n### ".localtime()." ### Starting getdellstorage\n";
6244 gethardware() if (!$MANUFACTURER);
6245 my $nodes=getnodes_hw("dell");
6246 if (!$nodes) {
6247 print LOG "Skipping, no Dell nodes found\n";
6248 return;
6249 }
6250 checkostools() if (!$RAN_OMREPORT);
6251 if (!$OMREPORT) {
6252 printboth("WARNING: Dell Hardware will not be checked, 'omreport' not installed on all Dell nodes.\n\n");
6253 msg("Dell Hardware Status","WARNING");
6254 return;
6255 }
6256 my $node,$category,$name,$field,$value;
6257 my $cmd=q[ omreport storage controller controller=0;
6258 omreport chassis info; omreport chassis bios;
6259 omreport chassis memory; omreport chassis processors;
6260 omreport chassis pwrsupplies; omreport chassis temps;
6261 omreport chassis volts; omreport chassis batteries;
6262 omreport chassis | sed -e 's/\(.*\)\s*:\s*\(.*\)/\2:\1/';
6263 ];
6264 mapall("--nodes=$nodes",$cmd);
6265 open(CMD_PIPE,$TMPFILE);
6266 while(<CMD_PIPE>) {
6267 print LOG;chomp;
6268 undef $field; undef $value;
6269
6270 if (/(\(0\..*\)) ssh/) {
6271 $node=$1; undef $category; undef $name; next;
6272 }
6273 next if (!$node);
6274
6275 $category=$name=$field=$value="noctl" if (/No controllers found/);
6276 if ((/^(Controller)s$/) or (/^(Connector)s$/) or (/^(Enclosure)\(s\)$/) or (/^(Virtual Disk)s$/) or
6277 (/^(Physical Disk)s$/) or (/^(Battery)$/) or (/^(Batteries)$/) or (/^(Main System Chassis)$/) or (/^(.*) Information$/)) {
6278 $category=$1; $name="0"; next;
6279 }
6280 next if (!$category);
6281
6282 ($field,$value)=split(/\s*:\s*/,$_,2) if (!$value) ;
6283 $name=$value if ($field =~ /^ID$|^Index$/);
6284 next if (!defined $field or !defined $value or !defined $name);
6285
6286 $DELLSTORAGE{$category}{$node}{$name}{$field}=$value;
6287 print LOG "Add DELLSTORAGE c:$category n:$node name:$name f:$field v:$value\n";
6288 }
6289}
6290########## End getdellstorage ##########
6291
6292########## Start checkdellstorage ##########
6293sub checkdellstorage {
6294 print LOG "\n\n\n### ".localtime()." ### Starting checkdellstorage\n";
6295 getdellstorage() if (!%DELLSTORAGE);
6296 return if (!%DELLSTORAGE);
6297 getopersys() if (!$OS);
6298
6299 my $e="";
6300 my $fail="PASSED";
6301 my $section,$field;
6302
6303 # Check for Main System Chassis
6304 $e="";
6305 $section="Main System Chassis";
6306 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6307 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6308 for my $field (sort keys %{$DELLSTORAGE{$section}{$node}{$name}}) {
6309 my $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6310 next if ($field =~ /COMPONENT/ or $value =~ /Ok|Learning/);
6311 $e.="ERROR: Node $node $field status is $value\n";
6312 }
6313 }
6314 }
6315
6316 # Check chassis status
6317 my @sections=("Memory","Processors");
6318 @sections=(@sections,"Power Supplies","Temperature Probes","Voltage Probes","Batteries") if(!$PREUPGRADE);
6319 $field="Status";
6320 for $section (@sections) {
6321 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6322 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6323 my $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6324 next if (($section eq "Memory" and $DELLSTORAGE{$section}{$node}{$name}{"Type"} =~ /Not Occupied/) or
6325 ($section eq "Processors" and $DELLSTORAGE{$section}{$node}{$name}{"Processor Brand"} =~ /Not Occupied/) or
6326 ($section eq "Voltage Probes" and $value =~ /Unknown/) or $value =~ /Ok/i);
6327 $e.="ERROR: Node $node $section at index $name $field is $value\n";
6328 }
6329 }
6330 }
6331
6332 # Compare Installed and Available memory
6333 $section="Memory";
6334 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6335 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6336 my ($installed,$foo) = split(" ",$DELLSTORAGE{$section}{$node}{$name}{"Total Installed Capacity"});
6337 my ($available,$foo) = split(" ",$DELLSTORAGE{$section}{$node}{$name}{"Total Installed Capacity Available to the OS"});
6338 if ($installed - $available > 1023) {
6339 $e.="ERROR: Node $node Memory installed is $installed MB but only $available MB is available to the O/S\n";
6340 }
6341 }
6342 }
6343 if ($e) {
6344 printboth("\n$e");
6345 printboth("RESOLUTION: Resolve hardware errors detected\n\n");
6346 msg("Dell Hardware Status","FAILED");
6347 } else {
6348 msg("Dell Hardware Status","PASSED");
6349 }
6350
6351 # Check for No controllers found
6352 $e="";
6353 $section="noctl";
6354 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6355 $e.="ERROR: Node $node No controllers found\n";
6356 }
6357 if ($e) {
6358 printboth("\n$e");
6359 printboth("RESOLUTION: Determine why no controllers are found. Restarting Dell Open Manage services may fix the problem\n");
6360 printboth(" Be cautious restarting as it might kill the GSAN. Take a CP and see KB333585 to quiesce the grid\n\n");
6361 msg("Dell Controller Status","FAILED");
6362 }
6363
6364 # Check for a bad Status or State
6365 $e="";
6366 my @sections=("Controller","Virtual Disk","Physical Disk");
6367 @sections=(@sections,"Connector","Battery","Enclosure") if(!$PREUPGRADE);
6368 for $section (@sections) {
6369 for my $node (sort keys %{$DELLSTORAGE{$section}} ) {
6370 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6371 $field="State"; my $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6372 if (defined $value and $value !~ /Ready|Online|Charging|Learning/){
6373 $e.="ERROR: Node $node $section $name in unexpected $field '$value'\n";
6374 }
6375 $field="Status"; $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6376 if (defined $value and $value !~ /Ok|Charging|Non-Critical|Learning/){
6377 $e.="ERROR: Node $node $section $name in unexpected $field '$value'\n";
6378 }
6379 $field="Failure Predicted"; $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6380 if (defined $value and $value !~ /No/){
6381 $e.="ERROR: Node $node Disk ID $name $field '$value'\n";
6382 }
6383 }
6384 }
6385 }
6386 if ($e) {
6387 printboth "\n$e";
6388 printboth("RESOLUTION: ");
6389 printboth("For Enclosure Backplane issues see KB444497\n ") if ($e =~ /Enclosure.*State/);
6390 printboth("Fix the hardware errors\n\n");
6391 msg("Disk Controller Status","FAILED");
6392 } else {
6393 msg("Disk Controller Status","PASSED");
6394 }
6395
6396 # Check Patrol Read Mode to be Disabled
6397 $e="";
6398 $section="Controller";
6399 $field="Patrol Read Mode";
6400 for my $node (sort keys %{$DELLSTORAGE{$section}} ) {
6401 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}} ) {
6402 my $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6403 $e.="ERROR: Node $node $field is $value.\n" if ($value ne "Disabled");
6404 }
6405 }
6406 if ($e) {
6407 printboth("\n$e");
6408 printboth("RESOLUTION: Use the following commands to disable patrol read\n");
6409 printboth(" For all nodes use: mapall --noerror --all+ --user=root '<commands from below>'\n");
6410 printboth(" omconfig storage controller action=setpatrolreadmode controller=0 mode=manual\n");
6411 printboth(" omconfig storage controller action=stoppatrolread controller=0\n");
6412 printboth(" omconfig storage controller action=setpatrolreadmode controller=0 mode=disable\n\n");
6413 msg("Dell Patrol Read Disabled","FAILED");
6414 } else {
6415 msg("Dell Patrol Read Disabled","PASSED");
6416 }
6417
6418 # Check Virtual Disks for Disk Caching to be Disabled
6419 $e="";
6420 $section="Virtual Disk";
6421 $field="Disk Cache Policy";
6422 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6423 my $diskcache="";
6424 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6425 my $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6426 $diskcache.=" $name" if ($value ne "Disabled");
6427 }
6428 $e.="ERROR: Node $node Disk Cache Enabled For ID's:$diskcache.\n" if ($diskcache);
6429 }
6430 if ($e) {
6431 printboth("\n$e");
6432 printboth("ERROR: Disk caches are enabled\n");
6433 printboth("RESOLUTION: Review ETA KB302081\n\n");
6434 msg("Disk Cache Disabled","FAILED");
6435 } else {
6436 msg("Disk Cache Disabled","PASSED");
6437 }
6438
6439 # Check Controller Driver
6440 $e="";
6441 $section="Controller";
6442 $field="Driver Version";
6443 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6444 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6445 my $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6446 my $kernel=$NODELIST{"/nodestatuslist/nodestatus/$node/version/kernel"} ;
6447 if ($value =~ /00\.00\.04\.01-RH1/
6448 and $DELLSTORAGE{$section}{$node}{$name}{"Name"} =~ /PERC 6\/i/i
6449 and $kernel !~ /2.6.9-104.ELsmp/
6450 and $NODE_INFO{$node}{os} =~ /redhat/i
6451 and $DELLSTORAGE{"Chassis"}{$node}{"0"}{"Chassis Model"} !~ /2950/) {
6452 $e.="INFO: Node $node $DELLSTORAGE{$section}{$node}{$name}{'Name'} driver $value does not match latest known version of 00.00.04.29\n";
6453 }
6454 }
6455 }
6456 if ($e) {
6457 printboth("\n$e");
6458 printboth("RESOLUTION: If node has experienced Ping/No SSH upgrade driver to 00.00.04.29. See bug 314445\n\n");
6459 $fail="INFO";
6460 }
6461 msg("Disk Controller Driver Version",$fail);
6462
6463 #### Start Dell Block Update
6464 # Check Controller Firmware
6465 $e="";
6466 $fail="PASSED";
6467 $section="Controller";
6468 $field="Firmware Version";
6469 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6470 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6471 my $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6472 if ( $value !~ /12.10.2-0004/ and $DELLSTORAGE{$section}{$node}{$name}{"Name"} =~ /PERC H700/i ) {
6473 $e.="INFO: Node $node $DELLSTORAGE{$section}{$node}{$name}{'Name'} firmware $value does not match latest known version of 12.10.4-0001\n";
6474 }
6475 if ( ($value !~ /6.3.3.0002/ and $DELLSTORAGE{$section}{$node}{$name}{"Name"} =~ /PERC 6\/i/i and
6476 $DELLSTORAGE{"Chassis"}{$node}{"0"}{"Chassis Model"} !~ /2950/)) {
6477 $e.="INFO: Node $node $DELLSTORAGE{$section}{$node}{$name}{'Name'} firmware $value does not match latest known version of 6.3.3-0002\n";
6478 }
6479 }
6480 }
6481
6482#Node $node SDR Firmware version '$version' does not match latest known version of 1.13\n"
6483 # Check BIOS Version
6484 $section="BIOS";
6485 $field="Version";
6486 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6487 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6488 my $value=$DELLSTORAGE{$section}{$node}{$name}{$field};
6489 if ($value ne '1.8.2' and $DELLSTORAGE{"Chassis"}{$node}{"0"}{"Chassis Model"} =~ /R510/) {
6490 $e.="INFO: NODE $node $DELLSTORAGE{'Chassis'}{$node}{'0'}{'Chassis Model'} BIOS version $value does not match latest known version of 1.8.2\n";
6491 }
6492print LOG qq[chassis model= $DELLSTORAGE{"Chassis"}{$node}{"0"}{"Chassis Model"} \n];
6493 if ($value ne '6.0.7' and $DELLSTORAGE{"Chassis"}{$node}{"0"}{"Chassis Model"} =~ /R710/) {
6494 print LOG "INFO: NODE $node $DELLSTORAGE{'Chassis'}{$node}{'0'}{'Chassis Model'} BIOS version $value does not match latest known version of 6.0.7\n";
6495 $e.="INFO: NODE $node $DELLSTORAGE{'Chassis'}{$node}{'0'}{'Chassis Model'} BIOS version $value does not match latest known version of 6.0.7\n";
6496 }
6497 }
6498 }
6499 # Check drive firmware
6500 $section="Physical Disk";
6501 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6502 my $diskfw="";
6503 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6504 if ($DELLSTORAGE{$section}{$node}{$name}{"Revision"} =~ /03.00C09|03.00C10/ and
6505 $DELLSTORAGE{$section}{$node}{$name}{"Product ID"} =~ /WDC WD1002FBYS/) {
6506 $diskfw.=" $name";
6507 }
6508 }
6509 $e.="ERROR: Node $node Disks with affected firmware: $diskfw\n" if ($diskfw);
6510 }
6511
6512 if ($e) {
6513 printboth("\n$e");
6514 printboth("RESOLUTION: Apply Dell Block updates in hotfix 314445.\n\n");
6515 $fail=($e =~ /ERROR/) ? "FAILED" : "WARNING";
6516 }
6517 msg("Dell Block Update",$fail);
6518 #### End Dell Block Update
6519
6520 #### Start Drive Firmware
6521 $e="";
6522 $fail="PASSED";
6523 $section="Physical Disk";
6524 $field="Revision";
6525 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6526 my $diskfw="";
6527 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6528 if ($DELLSTORAGE{$section}{$node}{$name}{$field} =~ /GKAOA9RA|GKAOA9N1|GKAOA74A/ and
6529 $DELLSTORAGE{$section}{$node}{$name}{"Product ID"} =~ /Hitachi HUA721010KLA330/) {
6530 $diskfw.=" $name";
6531 }
6532 }
6533 $e.="ERROR: Node $node Disks with affected firmware: $diskfw\n" if ($diskfw);
6534 }
6535 if ($e) {
6536 printboth("\n$e");
6537 printboth("RESOLUTION: Disk Drive firmware is out of date\n");
6538 printboth(" See TSE T012511FO in the Avamar Procedure Generator for product type HUA721010KLA330\n\n");
6539 $fail="FAILED";
6540 }
6541
6542 $e="";
6543 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6544 my $diskfw="";
6545 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6546 if (($DELLSTORAGE{$section}{$node}{$name}{$field} =~ /A4C2/ and
6547 $DELLSTORAGE{$section}{$node}{$name}{"Product ID"} =~ /HUS153030VLS300/) or
6548 ($DELLSTORAGE{$section}{$node}{$name}{$field} =~ /HS09/ and
6549 $DELLSTORAGE{$section}{$node}{$name}{"Product ID"} =~ /ST3300656SS/)) {
6550 $diskfw.=" $name";
6551 }
6552 }
6553 $e.="ERROR: Node $node Disks with affected firmware: $diskfw\n" if ($diskfw);
6554 }
6555 if ($e) {
6556 printboth("\n$e");
6557 printboth("RESOLUTION: Disk Drive firmware is out of date\n");
6558 printboth(" See KB444319 for more information\n\n");
6559 $fail="FAILED";
6560 }
6561
6562 $e="";
6563 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6564 my $diskfw="";
6565 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6566 if ($DELLSTORAGE{$section}{$node}{$name}{$field} =~ /01.01D0[12]/ and
6567 $DELLSTORAGE{$section}{$node}{$name}{"Product ID"} =~ /WDC WD2003FYYS/) {
6568 $diskfw.=" $name";
6569 }
6570 }
6571 $e.="ERROR: Node $node Disks with affected firmware: $diskfw\n" if ($diskfw);
6572 }
6573 if ($e) {
6574 printboth("\n$e");
6575 printboth("RESOLUTION: Disk Drive firmware is out of date\n");
6576 printboth(" See KB470549 for more information on bug 37550/35475\n\n");
6577 $fail="FAILED";
6578 }
6579
6580 $e="";
6581 for my $node (sort keys %{$DELLSTORAGE{$section}}) {
6582 my $diskfw="";
6583 for my $name (sort keys %{$DELLSTORAGE{$section}{$node}}) {
6584 if ($DELLSTORAGE{$section}{$node}{$name}{$field} =~ /0C06/ and
6585 $DELLSTORAGE{$section}{$node}{$name}{"Product ID"} =~ /WDC WD1002FBYS-18A6B0/) {
6586 $diskfw.=" $name";
6587 }
6588 }
6589 $e.="ERROR: Node $node Disks with affected firmware: $diskfw\n" if ($diskfw);
6590 }
6591 if ($e) {
6592 printboth("\n$e");
6593 printboth("RESOLUTION: Disk Drive firmware is out of date\n");
6594 printboth(" See the Avamar Procedure Generator -> Miscellaneous Procedures -> WD 1TD Drive Firmware Update from 0C06\n\n");
6595 $fail="FAILED";
6596 }
6597 msg("Disk Firmware",$fail);
6598 #### End Disk Firmware
6599
6600}
6601########## End checkdellstorage ##########
6602
6603########## major_version ##########
6604# return first 2 numbers of version (18.1 or 7.2, etc.)
6605sub major_version {
6606 my $v=shift;
6607 $v =~ m/(\d+\.\d+)\./;
6608 return $1;
6609}
6610########## Start upgradepath ##########
6611sub upgradepath {
6612 print LOG "\n\n\n### ".localtime()." ### Starting upgradepath\n";
6613
6614 getinstalledversion() if (!$VERSNUM);
6615 getdatadomain() if (!$DDRMAINT_VERSION) ;
6616 getconfiginfo() if (!$GOTCONFIGINFO);
6617 getnodetype() if (!%PARTLIST);
6618 getopersys() if (!$OS);
6619
6620 print LOG "VERSNUM=$VERSNUM AVAMARVER=$AVAMARVER PREUPGRADE=$UPGRADE_VERSION\n";
6621
6622 if (!grep /$UPGRADE_VERSION/, @supportedversions) {
6623 printboth("WARNING: The upgrade target version is not known to proactive check. This is possible when proactive check lags behind a new release\n");
6624 printboth("RESOLUTION: Verify the upgrade version is correct and supported\n\n");
6625 msg("Upgrade Version","WARNING");
6626 }
6627
6628 if ($UPGRADE_VERSION =~ /7.4.0/ ) {
6629 printboth("ERROR: Version 7.4.0 is no longer available\n");
6630 printboth("RESOLUTION: Select a newer version to use as an upgrade target\n\n");
6631 print "\nERROR: Version 7.4.0 is no longer available as an upgrade target\n\n";
6632 exit 1;
6633 }
6634
6635 if ($DDCNT>0 and $VERSNUM < 730 and $UPGRADE_VERSION >= '7.3' ) {
6636 my ($value,$maxtype)=get_metadatacapacity();
6637 if ($maxtype eq "CUR" ) {
6638 printboth("WARNING: The MCS may be reporting the metadata capacity wrong. The upgrade may cause a jump in the reported metadata capacity usage.\n");
6639 printboth("RESOLUTION: Compare the current MCS metadata usage to the correct value of ${value}%. See Bug 228042 for more info.\n\n");
6640 msg("Metadata Capacity Reporting","WARNING");
6641 }
6642 }
6643
6644 # Check if password might get messed up when moving to encryption
6645 # Affects anything going from 7.2x or before to 7.3 or 7.4. Fixed in 7.5?
6646 if ($VERSNUM<730 and $UPGRADE_VERSION =~ /7.[34]/ ) {
6647 my $root=`avmgr logn --id=root --ap=8RttoTriz 2>&1`;
6648 my $mcuser=`avmgr logn --id=MCUser --ap=MCuser1 2>&1`;
6649 my $e=1; my $msg;
6650 if ($root !~ /succeed/) {
6651 $e=1;
6652 printboth("INFO: Avamar root user is not the default password\n");
6653 }
6654 if ($admin !~ /succeed/) {
6655 $e=1;
6656 printboth("INFO: Avamar admin user is not the default password\n");
6657 }
6658 if ($e=1) {
6659 printboth("RESOLUTION: Ensure customer knows the Avamar root and MCUser passwords. See esc#27658 for more info\n\n");
6660 msg("Default Avamar Passwords","INFO");
6661 }
6662 }
6663
6664my $e="PASSED";
6665# Generic 2 prior version check
6666 my (%allowed,$min_vers);
6667
6668 if ($VERSNUM >= 741 and $UPGRADE_VERSION =~ /^18|^19.[01]/) {
6669 # 7.4.1 allowed to upgrade to 19.1
6670 } else {
6671 for (reverse sort {$a <=> $b} @supportedversions) {
6672 next if (major_version($_) ne major_version($UPGRADE_VERSION) and !%allowed or major_version($allowed) ) ;
6673 $allowed{major_version($_)}=1;
6674 $min_vers=major_version($_);
6675 last if (keys %allowed > 2);
6676 }
6677 print LOG "MinAllowed = $min_vers\n";
6678 if ( major_version($AVAMARVER) < $min_vers ) {
6679 printboth("ERROR: There is no direct upgrade path from $AVAMARVER to $UPGRADE_VERSION\n");
6680 printboth("RESOLUTION: See ETA KB 366085\n\n");
6681 $e="FAILED";
6682 }
6683 }
6684
6685 if ($VERSNUM < 720 and $UPGRADE_VERSION >= '7.3') {
6686 printboth("ERROR: It is required that the AvInstaller be upgraded prior to upgrading the server\n");
6687 printboth("RESOLUTION: Upgrade AVInstaller before upgrading the server.\n\n");
6688 msg("AVInstaller Version","FAILED");
6689 }
6690
6691 if ($MCSERVER_VERSION =~ /277897/ and $UPGRADE_VERSION =~ /7.3.0/) {
6692 printboth("ERROR: Once hotfix 277897 is installed upgrade to 7.3.0 is not supported.\n");
6693 printboth("RESOLUTION: Upgrade to 7.3.1 or newer.\n\n");
6694 $e="FAILED";
6695 }
6696
6697# 19.2 requirements. Only from 19.1 and SLES 11 SP4
6698 if ( $e ne "FAILED" and $UPGRADE_VERSION =~ /19.2.0-155/ and ( $OS =~ /SLES 11.[1-3]/ or $VERSNUM<1910 ) ) {
6699 printboth(qq[ERROR: There is no direct upgrade path from $AVAMARVER & $OS to $UPGRADE_VERSION \n]);
6700 printboth("RESOLUTION: See ETA KB 366085\n\n");
6701 $e="FAILED";
6702 }
6703
6704# Data Domain specific requirements
6705 if ($DDCNT>0) {
6706 if ( $VERSNUM<610 and $UPGRADE_VERSION >= '7' ) {
6707 printboth("ERROR: There is no direct upgrade path from $AVAMARVER to $UPGRADE_VERSION with Data Domain\n");
6708 printboth("RESOLUTION: See ETA KB 366085\n\n");
6709 $e="FAILED";
6710 }
6711 if ( $AVAMARVER =~ /^7.0/ and $UPGRADE_VERSION =~ /^7\.2/ ) {
6712 printboth("ERROR: There is no direct upgrade from $AVAMARVER to $UPGRADE_VERSION when Data Domain is present");
6713 printboth("RESOLUTION: You must upgrade to 7.1.2 first so you can then upgrade your DDOS to a minimum of 5.5.0.9 before upgrading to 7.2.");
6714 $e="FAILED";
6715 }
6716 if ( $AVAMARVER =~ /^6.1/ and $UPGRADE_VERSION >= '7.1' ) {
6717 printboth("WARNING: There might not be a direct upgrade path from $AVAMARVER to $UPGRADE_VERSION\n");
6718 printboth("RESOLUTION: See KB articles 366085 and 334708 to determine upgrade path depending on current versions of Avamar and DDOS.\n\n");
6719 $e="FAILED";
6720 }
6721 # recommend not using 7.0.1 unless only opt (v6.0 w/dd)
6722 if ($AVAMARVER =~ /^6.0/ and $UPGRADE_VERSION =~ /7.0.1/ ) {
6723 printboth("WARNING: EMC Strongly urges all customers to upgrade to 7.0.2 or later\n");
6724 printboth("RESOLUTION: Reconsider upgrading to 7.0.2 or later instead of 7.0.1\n\n");
6725 $e="WARNING";
6726 }
6727 }
6728 msg("Upgrade Path",$e);
6729}
6730########## End upgradepath ##########
6731
6732########## Start kernelcnt ##########
6733sub kernelcnt {
6734 print LOG "\n\n\n### ".localtime()." ### Starting kernelcnt\n";
6735 if (!$PREUPGRADE) {
6736 print LOG "Skipping: only run for preupgrade\n";
6737 return;
6738 }
6739 getconfiginfo() if (!$GOTCONFIGINFO);
6740 my $nodes=getnodes_os("suse|sles");
6741 if (!$nodes) {
6742 print LOG "no suse nodes found\n";
6743 return;
6744 }
6745 my %count;
6746 my $e="";
6747 for(grep(/kernel-default/,@RPMS)){
6748 print LOG "$_\n";
6749 next if (/extra|devel|utils/);
6750 ($node,$_)=split();
6751 if ($nodes =~ /$node/) {
6752 $count{$node}++
6753 } else {
6754 print LOG "not suse OS\n";
6755 }
6756 }
6757 for my $node (keys %count){
6758 print LOG "Count: $count{$node}\n";
6759 if ( $count{$node} > 4 ){
6760 $e.="ERROR: Node $node Too many ($count{$node}) kernel RPMs are installed\n";
6761 }
6762 }
6763 if ($e){
6764 printboth("${e}RESOLUTION: See KB454134\n\n");
6765 msg("Kernel RPMs","FAILED");
6766 }
6767}
6768########## End kernelcnt ##########
6769
6770
6771########## Start checkopenfiles ##########
6772sub checkopenfiles {
6773 print LOG "\n\n\n### ".localtime()." ### Starting checkopenfiles\n";
6774 getinstalledversion() if (!$AVAMARVER);
6775 if ( $VERSNUM<700 ) {
6776 print LOG "Skipping for version $VERSNUM less than 7.x";
6777 return;
6778 }
6779 my $cmd=q[ sysctl fs.file-max | awk '{print "SYS:"$0}'
6780 grep nofile /etc/security/limits.conf | awk ' !/^\s*#/ {print "LIM:"$0}'
6781 ];
6782 # running proc limit: ps h -C gsan | awk '{system("cat /proc/"$1"/limits")}' | awk '/Max open files/ {print "GSAN:"$4}'|head -1
6783 mapall("",$cmd);
6784 open(CMD_PIPE,$TMPFILE);
6785 my $e="";
6786 while(<CMD_PIPE>) {
6787 print LOG $_;chomp;
6788 $node=$1 if (/(\(0\..*\)) ssh/);
6789 if (/^SYS:.*=\s*(\d*)/) {
6790 $e.="ERROR: Node $node sysctl file-max setting of $1 is less than the required 1600000\n" if ($1 < 1600000 );
6791 } elsif (/^LIM:.*nofile\s+(\d*)/) {
6792 $e.="ERROR: Node $node /etc/security/limits.conf nofile setting of $1 is less than the required 800000\n" if ($1 < 800000);
6793# } elsif (/^GSAN:(\d*)/) {
6794# $e.="ERROR: Node $node Running GSAN process open files setting of $1 is less than the required 800000\n" if ($1 < 800000);
6795 }
6796 }
6797 if ($e) {
6798 printboth("${e}RESOLUTION: See KB304555 to change settings to required values.\n");
6799 printboth(" NOTE: Running GSAN process errors require restarting Avamar\n") if ($e =~ /GSAN process/);
6800 printboth("\n");
6801 msg("Open File Settings","FAILED");
6802 } else {
6803 msg("Open File Settings","PASSED");
6804 }
6805}
6806########## Start checkopenfiles ##########
6807
6808########## Start osconfig ##########
6809sub oscheck {
6810# shellshock 201526
6811 print LOG "\n\n\n### ".localtime()." ### Starting oscheck\n";
6812 getavamarver() if (!$AVTAR_VERSION);
6813 my $cmd=qq[ env x='() { :;}; echo "SS:1"' bash -c "" ] ;
6814
6815 my $msg=($PREUPGRADE) ? "RCM PCA #: RP2014-0004" : "CVE-2014-6271";
6816 my ($ss)=("")x1;
6817 mapall("",$cmd);
6818 open(CMD_PIPE,$TMPFILE);
6819 while(<CMD_PIPE>) {
6820 print LOG $_;
6821 $node=$1 if (/(\(0\..*\)) ssh/);
6822 if ( /SS:1/) {
6823 $ss.="ERROR: Node $node is subject to $msg\n";
6824 }
6825 }
6826 if ($ss) {
6827 printboth("${ss}RESOLUTION: See Hotfix 202719\n\n");
6828 msg($msg,"FAILED");
6829 }
6830
6831# Leap Second hotfix. If before Jun 30, 2015 23:59 UTC
6832
6833 getopersys if (!$OS);
6834 if ($OS !~ /suse|sles/i ) {
6835 print LOG "Skipped leap second check for o/s $OS\n";
6836 return;
6837 }
6838 my $nodes=getnodes_os("suse|sles");
6839 if ( $nodes and !$VBA and time < 1435708740 ) {
6840 mapall("--nodes=$nodes","sudo /usr/bin/atq");
6841 open(CMD_PIPE,$TMPFILE);
6842 while(<CMD_PIPE>) {
6843 print LOG $_;
6844 if (/(\(0\..*\)) ssh/) {
6845 $node=$1;
6846 # Patched kernel
6847 next if ( grep(/$node.*2.6.32.59-0.19/,@RPMS));
6848 $ls{$node}=1;
6849 next;
6850 }
6851 delete $ls{$node} if (/2015-0[67]/);
6852 }
6853 my $msg="";
6854 for (keys %ls) {
6855 $msg.="ERROR: Node $_ does not have leap second hotfix installed\n";
6856 }
6857 if ($msg) {
6858 printboth("${msg}RESOLUTION: Install Hotfix 229168\n\n");
6859 msg("Kernel Leap Second","FAILED");
6860 } else {
6861 msg("Kernel Leap Second","PASSED");
6862 }
6863 }
6864}
6865########## End oscheck ##########
6866
6867########## Start repoempty ##########
6868sub repoempty {
6869 print LOG "\n\n\n### ".localtime()." ### Starting repoempty\n";
6870 getconfiginfo() if (!$GOTCONFIGINFO);
6871 my $e="";
6872 for (@DATA_REPO) {
6873 my($node,$dir)=split();
6874 printboth("WARNING: Node $node Directory $dir is not empty. AvInstaller workflow appears to be active\n");
6875 $e=1;
6876 }
6877
6878 for (split(/\n|\r/, `/usr/bin/sqlite3 /usr/local/avamar/var/avi/server_data/avidb "select title,status from available_pkgs;"` )) {
6879 print LOG "avidb: $_\n"; my ($pkg,$status)=split(/\|/);
6880 my ($pkg,$status)=split(/\|/);
6881 if ($status =~ /processing|deploying|deployed/ ) {
6882 printboth("WARNING: AVIDB indicates $pkg is in status $status. AvInstaller workflow appears to be active\n");
6883 $e=1;
6884 }
6885 }
6886 if ($e) {
6887 printboth("RESOLUTION: Do not delete any files. Do not place new AVP files into packages Directory.\n Escalate internally to Remote Proactive Team Leader.\n\n");
6888 msg("AvInstaller Repository Check","WARNING");
6889 }
6890}
6891########## End repoempty ##########
6892
6893########## Start avamarsrc ##########
6894# Check /data01/avamar/src exists
6895sub avamarsrc {
6896 print LOG "\n\n\n### ".localtime()." ### Starting avamarsrc\n";
6897 getconfiginfo() if (!$GOTCONFIGINFO);
6898 my $e="";
6899 for (@DATA_SRC) {
6900 my($node,$dir)=split();
6901 $e.="WARNING: Node $node Directory /data01/avamar/src does not exists\n" if ($dir !~ /OK/) ;
6902 }
6903 if ($e) {
6904 printboth("${e}RESOLUTION: Find out why the diretory does not exists\n\n");
6905 msg("/data01/avamar/src","FAILED");
6906 }
6907}
6908########## End avamarsrc ##########
6909
6910
6911########## Start avinstaller ########
6912sub avinstaller {
6913 print LOG "\n\n\n### ".localtime()." ### Starting avinstaller\n";
6914 my $avistart_proc = `ps auxww | grep '/[a]vistart' | awk '{ print \$2 }'`;
6915 print LOG ("Avistart process: $avistart_proc \n");
6916 my $curl = `curl -k https://localhost:7543/avi/service/info/resources 2>&1`;
6917 print LOG "CURL cmd: curl -k https://localhost:7543/avi/service/info/resources\n\nOUTPUT:$curl\n";
6918 my $return_code = $? >> 8;
6919 if( $return_code == 7 or !$avistart_proc ) {
6920 printboth("ERROR: AVI Installer is not running or it is not responding\n\nRESOLUTION: Start AVI Installer.");
6921 msg("AVI Installer","FAILED");
6922 } else {
6923 msg("AVI Installer","PASSED");
6924 }
6925}
6926########## End avinstaller ########
6927
6928
6929########## Start checkmessages ##########
6930sub checkmessages {
6931 print LOG "\n\n\n### ".localtime()." ### Starting checkmessages\n";
6932 getconfiginfo() if (!$GOTCONFIGINFO);
6933 for (@DATA_checkmessages) {
6934 printboth("ERROR: Node $node reported $msg\n");
6935 }
6936 if (@DATA_checkmessages) {
6937 printboth("RESOLUTION: See KB449396 to increase cache size\n\n");
6938 msg("ARP Cache Overflow","FAILED");
6939 }
6940}
6941########## End checkmessages ##########
6942
6943########## Start getmaxrpmver #########
6944# Pass in regex to match RPMs
6945sub getmaxrpmver {
6946# print LOG "\n\n\n### ".localtime()." ### Starting getmaxrpmver\n";
6947 my $rpm_regex=shift;
6948 my $nodeid=shift;
6949 my $rpm_ver="";
6950# print LOG "find rpm $rpm_regex for node $nodeid\n";
6951 my (@MAX,@VER);
6952 for (grep / $rpm_regex-[0-9]/, @RPMS){
6953# print LOG "RPM: $_\n";
6954 (my $node,$rpm)=split();
6955 next if ($node !~ /$nodeid/);
6956# print LOG "found rpm $rpm_regex for node $nodeid\n";
6957 $rpm =~ s/^.*?-(\d)/\1/; # Remove RPM name till first number
6958# print LOG "vers=$r\n";
6959 my @VER=split(/[-\.]/, $rpm);
6960 my $index=0;
6961 for (@VER) {
6962 #print "ndx:$index ver:@VER[$index] max:@MAX[$index]\n";
6963 if (@VER[$index] > @MAX[$index]) {
6964 @MAX=@VER;
6965 $rpm_ver=$rpm;
6966# print LOG "new max = $rpm_ver\n";
6967 last;
6968 }
6969 $index++;
6970 }
6971 }
6972# print LOG "return=$rpm_ver\n";
6973 return $rpm_ver;
6974}
6975########## End getmaxrpmver #########
6976
6977########## Start minver #########
6978sub minver {
6979# Ver1=Version to check. Ver2=min version requirement
6980 my ($ver1,$ver2)=@_;
6981 return 0 if (!$ver1 or !$ver2);
6982 my @VERSION=split(/[-\.]/, $ver1);
6983 my @CHECK=split(/[-\.]/, $ver2);
6984 my $index=0;
6985 for (@CHECK) {
6986 if (@VERSION[$index] > @CHECK[$index]) {
6987 return 0;
6988 last;
6989 } elsif (@VERSION[$index] < @CHECK[$index]) {
6990 return 1;
6991 }
6992 $index++;
6993 }
6994 return 0;
6995}
6996########## End minver #########
6997
6998
6999########## Start getvba ##########
7000sub getvba {
7001 print LOG "\n\n\n### ".localtime()." ### Starting getvba\n";
7002 if (! -e "/usr/local/avamar/etc/EBR-VERSION") {
7003 print LOG "Skipping: No VBA version found\n";
7004 $VBA="";
7005 return;
7006 }
7007 getconfiginfo() if (!$GOTCONFIGINFO);
7008
7009 $VBA=1;
7010 # Get latest AvamarVMwareCombined version
7011 my $results;
7012 my @MAX;
7013 my (%VCENTER,%NSR);
7014
7015 open($fh,"/usr/local/vdr/etc/vdp_version_info");
7016 while(<$fh>){chomp;
7017 $VBA_VERSION=$1 if (/nemo-scripts=(\S*)/ and !$VBA_VERSION);
7018 $VBA_VERSION=$1 if (/ova-upgrade=(\S*)/);
7019 }
7020print "start\n";
7021
7022# NOTE: client=rpmvers. mcs=md5sum.
7023# hf=>? ddos=>compatible data domain version nsr=> compatible networker version
7024# ebr_nw* => rpm version installed
7025# avnwcomm => bug # seen in avnwcomm --version output.
7026# hfavtar => bug # seen in avtar --verison output
7027 my %vbaver = (
7028 "1.5.1.7" =>{ ddos=>"5\.3\.0\.[6-9]|5\.3\.[1-9]\.0|5\.[456]\..\..|5\.7\.[12]\..|6\.0\..\..", nsr=>"9\.0", rpm=>{ rpm=>"ebr-nw-2.5.1-18", bug=>261881 }
7029 },
7030 "1.5.0.160" =>{ ddos=>"5\.[567]\..\..", nsr=>"", rpm=>{ rpm=>"ebr-nw-2.5.0-741", bug=>252862 }
7031 },
7032 "1.5.0.159" =>{ ddos=>"5\.[567]\..\..", nsr=>"9\.0", rpm => { rpm=>"ebr-nw-2.5.0-741", bug=>252862 }
7033 },
7034 "1.5.0.149" =>{ ddos=>"5\.[567]\..\..", nsr=>"8\.5", rpm => { rpm=>"ebr-nw-2.5.0-741", bug=>252862 }
7035 },
7036 "1.1.3.7" =>{ ddos=>"5\.4\.2\.[1-9]|5\.4\.[3-9]|5\.[56]\..\..", nsr=>"8\.2\.3", rpm =>{ rpm=>"ebr-nw-2.0.3-9", bug=>262411 },
7037 },
7038 "1.1.2.8" =>{ ddos=>"5\.4\.2\.[1-9]|5\.4\.[3-9]|5\.[56]\..\..", nsr=>"8\.2\.2" , rpm =>{ rpm=>"ebr-nw-2.0.2-62", bug=>257066 },
7039 },
7040
7041 "1.1.2.6" =>{ recalled=>1, ddos=>"5\.4\.2\.[1-9]|5\.4\.[3-9]|5\.[56]\..\..", nsr=>"8\.2\.2"
7042 },
7043
7044 "1.1.1.50" =>{ ddos=>"5\.4\.1\.[2-9]|5\.4\.[2-9]|5\.[56]\..\..", nsr=>"8\.2\.1",
7045 rpm => { rpm=>"ebr-nw-2.0.1-223", bug=>252858, kb=>"" },
7046 rpm3=> { rpm=>"AvamarVMwareCombined-linux-sles11-x86_64-7.1.161-15", bug=>239997 },
7047 hfmcs=>"252054", mcs=>"0e11a800cbd9a6f2e4bb010b5b4c49e0"
7048 },
7049 "1.1.1.46" => { hf=>"231042", ddos=>"5\.4\.1\.[2-9]|5\.4\.[2-9]|5\.5\..\..", nsr=>"8\.2\.1",
7050 rpm => {rpm=>"flr-nw-app-2.0.1-194", bug=>231480 }
7051 },
7052 "1.1.1.41" => { recalled=>1, ddos=>"5\.4\.1\.[2-9]|5\.4\.[2-9]|5\.5\..\..", nsr=>"8\.2\.1.*671", },
7053 "1.1.0.149" => { ddos=>"5\.4\.1\.[2-9]|5\.4\.[2-9]|5\.5\..\..", nsr=>"8\.2\.",
7054 rpm => {rpm=>"ebr-nw-2.0.0-318", bug=>225771 },
7055 rpm2=> {rpm=>"ebr-nw-2.0.0-324", bug=>232801 },
7056 rpm1=> {rpm=>"flr-nw-app-2.0.0-322", bug=>231492 }
7057 },
7058 "1.1.0.141" => { ddos=>"5\.4\.1\.[2-9]|5\.5\..\..", nsr=>"8\.2\.", },
7059 "1.0.3.6" => { ddos=>"5\.3\.0\.[6-9]|5\.3\.[1-9]|5\.4\.1\.[2-9]|5\.4\.[2-9]|5\.5\.0\.", nsr=>"8\.1\.3",
7060 rpm => { rpm=>"ebr-nw-1.3.301", bug=>235225 }
7061 },
7062 "1.0.2.16" => { ddos=>"5\.3\.0\.[6-9]|5\.3\.[1-9]|5\.4\.1\.[2-9]|5\.4\.[2-9]", nsr=>"8\.1\.2",
7063 avnwcomm=>"231356", hfmcs=>"222752", mcs=>"481e3a1cb7d0fd51b695e951c1d1a547",
7064 rpm => { rpm=>"AvamarVMwareCombined-linux-sles11-x86_64-7.0.162-12", bug=>222268 }
7065 },
7066 "1.0.1.9" => { gsan=>"7.0.61-5",
7067 avnwcomm=>"196356", ddos=>"5\.3\.0\.[6-9]|5\.3\.[1-9]|5\.4\.1\.[2-9]|5\.4\.[2-9]", nsr=>"8\.1\.1",
7068 avtar=>"7.0.161-16", hfavtar=>"201117",
7069 client=>"7.0.161-18", hfclient=>"222267",
7070 hfmcs=>"199300", mcs=>"aeb0f7798d18d3558e13337403656712"
7071 },
7072 "1.0.0.199" => { gsan=>"7.0.60-11", ddos=>"5\.3\.0\.[6-9]|5\.3\.[1-9]|5\.4\.1\.[2-9]|5\.4\.[2-9]" },
7073 "1.0.0.180" => { gsan=>"7.0.60-7", ddos=>"5\.3\.0\.[6-9]|5\.3\.[1-9]|5\.4\.1\.[2-9]|5\.4\.[2-9]" }
7074 );
7075
7076
7077 print LOG "\n\nVBA VERSION '$VBA_VERSION'\n";
7078 if ($vbaver{$VBA_VERSION}{ddos}) {
7079 msg("VBA Version",$VBA_VERSION);
7080
7081 my $error=0;
7082 # Check for recalled version
7083 if ($vbaver{$VBA_VERSION}{recalled} ) {
7084 printboth("ERROR: VBA Version $VBA_VERSION has been recalled\n");
7085 printboth("RESOLUTION: Upgrade to a newer version\n\n");
7086 msg("VBA Recall","FAILED");
7087 $error=1;
7088 }
7089
7090 # Check for hotfixes by RPM installed
7091 for (grep(/^rpm/, keys %{$vbaver{$VBA_VERSION}}) ) {
7092 print LOG "HOT FIX RPM: $vbaver{$VBA_VERSION}{$_}{rpm} $vbaver{$VBA_VERSION}{$_}{bug}\n";
7093 if (!grep(/$vbaver{$VBA_VERSION}{$_}{rpm}/, @RPMS)) {
7094 printboth("WARNING: Hot fix $vbaver{$VBA_VERSION}{$_}{bug} is available for $VBA_VERSION\n");
7095 if (my $kb=$vbaver{$VBA_VERSION}{$_}{kb}) {
7096 printboth("RESOLUTION: See KB$kb for more information\n\n");
7097 } else {
7098 printboth("RESOLUTION: See hotfix information to see if this grid requires an update\n\n");
7099 }
7100 $error=1
7101 } else {
7102 print LOG "found rpm matching: $vbaver{$VBA_VERSION}{$_}{rpm}\n";
7103 }
7104 }
7105
7106 # Check for avnwcomm hotfixes
7107 if ( $vbaver{$VBA_VERSION}{avnwcomm} ){
7108 open($fh,"/usr/local/avamarclient/bin/avnwcomm --version");
7109 my $hf;
7110 while(<$fh>){ $hf=1 if (/$vbaver{$VBA_VERSION}{avnwcomm}/); print LOG $_; }
7111 if (!$hf){
7112 printboth("WARNING: avnwcomm hotfix $vbaver{$VBA_VERSION}{avnwcomm} is available for $VBA_VERSION\n");
7113 printboth("RESOLUTION: See hotfix information to see if this grid requires an update\n\n");
7114 msg("VBA avnwcomm updates","FAILED");
7115 $error=1;
7116 }
7117 }
7118
7119 # Check for avtar hotfixes
7120 print LOG "avtar: $vbaver{$VBA_VERSION}{avtar} and $AVTAR_VERSION ne $vbaver{$VBA_VERSION}{avtar}\n";
7121 if ( $vbaver{$VBA_VERSION}{avtar} and $AVTAR_VERSION ne $vbaver{$VBA_VERSION}{avtar} ){
7122 printboth("WARNING: avtar hotfix $vbaver{$VBA_VERSION}{hfavtar} is available for $VBA_VERSION\n");
7123 printboth("RESOLUTION: See hotfix information to see if this grid requires an update\n\n");
7124 msg("VBA avtar updates","FAILED");
7125 $error=1;
7126 }
7127
7128 # Check for client hotfixes
7129 print LOG "client: $vbaver{$VBA_VERSION}{client} and $vbaver{$VBA_VERSION}{client} ne $VBA_RPM\n";
7130 if ( $vbaver{$VBA_VERSION}{client} and $vbaver{$VBA_VERSION}{client} ne $VBA_RPM ){
7131 printboth("WARNING: Client Hotfix $vbaver{$VBA_VERSION}{hfclient} is available for $VBA_VERSION\n");
7132 printboth("RESOLUTION: See hotfix information to see if this grid requires an update\n\n");
7133 msg("VBA client updates","FAILED");
7134 $error=1;
7135 }
7136
7137 # Check for mcs hotfixes
7138 print LOG "mcs: $vbaver{$VBA_VERSION}{mcs} and $vbaver{$VBA_VERSION}{mcs} ne $MCS_MD5SUM\n";
7139 if ( $vbaver{$VBA_VERSION}{mcs} and $vbaver{$VBA_VERSION}{mcs} ne $MCS_MD5SUM ){
7140 printboth("WARNING: MCS Hotfix $vbaver{$VBA_VERSION}{hfmcs} is available for $VBA_VERSION\n");
7141 printboth("RESOLUTION: See hotfix information to see if this grid requires an update\n\n");
7142 msg("VBA MCS updates","FAILED");
7143 $error=1;
7144 }
7145
7146 # Check for 231042 hotfix
7147 if ( $vbaver{$VBA_VERSION}{hf} == 231042 and !grep(/2.6.32.59-0.17.1.8297.0.PTF.924392/,@RPMS) ){
7148 printboth("ERROR: Hotfix $vbaver{$VBA_VERSION}{hf} is required for $VBA_VERSION\n");
7149 printboth("RESOLUTION: See hotfix for more information\n\n");
7150 msg("VBA updates","FAILED");
7151 $error=1;
7152 }
7153
7154 # Check for iragent
7155 openmcdb() if (!$dbh);
7156 my $sth = $dbh->prepare("select cid from v_clients where full_domain_name='/clients/iragent' and enabled");
7157 $sth->execute;
7158 $R=$sth->fetchrow_hashref;
7159 $results=qx{sudo cat /usr/local/avamar/var/client/cid.bin | sed -n '2p'};
7160 chomp($results);
7161 print LOG "mcscid=$R->{cid} cid.out=$results\n";
7162 if ($R->{cid} ne $results) {
7163 printboth("ERROR: There appears to be a problem with /clients/iragent.\nIt doesn't exist or there is a CID mismatch MCS CID is '$R->{cid}' cid.out is '$results'\n");
7164 printboth("RESOLUTION: See KB458450 to further troubleshoot this problem.\n\n");
7165 msg("VBA /clients/iragent","FAILED") ;
7166 }
7167
7168 msg("VBA updates","PASSED") if (!$error);
7169 } else {
7170 print LOG "Unknown VBA version\n";
7171# unknown version?
7172 }
7173
7174### Get Networker Info
7175 open(my $fh,"/usr/local/vdr/etc/vcenterinfo.cfg");
7176 while(<$fh>){chomp;
7177 print LOG "$_\n";
7178 my ($fld,$val)=split("=");
7179 $VCENTER{$fld}=$val;
7180 }
7181 if ($VCENTER{"networker-hostname"}){
7182 print LOG qq[Get nsr version from $VCENTER{"networker-hostname"}\n];
7183 open (my $fh,qq[ echo "print type:NSR" | nsradmin -s $VCENTER{"networker-hostname"} -i - 2>&1 |]);
7184 my $line="";
7185 while(<$fh>){chomp;
7186 if (/(.*)\\$/) {
7187 $line=$1;
7188 next;
7189 }
7190 $line.=$_;
7191 my($key,$value)=$line=~m{\s*(.*):\s(.*);};
7192 next if (!$key);
7193 $line="";
7194 $NSR{$key}=$value;
7195 }
7196 }
7197 msg("NetWorker Host:",$VCENTER{"networker-hostname"}." ".$NSR{"version"});
7198 if (!$NSR{"version"}) {
7199 printboth("ERROR: Unable to determine NetWorker version\n");
7200 printboth("RESOLUTION: Verify NetWorker is running and reachable from this host\n\n");
7201 msg("NetWorker Connectivity:","FAILED");
7202 } else {
7203 msg("NetWorker Connectivity:","PASSED");
7204 if ($vbaver{$VBA_VERSION}{nsr} and $NSR{"version"} !~ /$vbaver{$VBA_VERSION}{nsr}/){
7205 msg("NetWorker Connectivity:","PASSED");
7206 print LOG "regex: $vbaver{$VBA_VERSION}{nsr}\n";
7207 printboth("ERROR: NetWorker version $NSR{version} and VBA Version $vbaver{$VBA_VERSION}{appver} may not be compatible\n");
7208 printboth("RESOLUTION: Consult release information to verify these versions are compabitble\n\n");
7209 msg("VBA/NetWorker Version:","FAILED");
7210 } else {
7211 msg("VBA/NetWorker Version:","PASSED");
7212 }
7213 }
7214
7215 # Check Local NetWorker Major version
7216 my $local=qx{strings /usr/sbin/nsrexecd | awk -F: '/#.*Release/ {split(\$2,V,".");printf "%d.%d", V[1], V[2]}'}; # apostrophe ' to fix color formatting
7217 print LOG "local=$local remote=$NSR{'version'}\n";
7218 if ( $NSR{"version"} !~ / $local/) {
7219 printboth("WARNING: Local NetWorker version $local is different than the NetWorker server $NSR{'version'}\n");
7220 printboth("RESOLUTION: Local and remote NetWorker major version numbers should be the same\n\n");
7221 msg("NetWorker Local Version","FAILED");
7222 }
7223
7224 # Check nsrexecd running
7225 if ( qx{ps -ae | grep -c nsrexecd} != 1 ){
7226 printboth("ERROR: nsrexecd is not running\n");
7227 printboth("RESOLUTION: Determine why it is not running\n\n");
7228 msg("Networker nsrexecd","FAILED");
7229 }
7230
7231 # Data Domain Compatibility
7232 getdatadomain() if (!$DDRMAINT_VERSION) ;
7233 my $e="";
7234 foreach (@DD_INDEX) {
7235 my $ddver=$DD{"/avamar/datadomain/ddrconfig/$_/ddos-version"};
7236 print LOG qq[ddr index: $_ version $ddver\n];
7237 if ( $vbaver{$VBA_VERSION}{ddos} and $ddver !~ /$vbaver{$VBA_VERSION}{ddos}/ ) {
7238 my $ddname=$DD{"/avamar/datadomain/ddrconfig/$_/hostname"};
7239 $e.=("ERROR: Data Domain $ddname version $ddver and VBA Version $vbaver{$VBA_VERSION}{appver} may not be compatible\n");
7240 }
7241 }
7242 if ($e){
7243 printboth($e."RESOLUTION: Consult release information to verify these versions are compabitble\n\n");
7244 msg("VBA/Data Domain Version:","FAILED");
7245 } else {
7246 msg("VBA/Data Domain Version:","PASSED");
7247 }
7248
7249 # Get EMWEBAPP status
7250 $results=qx{emwebapp.sh --test};
7251 chomp($results);
7252 if ($results =~ /status: (.*)/) {
7253 if ($1 eq "up" ) {
7254 msg("EM Web App","PASSED");
7255 } else {
7256 printboth("ERROR: Enterprise Manager Web application status is '$1'\n");
7257 printboth("RESOLUTION: Enable webapp?\n\n");
7258 msg("EM Web App","FAILED");
7259 }
7260 }
7261}
7262########## End getvba ##########
7263
7264########## Start chkcron ##########
7265sub chkcron {
7266 print LOG "\n\n\n### ".localtime()." ### Starting chkcron\n";
7267 if (-e "/etc/cron.d/wdt") {
7268 printboth("WARNING: The file /etc/cron.d/wdt exists indicating the watchdog process might be running\n");
7269 printboth("RESOLUTION: Ensure watchdog process is not configured and disable if neccesary. See esc34942 for more info\n\n");
7270 }
7271}
7272
7273########## Start chkspace ##########
7274sub chkspace {
7275 print LOG "\n\n\n### ".localtime()." ### Starting chkspace\n";
7276 my $cmd=qq[df |sed 's/^/DISK:/' ; df -i|sed 's/^/INODE:/' ];
7277 mapall("",$cmd);
7278 my $errvba="";
7279 my $errdisk="";
7280 my $errinode="";
7281 open(CMD_PIPE,$TMPFILE);
7282 while(<CMD_PIPE>) {
7283 print LOG $_;
7284 if (/(\(0\..*\)) ssh/) {
7285 $node=$1;
7286 }
7287 my ($filesystem,$size,$used,$available,$percent,$mount)=split();
7288 $percent=~s/%//;
7289# VBA space > 90%
7290 if (/DISK:/ and $VBA and $filesystem =~ /space/ and $percent > 90) {
7291 $errvba.="ERROR: /space partition is at $result used.\n";
7292 }
7293# Inodes
7294 if (/^INODE:/ and $percent>90) {
7295 $errinode.="ERROR: Nodes $node filesystem $filesystem has used $percent of the inodes\n";
7296 }
7297# Disks
7298 if (/DISK:/ and $percent > 90) {
7299 $errdisk.="ERROR: /space partition is at $result used.\n";
7300 }
7301
7302 } # WHILE
7303 if ($errinode) {
7304 printboth($errinode);
7305 printboth("RESOLUTION: Resolve high inode usage\n\n");
7306 msg("Inode usage","WARNING");
7307 }
7308 if ($errvba) {
7309 printboth("RESOLUTION: See KB425661 for more information\n\n");
7310 msg("VBA /space","FAILED");
7311 } else {
7312 msg("VBA /space","PASSED");
7313 }
7314 if ($errdisk) {
7315 printboth("RESOLUTION: Resolve high disk usage\n\n");
7316 msg("Disk Space","FAILED");
7317 }
7318}
7319########## End chkspace ##########
7320
7321########## Start chkproxy ##########
7322sub chkproxy {
7323 print LOG "\n\n\n### ".localtime()." ### Starting chkproxy\n";
7324 openmcdb() if (!$dbh);
7325 my $sql = qq[ select descr,checkin_ms/1000 as checkin, agent_version, checkin_ms/1000<EXTRACT(EPOCH FROM NOW()-INTERVAL '1 minutes') as overdue
7326 from clients where client_type='VPROXY_REGULAR' ];
7327 my $sth = $dbh->prepare($sql);
7328 $sth->execute;
7329 my ($pe,$pver,$pve);
7330 while ($R=$sth->fetchrow_hashref()){
7331 if ($R->{overdue} eq "t") {
7332 my ($dd,$mm,$yr) = (localtime($R->{checkin}))[3,4,5];
7333 my $date=sprintf("%04d-%02d-%02d",$yr+1900,$mm+1,$dd);
7334 $pe.="WARNING: proxy $R->{descr} has not checked in since $date\n";
7335 }
7336 if (!$pver) {
7337 $pver=$R->{agent_version};
7338 } elsif ( $pver ne $R->{agent_version}) {
7339 $pve.="WARNING: Proxy $R->{descr} version $R->{agent_version} doesn't match other proxy versions $pver\n";
7340 }
7341 }
7342 if (!$pe) {
7343 msg("VBA Proxies","PASSED");
7344 } else {
7345 printboth($pe."RESOLUTION: Verify proxy status. Disable to remove this message. See KB457998 for more info\n\n");
7346 msg("VBA Proxies","FAILED");
7347 }
7348 if ($pve){
7349 printboth($pve."RESOLUTION: Verify all proxy versions\n\n");
7350 msg("VBA Proxy Versions","WARNING");
7351 }
7352}
7353########## End chkproxy ##########
7354
7355########## Start servicemode ##########
7356sub servicemode {
7357 print LOG "\n\n\n### ".localtime()." ### Starting servicemode\n";
7358 my $hours=shift;
7359 if ($hours) {
7360 print LOG "send message hours=$hours\n";
7361 $hours=3;
7362 if (system(qq[avmaint infomessage --errcode=1 --errkind=error --ava "ServiceMode_$hours"]) != 0 ) {
7363 printboth("ERROR: Unknown error running avmaint commands (err: $!)\n");
7364 printboth("RESOLUTION: Try running command manually: avmaint infomessage --errcode=1 --errkind=error --ava 'ServiceMode_$hours'\n\n");
7365 } else {
7366 system("(echo $hours > /home/admin/.servicemode; sleep 1m; rm /home/admin/.servicemode)2>/dev/null 2>&1 &");
7367 print LOG "started background command to removed servicemode file in $hours hours\n";
7368 print("\nService Mode enabled for $hours hours\n\n");
7369 }
7370 } elsif (my $time=(stat "/home/admin/.servicemode")[9]){
7371 chomp(my $hours=`cat /home/admin/.servicemode`);
7372 my ($sec, $min, $hour, $day,$month,$year,$foo) = localtime($time);
7373 msg("Service Mode",sprintf "Enabled %02d/%02d/%4d at %02d:%02d for $hours hours",$month+1,$day,$year+1900,$hour,$min);
7374 }
7375}
7376########## End servicemode ##########
7377
7378########## Start dtltsecurity ##########
7379sub dtltsecurity {
7380 print LOG "\n\n\n### ".localtime()." ### Starting dtltsecurity\n";
7381 getavamarver() if (!$DATANODEVERSION);
7382 getopersys() if (!$OS);
7383 my %dtltwar = ( "7.1.1" => { rhelmd5=>"40404ac160002d83733db2bae2e3e901", slesmd5=>"92fa487ef5ec1b617894127620068b7c", bug=>235341 },
7384 "7.0.2" => { rhelmd5=>"ce1259a51494cdbdc1ade7d212f8ccf9", slesmd5=>"ce1259a51494cdbdc1ade7d212f8ccf9", bug=>239448 },
7385 "7.0.3" => { rhelmd5=>"c4c56d76b63a4ec4b910f2b66d05c691", slesmd5=>"c4c56d76b63a4ec4b910f2b66d05c691", bug=>235342 },
7386 "7.2.0" => { rhelmd5=>"873f4fa2fc744223e762c55fd11ab7aa", slesmd5=>"873f4fa2fc744223e762c55fd11ab7aa", bug=>249578 }
7387 );
7388 if ($PREUPGRADE) {
7389 print LOG "skip for preupgrade\n";
7390 return
7391 }
7392 if (! -e "/usr/local/avamar/lib/dtlt.war") {
7393 print LOG "skip dtlt.war doesnt exist\n";
7394 return
7395 }
7396 my ($gsan_maj,$foo)=split("-",$DATANODEVERSION);
7397 $bugmd5=($NODE_INFO{"(0.s)"}{os} =~ /suse|sles/i) ? $dtltwar{$gsan_maj}{slesmd5} : $dtltwar{$gsan_maj}{rhelmd5};
7398 print LOG "gsanmaj: $gsan_maj bugmd5='$bugmd5' os=$NODE_INFO{$node}{os}\n";
7399 if ( $bugmd5 ) {
7400 chomp(my $dtltmd5=`md5sum /usr/local/avamar/lib/dtlt.war`);
7401 $dtltmd5 =~ s/ .*//;
7402 print LOG "md5 dtlt.war $dtltmd5\n";
7403 if ($dtltmd5 ne $bugmd5) {
7404 printboth("ERROR: This grid is vulnerable to bug $dtltwar{$gsan_maj}{bug}\n");
7405 printboth("RESOLUTION: See hot fix $dtltwar{$gsan_maj}{bug} for more information\n\n");
7406 msg("DTLT Security","FAILED");
7407 }
7408 }
7409}
7410########## End dtltsecurity ##########
7411
7412########## Start tomcatdir ##########
7413sub tomcatdir {
7414 print LOG "\n\n\n### ".localtime()." ### Starting tomcatdir\n";
7415 if (-d "/usr/local/jakarta-tomcat-5.5.9" and $UPGRADE_VERSION =~ /7\.1\.[012]/) {
7416 printboth("WARNING: Directory /usr/local/jakarta-tomcat-5.5.9/ exists\n");
7417 printboth("RESOLUTION: See escalation 23513/bug 232101 for more info\n\n");
7418 msg("Jakarta Tomcat Directory","FAILED");
7419 }
7420}
7421########## End tomcatdir ##########
7422
7423########## Start gsanflags ##########
7424sub gsanflags {
7425 print LOG "\n\n\n### ".localtime()." ### Starting gsanflags\n";
7426 getavamarver() if (!$DATANODEVERSION);
7427
7428 if ($DATANODEVERSION eq "7.1.1-145") {
7429 my $cmd=qq[grep rwmutexmaxreadlocks /data01/cur/gsan.opt];
7430 mapall("",$cmd);
7431 my $err="";
7432 open(CMD_PIPE,$TMPFILE);
7433 while(<CMD_PIPE>) {
7434 print LOG $_;
7435 $node=$1 if (/(\(0\..*\)) ssh/);
7436 $err.=qq[ERROR: Node $node does not have rwmutexmaxreadlocks="0"\n] if (!/rwmutexmaxreadlocks="0"/ and /rwmutexmaxreadlocks/) ;
7437 }
7438 if ($err) {
7439 printboth("${err}RESOLUTION: See KB468543 for instructions to resolve this issue\n\n");
7440 msg("Check rwmutexmaxreadlocks","FAILED");
7441 }
7442 }
7443
7444 if ($DATANODEVERSION eq "7.3.0-226") {
7445 my $cmd=qq[grep -P 'rwmutexwritelockoverridetimeout|rwmutexwritelocktimeout' /data01/cur/gsan.opt];
7446 mapall("",$cmd);
7447 open(CMD_PIPE,$TMPFILE);
7448 my ($node,$err)=("")x2;
7449 my ($saw_override,$saw_timeout)=(0)x2;
7450 while(<CMD_PIPE>) {
7451 print LOG $_;
7452 if (/(\(0\..*\)) ssh/){
7453 if ($node) {
7454 $err.=qq[ERROR: Node $node does not have rwmutexwritelockoverridetimeout=0\n] if (!$saw_override);
7455 $err.=qq[ERROR: Node $node does not have rwmutexwritelocktimeout=360000\n] if (!$saw_timeout);
7456 }
7457 $node=$1;
7458 ($saw_override,$saw_timeout)=(0)x2;
7459 }
7460 $saw_override=1 if (/rwmutexwritelockoverridetimeout="0"/);
7461 $saw_timeout=1 if (/rwmutexwritelocktimeout="360000"/);
7462 }
7463 if ($err) {
7464 printboth("${err}RESOLUTION: See KB484891 for instructions to resolve this issue\n\n");
7465 msg("Check rwmutexmaxreadlocks","FAILED");
7466 }
7467 }
7468}
7469########## End gsanflags ##########
7470
7471########## Start rpmversions ##########
7472sub rpmversions {
7473 print LOG "\n\n\n### ".localtime()." ### Starting rpmversions\n";
7474 getconfiginfo() if (!$GOTCONFIGINFO);
7475 getavamarver() if (!$DATANODEVERSION);
7476
7477 if ($DATANODEVERSION eq "7.3.0-226" and getmaxrpmver("avbase","0.s") lt "3.0.0-4" ) {
7478 printboth("ERROR: Avamar version 7.3.0-226 has hotfix 262291 available to fix pam_authenticate errors\n");
7479 printboth("RESOLUTION: See KB485144 for more info\n\n");
7480 }
7481}
7482########## End rpmversions ##########
7483
7484########## Start siteinv ##########
7485sub siteinv {
7486 print LOG "\n\n\n### ".localtime()." ### Starting siteinv\n";
7487 getavamarver() if (!$DATANODEVERSION);
7488 my $site_inventory_size=-s "/usr/local/avamar/bin/site_inventory";
7489 print LOG "site_inventory size $site_inventory_size mcs=$MCSERVER_VERSION\n";
7490 if ($MCSERVER_VERSION =~ /7.3.0-233/ and $site_inventory_size==3652) {
7491 printboth("ERROR: Hotfix 278520 to fix site_inventory is not installed\nRESOLUTION: Install hotfix 278520\n\n");
7492 msg("Site Inventory","FAILED");
7493 } elsif ($MCSERVER_VERSION =~ /7.3.0-226/ and $site_inventory_size==3650) {
7494 printboth("ERROR: Hotfix 282188 to fix site_inventory is not installed\nRESOLUTION: Install hotfix 282188\n\n");
7495 msg("Site Inventory","FAILED");
7496 } elsif ($MCSERVER_VERSION =~ /7.3.1-125/ and $site_inventory_size==3650) {
7497 printboth("ERROR: Hotfix 279527 to fix site_inventory is not installed\nRESOLUTION: Install hotfix 279527\n\n");
7498 msg("Site Inventory","FAILED");
7499 }
7500}
7501########## End siteinv ##########
7502
7503
7504########## Start getavsysreport ##########
7505sub getavsysreport {
7506 print LOG "\n\n\n### ".localtime()." ### Starting getavsysreport\n";
7507 my $cmd=qq[
7508echo "section:chassis-battery"
7509avsysreport chassis-battery
7510echo "section:chassis-component"
7511avsysreport chassis-component
7512echo "section:chassis-fan"
7513avsysreport chassis-fan
7514echo "section:chassis-fan-redundancy"
7515avsysreport chassis-fan-redundancy
7516echo "section:chassis-info"
7517avsysreport chassis-info
7518echo "section:controller"
7519avsysreport controller
7520echo "section:controller-battery"
7521avsysreport controller-battery
7522echo "section:controller-connector"
7523avsysreport controller-connector
7524echo "section:controller-enclosure"
7525avsysreport controller-enclosure
7526echo "section:memory-module"
7527avsysreport memory-module
7528echo "section:physical-disk"
7529avsysreport physical-disk controller=0
7530echo "section:power-supply"
7531avsysreport power-supply
7532echo "section:power-supply-redundancy"
7533avsysreport power-supply-redundancy
7534echo "section:processor"
7535avsysreport processor
7536echo "section:syserror-context"
7537avsysreport syserror-context
7538echo "section:temperature-probe"
7539avsysreport temperature-probe
7540echo "section:virtual-disk"
7541avsysreport virtual-disk
7542echo "section:voltage-probe"
7543avsysreport voltage-probe
7544];
7545
7546 mapall($ALL,$cmd);
7547 my $err="";
7548 open(CMD_PIPE,$TMPFILE);
7549 while(<CMD_PIPE>) {chomp;
7550 #print LOG $_;
7551 if (/\((0\..*)\) ssh/){
7552 $node=$1;
7553 print LOG "New Node:$node\n";
7554 next;
7555 }
7556 if (/^section:(\S*)/){
7557 $section=$1;
7558 print LOG "New Section:$section\n";
7559 next;
7560 }
7561 $AVSYSREPORT{$node}{$section}.="$_\n";
7562 my ($name,$value)=split(/\s*:\s*/);
7563 $AVSYSREPORT{$node}{$section}{$name}="$value";
7564 }
7565}
7566########## End getavsysreport ##########
7567
7568########## Start avsysreport ##########
7569sub micron_ssd {
7570 my $exit=shift;
7571 getavsysreport if (!%AVSYSREPORT);
7572 getnodetype($exit) if (!%PARTLIST );
7573 my ($e,$msg,$missing_ssd)=("")x3;
7574 for my $node (sort keys %AVSYSREPORT) {
7575 my $physnode=$NODE_LXREF{"($node)"};
7576 my $partno=$NODE_INFO{"$node"}{partno};
7577 my $ssd=$PARTLIST{$partno}{ssd};
7578 print LOG "node $node phys=$physnode desc=$PARTLIST{$partno}{desc} havessd=$ssd\n";
7579 my ($serialno,$firmware,$sawssd,$product)=("")x4;
7580 for(split (/[\r\n]/,$AVSYSREPORT{$node}{'physical-disk'})) {
7581 print LOG "$_" if (/Media Type/);
7582 if (/Media Type.*(SSD|Solid|State)/) {
7583 $sawssd=$_;
7584 print LOG "sawssd!\n";
7585 }
7586 if (/Formatted Size\s*:\s*(\d+)/ and $ssd>0) {
7587 if ( ($1 / $ssd) < 1.10 and ($1/$ssd) > .90 ) {
7588 $sawssd=$_;
7589 print LOG "$_\ndrive within 10% of expected SSD size. Sawssd!\n";
7590 }
7591 }
7592 $serialno=$1 if (/Serial Number\s*: (\S*)/ );
7593 $firmware=$1 if (/Firmware Revision\s*:\s*(\d+)/) ;
7594 if (/Product ID\s*: (.*)/i) {
7595 $product=$1 ;
7596 print LOG "$node fw=$firmware sn=$serialno prod=$product sawssd=$sawssd\n";
7597 if ($product =~ /P400m/i) {
7598 print LOG "P400 found with firmware $firmware\n";
7599 if ($firmware < 239) {
7600 $e=1;
7601 $msg.="ERROR: Node $node $1 (s/n $serialno) SSD firmware '$firmware' does not match latest known version of '0239'\n" ;
7602 } else {
7603 $msg.="INFO: Node $node $1 (s/n $serialno) SSD drive has been remediated for hotfix 314445 Micron p400m 1000 day memory leak\n" if $exit;
7604 }
7605 }
7606 }
7607 }
7608 if ($ssd>0 and !$sawssd) {
7609 $missing_ssd.="WARNING: Node $node SSD Drive expected but not identified\n";
7610 }
7611 }
7612 if ($missing_ssd) {
7613 my $m="${missing_ssd}\nRESOLUTION: Check the Media Type output from 'avsysreport pdisk'. Expected to see 'SSD' or 'Solid State'. Check hardware health if not seen\n NOTE: This may cause the Micron SSD results to be wrong\n\n";
7614 printboth($m);
7615 print $m if $exit;
7616 msg("SSD Found","FAILED");
7617 }
7618 if ($e) {
7619 printboth("${msg}\nRESOLUTION: Apply Hotfix 282000 for Micron P400m 1000 day memory leak issue to prevent node failure requiring rollback\n\n");
7620 print "\n" if $exit;
7621 print "\n$msg\nRESOLUTION: Apply Hotfix 282000 for Micron P400m 1000 day memory leak issue to prevent node failure requiring rollback\n\n" if $exit;
7622 msg("Micron SSD Firmware","FAILED");
7623 } else {
7624 print LOG "$msg\n";
7625 print "\n" if $exit;
7626 print "\n$msg\n" if $exit;
7627 msg("Micron SSD Firmware","PASSED");
7628 }
7629 exit 0 if $exit;
7630}
7631########## End avsysreport ##########
7632
7633########## Start pem_files ##########
7634sub pem_files {
7635return;
7636#
7637# KRM 8/26/19. Routine wasnt running because mapall was missing $ALL. Fixed and it fails on every node and system. The pem files do not exactly match each other
7638# one option might be to compare create times but for now I couldn't find what prompted this check so removing it
7639#
7640 print LOG "\n\n\n### ".localtime()." ### Starting pem_files\n";
7641 getinstalledversion() if (!$VERSNUM);
7642 if ($VERSNUM <= 720) {
7643 print LOG "dont check before 7.2 (versnum=$VERSNUM)\n";
7644 return;
7645 }
7646 my $cmd=qq[ md5sum /home/admin/cert.pem /home/admin/key.pem ];
7647 mapall($ALL,$cmd);
7648 my $err="";
7649 open(CMD_PIPE,$TMPFILE);
7650 my ($cert,$key,$e)=("")x3;
7651 while(<CMD_PIPE>) {chomp;
7652 print LOG "$_\n";
7653 if (/(\(0\..*\)) ssh/){
7654 $node=$1;
7655 print LOG "New Node:$node\n";
7656 next;
7657 }
7658 if (/(\S*).*cert.pem/) {
7659 if ($1 ne $cert and $cert) {
7660 $e.="ERROR: Node $node /home/admin/cert.pem file does not match previous node\n";
7661 }
7662 $cert=$1;
7663 }
7664 if (/(\S*).*key.pem/) {
7665 if ($1 ne $key and $key) {
7666 $e.="ERROR: Node $node /home/admin/key.pem file does not match previous node\n";
7667 }
7668 $key=$1;
7669 }
7670 }
7671 if ($e) {
7672 printboth("$e\nRESOLUTION: Correct PEM files to be the same on all nodes\n\n");
7673 msg("PEM Files","FAILED");
7674 } else {
7675 msg("PEM Files","PASSED");
7676 }
7677}
7678########## End pem_files ##########
7679
7680########## Start remote_management ##########
7681sub remote_management {
7682 print LOG "\n\n\n### ".localtime()." ### Starting remote_management\n";
7683 getnodetype() if (!$NODETYPE);
7684 #getavamarver() if (!$DATANODEVERSION);
7685 #getinstalledversion() if (!$VERSNUM);
7686 my ($sudomsg,$dellmsg,$intelmsg)=("")x2;
7687
7688 # Setup for Dell gen 1,2,3,4
7689 my $nodes=getnodes_gen("Gen1-|Gen2-|Gen3-|Gen4-");
7690 if ($nodes) {
7691 if (!$SUDO) {
7692 checkostools() if (!$RAN_OMREPORT);
7693 $sudomsg="INFO: Unable to check remote management on nodes $nodes because sudo has been disabled in this version\nRESOLUTION: Manually check settings as root with: $RACADM_CMD getniccfg | grep -A 6 'IPv4 settings'\n Output will be blank if racadm is not working. An IP address indicates it is enabled. Anything like 0.0.0.0 is disabled\n\n";
7694 } else {
7695 my $cmd=qq[ sudo $RACADM_CMD getniccfg | grep -A 6 "IPv4 settings"];
7696 mapall("--nodes=$nodes",$cmd);
7697 open(CMD_PIPE,$TMPFILE);
7698 my $enabled=0;
7699 my $node="";
7700 while(<CMD_PIPE>) {chomp;
7701 print LOG "$_\n";
7702 if (/(\(0\..*\)) ssh/){
7703 if ($node and $enabled==0) {
7704 $dellmsg.="INFO: Node $node does not appear to have remote management enabled\n";
7705 }
7706 $node=$1;
7707 $enabled=0;
7708 }
7709 if (/IP Address\s*= [1-9]/) {
7710 print LOG "Saw IP Address. Calling it enabled: $_\n";
7711 $enabled=1;
7712 }
7713 }
7714 if ($node and $enabled==0) {
7715 $dellmsg.="INFO: Node $node does not appear to have remote management enabled\n";
7716 }
7717 }
7718 }
7719 # Gen4s
7720 $nodes=getnodes_gen("Gen4s-");
7721 if ($nodes) {
7722 my $cmd=qq[ sudo ipmitool lan print 3; sudo ipmitool lan print 1];
7723 mapall("--nodes=$nodes",$cmd);
7724 open(CMD_PIPE,$TMPFILE);
7725 my $enabled=0;
7726 my $node;
7727 while(<CMD_PIPE>) {chomp;
7728 print LOG "$_\n";
7729 if (/(\(0\..*\)) ssh/){
7730 if ($node and $enabled==0) {
7731 print LOG "not enabled: $node enabled=$enabled\n";
7732 $intelmsg.="INFO: Node $node does not appear to have remote management enabled\n";
7733 }
7734 $node=$1;
7735 $enabled=0;
7736 }
7737 if (/IP Address\s*: [1-9]/) {
7738 print LOG "Saw IP Address. Calling it enabled: $_\n";
7739 $enabled=1;
7740 }
7741 }
7742 if ($node and $enabled==0) {
7743 print LOG "not enabled: $node enabled=$enabled\n";
7744 $intelmsg.="INFO: Node $node does not appear to have remote management enabled\n";
7745 }
7746 }
7747 # Gen4T
7748 $nodes=getnodes_gen("Gen4t-");
7749 if ($nodes) {
7750 my $cmd=qq[ sudo ipmitool lan print 4; sudo ipmitool lan print 1];
7751 mapall("--nodes=$nodes",$cmd);
7752 open(CMD_PIPE,$TMPFILE);
7753 my $enabled=0;
7754 my $node;
7755 while(<CMD_PIPE>) {chomp;
7756 print LOG "$_\n";
7757 if (/(\(0\..*\)) ssh/){
7758 if ($node and $enabled==0) {
7759 print LOG "not enabled: $node enabled=$enabled\n";
7760 $intelmsg.="INFO: Node $node does not appear to have remote management enabled\n";
7761 }
7762 $node=$1;
7763 $enabled=0;
7764 }
7765 if (/IP Address\s*: [1-9]/) {
7766 print LOG "Saw IP Address. Calling it enabled: $_\n";
7767 $enabled=1;
7768 }
7769 }
7770 if ($node and $enabled==0) {
7771 print LOG "not enabled: $node enabled=$enabled\n";
7772 $intelmsg.="INFO: Node $node does not appear to have remote management enabled\n";
7773 }
7774 }
7775
7776 if ($sudomsg) {
7777 printboth($sudomsg);
7778 msg("Remote Management","INFO");
7779 } elsif ($dellmsg) {
7780 printboth("${dellmsg}RESOLUTION: Check if it should be enabled.\n Manual command as root: racadm getniccfg|grep -A6 'IPv4 settings'\n\n");
7781 msg("Remote Management","INFO");
7782 } elsif ($intelmsg) {
7783 printboth("${intelmsg}RESOLUTION: Check if it should be enabled.\n\n");
7784 msg("Remote Management","INFO");
7785 } else {
7786 msg("Remote Management","PASSED");
7787 }
7788}
7789
7790########## End remote_management ##########
7791
7792########## Start mccli_java ##########
7793sub mccli_java {
7794 print LOG "\n\n\n### ".localtime()." ### Starting remote_management\n";
7795 print LOG `grep JRE_14= /usr/local/avamar/bin/mccli`;
7796 chomp($curr_jre=`grep JRE_14= /usr/local/avamar/bin/mccli | tail -1`);
7797 if ( $curr_jre =~ m{/usr/java/jre1.8.0_102}) {
7798 printboth("WARNING: /usr/local/avamar/bin/mccli is using a hardcoded version instead of /usr/java/latest which can cause issues with AER, ADMe and cron utilities\n");
7799 printboth("RESOLUTION: See ESC30971 and BUG281602\n\n");
7800 msg("mccli Java Home","WARNING");
7801 return;
7802 };
7803 if ($NODETYPE !~ /AER/ and !$ADME_VER) {
7804 print LOG "Skipping. No AER or ADME found\n";
7805 return;
7806 }
7807 if ( $curr_jre =~ m{/usr/java/latest} ) {
7808 printboth("WARNING: /usr/local/avamar/bin/mccli is using \$JAVA_HOME instead of /usr/java/latest which can cause issues with AER, ADMe and cron utilities\n");
7809 printboth("RESOLUTION: See ESC30971 and BUG281602\n\n");
7810 msg("mccli Java Home","WARNING");
7811 return;
7812 }
7813}
7814########## End mccli_java ##########
7815
7816########## Start check_services ##########
7817sub check_services {
7818 print LOG "\n\n\n### ".localtime()." ### Starting check_services\n";
7819 my $cmd=qq[ pidof smartd && echo BADSMARTD ];
7820 mapall($ALL,$cmd);
7821 open(CMD_PIPE,$TMPFILE);
7822 my ($node,$e)=("")x2;
7823 while(<CMD_PIPE>) {chomp;
7824 print LOG "$_\n";
7825 $node=$1 if (/(\(0\..*\)) ssh/);
7826 if (/BADSMARTD/) {
7827 $e.="ERROR: Node $node has smartd running\n";
7828 }
7829 }
7830 if ($e) {
7831 printboth("${e}RESOLUTION: See KB538624 to stop and disable smartd\n\n");
7832 msg("smartd","FAILED");
7833 }
7834}
7835########## End check_services ##########
7836
7837
7838########## Start mcserver_xml_diff ##########
7839sub mcserver_xml_diff {
7840 print LOG "\n\n\n### ".localtime()." ### Starting mcserver_xml_diff\n";
7841 getconfiginfo() if (!$GOTCONFIGINFO);
7842 my $filename= dirname($0)."/.phc-mcserver.xml";
7843 print LOG "filename=$filename\n";
7844 if (! -e "$filename") {
7845 `cp /usr/local/avamar/var/mc/server_data/prefs/mcserver.xml "$filename"`;
7846 printboth("INFO: mcserver.xml has been saved to use for future comparisons.\n");
7847 msg("mcserver.xml","INFO");
7848 return;
7849 }
7850 # Get mcserver.xml
7851 my $xml = new XML::Parser( Style => 'Tree' );
7852 print LOG "parse phc-mcserver.xml";
7853 my $tree=$xml->parsefile("$filename");
7854 SimpleXMLTree($tree);
7855 %MCSAVE=%xmltree;
7856 my $e=0;
7857 my @ignore=qw(
7858 backuponlyAP
7859 backuprestoreAP
7860 replonlyAP
7861 replonlyID
7862 MCUSERAP
7863 restoreonlyAP
7864 rootAP
7865 viewuserAP
7866 ^version$
7867 smtpHost
7868 site_name
7869 brmHostname
7870 customerContactInfoDays
7871 db_schema_version
7872 db_views_schema_version
7873 email_logs_tar_cmd
7874 EventHandler.SnapupsTable
7875 hfsaddr
7876 javaDir
7877 jdbcDriver
7878 local_hfsaddr
7879 jreVersion
7880 minimum_mcgui_version
7881 min_supported_version
7882 rmi_ssl_keystore_ap
7883 schedule_version
7884 ^obsolete
7885 );
7886
7887 my $regex=join("|", @ignore ) ;
7888 for (sort keys %MCSERVER){
7889 delete $MCSAVE{$_} if ( $MCSERVER{$_} eq $MCSAVE{$_} ) ;
7890 next if (! defined $MCSAVE{$_}); # new field or the same
7891 if ($_ =~ /$regex/ or $MCSAVE{$_} =~ /$regex/) {
7892 print LOG "mismatch regex skip: new $MCSERVER{$_} save: $MCSAVE{$_}\n";
7893 delete $MCSAVE{$_} ;
7894 next;
7895 }
7896 print LOG "mismatch: new $MCSERVER{$_} save: $MCSAVE{$_}\n";
7897 printboth("INFO: Field ".basename($_)." has changed from $MCSAVE{$_} to $MCSERVER{$_}\n");
7898 delete $MCSAVE{$_} ;
7899 $e=1;
7900 }
7901 for (sort keys %MCSAVE) {
7902 if ($_ =~ /$regex/) {
7903 print LOG "removed regex skip: save: $MCSAVE{$_}\n";
7904 next;
7905 }
7906 print LOG "removed: save: $MCSAVE{$_}\n";
7907 #printboth("INFO: Field $_ has changed from $MCSAVE{$_} to removed in mcserver.xml\n");
7908 #$e=1;
7909 }
7910 if ($e) {
7911 printboth("NOTE: Particularly for upgrades this is experimental. Please do not make any changes without understanding their impact\n\n");
7912 printboth("RESOLUTION: This may be a normal expected change to mcserver.xml. You need to determine why the values changed\n");
7913 printboth(" To update proactive check with the current mcserver.xml settings run proactive_check.pl --mcserver\n\n");
7914 msg("mcserver.xml","INFO");
7915 } else {
7916 msg("mcserver.xml","PASSED");
7917 }
7918
7919 return;
7920}
7921########## END mcserver_xml_diff ##########
7922#
7923#
7924#
7925
7926#
7927
7928#
7929
7930#
7931
7932#
7933
7934#
7935#
7936########## Start capacity.pl ##########
7937sub capacity_info {
7938
7939#
7940# $Id: capacity.sh
7941#
7942# 3.71 - 9/28/17 dont print DDR files if ava only.
7943# 3.7 - 9/1/16 fix overstating bytesmod, scan when multiple backups replicated for a client at once
7944# 3.6 - 3/3/16 add sslmod=prefer to mcdb connection
7945# 3.5 - 9/16/14 fixed top change rate clients comparing dd&ava new to ava total.
7946# 3.4 - fixed div/0 err if no avamar
7947# 3.3 - fixed <="6" to <"7" on version
7948# 3.2 - --ava was showing ddr top clients
7949# 3.1 - dont allow to run as root. the version check doesnt work
7950# 3.0 - fix bug from 2.9. repl DD info only available on 7.0+
7951# 2.9 - identify repl target DD backups
7952# 2.8 - identify repl DD backups, print note about DDR new
7953# 2.7 - fixed problem identifying DDR clients, replication source, net, --domain, # of files
7954# 2.6 - fixed problem identifying version
7955# 2.5 - rewrite using perl, added sql injection rejection, start, end, gb, norepl, ava, ddr flags.
7956
7957use Switch;
7958use DBI;
7959
7960 my $LIMIT=5;
7961 my $IN_DAYS=14;
7962
7963 my $SIZE=1024*1024;
7964 my $SIZEID="mb";
7965 my ($INCL_DDR_HOSTNAME,$INCL_REPL_DDR,$START,$END,$DATEWHERE);
7966 my (%CLI,%SENT,$DDR_TOTAL_FILES,%FILES,%dates);
7967 if ( $> == 0 ) {
7968 print "This program cannot be run as root. Please change to the 'admin' user.\n";
7969 exit 1;
7970 }
7971
7972 my @gsan=grep(/^\s*version:/, `gsan --version`);
7973 (my $VERSION=$gsan[0]) =~ s/^\s*version:\s*//;
7974 if ($VERSION>"5" or !$VERSION) {
7975 $INCL_DDR_HOSTNAME=", ddr_hostname";
7976 $INCL_REPL_DDR=($VERSION>="7" or !$VERSION) ? ", b.ddr_hostname" : ",''" ;
7977 }
7978
7979 my $BACKUP_TYPES="1,2,12";
7980 my($AVAMAR_ONLY,$DDR_ONLY,$INCL_DDR,$NO_REPL, $CRITERIA, $GRAND_TOTAL_ADDED, $GRAND_TOTAL_FILES);
7981 my $PROG = "capacity.sh v3.71";
7982 $CRITERIA="";
7983 $REPLCRITERIA="";
7984 $ACTCRITERIA="";
7985
7986# Open Database
7987 my $dbh = DBI->connect("dbi:Pg:dbname=mcdb;port=5555;sslmode=prefer", "admin", "" );
7988 if (!$dbh) {
7989 printboth("ERROR: Could not connect to MCS database.\n");
7990 exit 1;
7991 }
7992
7993# Arguments
7994 foreach(@ARGV) {
7995 if($_ !~ /^--([^=]+)=?(.*)$/) {
7996 print STDERR "Invalid command line argument: $_\n";
7997 exit;
7998 }
7999 my $arg = $1;
8000 my $value = $2;
8001
8002 #if ( $dbh->index($value,"'")>0) {
8003 # print "ERROR: Invalid arugment $arg=$value. ' is not allowed\n";
8004 # exit 1;
8005 #}
8006
8007 switch ($arg) {
8008 case /ca|de/ {my $foo=bar}
8009 case /^h/ { xdoHelp(1); exit 0;} #help
8010 case /^v/ { print "$PROG\n"; exit 0; } #version
8011 case /^debug/ { $DEBUG=1 ; }
8012 case /^days/ { $IN_DAYS=$value; } # days
8013 case /^top/ { $LIMIT=$value; } #top
8014 case /^ava/ { $AVAMAR_ONLY=1;} #avamar
8015 case /^ddr/ { $DDR_ONLY=1; } #ddr
8016 case /^norepl/ { $BACKUP_TYPES="1,2"; } #norepl
8017 case /^client/ { $CRITERIA .=" and client_name like ".$dbh->quote("%$value%"); } #client
8018 case /^domain/ { $CRITERIA.=" and dpn_domain like ".$dbh->quote("%$value%"); } #domain
8019 case /^excl/ { $CRITERIA.=" and dpn_domain !~ $dbh->quote($value) and client_name !~ $dbh->quote($value)" }
8020 case /^plugin/ { $CRITERIA.=" and pid_number in ($value) "; $REPLCRITERIA.=" and pid_number in ($value) " }
8021 case /^gb/ { $SIZE=1024*1024*1024; $SIZEID="gb";} #gb
8022 case /^start|^after/ { chomp($START=`date --date="$value" '+%Y-%m-%d'`); if (!$START) {print "Bad Date\n"; exit 1};; } #start
8023 case /^end|before/ { chomp($END=`date --date="$value" '+%Y-%m-%d'`); if (!$END) {print "Bad Date\n"; exit 1};; } #end
8024 else { print "Invalid Command line: --$arg\nTry --help\n"; exit; }
8025 }
8026 }
8027
8028
8029
8030 if ($START) {
8031 $DATEWHERE=" started_ts >= '$START' ";
8032 $DATEWHERE.=" and started_ts <= '$END' " if ($END);
8033 } else {
8034 $DATEWHERE=" (started_ts + INTERVAL '$IN_DAYS DAY') >= date(NOW()) ";
8035 }
8036
8037 my $SQL=qq[ select date(started_ts) as started_ts, bytes_modified_sent, bytes_scanned, num_mod_files,
8038 dpn_domain||'/'||client_name as client_name, num_of_files,server,cid,wid,'' as backup_type $INCL_DDR_HOSTNAME
8039 from activities
8040 where <date>
8041 and type in ($BACKUP_TYPES)
8042 and bytes_scanned>0 and bytes_modified_sent>=0
8043 $CRITERIA $ACTCRITERIA
8044 UNION
8045 select date(a.started_ts) as started_ts, a.bytes_modified_sent, a.bytes_scanned, a.num_mod_files,
8046 a.dpn_domain||'/'||a.client_name as client_name, a.num_of_files, '',a.cid,a.wid, 'REPL' as backup_type $INCL_REPL_DDR
8047 from repl_activities a
8048 join v_repl_backups b on a.cid=b.cid and a.wid=b.wid
8049 where <date>
8050 and a.type in ($BACKUP_TYPES)
8051 and a.bytes_scanned>0 and a.bytes_modified_sent>=0
8052 $CRITERIA $REPLCRITERIA
8053 ];
8054
8055# Run Capacity Report
8056 *OUTPUT=*STDOUT;
8057 my $PRINT="yes";
8058 my $ddr_repl_msg;
8059 get_capacity_info($DATEWHERE,$IN_DAYS);
8060 $PRINT="";
8061 my $n=($IN_DAYS<30) ? 30 : $IN_DAYS+30;
8062 get_capacity_info("(started_ts + INTERVAL '$n DAY') >= date(NOW())",$n);
8063 $n+=30;
8064 get_capacity_info("(started_ts + INTERVAL '$n DAY') >= date(NOW())",$n);
8065
8066 print $ddr_repl_msg if ($ddr_repl_msg);
8067#
8068 $LIMIT=10 if (!$LIMIT);
8069 big_clients();
8070 client_files();
8071 exit 0;
8072
8073
8074########## Start sub doHelp() ##########
8075# Help/Usage sub routine
8076sub xdoHelp {
8077 print <<"xxEndHelpxx";
8078
8079$PROG
8080
8081This will print information regarding the amount of new data being added to the grid and amount of data being removed by garbage collection.
8082If there are backups going to data domain additional columns will be added to show how much data is going to data domain and how much to avamar.
8083The amount scanned and change rate will include both Avamar and Data Domain data. Removed and Net are always just Avamar data.
8084You can use the --avamar or -ddr flags so that the Scanned and Rate columns will only reflect Avamar or Data Domain information.
8085
8086
8087--version Display the program version
8088--help Display the help screen
8089--days=n Limit data to backups in the past "n" days. Defaults to 14
8090--client=x Limit data to backups with clients that contain "x"
8091--domain=x Limit data to backups in domains that contain "x"
8092--avamar Limit data to backups with Avamar as the target
8093--ddr Limit data to backups with Data Domain as the target
8094--norepl Do not include replication backups
8095--gb Report in GB instead of MB
8096--top=n Limit large client list to "n". Default 5
8097--start=x Start report from date mm/dd/yy
8098--end=x End report on date mm/dd/yy
8099
8100
8101xxEndHelpxx
8102}
8103
8104########## End of sub doHelp() ##########
8105
8106
8107
8108########## Start capacity_info ########
8109sub get_capacity_info {
8110 $DATEWHERE=shift;
8111 $IN_DAYS=shift;
8112 my (%dates,%gcinfo,%buinfo,$sql,$sth);
8113
8114# GCINFO
8115 (my $tmp=$DATEWHERE) =~ s/started_ts/start_time/g;
8116 $sql = qq[ select start_time, elapsed_time, result, bytes_recovered, passes
8117 from v_gcstatus
8118 where $tmp
8119 ];
8120
8121 $sth = $dbh->prepare($sql);
8122 $sth->execute;
8123
8124 while ( my @row = $sth->fetchrow_array() ) {
8125 my ($start,$time)=split(" ",$row[0]);
8126 $dates{$start}=$start;
8127 $gcinfo{$start}=[ @row ];
8128 }
8129 $sth->finish;
8130
8131# Backup and Destination Replication Info
8132 ($sql=$SQL) =~ s/<date>/$DATEWHERE/g;
8133print "$sql" if $DEBUG;
8134 $sth = $dbh->prepare($sql);
8135 $sth->execute;
8136
8137#date(started_ts) as started_ts, bytes_modified_sent, bytes_scanned, num_mod_files, dpn_domain||/||client_name as client_name, num_of_files $INCL_DDR_HOSTNAME
8138
8139 while ( my $R = $sth->fetchrow_hashref() ) {
8140 my $ddr=($R->{ddr_hostname}) ? 1 : 0;
8141 if ($R->{backup_type} eq "REPL" and $VERSION<"7") {
8142 $ddr_repl_msg="\nNOTE: In this version of Avamar replication data cannot be identified as Avamar or Data Domain.\n";
8143 $ddr_repl_msg.=" All replication data will end up under Avamar New.\n";
8144 }
8145
8146# Only use summary record (repl_activities) once for each detail record (repl_backups)
8147 #my $key=$R->{cid}.$R->{wid}.$R->{j
8148 if ($CIDWID{$R->{cid}.$R->{wid}}) {
8149 $R->{bytes_modified_sent}=0;
8150 $R->{num_of_files}=0;
8151 $R->{bytes_scanned}=0;
8152 print "Skip:$R->{started_ts} mod:$R->{bytes_modified_sent} $R->{client_name} $R->{cid}-$R->{wid}\n" if $DEBUG;
8153 }
8154 $CIDWID{$R->{cid}.$R->{wid}}=1;
8155
8156 if ($PRINT) {
8157 if ($R->{ddr_hostname}) {
8158 $DDR_TOTAL_FILES+=$R->{num_mod_files};
8159 $FILES{$R->{client_name}} += $R->{num_mod_files};
8160 }
8161 $SENT{$R->{client_name}} += $R->{bytes_modified_sent};
8162 $CLI{$R->{client_name}}{scan} += $R->{bytes_scanned};
8163 $CLI{$R->{client_name}}{totfiles} += $R->{num_of_files};
8164 $CLI{$R->{client_name}}{ddr} = $R->{ddr_hostname};
8165
8166 }
8167 $buinfo{$R->{started_ts}}{totsent}+= $R->{bytes_modified_sent};
8168 $buinfo{$R->{started_ts}}{totscan}+= $R->{bytes_scanned} ;
8169 $buinfo{$R->{started_ts}}{totcnt}+= 1;
8170 if (!$ddr) {
8171 $buinfo{$R->{started_ts}}{sent} += $R->{bytes_modified_sent};
8172 $buinfo{$R->{started_ts}}{scan} += $R->{bytes_scanned} ;
8173 $buinfo{$R->{started_ts}}{cnt} += 1;
8174 $dates{$R->{started_ts}}=$R->{started_ts};
8175 } else {
8176 $buinfo{$R->{started_ts}}{ddrsent} += $R->{bytes_modified_sent};
8177 $buinfo{$R->{started_ts}}{ddrscan} += $R->{bytes_scanned} ;
8178 $buinfo{$R->{started_ts}}{ddrcnt} += 1;
8179 $INCL_DDR=1;
8180 $dates{$R->{started_ts}}=$R->{started_ts};
8181 }
8182 }
8183
8184
8185# Print headings
8186 if ($PRINT) {
8187 my $e="==========";
8188 printf ("\n%-10s "," DATE");
8189 printf OUTPUT ("%10s %4s ","AVAMAR NEW","#BU") if (!$DDR_ONLY) ;
8190 printf OUTPUT ("%10s %4s ","DDR NEW","#BU") if (!$AVAMAR_ONLY and $INCL_DDR) ;
8191 printf OUTPUT ("%10s ","SCANNED");
8192 printf OUTPUT ("%10s %5s %4s %10s ","REMOVED","MINS","PASS","AVAMAR NET") if (!$DDR_ONLY);
8193 printf OUTPUT ("%10s\n","CHG RATE");
8194
8195 printf OUTPUT ("%10s ",$e);
8196 printf OUTPUT ("%10s=== %4s ",$e,"====") if (!$DDR_ONLY) ;
8197 printf OUTPUT ("%10s=== %4s ",$e,"====") if (!$AVAMAR_ONLY and $INCL_DDR) ;
8198 printf OUTPUT ("%10s=== ",$e);
8199 printf OUTPUT ("%10s=== %5s %4s %10s=== ",$e,"====","====",$e) if (!$DDR_ONLY);
8200 printf OUTPUT ("%10s\n",$e);
8201 }
8202# Print Detail Lines
8203 my $cnt=0;
8204 my($array,$buchg,$tbuscan,$tgcrecovered,$tbunew,$tgcelap,$date);
8205 my ($gcstart,$gcelap,$gcresult,$gcrecovered,$gcpass)=("");
8206 my ($gt_totsent, $gt_totscan, $gt_ddrsent, $gt_ddrcnt, $gt_sent, $gt_cnt, $gt_gcrecovered, $gt_gcelap, $gt_totcnt,$gt_gcpass)=(0);
8207 foreach $date (sort keys %dates ) {
8208 my $array = $gcinfo{$date};
8209 ($gcstart,$gcelap,$gcresult,$gcrecovered,$gcpass)=@$array;
8210 $gcrecovered*=-1;
8211 my $totsent=$buinfo{$date}{totsent};
8212 my $totscan=$buinfo{$date}{totscan};
8213
8214 if ($AVAMAR_ONLY) {
8215 $totsent=$buinfo{$date}{sent};
8216 $totscan=$buinfo{$date}{scan};
8217 }
8218
8219 if ($DDR_ONLY) {
8220 $totsent=$buinfo{$date}{ddrsent};
8221 $totscan=$buinfo{$date}{ddrscan};
8222 }
8223 my $pchange = ($totscan==0) ? " N/A" : sprintf("%9.2f%",$totsent/$totscan*100) ;
8224 if ($PRINT) {
8225 printf ("%10s ",$date);
8226 printf OUTPUT ("%10d $SIZEID %4d ",$buinfo{$date}{sent}/$SIZE,$buinfo{$date}{cnt}) if (!$DDR_ONLY) ;
8227 printf OUTPUT ("%10d $SIZEID %4d ",$buinfo{$date}{ddrsent}/$SIZE,$buinfo{$date}{ddrcnt}) if (!$AVAMAR_ONLY and $INCL_DDR) ;
8228 printf OUTPUT ("%10d $SIZEID ", $totscan / $SIZE );
8229 printf OUTPUT ("%10d $SIZEID %5d %4d %10d $SIZEID ",$gcrecovered/$SIZE, $gcelap/60, $gcpass, ($buinfo{$date}{sent} + $gcrecovered)/$SIZE) if (!$DDR_ONLY);
8230 printf OUTPUT ("%10s\n", $pchange);
8231 }
8232 $gt_totsent += $totsent;
8233 $gt_totscan += $totscan;
8234 $gt_ddrsent += $buinfo{$date}{ddrsent};
8235 $gt_ddrcnt += $buinfo{$date}{ddrcnt};
8236 $gt_sent += $buinfo{$date}{sent} ;
8237 $gt_cnt += $buinfo{$date}{cnt} ;
8238 $gt_gcrecovered += $gcrecovered;
8239 $gt_gcelap += $gcelap;
8240 $gt_totcnt++;
8241 $gt_gcpass+=$gcpass;
8242 }
8243 if ($PRINT) {
8244 printf ("===================================");
8245 printf OUTPUT ("===================") if (!$DDR_ONLY) ;
8246 printf OUTPUT ("===================") if (!$AVAMAR_ONLY and $INCL_DDR) ;
8247 printf OUTPUT ("=======================================") if (!$DDR_ONLY);
8248 printf OUTPUT ("\n");
8249 $GRAND_TOTAL_ADDED=0;
8250 $GRAND_TOTAL_ADDED+=$gt_sent if (!$DDR_ONLY) ;
8251 $GRAND_TOTAL_ADDED+=$gt_ddrsent if (!$AVAMAR_ONLY) ;
8252 }
8253 printf ("%3d DAY AVG",$IN_DAYS);
8254 printf OUTPUT ("%10d $SIZEID %4d ",$gt_sent/$SIZE/$gt_totcnt,$gt_cnt/$gt_totcnt) if (!$DDR_ONLY) ;
8255 printf OUTPUT ("%10d $SIZEID %4d ",$gt_ddrsent/$SIZE/$gt_totcnt,$gt_ddrcnt/$gt_totcnt,) if (!$AVAMAR_ONLY and $INCL_DDR) ;
8256 printf OUTPUT ("%10d $SIZEID ", $gt_totscan / $SIZE/$gt_totcnt );
8257 printf OUTPUT ("%10d $SIZEID %5d %4d %10d $SIZEID ",$gt_gcrecovered/$SIZE/$gt_totcnt, $gt_gcelap/60/$gt_totcnt, $gt_gcpass/$gt_totcnt, ($gt_sent + $gt_gcrecovered)/$SIZE/$gt_totcnt) if (!$DDR_ONLY);
8258 my $pchange = ($gt_totscan==0) ? " N/A" : sprintf("%9.2f%",$gt_totsent/$gt_totscan*100) ;
8259 printf OUTPUT ("%10s\n", $pchange);
8260}
8261########## End capacity_info ########
8262
8263########## Start big_clients ########
8264sub big_clients {
8265
8266
8267 print "\nTop Change Rate Clients. Total Data Added ".int($GRAND_TOTAL_ADDED/$SIZE). "$SIZEID\n\n";
8268 printf ("%13s %10s %7s %4s %s\n","NEW DATA","% OF TOTAL","CHGRATE","TYPE","CLIENT");
8269 printf ("%13s %10s %4s %s\n","=============","==========","=======","====","======");
8270
8271#date(started_ts) as started_ts, bytes_modified_sent, bytes_scanned, num_mod_files, dpn_domain||/||client_name as client_name, num_of_files $INCL_DDR_HOSTNAME
8272
8273 my @keys = sort { $SENT{$b} <=> $SENT{$a} } keys %SENT;
8274 my $cnt=0;
8275 for my $client ( @keys ) {
8276 my $chgrate=0;
8277 $chgrate=($SENT{$client}/$CLI{$client}{scan})*100 if ($CLI{$client}{scan} gt 0) ;
8278 my $type=($CLI{$client}{ddr}) ? "DDR" : "AVA";
8279 next if ($DDR_ONLY and $type ne "DDR");
8280 next if ($AVAMAR_ONLY and $type ne "AVA");
8281 my $pct=($GRAND_TOTAL_ADDED>0) ? ($SENT{$client}/$GRAND_TOTAL_ADDED)*100 : 0;
8282 (my $pclient=$client) =~ s/_.{22}$//;
8283 my $sz=($SIZE>0) ? $SENT{$client}/$SIZE : 0;
8284 printf OUTPUT ("%10d $SIZEID %9.2f %6.2f%% %4s %s\n",$sz,$pct,$chgrate,$type,$pclient);
8285 $cnt++; last if ($cnt==$LIMIT);
8286 }
8287 print OUTPUT "\n";
8288}
8289
8290########## End big_clients ########
8291
8292########## Start client_files########
8293sub client_files{
8294 return if (!$DDR_TOTAL_FILES or $AVAMAR_ONLY);
8295 print "\nTop File Count Clients. Total Files Added $DDR_TOTAL_FILES\n\n";
8296 printf ("%13s %10s %7s %4s %s\n","NUM FILES","% OF TOTAL","TYPE","CLIENT");
8297 printf ("%13s %10s %4s %s\n","=============","==========","====","======");
8298
8299 my @keys = sort { $FILES{$b} <=> $FILES{$a} } keys %FILES;
8300 my $cnt=0;
8301 for my $client ( @keys ) {
8302 my $type=($CLI{$client}{ddr}) ? "DDR" : "AVA";
8303 next if ($type ne "DDR");
8304 my $pct=($FILES{$client}/$DDR_TOTAL_FILES)*100;
8305 (my $pclient=$client) =~ s/_.{22}$//;
8306 printf OUTPUT ("%13d %9.2f %4s %s\n", $FILES{$client}, $pct, $type,$pclient);
8307 $cnt++; last if ($cnt==$LIMIT);
8308 }
8309 print OUTPUT "\n";
8310}
8311
8312########## End client_files ########
8313
8314}
8315########## End capacity.pl ##########
8316
8317
8318
8319############################################################################################################################################
8320
8321##########
8322
8323########## START AvamarUtilizationCalc.sql
8324
8325##########
8326
8327############################################################################################################################################
8328
8329
8330sub AvamarUtilizationCalc {
8331
8332open(my $fh,">tmp.sql");
8333print $fh
8334q[-- Avamar System Utilization Calculator
8335---------------------------------------------------------------------------------
8336-- May, 2018, by Vijay Anand
8337\set VERSION '''0.06'''
8338---------------------------------------------------------------------------------
8339-- This SQL script is used to estimate Avamar system capacity/Utilization
8340-- Synopsis/Exec Overview:
8341-- =======================
8342-- 1. The grid BACKUP capability is measured against goal of finishing backups with headroom.
8343-- The reasoning is that Backups should complete in 75% of the backup window.
8344-- This will allow the system to recover from 24 hours down-time, and still complete backups.
8345--
8346-- We need to measure backup completion rate (backups/hr, TB/hr) CAPABILITY.
8347-- We define CAPABILITY as 95'th percentile of hourly completion rate.
8348-- This will get us 2 numbers : backups per hour, and TB per hour.
8349-- Extrapolate this to a backup window of 6.5 hours, and you get grid CAPABILITY:
8350-- which is in Number of backups per day , and TB per day.
8351-- Evaluate these CAPABILITY numbers vs. Current ACTUAL numbers.
8352--
8353-- 2. Replication must complete in < 5 hours.
8354-- This leaves 50% of the 10-hr replication window available in case of 24 hour down time.
8355-- There is also a goal to replicate a backup in < 24 hr after creation.
8356-- This makes achieving replication goals a priority over backup goals.
8357--
8358-- We measure the replication rate (repl/hr, TB/hr) CAPABILITY.
8359-- We define CAPABILITY as 95'th percentile of hourly completion rate.
8360-- (Same as backup) We estimate hourly, and extrapolate to daily CAPABILITY.
8361-- Evaluate these CAPABILITY numbers vs. Current ACTUAL numbers.
8362--
8363-- 3. Does HFS check+GC complete in < 4 hours ?
8364-- Estimate how close we are to 4 hours (percentage)
8365---------------------------------------------------------------------------------
8366-- Run this script (AvamarUtilizationCalc.sql) using :
8367-- psql -p 5555 mcdb -U admin -F AvamarUtilizationCalc.sql
8368---------------------------------------------------------------------------------
8369--
8370---------------------------------------------------------------------------------
8371-- S C R I P T G L O B A L V A R I A B L E S
8372---------------------------------------------------------------------------------
8373\echo ======= Avamar System Utilization Calculator VERSION :VERSION ==========
8374\set ON_ERROR_STOP on
8375\set Output_file `echo /tmp/UtilizationInfo-$HOSTNAME-$(date +%F).csv`
8376-- Examine data this far back, to get 95'th percentile ....
8377\set lookback_interval '''200 days'''
8378
8379-- Use 75% of the backup window (25% margin)
8380\set backup_window_hours 10
8381\set backup_criteria_hours :backup_window_hours * 0.75
8382
8383-- Use 50% of the replication window. Leave 50% margin
8384\set repl_windows_hours 18
8385\set repl_criteria_hours :repl_windows_hours * 0.5
8386
8387-- Maintainance : CP + Hfscheck should finish in 4 hours
8388\set cp_hfs_criteria_hours 4
8389
8390--Number of chunks to use in percentile calcs
8391\set nbr_of_chunks 20
8392
8393-- Percentile for "Daily Actual" Backups and replication
8394-- Egregious use of ENV variables to let shell do math, because psql can't
8395\set daily_actual_percentile 75
8396\set hourly_percentile 95
8397\setenv env_daily_actual_percentile :daily_actual_percentile
8398\echo ======= Test1
8399\setenv env_hourly_percentile :hourly_percentile
8400\setenv env_nbr_of_chunks :nbr_of_chunks
8401\set select_daily_chunk_nbr `echo $(( $env_nbr_of_chunks * $env_daily_actual_percentile / 100 ))`
8402\echo selected daily chunk nbr (:select_daily_chunk_nbr of :nbr_of_chunks) == :daily_actual_percentile th percentile
8403\set select_hourly_chunk_nbr `echo $(( $env_nbr_of_chunks * $env_hourly_percentile / 100 ))`
8404\echo selected hourly chunk nbr (:select_hourly_chunk_nbr of :nbr_of_chunks) == :hourly_percentile th percentile
8405
8406---------------------------------------------------------------------------------
8407\echo Creating temporary RESULTS table and initial Views ....
8408---------------------------------------------------------------------------------
8409-- This will get auto-deleted at end of session (TEMPORARY)
8410CREATE TEMPORARY TABLE IF NOT EXISTS capacity_info
8411 (
8412 name VARCHAR(80) NOT NULL UNIQUE PRIMARY KEY,
8413 value VARCHAR(80)
8414 ) ;
8415INSERT INTO capacity_info
8416 SELECT 'Timestamp', to_char(now(),'YYYY-MM-DD HH24:MI TZ')
8417;
8418INSERT INTO capacity_info SELECT 'Param_VERSION', :VERSION;
8419INSERT INTO capacity_info SELECT 'Param_lookback_interval', :lookback_interval;
8420INSERT INTO capacity_info SELECT 'Param_backup_criteria_hours', :backup_criteria_hours;
8421INSERT INTO capacity_info SELECT 'Param_repl_criteria_hours', :repl_criteria_hours;
8422INSERT INTO capacity_info SELECT 'Param_cp_hfs_criteria_hours', :cp_hfs_criteria_hours;
8423INSERT INTO capacity_info
8424 SELECT property, value from property_value
8425 where property IN ('systemname','systemid')
8426;
8427INSERT INTO capacity_info
8428 SELECT 'Data_Nodes', count( distinct node)
8429 from sv_node_space where date_time > now() - interval :lookback_interval;
8430---------------------------------------------------------------------------------
8431-- initialize backup views
8432---------------------------------------------------------------------------------
8433-- Create HOURLY backups view -- Last 100 days ----
8434CREATE OR REPLACE TEMPORARY VIEW v_hourly_backup_summary as
8435 SELECT to_char(date_trunc('hour',started_ts), 'YYYY-MM-DD HH24:MI') as Started
8436 , count(*) as total
8437 , sum(CASE WHEN last_status_code in (30000, 30005) then 1 ELSE 0 END)
8438 as successful
8439 , sum(CASE WHEN last_status_code in (30000, 30005) then 0 ELSE 1 END)
8440 as failed
8441 , to_char((sum( bytes_scanned) /1024^4)::NUMERIC,'999,999,999.00') as TB_scan
8442 , to_char((sum( bytes_modified_sent)/1024^3)::NUMERIC, '999,999,999.00') as GB_sent
8443 , sum( bytes_scanned) /1024^4 as TB_scanned_raw
8444 FROM activities
8445 WHERE type in (1, 2) -- Only BACKUPs
8446 AND started_ts > now() - interval :lookback_interval
8447 GROUP BY Started
8448 ORDER BY Started
8449;
8450
8451-- DAILY summary backups - last 100 days ----
8452CREATE OR REPLACE TEMPORARY VIEW v_daily_backup_summary as
8453 SELECT to_char(date_trunc('day',started_ts), 'YYYY-MM-DD') as Started
8454 , count(*) as total
8455 , sum(CASE WHEN last_status_code in (30000, 30005) then 1 ELSE 0 END)
8456 as successful
8457 , sum(CASE WHEN last_status_code in (30000, 30005) then 0 ELSE 1 END)
8458 as failed
8459 , to_char((sum( bytes_scanned) /1024^4)::NUMERIC,'999,999,999.00') as TB_scan
8460 , to_char((sum( bytes_modified_sent)/1024^3)::NUMERIC, '999,999,999.00') as GB_sent
8461 , sum( bytes_scanned) /1024^4 as TB_scanned_raw
8462 FROM activities
8463 WHERE type in (1, 2) -- Only BACKUPs
8464 AND started_ts > now() - interval :lookback_interval
8465 GROUP BY Started
8466 ORDER BY Started;
8467
8468--- get client counts ------
8469INSERT INTO capacity_info
8470 SELECT 'Clients_'|| client_type as cltype,count(*) from v_clients_2
8471 where display_full_domain NOT like '/MC_RETIRED%'
8472 group by client_type
8473 ORDER BY client_type;
8474---------------------------------------------------------------------------------
8475------ Calculate BACKUP info ------
8476---------------------------------------------------------------------------------
8477\echo BACKUPS : Calculating 95th percentile capabilities ...
8478--- 95'th percentile backups per hr and TB per hr ----
8479INSERT INTO capacity_info
8480 SELECT 'backupperhour95pctile' as buPerhr, a.total
8481 FROM (Select total , ntile(:nbr_of_chunks) over (order by total) as chunknbr
8482 from v_hourly_backup_summary
8483 WHERE successful/total ::NUMERIC > 0.9
8484 ) a
8485 WHERE a.chunknbr=:select_hourly_chunk_nbr
8486 ORDER BY a.total desc
8487 LIMIT 1;
8488
8489INSERT INTO capacity_info
8490 SELECT 'TB_scan_perhr_95pctile',
8491 to_char(b.TB_scanned_raw::NUMERIC,'999,999,999.00')
8492 FROM (Select TB_scanned_raw , ntile(:nbr_of_chunks) over (order by TB_scanned_raw) as chunknbr
8493 from v_hourly_backup_summary
8494 WHERE successful/total ::NUMERIC > 0.9) b
8495 where b.chunknbr=:select_hourly_chunk_nbr
8496 ORDER BY TB_scanned_raw DESC
8497 Limit 1;
8498-- Insert 'per-day' capability ----
8499-- Note: "Backup Window" is 8 hours
8500-- So we allow :backup_criteria_hours hours for backup "Capacity" calc
8501INSERT INTO capacity_info
8502 SELECT 'capability_backups_perday', (SELECT value from capacity_info
8503 where NAME='backupperhour95pctile')::NUMERIC * :backup_criteria_hours;
8504
8505INSERT INTO capacity_info
8506 SELECT 'capability_bkupTB_perday', (SELECT value from capacity_info
8507 where NAME='TB_scan_perhr_95pctile')::NUMERIC * :backup_criteria_hours;
8508
8509---- ACTUAL: Get selected percentile actual backups per hr, and TB per hr --
8510\echo BACKUPS : Calculating Actual current rates ...
8511INSERT INTO capacity_info
8512 SELECT 'actual_backups_perday' , a.total
8513 FROM (Select total , ntile(:nbr_of_chunks) over (order by total) as chunknbr
8514 from v_daily_backup_summary
8515 WHERE successful/total ::NUMERIC > 0.9
8516 ) a
8517 WHERE a.chunknbr=:select_daily_chunk_nbr
8518 ORDER BY a.total desc
8519 LIMIT 1;
8520
8521INSERT INTO capacity_info
8522 SELECT 'actual_daily_bkup_TB_perday', b.TB_scanned_raw
8523 FROM (Select TB_scanned_raw , ntile(:nbr_of_chunks) over (order by TB_scanned_raw) as chunknbr
8524 from v_daily_backup_summary
8525 WHERE successful/total ::NUMERIC > 0.9) b
8526 where b.chunknbr=:select_daily_chunk_nbr
8527 ORDER BY TB_scanned_raw DESC
8528 Limit 1;
8529
8530---- Calculate percent of capacity (Over or under) actuals represent ---
8531INSERT INTO capacity_info
8532 SELECT 'Utilization_backups_perday', to_char( (SELECT value from capacity_info where NAME='actual_backups_perday')::NUMERIC * 100
8533 / value::NUMERIC , '9,999.9%') AS Utilization_backups_per_day
8534 from capacity_info where name='capability_backups_perday';
8535
8536 INSERT INTO capacity_info
8537 SELECT 'Utilization_TB_perday', to_char( (SELECT value from capacity_info where NAME='actual_daily_bkup_TB_perday')::NUMERIC * 100
8538 / value::NUMERIC , '9,999.9%') AS Utilization_backups_per_day
8539 from capacity_info where name='capability_bkupTB_perday';
8540---------------------------------------------------------------------------------
8541------ Calculate REPLICATION info ------
8542-- NOTE:
8543--- the 'repl_activities' table reports TWICE the amount of replication because
8544--- it includes domain /MC_SYSTEM entries that collect all PROGRESS messages per job
8545---------------------------------------------------------------------------------
8546\echo REPLICATION : Generating Views and capabilities ...
8547CREATE OR REPLACE TEMPORARY VIEW v_daily_repl_summary as
8548 SELECT to_char(date_trunc('day',date_time), 'YYYY-MM-DD') as Started
8549 , count(*) as total
8550 , sum(CASE WHEN last_status_code in (30000, 30005) then 1 ELSE 0 END)
8551 as successful
8552 , sum(CASE WHEN last_status_code in (30000, 30005) then 0 ELSE 1 END)
8553 as failed
8554 , to_char((sum( bytes_scanned) /1024^4)::NUMERIC,'999,999,999.00') as TB_scan
8555 , to_char((sum( bytes_new)/1024^3)::NUMERIC, '999,999,999.00') as GB_sent
8556 , sum( bytes_scanned) /1024^4 as TB_scanned_raw
8557 FROM repl_activities
8558 WHERE date_time > now() - interval :lookback_interval
8559 AND dpn_domain!='/MC_SYSTEM'
8560 GROUP BY Started
8561 ORDER BY Started;
8562
8563CREATE OR REPLACE TEMPORARY VIEW v_hourly_repl_summary as
8564 SELECT to_char(date_trunc('hour',date_time), 'YYYY-MM-DD HH24:MI') as Started
8565 , count(*) as total
8566 , sum(CASE WHEN last_status_code in (30000, 30005) then 1 ELSE 0 END)
8567 as successful
8568 , sum(CASE WHEN last_status_code in (30000, 30005) then 0 ELSE 1 END)
8569 as failed
8570 , to_char((sum( bytes_scanned) /1024^4)::NUMERIC,'999,999,999.00') as TB_scan
8571 , to_char((sum( bytes_new)/1024^3)::NUMERIC, '999,999,999.00') as GB_sent
8572 , sum( bytes_scanned) /1024^4 as TB_scanned_raw
8573 FROM repl_activities
8574 WHERE date_time > now() - interval :lookback_interval
8575 AND dpn_domain!='/MC_SYSTEM'
8576 GROUP BY Started
8577 ORDER BY Started;
8578
8579--- 95'th percentile repl per hr and TB per hr ----
8580INSERT INTO capacity_info
8581 SELECT 'replperhour95pctile' , a.total
8582 FROM (Select total , ntile(:nbr_of_chunks) over (order by total) as chunknbr
8583 from v_hourly_repl_summary
8584 WHERE successful/total ::NUMERIC > 0.9
8585 ) a
8586 WHERE a.chunknbr=:select_hourly_chunk_nbr
8587 ORDER BY a.total desc
8588 LIMIT 1;
8589
8590INSERT INTO capacity_info
8591 SELECT 'TB_repl_perhr_95pctile',
8592 to_char(b.TB_scanned_raw::NUMERIC,'999,999,999.00')
8593 FROM (Select TB_scanned_raw , ntile(:nbr_of_chunks) over (order by TB_scanned_raw) as chunknbr
8594 from v_hourly_repl_summary
8595 WHERE successful/total ::NUMERIC > 0.9) b
8596 where b.chunknbr=:select_hourly_chunk_nbr
8597 ORDER BY TB_scanned_raw DESC
8598 Limit 1;
8599-- Insert 'per-day' capability ----
8600-- Note: Replication is an 18 hour window
8601-- So we allow :repl_criteria_hours hours for "capability"
8602INSERT INTO capacity_info
8603 SELECT 'capability_repl_perday', (SELECT value from capacity_info
8604 where NAME='replperhour95pctile')::NUMERIC * :repl_criteria_hours;
8605
8606INSERT INTO capacity_info
8607 SELECT 'capability_replTB_perday', (SELECT value from capacity_info
8608 where NAME='TB_repl_perhr_95pctile')::NUMERIC * :repl_criteria_hours;
8609
8610---- ACTUAL: Get 99'th percentile actual repl per hr, and TB per hr --
8611INSERT INTO capacity_info
8612 SELECT 'actual_repl_perday' , a.total
8613 FROM (Select total , ntile(:nbr_of_chunks) over (order by total) as chunknbr
8614 from v_daily_repl_summary
8615 WHERE successful/total ::NUMERIC > 0.9
8616 ) a
8617 WHERE a.chunknbr=:select_daily_chunk_nbr
8618 ORDER BY a.total desc
8619 LIMIT 1;
8620
8621INSERT INTO capacity_info
8622 SELECT 'actual_daily_repl_TB_perday', b.TB_scanned_raw
8623 FROM (Select TB_scanned_raw , ntile(:nbr_of_chunks) over (order by TB_scanned_raw) as chunknbr
8624 from v_daily_repl_summary
8625 WHERE successful/total ::NUMERIC > 0.9) b
8626 where b.chunknbr=:select_daily_chunk_nbr
8627 ORDER BY TB_scanned_raw DESC
8628 Limit 1;
8629
8630---- Calculate percent of capacity (Over or under) actuals represent ---
8631INSERT INTO capacity_info
8632 SELECT 'Utilization_repl_perday', to_char( (SELECT value from capacity_info where NAME='actual_repl_perday')::NUMERIC * 100
8633 / value::NUMERIC , '9,999.9%')
8634 from capacity_info where name='capability_repl_perday';
8635
8636 INSERT INTO capacity_info
8637 SELECT 'Utilization_repl_TB_perday', to_char( (SELECT value from capacity_info where NAME='actual_daily_repl_TB_perday')::NUMERIC * 100
8638 / value::NUMERIC , '9,999.9%')
8639 from capacity_info where name='capability_replTB_perday';
8640
8641---------------------------------------------------------------------------------
8642-- Gather Data Domain Utilization & Capacity
8643--------------------------------------------------------------------------------
8644WITH dd_recent
8645 AS (SELECT ddr_hostname,utilization,capacity_gib from ddr_node_space
8646 ORDER by date_time desc LIMIT 100)
8647 INSERT into capacity_info
8648 (SELECT 'DD_' || ddr_hostname ||'_util_pct', max(dd_recent.utilization) as util
8649 FROM dd_recent
8650 group by ddr_hostname)
8651 UNION ALL
8652 (SELECT 'DD_' || ddr_hostname || '_tb' , max(capacity_gib )/1024 as TB
8653 FROM dd_recent
8654 group by ddr_hostname);
8655
8656---------------------------------------------------------------------------------
8657-- Format TB for pretty output (Truncate to 2 decimals)
8658--------------------------------------------------------------------------------
8659-- Note - we do NOT use commas to separate thousands
8660-- Because that confuses Excel when the CSV is imported.
8661UPDATE capacity_info SET value=to_char(value::NUMERIC, '999999999.00')
8662 WHERE name IN ('actual_daily_bkup_TB_perday', 'actual_daily_repl_TB_perday'
8663 , 'capability_bkupTB_perday','capability_replTB_perday');
8664UPDATE capacity_info SET value=to_char(value::NUMERIC, '999999999')
8665 WHERE name IN ('capability_backups_perday', 'capability_repl_perday'
8666 );
8667UPDATE capacity_info SET value=to_char(value::NUMERIC, '999999999.00')
8668 where NAME like 'DD_%_tb';
8669---------------------------------------------------------------------------------
8670----- Check GC/hfscheck completion times -----
8671---------------------------------------------------------------------------------
8672-- Checkpoint start to HFS Finish elapsed hours
8673\echo CP/HFS Check : Calculating daily elapsed time ...
8674CREATE OR REPLACE TEMPORARY VIEW v_cp_hfs_elapsed as
8675 SELECT hfs.date_time,right(summary,17) as cpname,
8676 (SELECT (hfs.ts - min(ts))/3600/1000 as elapsedhr
8677 FROM events
8678 WHERE code=54 and ts < hfs.ts
8679 AND summary like 'checkpoint ' || right(hfs.summary,17) || '%' )
8680 FROM events hfs
8681 WHERE hfs.code=4003
8682 and date_time > now() - interval :lookback_interval;
8683
8684INSERT INTO capacity_info
8685 SELECT 'cp_hfs_hours' ,
8686 to_char( a.elapsedhr , '99,999.99')
8687 FROM (Select elapsedhr , ntile(:nbr_of_chunks) over (order by elapsedhr) as chunknbr
8688 from v_cp_hfs_elapsed
8689 ) a
8690 WHERE a.chunknbr=:select_daily_chunk_nbr
8691 ORDER BY a.elapsedhr desc
8692 LIMIT 1;
8693-- See how close this is to allowed 4 hours ...
8694INSERT INTO capacity_info
8695 SELECT 'Utilization_cp_hfs', to_char( (SELECT value from capacity_info where NAME='cp_hfs_hours')::NUMERIC * 100
8696 / :cp_hfs_criteria_hours , '9,999.9%') ;
8697---------------------------------------------------------------------------------
8698-- Send capacity info to /tmp/file...
8699\echo ===== R e s u l t s =====
8700SELECT * FROM capacity_info
8701WHERE name like 'Util%' OR name like '%_util_pct';
8702
8703COPY capacity_info TO :'Output_file' WITH CSV;
8704SELECT 'COMPLETED. Output saved to ' || :'Output_file' as Result ;
8705
8706---------------------------------------------------------------------------------
8707];
8708my $x=`psql mcdb -p 5555 -f tmp.sql`;
8709if ($x=~/Output saved to (\S*)/){
8710 my $file=$1 ;
8711 #print "$file\n";
8712 my $y=`cat $file`;
8713 print $y;
8714}
8715unlink "tmp.sql";
8716}
8717###############################################################################################################
8718########## END AvamarUtilizationCalc.sql
8719###############################################################################################################