Appendix A
# Script: hadoop-forensics.sh
# This script is designed for first responders to collect essential preliminary information required to initiate a digital forensics investigation on a Linux OS with a Hadoop Cluster setup.
# Author: Cephas Mpungu
# Function to display usage information
usage() {
echo "Usage: $0 <output_directory>"
echo "Example: $0 /path/to/output_directory"
}
# Error handling function
handle_error() {
echo "Error: $1" >&2
exit 1
}
# Function to check if a command exists
command_exists() {
command -v "$1" >/dev/null 2>&1
}
# Check if output directory argument is provided
if [ $# -ne 1 ]; then
usage
exit 1
fi
# Check if the output directory exists
if [ ! -d "$1" ]; then
handle_error "Output directory does not exist."
fi
# Output file paths
OUTPUT_DIR="$1"
OS_INFO_FILE="$OUTPUT_DIR/OS_Info.txt"
HADOOP_INFO_FILE="$OUTPUT_DIR/Hadoop_Info.txt"
LOG_FILE="$OUTPUT_DIR/script_log.txt"
# Redirect all script output to log file
exec > >(tee -a "$LOG_FILE")
exec 2>&1
# Function to add section headers to output files
add_section_header() {
echo "$1" >> "$2"
echo "===================================" >> "$2"
}
# Function to add command output to output files
add_command_output() {
echo "$1" >> "$2"
echo >> "$2"
}
# Start of script
echo "Starting Hadoop cluster information gathering script"
echo "$(date) - Starting Hadoop cluster information gathering script" >> "$LOG_FILE"
# Detect the operating system
if [ -f /etc/os-release ]; then
. /etc/os-release
OS=$NAME
else
OS=$(uname -s)
fi
# Function to get OS-specific information
get_os_info() {
add_section_header "Operating System Information" "$OS_INFO_FILE"
if command_exists lsb_release; then
add_command_output "Operating System: $(lsb_release -d | cut -f 2)" "$OS_INFO_FILE"
elif [ -f /etc/os-release ]; then
add_command_output "Operating System: $(grep -w NAME /etc/os-release | cut -d= -f2 | tr -d '\"')" "$OS_INFO_FILE"
elif [ -f /etc/centos-release ]; then
add_command_output "Operating System: $(cat /etc/centos-release)" "$OS_INFO_FILE"
else
add_command_output "Operating System: $(uname -s)" "$OS_INFO_FILE"
fi
add_command_output "$(uname -a)" "$OS_INFO_FILE"
add_command_output "Users With Sudo Privileges:" "$OS_INFO_FILE"
add_command_output "$(getent group sudo | cut -d: -f4 | tr ',' '\n')" "$OS_INFO_FILE"
add_command_output "$(getent group wheel | cut -d: -f4 | tr ',' '\n')" "$OS_INFO_FILE"
add_command_output "Logged In Users:" "$OS_INFO_FILE"
add_command_output "$(w)" "$OS_INFO_FILE"
add_command_output "Uptime:" "$OS_INFO_FILE"
add_command_output "$(uptime | awk '{print $3, substr($4, 1, length($4)-1)}')" "$OS_INFO_FILE"
add_command_output "Load Averages:" "$OS_INFO_FILE"
add_command_output "$(cat /proc/loadavg | awk '{print "Load averages (1min/5min/15min): " $1 ", " $2 ", " $3}')" "$OS_INFO_FILE"
add_command_output "Disk Usage:" "$OS_INFO_FILE"
add_command_output "$(df -h)" "$OS_INFO_FILE"
add_command_output "Partition Information:" "$OS_INFO_FILE"
add_command_output "$(lsblk)" "$OS_INFO_FILE"
add_command_output "Mount Points:" "$OS_INFO_FILE"
add_command_output "$(mount)" "$OS_INFO_FILE"
add_command_output "Filesystem Configuration:" "$OS_INFO_FILE"
add_command_output "$(cat /etc/fstab)" "$OS_INFO_FILE"
add_command_output "Network Configuration:" "$OS_INFO_FILE"
if command_exists ifconfig; then
add_command_output "$(ifconfig)" "$OS_INFO_FILE"
else
add_command_output "$(ip addr)" "$OS_INFO_FILE"
fi
add_command_output "$(netstat -tuln)" "$OS_INFO_FILE"
add_command_output "$(cat /etc/hosts)" "$OS_INFO_FILE"
# Check for promiscuous mode
add_section_header "Promiscuous Mode Check" "$OS_INFO_FILE"
if command_exists ip; then
add_command_output "IP link Results:" "$OS_INFO_FILE"
add_command_output "$(ip link | grep PROMISC)" "$OS_INFO_FILE"
else
add_command_output "IP command not found, using alternative:" "$OS_INFO_FILE"
add_command_output "$(ifconfig | grep PROMISC)" "$OS_INFO_FILE"
fi
add_command_output "$(ls /var/log/)" "$OS_INFO_FILE"
}
# Function to get Hadoop-specific information
get_hadoop_info() {
add_section_header "Hadoop Cluster Information" "$HADOOP_INFO_FILE"
add_command_output "Hadoop Configuration Files:" "$HADOOP_INFO_FILE"
add_command_output "$(find $(dirname $(which hdfs))/../etc/hadoop -type f -name '*.xml')" "$HADOOP_INFO_FILE"
add_command_output "Hadoop HDFS Version:" "$HADOOP_INFO_FILE"
add_command_output "$(hdfs version)" "$HADOOP_INFO_FILE"
add_command_output "All Cluster Namenodes:" "$HADOOP_INFO_FILE"
add_command_output "$(hdfs getconf -namenodes)" "$HADOOP_INFO_FILE"
add_command_output "All Cluster Secondary Namenodes:" "$HADOOP_INFO_FILE"
add_command_output "$(hdfs getconf -secondaryNameNodes)" "$HADOOP_INFO_FILE"
add_command_output "Default Hadoop Log File List:" "$HADOOP_INFO_FILE"
add_command_output "$(ls -l $(dirname $(which hdfs))/../logs)" "$HADOOP_INFO_FILE"
add_command_output "Hadoop Cluster Network Config:" "$HADOOP_INFO_FILE"
add_command_output "$(cat /etc/hosts)" "$HADOOP_INFO_FILE"
add_command_output "Hadoop Cluster Environment Variables Config:" "$HADOOP_INFO_FILE"
add_command_output "$(hdfs envvars)" "$HADOOP_INFO_FILE"
add_command_output "Detailed Info/Metrics About Hadoop Cluster:" "$HADOOP_INFO_FILE"
add_command_output "$(hdfs dfsadmin -report)" "$HADOOP_INFO_FILE"
add_command_output "Current Running Hadoop Processes:" "$HADOOP_INFO_FILE"
add_command_output "$(ps aux | grep hadoop)" "$HADOOP_INFO_FILE"
add_command_output "$(ps aux | grep java)" "$HADOOP_INFO_FILE"
add_section_header "Hadoop Cluster Nodes" "$HADOOP_INFO_FILE"
add_command_output "Namenodes:" "$HADOOP_INFO_FILE"
add_command_output "$(cat $(dirname $(which hdfs))/../etc/hadoop/masters)" "$HADOOP_INFO_FILE"
add_command_output "Datanodes:" "$HADOOP_INFO_FILE"
add_command_output "$(cat $(dirname $(which hdfs))/../etc/hadoop/slaves)" "$HADOOP_INFO_FILE"
}
# Function to get IDS information
get_ids_info() {
add_section_header "Intrusion Detection Software (IDS) Information Logs" "$OS_INFO_FILE"
local ids_detected=0
if command_exists snort; then
add_command_output "Snort Status:" "$OS_INFO_FILE"
add_command_output "$(systemctl status snort)" "$OS_INFO_FILE"
add_command_output "Snort Log Files:" "$OS_INFO_FILE"
add_command_output "$(ls /var/log/snort/)" "$OS_INFO_FILE"
ids_detected=1
fi
if command_exists suricata; then
add_command_output "Suricata Status:" "$OS_INFO_FILE"
add_command_output "$(systemctl status suricata)" "$OS_INFO_FILE"
add_command_output "Suricata Log Files:" "$OS_INFO_FILE"
add_command_output "$(ls /var/log/suricata/)" "$OS_INFO_FILE"
ids_detected=1
fi
if command_exists ossec; then
add_command_output "OSSEC Status:" "$OS_INFO_FILE"
add_command_output "$(systemctl status ossec)" "$OS_INFO_FILE"
add_command_output "OSSEC Log Files:" "$OS_INFO_FILE"
add_command_output "$(ls /var/ossec/logs/)" "$OS_INFO_FILE"
ids_detected=1
fi
if [ "$ids_detected" -eq 0 ]; then
add_command_output "No IDS detected." "$OS_INFO_FILE"
fi
}
# Function to get rootkit detection information
get_rootkit_info() {
add_section_header "Rootkit Detection" "$OS_INFO_FILE"
local rootkit_detected=0
if command_exists chkrootkit; then
add_command_output "chkrootkit Scan Results:" "$OS_INFO_FILE"
add_command_output "$(chkrootkit)" "$OS_INFO_FILE"
rootkit_detected=1
fi
if command_exists rkhunter; then
add_command_output "rkhunter Scan Results:" "$OS_INFO_FILE"
add_command_output "$(rkhunter --check --skip-keypress)" "$OS_INFO_FILE"
rootkit_detected=1
fi
if command_exists unhide; then
add_command_output "unhide Scan Results:" "$OS_INFO_FILE"
add_command_output "$(unhide proc)" "$OS_INFO_FILE"
add_command_output "$(unhide sys)" "$OS_INFO_FILE"
rootkit_detected=1
fi
if [ "$rootkit_detected" -eq 0 ]; then
add_command_output "No rootkits detected." "$OS_INFO_FILE"
fi
}
# Execute all functions
get_os_info
get_hadoop_info
get_ids_info
get_rootkit_info
# End of script
echo "Hadoop cluster information gathering completed"