wpaudit/modules/wp_analyzer/file_exposure.py
2025-05-19 22:38:08 +05:00

222 lines
14 KiB
Python

import re
from urllib.parse import urljoin, urlparse
from core.utils import sanitize_filename # Core utils needed here
from .utils import make_request # Local utils for requests
def _parse_wp_config_content(content, source_url, state, module_key):
"""Helper function to parse potential wp-config content for sensitive info."""
print(f" Attempting to parse content from presumed wp-config backup: {source_url}")
# Initialize structure for parsed details within the main findings dictionary
analyzer_findings = state.get_module_findings(module_key, {})
exposure_details = analyzer_findings.get("sensitive_file_exposure", {})
if "parsed_config_files" not in exposure_details:
exposure_details["parsed_config_files"] = []
parsed_configs = exposure_details["parsed_config_files"]
found_config_details = {"source_url": source_url, "parsed_items": {}}
db_creds_found = False
# Regex for define('KEY', 'VALUE'); - handles single and double quotes
define_pattern = re.compile(r"define\s*\(\s*['\"]([^'\"]+)['\"]\s*,\s*['\"]([^'\"]*)['\"]\s*\)\s*;")
matches = define_pattern.findall(content)
# Store actual credentials temporarily for command generation, but obfuscate for general reporting
extracted_creds = {}
for key, value in matches:
# List of sensitive keys to look for
db_keys = ["DB_NAME", "DB_USER", "DB_PASSWORD", "DB_HOST"]
other_sensitive_keys = [
"AUTH_KEY", "SECURE_AUTH_KEY", "LOGGED_IN_KEY", "NONCE_KEY",
"AUTH_SALT", "SECURE_AUTH_SALT", "LOGGED_IN_SALT", "NONCE_SALT"
]
if key in db_keys:
extracted_creds[key] = value # Store actual value for potential command generation
found_config_details["parsed_items"][key] = "********" if key == "DB_PASSWORD" else value
db_creds_found = True
elif key in other_sensitive_keys:
found_config_details["parsed_items"][key] = "********" # Obfuscate salts and keys
# Check for WP_DEBUG specifically
if key == "WP_DEBUG" and value.lower() == "true":
found_config_details["parsed_items"]["WP_DEBUG"] = "true"
state.add_remediation_suggestion("wp_debug_enabled_config", {
"source": "WP Analyzer",
"description": f"WP_DEBUG is enabled in an exposed configuration file ({source_url}).",
"severity": "Medium",
"remediation": "Disable WP_DEBUG on production sites."
})
if db_creds_found:
print(" [!!!] Database credentials potentially found in exposed config!")
state.add_critical_alert(f"DB Credentials potentially found in exposed config: {source_url}")
found_config_details["parsed_items"]["DB_CREDENTIALS_HINT"] = "Present (Details partially obfuscated in report)"
# Add extracted (but not fully reported) creds to the specific config detail for this file
# This is sensitive; ensure it's handled carefully and not broadly logged.
found_config_details["_extracted_db_credentials"] = extracted_creds
# Append the parsed details to the list
parsed_configs.append(found_config_details)
exposure_details["parsed_config_files"] = parsed_configs
analyzer_findings["sensitive_file_exposure"] = exposure_details
state.update_module_findings(module_key, analyzer_findings)
return extracted_creds if db_creds_found else None
def check_sensitive_file_exposure(state, config, target_url):
"""Checks for publicly accessible sensitive files."""
module_key = "wp_analyzer"
analyzer_findings = state.get_module_findings(module_key, {})
# Ensure the specific key exists before trying to access sub-keys
if "sensitive_file_exposure" not in analyzer_findings:
analyzer_findings["sensitive_file_exposure"] = {"status": "Running", "found_files": []}
exposure_details = analyzer_findings["sensitive_file_exposure"]
# Expanded list of sensitive files and backup patterns
sensitive_files = [
# Config files and backups
"wp-config.php", "wp-config.php.bak", "wp-config.php.old", "wp-config.php.save",
"wp-config.php.swp", "wp-config.php.txt", "wp-config.php~", "wp-config.bak",
"wp-config.old", "wp-config.save", ".wp-config.php.swp", "config.php.bak",
# Logs
"debug.log", "wp-content/debug.log", "logs/debug.log", "error.log", "php_error.log",
"error_log", "php_errors.log", "access.log", "access_log",
# Environment files
".env", ".env.example", ".env.local", ".env.dev", ".env.prod", ".env.bak",
# Database dumps
"dump.sql", "backup.sql", "db.sql", "database.sql", "site.sql", "wordpress.sql",
"data.sql", "export.sql", "wp_backup.sql",
# PHP config/info
"php.ini", "php.ini.bak", ".user.ini", ".user.ini.bak",
"phpinfo.php", "info.php", "test.php", "phpversion.php",
# Common archive formats
"backup.zip", "site.zip", "wordpress.zip", "wp.zip", "files.zip",
"backup.tar.gz", "site.tar.gz", "wordpress.tar.gz", "wp.tar.gz", "files.tar.gz",
"backup.rar", "site.rar", "wordpress.rar", "wp.rar", "files.rar",
# Version control system files
".git/config", ".svn/wc.db",
# Dependency management files
"composer.json", "composer.lock", "package.json", "yarn.lock"
]
# Note: Dynamic pattern generation (e.g., timestamps) could be added but increases complexity.
base_parsed_url = urlparse(target_url)
# Ensure found_files list exists
if "found_files" not in exposure_details:
exposure_details["found_files"] = []
found_files_list = exposure_details["found_files"]
for file_path in sensitive_files:
# Define potential locations for the file
test_urls = []
# 1. At the web root
test_urls.append(urljoin(target_url, file_path))
# 2. Inside common WP directories (unless path already includes it)
if not any(wp_dir in file_path for wp_dir in ["wp-content/", "wp-includes/", "wp-admin/"]):
test_urls.append(urljoin(f"{base_parsed_url.scheme}://{base_parsed_url.netloc}/wp-includes/", file_path))
test_urls.append(urljoin(f"{base_parsed_url.scheme}://{base_parsed_url.netloc}/wp-admin/", file_path))
# wp-content is often handled by paths like wp-content/debug.log already in the list
# Check unique URLs
for test_url in set(test_urls):
print(f" Checking for sensitive file: {test_url}")
# Use GET to retrieve content for basic checks
response = make_request(test_url, config, method="GET")
if response and response.status_code == 200:
# Basic content check to avoid false positives on generic 200 OK error pages
content_type = response.headers.get("Content-Type", "").lower()
# Check if it looks like an HTML error/redirect page rather than the actual file
is_likely_html_error = "text/html" in content_type and any(
kw in response.text.lower() for kw in ["error", "not found", "forbidden", "page not found", "<!doctype html>", "redirecting"]
)
# Check if content length is plausible (e.g., > 0, maybe > 10 bytes to avoid tiny files)
plausible_size = len(response.content) > 10
if not is_likely_html_error and plausible_size:
print(f" [!!!] POTENTIALLY SENSITIVE FILE EXPOSED: {test_url}")
details = {
"url": test_url,
"status_code": response.status_code,
"content_type": content_type,
"size": len(response.content)
}
# Avoid adding duplicates if run multiple times
if details not in found_files_list:
found_files_list.append(details)
# Determine severity based on file type
severity = "Medium" # Default
if "wp-config" in file_path or ".sql" in file_path or ".env" in file_path or ".git" in file_path or ".svn" in file_path:
severity = "High"
elif "log" in file_path or ".ini" in file_path or "phpinfo" in file_path:
severity = "Medium"
elif any(ext in file_path for ext in [".zip", ".tar.gz", ".rar"]):
severity = "Medium" # Could be high depending on content
else:
severity = "Low" # e.g., composer.json
state.add_critical_alert(f"Sensitive File Exposed ({severity}): {test_url}")
state.add_remediation_suggestion(f"file_expose_{sanitize_filename(file_path)}", {
"source": "WP Analyzer",
"description": f"Potentially sensitive file '{file_path}' found at {test_url}. Content should be reviewed manually.",
"severity": severity,
"remediation": "Remove publicly accessible sensitive files. Ensure proper server permissions and web server rules (e.g., .htaccess, Nginx config) to deny access to backup files, logs, environment files, VCS directories, and database dumps."
})
# If it's a potential wp-config backup, try parsing it
if "wp-config" in file_path and response.text:
extracted_db_creds = _parse_wp_config_content(response.text, test_url, state, module_key)
if extracted_db_creds:
# Check if mysql client is available (example, tool check should handle this)
mysql_available = state.get_full_state().get("tool_checks", {}).get("mysql", {}).get("status") == "Found"
db_name = extracted_db_creds.get("DB_NAME")
db_user = extracted_db_creds.get("DB_USER")
db_pass = extracted_db_creds.get("DB_PASSWORD")
db_host = extracted_db_creds.get("DB_HOST", "localhost") # Default to localhost if not found
if db_name and db_user and db_pass:
# Add to the specific file's details in found_files_list
for item in found_files_list:
if item["url"] == test_url:
item["actionable_info"] = item.get("actionable_info", {})
item["actionable_info"]["db_credentials_extracted"] = {
"db_name": db_name,
"db_user": db_user,
"db_host": db_host,
# Password is not stored here directly for safety, command uses it
}
if mysql_available:
# Advise user on how to connect, rather than trying to perfectly escape the password in a command string.
mysql_command_suggestion = f"mysql -h \"{db_host}\" -u \"{db_user}\" -p YOUR_PASSWORD_HERE \"{db_name}\" -e \"SHOW TABLES;\""
suggestion_text = (
f"Extracted DB credentials. Try connecting (USE WITH CAUTION):\n"
f" Host: {db_host}, User: {db_user}, DB: {db_name} (Password found but not shown here for safety).\n"
f" Example command (replace YOUR_PASSWORD_HERE or use interactive prompt):\n"
f" {mysql_command_suggestion}\n"
f" (Ensure 'mysql' client is installed. Consider using MYSQL_PWD environment variable for the password to avoid shell history issues.)"
)
item["actionable_info"]["suggested_db_connect_command"] = suggestion_text
item["actionable_info"]["extracted_db_password_NOTE"] = "Password was extracted but is not directly included in this suggested command for security. Use the extracted password when prompted or via MYSQL_PWD."
print(f" [ACTIONABLE] {suggestion_text}")
# Add a specific remediation for this actionable item
state.add_remediation_suggestion(f"db_creds_exposed_connect_{sanitize_filename(file_path)}", {
"source": "WP Analyzer (File Exposure)",
"description": f"Database credentials found in {test_url}. Suggested manual connection command provided. IMMEDIATE ATTENTION REQUIRED.",
"severity": "Critical",
"remediation": f"Immediately secure or remove the exposed file ({test_url}). Change database credentials. Investigate potential unauthorized access. Review web server configurations to prevent access to sensitive files."
})
else:
item["actionable_info"]["db_connection_note"] = "MySQL client not detected by tool checker; manual connection attempt may still be possible."
break
exposure_details["found_files"] = found_files_list
exposure_details["status"] = "Checked"
analyzer_findings["sensitive_file_exposure"] = exposure_details
state.update_module_findings(module_key, analyzer_findings)