wpaudit/modules/wp_analyzer/directory_listing.py
Huzaifa Shoukat 8e8b4f8a84 Bug Fix
2025-05-21 22:39:24 +05:00

185 lines
11 KiB
Python

import re
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
import datetime # For year/month upload paths
from core.utils import sanitize_filename
from .utils import make_request
SENSITIVE_FILE_EXTENSIONS = ['.log', '.sql', '.bak', '.zip', '.tar.gz', '.tgz', '.sitemap', '.xml', '.phps', '.env', '.config', '.conf', '.ini', '.sh', '.txt']
SENSITIVE_FILENAME_KEYWORDS = ['debug', 'error', 'backup', 'dump', 'secret', 'password', 'key', 'config']
def _is_directory_listing(html_content, url_path):
"""Checks if HTML content indicates a directory listing."""
if not html_content:
return False
text_lower = html_content.lower()
# More specific title check
title_match = re.search(r"<title>index of\s+" + re.escape(url_path.lower()), text_lower)
# Check for "Parent Directory" link text (common on Apache listings)
parent_dir_match_apache = "parent directory" in text_lower and "<a href" in text_lower
# Check for "Name Last modified Size Description" which is common on Nginx/Apache
common_header_match = all(kw in text_lower for kw in ["name", "last modified", "size"])
# Heuristic: count file-like links (ending with common extensions or / for dirs)
# This needs to be robust enough not to count navigation links.
soup = BeautifulSoup(html_content, 'lxml')
listing_links_count = 0
for a_tag in soup.find_all('a', href=True):
href = a_tag['href']
# Skip common non-listing links
if href.startswith(('?', '#', 'javascript:', 'mailto:')) or href == '../' or href == './':
continue
# Check if it looks like a file or directory within the current path context
if href.endswith('/') or any(href.lower().endswith(ext) for ext in ['.php', '.html', '.txt', '.js', '.css', '.jpg', '.png', '.zip', '.log']):
listing_links_count += 1
return bool(title_match or parent_dir_match_apache or common_header_match or listing_links_count > 3)
def _extract_links_from_listing(html_content, base_url):
"""Extracts file and directory links from a directory listing page."""
links = {"dirs": [], "files": []}
if not html_content:
return links
soup = BeautifulSoup(html_content, 'lxml')
for a_tag in soup.find_all('a', href=True):
href = a_tag['href']
# Skip parent directory, current directory, query string links, and fragment links
if href == "../" or href == "./" or href.startswith("?") or href.startswith("#") or href.startswith("javascript:"):
continue
full_url = urljoin(base_url, href) # Resolve relative links
# Ensure we are still within the same path segment or deeper
if not full_url.startswith(base_url):
continue
if href.endswith('/'):
links["dirs"].append(href.strip('/')) # Store dir name
else:
links["files"].append(href) # Store file name
return links
def _check_and_log_listing(test_url, response, state, config, vulnerable_paths_map, dir_path_key_for_remediation):
"""Helper to check response, log findings, and extract links if listing is found."""
if response and response.status_code == 200:
parsed_url = urlparse(test_url)
if _is_directory_listing(response.text, parsed_url.path):
print(f" [!!!] Directory Listing ENABLED for: {test_url}")
listed_content = _extract_links_from_listing(response.text, test_url)
sensitive_files_found = []
for fname in listed_content["files"]:
if any(fname.lower().endswith(ext) for ext in SENSITIVE_FILE_EXTENSIONS) or \
any(keyword in fname.lower() for keyword in SENSITIVE_FILENAME_KEYWORDS):
sensitive_files_found.append(fname)
path_finding = {
"url": test_url,
"listed_dirs": listed_content["dirs"],
"listed_files_count": len(listed_content["files"]),
"sensitive_files_hint": sensitive_files_found
}
if test_url not in [vp["url"] for vp in vulnerable_paths_map.get(dir_path_key_for_remediation, [])]:
vulnerable_paths_map.setdefault(dir_path_key_for_remediation, []).append(path_finding)
severity = "Medium"
description_extra = ""
if sensitive_files_found:
severity = "High"
description_extra = f" Sensitive files potentially exposed: {', '.join(sensitive_files_found[:3])}{'...' if len(sensitive_files_found)>3 else ''}."
state.add_critical_alert(f"Directory Listing enabled: {test_url}" + (f" (Sensitive files: {len(sensitive_files_found)})" if sensitive_files_found else ""))
# Use a more generic remediation key if it's for a sub-path, or specific if it's a top-level common dir
remediation_key = f"dir_listing_{sanitize_filename(dir_path_key_for_remediation.replace('/', '_'))}"
state.add_remediation_suggestion(remediation_key, {
"source": "WP Analyzer (Directory Listing)",
"description": f"Directory listing is enabled for '{test_url}', potentially exposing file names or structures.{description_extra}",
"severity": severity,
"remediation": "Disable directory listing via web server configuration (e.g., 'Options -Indexes' in .htaccess for Apache, or 'autoindex off;' in Nginx)."
})
return True, listed_content # Listing found, return content
return False, None # No listing or error
def check_directory_listing(state, config, target_url):
"""Checks for directory listing enabled on common and discovered WordPress directories."""
wp_analyzer_module_key = "wp_analyzer"
findings_subkey = "directory_listing" # Align with analyzer.py
analyzer_findings = state.get_module_findings(wp_analyzer_module_key, {})
current_phase_findings = analyzer_findings.get(findings_subkey, {})
if not current_phase_findings:
current_phase_findings = {
"status": "Running",
"vulnerable_paths_map": {}, # Store findings per base path for better organization
"details": "Checking for directory listings..."
}
current_phase_findings["status"] = "Running"
analyzer_findings[findings_subkey] = current_phase_findings
print(" [i] Enhanced Directory Listing Checks...")
# Common base directories to check
# Added more specific paths and common misconfigurations
base_dirs_to_check = [
"wp-content/", "wp-content/uploads/", "wp-content/plugins/", "wp-content/themes/",
"wp-includes/", "wp-admin/", "wp-admin/includes/", "wp-admin/css/", "wp-admin/js/",
"wp-content/backups/", "wp-content/backup/", "wp-content/cache/", "wp-content/logs/"
# "wp-content/mu-plugins/" # Must-use plugins
]
# Use a dictionary to store vulnerable paths, keyed by the initial path checked for remediation grouping
vulnerable_paths_map = current_phase_findings.get("vulnerable_paths_map", {})
# Initial scan of base directories
for dir_path in base_dirs_to_check:
test_url = urljoin(target_url, dir_path)
print(f" Checking base directory: {test_url}")
try:
response = make_request(test_url, config, timeout=7)
is_listing, listed_content = _check_and_log_listing(test_url, response, state, config, vulnerable_paths_map, dir_path)
# Recursive checks for uploads, plugins, themes if listing found on their parent
if is_listing and listed_content:
if dir_path == "wp-content/uploads/":
# Check current year and previous year for month subdirs
current_year = datetime.datetime.now().year
for year_offset in range(2): # Check current and previous year
year_to_check = str(current_year - year_offset)
if year_to_check in listed_content["dirs"]: # Check if year dir was listed
year_url = urljoin(test_url, f"{year_to_check}/")
print(f" Recursively checking uploads year: {year_url}")
year_resp = make_request(year_url, config, timeout=5)
is_year_listing, year_listed_content = _check_and_log_listing(year_url, year_resp, state, config, vulnerable_paths_map, dir_path)
if is_year_listing and year_listed_content:
for month_num in range(1, 13):
month_str = f"{month_num:02d}" # Format as 01, 02, etc.
if month_str in year_listed_content["dirs"]:
month_url = urljoin(year_url, f"{month_str}/")
print(f" Recursively checking uploads month: {month_url}")
month_resp = make_request(month_url, config, timeout=5)
_check_and_log_listing(month_url, month_resp, state, config, vulnerable_paths_map, dir_path)
elif dir_path in ["wp-content/plugins/", "wp-content/themes/"]:
item_type = "plugin" if "plugins" in dir_path else "theme"
for item_name in listed_content["dirs"]:
item_url = urljoin(test_url, f"{item_name}/")
print(f" Recursively checking {item_type}: {item_url}")
item_resp = make_request(item_url, config, timeout=5)
_check_and_log_listing(item_url, item_resp, state, config, vulnerable_paths_map, dir_path)
except Exception as e:
print(f" [-] Error checking directory {dir_path}: {e}")
# Log error for this specific path if needed in state
current_phase_findings["vulnerable_paths_map"] = vulnerable_paths_map
total_vulnerable_unique_urls = sum(len(paths) for paths in vulnerable_paths_map.values())
if total_vulnerable_unique_urls > 0:
current_phase_findings["status"] = "Vulnerabilities Found"
current_phase_findings["details"] = f"Found {total_vulnerable_unique_urls} path(s) with directory listing enabled. Check 'vulnerable_paths_map' for details."
else:
current_phase_findings["status"] = "Completed (No Listings Found)"
current_phase_findings["details"] = "No directory listings found on checked paths."
analyzer_findings[findings_subkey] = current_phase_findings
state.update_module_findings(wp_analyzer_module_key, analyzer_findings)
print(f" [+] Enhanced Directory Listing checks finished. Status: {current_phase_findings['status']}")