mirror of
https://ghproxy.net/https://github.com/serpwings/static-wordpress.git
synced 2025-10-04 02:37:53 +08:00
optimized sitemap search
This commit is contained in:
parent
cbd21ac068
commit
bc121722a7
4 changed files with 38 additions and 28 deletions
|
@ -41,7 +41,7 @@ from bs4 import BeautifulSoup
|
|||
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
from ..core.constants import CONFIGS
|
||||
from ..core.utils import get_clean_url, get_remote_content
|
||||
from ..core.utils import get_clean_url, get_remote_content, is_url_valid
|
||||
|
||||
# +++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
# IMPLEMENATIONS
|
||||
|
@ -57,27 +57,29 @@ def find_sitemap_location(home_url_: str) -> str:
|
|||
Returns:
|
||||
str: Location of Sitemap
|
||||
"""
|
||||
for sitemap_path in CONFIGS["SITEMAP"]["SEARCH_PATHS"]:
|
||||
sitemap_url = get_clean_url(home_url_, sitemap_path)
|
||||
response = get_remote_content(sitemap_url)
|
||||
if response.status_code < 400:
|
||||
return parse.urlparse(response.url).path
|
||||
if is_url_valid(home_url_):
|
||||
for sitemap_path in CONFIGS["SITEMAP"]["SEARCH_PATHS"]:
|
||||
sitemap_url = get_clean_url(home_url_, sitemap_path)
|
||||
response = get_remote_content(sitemap_url)
|
||||
if response.status_code < 400:
|
||||
return parse.urlparse(response.url).path
|
||||
|
||||
# robots.txt
|
||||
robots_txt = get_clean_url(home_url_, "robots.txt")
|
||||
response = get_remote_content(robots_txt)
|
||||
if response:
|
||||
for item in response.text.split("\n"):
|
||||
if item.startswith("Sitemap:"):
|
||||
return item.split("Sitemap:")[-1].strip()
|
||||
# robots.txt
|
||||
robots_txt = get_clean_url(home_url_, "robots.txt")
|
||||
response = get_remote_content(robots_txt)
|
||||
if response:
|
||||
for item in response.text.split("\n"):
|
||||
if item.startswith("Sitemap:"):
|
||||
return item.split("Sitemap:")[-1].strip()
|
||||
|
||||
# check home page for link rel=sitemap
|
||||
response = get_remote_content(home_url_)
|
||||
if response:
|
||||
soup = BeautifulSoup(response.text, features="xml")
|
||||
for link in soup.find_all("link"):
|
||||
if link.has_attr("sitemap"):
|
||||
return link["href"]
|
||||
# check home page for link rel=sitemap
|
||||
response = get_remote_content(home_url_)
|
||||
if response:
|
||||
soup = BeautifulSoup(response.text, features="xml")
|
||||
for link in soup.find_all("link"):
|
||||
if link.has_attr("sitemap"):
|
||||
return link["href"]
|
||||
return ""
|
||||
return ""
|
||||
|
||||
|
||||
|
|
|
@ -228,6 +228,12 @@ def is_url_valid(url_: str) -> bool:
|
|||
url_parsed_ = parse.urlparse(url_)
|
||||
|
||||
if all([url_parsed_.scheme, url_parsed_.netloc]):
|
||||
return get_remote_content(url_parsed_, max_retires=1).status_code < 399
|
||||
from urllib.request import urlopen
|
||||
|
||||
# print(url_parsed_)
|
||||
# # return get_remote_content(url_parsed_, max_retires=1).status_code < 399
|
||||
try:
|
||||
return urlopen(url_).getcode() < 399
|
||||
except:
|
||||
return False
|
||||
return False
|
||||
|
|
|
@ -378,7 +378,7 @@ class StaticWordPressGUI(QMainWindow):
|
|||
self._project.open(project_path)
|
||||
if self._project.is_open():
|
||||
project_dialog = ProjectDialog(
|
||||
self, self._project, title_="Project Properties"
|
||||
parent=self, project_=self._project, title_="Project Properties"
|
||||
)
|
||||
|
||||
if project_dialog.exec_():
|
||||
|
@ -756,7 +756,8 @@ class StaticWordPressGUI(QMainWindow):
|
|||
def update_widgets(self) -> None:
|
||||
self.findChild(QMenu, "menu_github").setEnabled(self._project.has_github())
|
||||
self.findChild(QMenu, "menu_wordpress").setEnabled(
|
||||
self._project.has_wordpress() or self._project.can_crawl()
|
||||
self._project.is_open()
|
||||
and (self._project.has_wordpress() or self._project.can_crawl())
|
||||
)
|
||||
self.findChild(QToolBar, "toolbar_github").setEnabled(
|
||||
self._project.has_github()
|
||||
|
|
|
@ -323,7 +323,7 @@ class ProjectDialog(QDialog):
|
|||
self.pushbutton_verify.setIcon(
|
||||
QIcon(f"{SHARE_FOLDER_PATH}/icons/check_project.svg")
|
||||
)
|
||||
self.pushbutton_verify.clicked.connect(self.check_project)
|
||||
self.pushbutton_verify.clicked.connect(self.verify_project_settings)
|
||||
|
||||
self.pushbutton_save = QPushButton("&Save")
|
||||
self.pushbutton_save.setIcon(QIcon(f"{SHARE_FOLDER_PATH}/icons/ok.svg"))
|
||||
|
@ -398,10 +398,11 @@ class ProjectDialog(QDialog):
|
|||
self._bg_worker.emit_sitemap_location.connect(self.update_sitemap_location)
|
||||
self._bg_thread.start()
|
||||
|
||||
def update_sitemap_location(self, sitemap_location):
|
||||
self.lineedit_sitemap.setText(sitemap_location)
|
||||
def update_sitemap_location(self, sitemap_location_):
|
||||
if sitemap_location_:
|
||||
self.lineedit_sitemap.setText(sitemap_location_)
|
||||
|
||||
def check_project(self):
|
||||
def verify_project_settings(self):
|
||||
""""""
|
||||
# TODO: Add checks for WP_API and Gh_API and if not present then disable them.
|
||||
# TODO: Move these checks to background thread e.g. for WP_API or SRC_URL or SRC or DST Path
|
||||
|
@ -455,7 +456,7 @@ class ProjectDialog(QDialog):
|
|||
[
|
||||
self.lineedit_project_name.text(),
|
||||
self.lineedit_output.text(),
|
||||
is_url_valid(self.lineedit_src_url.text()),
|
||||
# is_url_valid(self.lineedit_src_url.text()),
|
||||
Path(self.lineedit_output.text()).is_dir(),
|
||||
]
|
||||
):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue