commit dc92b64add3c36101f0b123d32a65500adb28152 Author: Seo Wings Date: Sun Mar 5 18:59:16 2023 +0100 intial commit diff --git a/.github/workflows/makedocs.yml b/.github/workflows/makedocs.yml new file mode 100644 index 0000000..aa37bd2 --- /dev/null +++ b/.github/workflows/makedocs.yml @@ -0,0 +1,18 @@ +name: Documentation Generator + +on: + push: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + - run: pip install --upgrade pip && pip install mkdocs mkdocs-gen-files mkdocstrings[python] pymdown-extensions + - run: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com' + - name: Publish docs + run: mkdocs gh-deploy diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..376728e --- /dev/null +++ b/.gitignore @@ -0,0 +1,134 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.swp +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + + +# misc +pyvenv.cfg \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4da376c --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +Static Wordpress Netlify Process +https://github.com/serpwings/simply-static-post-process + +A Python Library to Prepare and Deploy Static version of WordPress Installation to +Static Hosting Service Providrs (Netlify). + + +MIT License +Copyright (c) 2023 SERP Wings + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..47ee8b0 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Simply Static Post Process + +![pyrobotstx feature image](docs/img/simply-static-netlify-deploy-workflow.png) + + +A Python Library to prepare and deploy a static version of a WordPress Installation on Netlify (Static Hosting Service Provider). + +## How to Use simply-static-post-process? +Please check our detailed tutorial (video/text) on [Simply Simply Static Post Process Tutorial](https://www.seowings.org/simply-static-tutorial/) to understand how it works. This package has a live documentation file on [documentation link](https://simply-static.netlify.app). + +## Contributions + +Contributions, suggestions, and comments are welcome. Please fork the repository and submit a pull request. + + +## About Us + +This work is a collaborative effort of [seowings](https://seowings.org/) and [serpwings](https://serpwings.com/). + + +## LICENSE + +- Simply Static Netlify Process is released under [MIT License](https://github.com/serpwings/simply-static-post-process/blob/master/LICENSE). +- ``src\search.js`` is distributed without any additional licensing restrictions. Please consult ``src\search.js`` for more details. diff --git a/docs/helper-functions.md b/docs/helper-functions.md new file mode 100644 index 0000000..7993664 --- /dev/null +++ b/docs/helper-functions.md @@ -0,0 +1,4 @@ +# Helper Functions + +::: src.helpers + diff --git a/docs/img/favicon.ico b/docs/img/favicon.ico new file mode 100644 index 0000000..3e529de Binary files /dev/null and b/docs/img/favicon.ico differ diff --git a/docs/img/simply-static-netlify-deploy-workflow.png b/docs/img/simply-static-netlify-deploy-workflow.png new file mode 100644 index 0000000..c7797ba Binary files /dev/null and b/docs/img/simply-static-netlify-deploy-workflow.png differ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..c61c1a2 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,24 @@ +# Simply Static Post Process + +![pyrobotstx feature image](docs/img/simply-static-netlify-deploy-workflow.png) + + +A Python Library to prepare and deploy a static version of a WordPress Installation on Netlify (Static Hosting Service Provider). + +## How to Use simply-static-post-process? +Please check our detailed tutorial (video/text) on [Simply Simply Static Post Process Tutorial](https://www.seowings.org/simply-static-tutorial/) to understand how it works. This package has a live documentation file on [documentation link](https://simply-static.netlify.app). + +## Contributions + +Contributions, suggestions, and comments are welcome. Please fork the repository and submit a pull request. + + +## About Us + +This work is a collaborative effort of [seowings](https://seowings.org/) and [serpwings](https://serpwings.com/). + + +## LICENSE + +- Simply Static Netlify Process is released under [MIT License](https://github.com/serpwings/simply-static-post-process/blob/master/LICENSE). +- ``src\search.js`` is distributed without any additional licensing restrictions. Please consult ``src\search.js`` for more details. \ No newline at end of file diff --git a/docs/simply-static-netlify.md b/docs/simply-static-netlify.md new file mode 100644 index 0000000..78286c0 --- /dev/null +++ b/docs/simply-static-netlify.md @@ -0,0 +1,3 @@ +# StaticWordPressNetlify Class + +::: src.main.StaticWordPressNetlify diff --git a/docs/tutorial.md b/docs/tutorial.md new file mode 100644 index 0000000..f50bcb0 --- /dev/null +++ b/docs/tutorial.md @@ -0,0 +1,3 @@ +# Tutorial: How to Deploy Simply Static Post Process + +Please read/watch detailed tutorial on [Seowings Tutorial Page](https://www.seowings.org/simply-static-tutorial/). \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..d8f4333 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,32 @@ +site_name: simply-static-post-process +site_description: Python Package to Create, Manipulate and Analyze Website Sitemaps +site_author: SERP Wings + +site_url: https://simply-static.netlify.app + +repo_url: https://github.com/serpwings/simply-static-post-process +edit_uri: blob/main/docs/ + +nav: + - Home: index.md + - Tutorial: tutorial.md + - API: + - simply-static-netlify object: simply-static-netlify.md + - Helper Functions: helper-functions.md + +plugins: + - search + - mkdocstrings + +markdown_extensions: + - admonition + - codehilite + - smarty + - meta + - toc: + permalink: True + - attr_list + +theme: readthedocs + +copyright: © Copyright 2023 SERP Wings \ No newline at end of file diff --git a/netlify.toml b/netlify.toml new file mode 100644 index 0000000..f55cb09 --- /dev/null +++ b/netlify.toml @@ -0,0 +1,3 @@ +[build] + publish = "output" + command = "python src/main.py" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a89fc9f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +beautifulsoup4==4.11.1 +lxml>=4.9.1 +requests==2.28.0 diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 0000000..cc1923a --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +3.8 diff --git a/src/helpers.py b/src/helpers.py new file mode 100644 index 0000000..cf9a594 --- /dev/null +++ b/src/helpers.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- # + +""" +Simply Static Netlify Process +https://github.com/serpwings/simply-static-post-process + +A Python Library to prepare and deploy static version of a WordPress Installation on Netlify (Static Hosting Service Providers). + + +MIT License +Copyright (c) 2023 SERP Wings + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ +# FUNCTIONS +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ + + +def string_formatter(str): + """ + String formatter for html formatting output. + + Args: + ua_name (str, required): string to be formatted. + """ + return str + + +def log_to_console(typ="INFO", message=""): + """ + help to log text streams (input as message) to console. + + Args: + typ (str, optional): Type of log. Default to ``INFO`` but can be any arbitrary value e.g. ``ERROR``, ``DEBUG`` and etc. + message (str, required): message string to be logged into console. + """ + print(f"{typ}: {message}") + + +def update_links(content="", link_from="", link_to=""): + """ + Usefull for fixing schema or other links which simply static cannot fix. + + Args: + content (str, required): Text which you want to update links e.g. Cotent on Home Page. + link_from (str, required): string values of link which you want to change. + link_to (str, required): string values of links to be replaced with. + """ + if link_from and link_to and content: + link_from = link_from.split("://")[-1] + link_to = link_to.split("://")[-1] + return content.replace(link_from, link_to) + return content + + +if __name__ == "__main__": + pass diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..6e40a2c --- /dev/null +++ b/src/main.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- # + +""" +Simply Static Netlify Process +https://github.com/serpwings/simply-static-post-process + +A Python Library to prepare and deploy static version of a WordPress Installation on Netlify (Static Hosting Service Providers). + + +MIT License +Copyright (c) 2023 SERP Wings + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ +# IMPORTS (Standard Library) +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ + +import os +import glob +import codecs +from pathlib import Path +from zipfile import ZipFile +import shutil +import json +from urllib.parse import parse_qs + +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ +# IMPORTS (3rd Party) +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ + +from bs4 import BeautifulSoup +from bs4.formatter import HTMLFormatter +import requests +from requests.structures import CaseInsensitiveDict + + +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ +# Local Imports +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ + +import helpers + +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ +# CONSTANTS +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ + +LUNR = { + "src": "https://cdnjs.cloudflare.com/ajax/libs/lunr.js/2.3.9/lunr.min.js", + "integrity": "sha512-4xUl/d6D6THrAnXAwGajXkoWaeMNwEKK4iNfq5DotEbLPAfk6FSxSP3ydNxqDgCw1c/0Z1Jg6L8h2j+++9BZmg==", + "crossorigin": "anonymous", + "referrerpolicy": "no-referrer", +} + +SEARCH_INDEX = {"src": "search.js"} + + +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ +# CLASSES +# +++++++++++++++++++++++++++++++++++++++++++++++++++++ + + +class StaticWordPressNetlify: + """ + This is a class processing Simply Static WordPress Plugin and converting it to netlify static site. + + Attributes: + config (dic): Contains all important configurations about the class. + output_folder (Path): Contains Path for output folder location in Netlify + zip_file_path (Path): ZIP File Path to download simply-static-zip file freom remote server. + redirect_page (Path): Contains Path of redirect page + robots_txt_page (Path): Contains Path of robots.txt page + self._404_page (Path): Contains Path of 404 Page + """ + + def __init__(self, config_=None): + """Initialize StaticWordPressNetlify objet with a config values. + + Args: + config_ (dict, optional): contains diverse conditions for StaticWordPressNetlify object. + """ + if config_: + self.config = config_ + self.output_folder = Path(self.config["root"], self.config["output_folder"]) + self.zip_file_path = Path(self.config["root"], self.config["zip_file_name"]) + self.redirect_page = Path( + self.output_folder, self.config["pages"]["redirect"] + ) + self.robots_txt_page = Path( + self.output_folder, self.config["pages"]["robots"] + ) + self._404_page = Path( + self.output_folder, self.config["pages"]["404"], "index.html" + ) + + def download_zip_file(self): + """Download zip file from remote server""" + + helpers.log_to_console("INFO", configurations["zip_url"]) + + headers = CaseInsensitiveDict() + headers["Cache-Control"] = "no-cache, no-store, must-revalidate" + headers["Pragma"] = "no-cache" + headers["Expires"] = "0" + + current_session = requests.session() + response = current_session.get(configurations["zip_url"], headers=headers) + + if response.status_code == 200: + with open(self.config["zip_file_name"], "wb") as fd: + for chunk in response.iter_content(chunk_size=128): + fd.write(chunk) + helpers.log_to_console("INFO", "Simply Static Zip File Downloaded") + else: + helpers.log_to_console("ERROR", "Simply Static Zip File Not available") + + current_session.cookies.clear() + + def create_output_folder(self): + """Create Ouput Folder it it doesnot exist.""" + + if not self.output_folder.is_dir(): + self.output_folder.mkdir(parents=True, exist_ok=True) + helpers.log_to_console("INFO", "Output Folder Created") + else: + helpers.log_to_console("ERROR", "Cannot Create Output Folder") + + def create_robots_txt(self): + """Create Robots.txt File using robots-txt page. Default would be ``User-agent: *``""" + robots_path = Path( + self.robots_txt_page, + "index.html", + ) + + if robots_path.exists(): + with codecs.open(robots_path, "r", "utf-8") as f: + robots_txt_contents = f.read() + soup = BeautifulSoup(robots_txt_contents, "lxml") + robots_table = soup.find_all("table")[0] + + with open(f"{self.output_folder}/robots.txt", "w") as f: + for row in robots_table.tbody.find_all("tr"): + f.write("".join([cell.text.strip("\r") for cell in row("td")])) + f.write("\n") + + shutil.rmtree(Path(self.output_folder, self.config["pages"]["robots"])) + else: + with open(f"{self.output_folder}/robots.txt", "w") as f: + f.write("User-agent: * \n") + f.write("Disallow: /wp-admin/ \n") + f.write("Allow: /wp-admin/admin-ajax.php \n") + + helpers.log_to_console("INFO", "Created robots.txt file") + + def extract_zip_file(self): + """Extract simply static zip file to ouput folder.""" + if self.output_folder.is_dir(): + zf = ZipFile(self.zip_file_path, "r") + zf.extractall(self.output_folder) + zf.close() + helpers.log_to_console("INFO", "Zip File Extracted") + else: + helpers.log_to_console("ERROR", "Cannot extract Zip File") + + def fix_404_error_page(self): + """Fix 404 page by moving it to home directory. It deletes old folder.""" + try: + with codecs.open(self._404_page, "r", "utf-8") as f: + contents_404_page = f.read() + contents_404_page = helpers.update_links( + contents_404_page, + self.config["callback_home"], + self.config["callback_deploy_url"], + ) + + with open( + Path(self.output_folder, "404.html"), "w", encoding="utf-8" + ) as f: + f.write(contents_404_page) + helpers.log_to_console("INFO", "404 Page Created") + shutil.rmtree(Path(self.output_folder, self.config["pages"]["404"])) + helpers.log_to_console("INFO", "404 Folder Removed") + + except: + helpers.log_to_console("ERROR", "404 Page Not Created") + + def fix_home_page(self): + """Fix Schemas and other links on home page which are ignored by simply static plugin.""" + home_page_path = Path(self.output_folder, "index.html") + + try: + with codecs.open(home_page_path, "r", "utf-8") as f: + contents_home_page = f.read() + contents_home_page = helpers.update_links( + contents_home_page, + self.config["callback_home"], + self.config["callback_deploy_url"], + ) + + with open(home_page_path, "w", encoding="utf-8") as f: + f.write(contents_home_page) + helpers.log_to_console("INFO", "Fixed Home Page") + + except: + helpers.log_to_console("ERROR", "Home Page can not be fixed") + + def build_search_index(self): + """Buidl search index by using title, body content and href of a given page""" + helpers.log_to_console("INFO", "Start Building Search Index") + + # Copy Search.js into search folder + source_path = Path(self.config["root"], "src/search.js") + target_path = Path( + self.output_folder, f"{self.config['pages']['search']}/search.js" + ) + shutil.copyfile(source_path, target_path) + + # Now Process all foldre with content/index.html files + paths_to_pages = [ + path.split("index.html")[0] + for path in glob.glob(f"{self.output_folder}/**", recursive=True) + if path.endswith("index.html") + ] + + search_index_output = [] + + for page_path in paths_to_pages: + make_title = self.config["search"]["title"] + make_url = self.config["search"]["url"] + make_text = self.config["search"]["text"] + make_images = self.config["search"]["images"] + document_path = Path(page_path, "index.html") + + if document_path.exists(): + with codecs.open(document_path, "r", "utf-8") as f: + contents_document_page = f.read() + contents_document_page = helpers.update_links( + contents_document_page, + self.config["callback_home"], + self.config["callback_deploy_url"], + ) + + soup = BeautifulSoup(contents_document_page, "lxml") + # append tags only if search page is specified + if "/search/" in str(document_path): + script_text = [ + soup.new_tag( + "script", + src=LUNR["src"], + integrity=LUNR["integrity"], + crossorigin=LUNR["crossorigin"], + referrerpolicy=LUNR["referrerpolicy"], + ), + soup.new_tag( + "script", + src=SEARCH_INDEX["src"], + ), + ] + + for script in script_text: + soup.find("head").append(str(script)) + + # TODO: In future add support for minification check + updated_content = soup.prettify( + formatter=HTMLFormatter(helpers.string_formatter) + ) + + with open(document_path, "w", encoding="utf-8") as f: + f.write(updated_content) + + title = soup.find("title") + title = title.string if title else None + url = soup.find("meta", {"property": "og:url"}) + url = url["content"] if url else None + canonical = soup.find("link", {"rel": "canonical"}) + if canonical: + url = canonical["href"] + + all_strings = soup.body.find_all(["h1", "h2", "h3", "p"]) + output = [strings for bd in all_strings for strings in bd.strings] + text = " ".join(output) + + if url and document_path.parts[-2] not in [ + self.config["pages"][page] for page in self.config["pages"] + ]: + out = { + "title": title if make_title else "", + "content": text if make_text else "", + "href": url if make_url else "", + } + search_index_output.append(out) + helpers.log_to_console("INFO", url) + + search_index_json_file_path = Path( + self.output_folder, + self.config["pages"]["search"], + "lunr.json", + ) + + with open(search_index_json_file_path, "w") as fl: + json.dump(search_index_output, fl, indent=4) + + helpers.log_to_console("INFO", "Prepare Search Index for title, Url and Text") + + def clean_directory_check(self): + """ """ + helpers.log_to_console( + "INFO", "Started Removing Bad URLs/Directories for forceful deploy." + ) + + files = [f for f in glob.glob(f"{self.output_folder}/**/*", recursive=True)] + for f in files: + if "#" in f or "?" in f: + if os.path.exists(f) and os.path.isdir(f): + print(f"removing {f} for forceful deploy.") + shutil.rmtree(f) + + helpers.log_to_console("INFO", "Removed Bad URLs/Directories from deployement.") + + def create_redirect_toml_file(self): + """Create netlify.toml file with redirects information freom redirect page.""" + helpers.log_to_console( + "INFO", "Source Redirect Page " + self.config["pages"]["redirect"] + ) + redirect_path = Path( + self.redirect_page, + "index.html", + ) + + rules = [ + [ + "[[redirects]]\n", + 'from = "/*"\n', + f'to = "/{self.config["pages"]["search"]}"\n', + "status = 301\n", + 'query = {s = ":s"}\n', + "force = true\n", + "\n", + ] + ] + + if redirect_path.exists(): + with codecs.open(redirect_path, "r", "utf-8") as f: + contents = f.read() + soup = BeautifulSoup(contents, "lxml") + redirect_table = soup.find_all("table")[0] + table_data = [ + [cell.text.strip("\r") for cell in row("td")] + for row in redirect_table.tbody.find_all("tr") + ] + + helpers.log_to_console( + "WARNING", f"Redirect Rules found - {len(table_data)}" + ) + + if len(table_data) > 1: + for data in table_data[1:]: + if data[3].strip() == "1": + rules.append( + [ + f"[[redirects]]\n", + f'{table_data[0][0].lower().strip()} = "{data[0].strip()}"\n', + f'{table_data[0][1].lower().strip()} = "{data[1].strip()}"\n', + f"{table_data[0][2].lower().strip()} = {data[2].strip()}\n", + "\n", + ] + ) + + shutil.rmtree(self.redirect_page) + + else: + helpers.log_to_console("WARNING", "No Redirect File found") + + netlify_toml_file = Path(self.output_folder, "netlify.toml") + + with open(netlify_toml_file, "w", encoding="utf-8") as f: + f.writelines(["".join(rule) for rule in rules]) + + helpers.log_to_console("INFO", "Netlify toml File Created Successfully") + + +if __name__ == "__main__": + + params = parse_qs(os.environ.get("INCOMING_HOOK_BODY")) + helpers.log_to_console("DEBUG", params) + + wordpress_simply_static_zip_url = ( + params["callback_home"][0] + + "/wp-content/plugins/simply-static/static-files/" + + params["archive_name"][0] + ) + + if wordpress_simply_static_zip_url: + configurations = { + "root": "", + "callback_home": params["callback_home"][0], + "callback_deploy_url": params["callback_deploy_url"][0], + "output_folder": "output", + "zip_url": wordpress_simply_static_zip_url, + "zip_file_name": "wordpress-simply-static.zip", + "pages": { + "404": "404-error", + "redirect": "redirects", + "robots": "robots", + "search": "search", + }, + "search": { + "title": "true", + "url": "true", + "text": "true", + "images": "false", + }, + } + + swpn = StaticWordPressNetlify(config_=configurations) + swpn.download_zip_file() + swpn.create_output_folder() + swpn.extract_zip_file() + swpn.fix_404_error_page() + swpn.fix_home_page() + swpn.build_search_index() + swpn.create_redirect_toml_file() + swpn.create_robots_txt() + + force_deploy = True # TODO: Pass via simply-static-netlify-callback + + if force_deploy: + swpn.clean_directory_check() + + else: + helpers.log_to_console("ERROR", "Zip File not avialable to deploy") \ No newline at end of file diff --git a/src/search.js b/src/search.js new file mode 100644 index 0000000..fc6348f --- /dev/null +++ b/src/search.js @@ -0,0 +1,305 @@ + +/* +The original source code of this file is avialble at +https://github.com/a-luna/aaronluna.dev/blob/master/static/js/search.js +There is no lincense string attached with original file/repository. + +simply-static-post-process (SERPWings) also do not claim any license of this file. +Always check original source for updated Licensing information. +*/ + +let pagesIndex, searchIndex; +const MAX_SUMMARY_LENGTH = 200; +const SENTENCE_BOUNDARY_REGEX = /\b\.\s/gm; +const WORD_REGEX = /\b(\w*)[\W|\s|\b]?/gm; + +async function initSearchIndex() { + try { + const response = await fetch("./lunr.json"); + pagesIndex = await response.json(); + searchIndex = lunr(function () { + this.field("title"); + this.field("content"); + this.ref("href"); + pagesIndex.forEach((page) => this.add(page)); + }); + } catch (e) { + console.log(e); + } +} + +function searchBoxFocused() { + document.querySelector(".search-container").classList.add("focused"); + document + .getElementById("search") + .addEventListener("focusout", () => searchBoxFocusOut()); +} + +function searchBoxFocusOut() { + document.querySelector(".search-container").classList.remove("focused"); +} + +function handleSearchQuery(event) { + event.preventDefault(); + const query = document.getElementById("search").value.trim().toLowerCase(); + if (!query) { + displayErrorMessage("Please enter a search term"); + return; + } + const results = searchSite(query); + if (!results.length) { + displayErrorMessage("Your search returned no results"); + return; + } + renderSearchResults(query, results); +} + +function displayErrorMessage(message) { + document.querySelector(".search-error-message").innerHTML = message; + document.querySelector(".search-container").classList.remove("focused"); + document.querySelector(".search-error").classList.remove("hide-element"); + document.querySelector(".search-error").classList.add("fade"); +} + +function removeAnimation() { + this.classList.remove("fade"); + this.classList.add("hide-element"); + document.querySelector(".search-container").classList.add("focused"); +} + +function searchSite(query) { + const originalQuery = query; + query = getLunrSearchQuery(query); + let results = getSearchResults(query); + return results.length + ? results + : query !== originalQuery + ? getSearchResults(originalQuery) + : []; +} + +function getLunrSearchQuery(query) { + const searchTerms = query.split(" "); + if (searchTerms.length === 1) { + return query; + } + query = ""; + for (const term of searchTerms) { + query += `+${term} `; + } + return query.trim(); +} + +function getSearchResults(query) { + return searchIndex.search(query).flatMap((hit) => { + if (hit.ref == "undefined") return []; + let pageMatch = pagesIndex.filter((page) => page.href === hit.ref)[0]; + pageMatch.score = hit.score; + return [pageMatch]; + }); +} + +function renderSearchResults(query, results) { + clearSearchResults(); + updateSearchResults(query, results); + showSearchResults(); + scrollToTop(); +} + +function clearSearchResults() { + const results = document.querySelector(".search-results ul"); + while (results.firstChild) results.removeChild(results.firstChild); + + document.getElementById("query").innerHTML = ""; + + if (!results.length) { + displayErrorMessage(""); + return; + } +} + +function updateSearchResults(query, results) { + document.querySelector(".search-results ul").innerHTML = results + .map( + (hit) => ` +
  • + ${hit.title} +

    ${createSearchResultBlurb(query, hit.content)}

    +
  • + ` + ) + .join(""); + const searchResultListItems = document.querySelectorAll(".search-results ul li"); + document.getElementById("query").innerHTML = "Search Query: " + query + " (" + searchResultListItems.length + ")"; + searchResultListItems.forEach( + (li) => (li.firstElementChild.style.color = getColorForSearchResult(li.dataset.score)) + ); +} + +function createSearchResultBlurb(query, pageContent) { + const searchQueryRegex = new RegExp(createQueryStringRegex(query), "gmi"); + const searchQueryHits = Array.from( + pageContent.matchAll(searchQueryRegex), + (m) => m.index + ); + const sentenceBoundaries = Array.from( + pageContent.matchAll(SENTENCE_BOUNDARY_REGEX), + (m) => m.index + ); + let searchResultText = ""; + let lastEndOfSentence = 0; + for (const hitLocation of searchQueryHits) { + if (hitLocation > lastEndOfSentence) { + for (let i = 0; i < sentenceBoundaries.length; i++) { + if (sentenceBoundaries[i] > hitLocation) { + const startOfSentence = i > 0 ? sentenceBoundaries[i - 1] + 1 : 0; + const endOfSentence = sentenceBoundaries[i]; + lastEndOfSentence = endOfSentence; + parsedSentence = pageContent.slice(startOfSentence, endOfSentence).trim(); + searchResultText += `${parsedSentence} ... `; + break; + } + } + } + const searchResultWords = tokenize(searchResultText); + const pageBreakers = searchResultWords.filter((word) => word.length > 50); + if (pageBreakers.length > 0) { + searchResultText = fixPageBreakers(searchResultText, pageBreakers); + } + if (searchResultWords.length >= MAX_SUMMARY_LENGTH) break; + } + return ellipsize(searchResultText, MAX_SUMMARY_LENGTH).replace( + searchQueryRegex, + "$&" + ); +} + +function createQueryStringRegex(query) { + return query.split(" ").length == 1 ? `(${query})` : `(${query.split(" ").join("|")})`; +} + +function tokenize(input) { + const wordMatches = Array.from(input.matchAll(WORD_REGEX), (m) => m); + return wordMatches.map((m) => ({ + word: m[0], + start: m.index, + end: m.index + m[0].length, + length: m[0].length, + })); +} + +function fixPageBreakers(input, largeWords) { + largeWords.forEach((word) => { + const chunked = chunkify(word.word, 20); + input = input.replace(word.word, chunked); + }); + return input; +} + +function chunkify(input, chunkSize) { + let output = ""; + let totalChunks = (input.length / chunkSize) | 0; + let lastChunkIsUneven = input.length % chunkSize > 0; + if (lastChunkIsUneven) { + totalChunks += 1; + } + for (let i = 0; i < totalChunks; i++) { + let start = i * chunkSize; + let end = start + chunkSize; + if (lastChunkIsUneven && i === totalChunks - 1) { + end = input.length; + } + output += input.slice(start, end) + " "; + } + return output; +} + +function ellipsize(input, maxLength) { + const words = tokenize(input); + if (words.length <= maxLength) { + return input; + } + return input.slice(0, words[maxLength].end) + "..."; +} + +function showSearchResults() { + document.querySelector(".search-results").classList.add("hide-element"); +} + +function scrollToTop() { + const toTopInterval = setInterval(function () { + const supportedScrollTop = + document.body.scrollTop > 0 ? document.body : document.documentElement; + if (supportedScrollTop.scrollTop > 0) { + supportedScrollTop.scrollTop = supportedScrollTop.scrollTop - 50; + } + if (supportedScrollTop.scrollTop < 1) { + clearInterval(toTopInterval); + } + }, 10); +} + +function getColorForSearchResult(score) { + const warmColorHue = 171; + const coolColorHue = 212; + return adjustHue(warmColorHue, coolColorHue, score); +} + +function adjustHue(hue1, hue2, score) { + if (score > 3) return `hsl(${hue1}, 100%, 50%)`; + const hueAdjust = (parseFloat(score) / 3) * (hue1 - hue2); + const newHue = hue2 + Math.floor(hueAdjust); + return `hsl(${newHue}, 100%, 50%)`; +} + +function handleClearSearchButtonClicked() { + hideSearchResults(); + clearSearchResults(); + document.getElementById("search").value = ""; +} + +function hideSearchResults() { + document.querySelector(".search-results").classList.add("hide-element"); +} + +initSearchIndex(); +document.addEventListener("DOMContentLoaded", function () { + if (document.getElementById("search-form") != null) { + const searchInput = document.getElementById("search"); + searchInput.addEventListener("focus", () => searchBoxFocused()); + searchInput.addEventListener("keydown", (event) => { + if (event.keyCode == 13) handleSearchQuery(event); + }); + document + .querySelector(".search-error") + .addEventListener("animationend", removeAnimation); + } + document + .querySelectorAll(".fa-search") + .forEach((button) => + button.addEventListener("click", (event) => handleSearchQuery(event)) + ); + document + .querySelectorAll(".clear-search-results") + .forEach((button) => + button.addEventListener("click", () => handleClearSearchButtonClicked()) + ); +}); + +if (!String.prototype.matchAll) { + String.prototype.matchAll = function (regex) { + "use strict"; + function ensureFlag(flags, flag) { + return flags.includes(flag) ? flags : flags + flag; + } + function* matchAll(str, regex) { + const localCopy = new RegExp(regex, ensureFlag(regex.flags, "g")); + let match; + while ((match = localCopy.exec(str))) { + match.index = localCopy.lastIndex - match[0].length; + yield match; + } + } + return matchAll(this, regex); + }; +} \ No newline at end of file