intial commit

This commit is contained in:
Seo Wings 2023-03-05 18:59:16 +01:00
commit dc92b64add
17 changed files with 1100 additions and 0 deletions

18
.github/workflows/makedocs.yml vendored Normal file
View file

@ -0,0 +1,18 @@
name: Documentation Generator

on:
push:
branches: [main]

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- uses: actions/setup-python@v2
- run: pip install --upgrade pip && pip install mkdocs mkdocs-gen-files mkdocstrings[python] pymdown-extensions
- run: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com'
- name: Publish docs
run: mkdocs gh-deploy

134
.gitignore vendored Normal file
View file

@ -0,0 +1,134 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.swp
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/


# misc
pyvenv.cfg

25
LICENSE Normal file
View file

@ -0,0 +1,25 @@
Static Wordpress Netlify Process
https://github.com/serpwings/simply-static-post-process

A Python Library to Prepare and Deploy Static version of WordPress Installation to
Static Hosting Service Providrs (Netlify).


MIT License
Copyright (c) 2023 SERP Wings <www.serpwings.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

24
README.md Normal file
View file

@ -0,0 +1,24 @@
# Simply Static Post Process

![pyrobotstx feature image](docs/img/simply-static-netlify-deploy-workflow.png)


A Python Library to prepare and deploy a static version of a WordPress Installation on Netlify (Static Hosting Service Provider).

## How to Use simply-static-post-process?
Please check our detailed tutorial (video/text) on [Simply Simply Static Post Process Tutorial](https://www.seowings.org/simply-static-tutorial/) to understand how it works. This package has a live documentation file on [documentation link](https://simply-static.netlify.app).

## Contributions

Contributions, suggestions, and comments are welcome. Please fork the repository and submit a pull request.


## About Us

This work is a collaborative effort of [seowings](https://seowings.org/) and [serpwings](https://serpwings.com/).


## LICENSE

- Simply Static Netlify Process is released under [MIT License](https://github.com/serpwings/simply-static-post-process/blob/master/LICENSE).
- ``src\search.js`` is distributed without any additional licensing restrictions. Please consult ``src\search.js`` for more details.

4
docs/helper-functions.md Normal file
View file

@ -0,0 +1,4 @@
# Helper Functions

::: src.helpers

BIN
docs/img/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

24
docs/index.md Normal file
View file

@ -0,0 +1,24 @@
# Simply Static Post Process

![pyrobotstx feature image](docs/img/simply-static-netlify-deploy-workflow.png)


A Python Library to prepare and deploy a static version of a WordPress Installation on Netlify (Static Hosting Service Provider).

## How to Use simply-static-post-process?
Please check our detailed tutorial (video/text) on [Simply Simply Static Post Process Tutorial](https://www.seowings.org/simply-static-tutorial/) to understand how it works. This package has a live documentation file on [documentation link](https://simply-static.netlify.app).

## Contributions

Contributions, suggestions, and comments are welcome. Please fork the repository and submit a pull request.


## About Us

This work is a collaborative effort of [seowings](https://seowings.org/) and [serpwings](https://serpwings.com/).


## LICENSE

- Simply Static Netlify Process is released under [MIT License](https://github.com/serpwings/simply-static-post-process/blob/master/LICENSE).
- ``src\search.js`` is distributed without any additional licensing restrictions. Please consult ``src\search.js`` for more details.

View file

@ -0,0 +1,3 @@
# StaticWordPressNetlify Class

::: src.main.StaticWordPressNetlify

3
docs/tutorial.md Normal file
View file

@ -0,0 +1,3 @@
# Tutorial: How to Deploy Simply Static Post Process

Please read/watch detailed tutorial on [Seowings Tutorial Page](https://www.seowings.org/simply-static-tutorial/).

32
mkdocs.yml Normal file
View file

@ -0,0 +1,32 @@
site_name: simply-static-post-process
site_description: Python Package to Create, Manipulate and Analyze Website Sitemaps
site_author: SERP Wings

site_url: https://simply-static.netlify.app

repo_url: https://github.com/serpwings/simply-static-post-process
edit_uri: blob/main/docs/

nav:
- Home: index.md
- Tutorial: tutorial.md
- API:
- simply-static-netlify object: simply-static-netlify.md
- Helper Functions: helper-functions.md

plugins:
- search
- mkdocstrings

markdown_extensions:
- admonition
- codehilite
- smarty
- meta
- toc:
permalink: True
- attr_list
theme: readthedocs

copyright: © Copyright 2023 SERP Wings

3
netlify.toml Normal file
View file

@ -0,0 +1,3 @@
[build]
publish = "output"
command = "python src/main.py"

3
requirements.txt Normal file
View file

@ -0,0 +1,3 @@
beautifulsoup4==4.11.1
lxml>=4.9.1
requests==2.28.0

1
runtime.txt Normal file
View file

@ -0,0 +1 @@
3.8

74
src/helpers.py Normal file
View file

@ -0,0 +1,74 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*- #

"""
Simply Static Netlify Process
https://github.com/serpwings/simply-static-post-process

A Python Library to prepare and deploy static version of a WordPress Installation on Netlify (Static Hosting Service Providers).


MIT License
Copyright (c) 2023 SERP Wings <www.serpwings.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

# +++++++++++++++++++++++++++++++++++++++++++++++++++++
# FUNCTIONS
# +++++++++++++++++++++++++++++++++++++++++++++++++++++


def string_formatter(str):
"""
String formatter for html formatting output.

Args:
ua_name (str, required): string to be formatted.
"""
return str


def log_to_console(typ="INFO", message=""):
"""
help to log text streams (input as message) to console.

Args:
typ (str, optional): Type of log. Default to ``INFO`` but can be any arbitrary value e.g. ``ERROR``, ``DEBUG`` and etc.
message (str, required): message string to be logged into console.
"""
print(f"{typ}: {message}")


def update_links(content="", link_from="", link_to=""):
"""
Usefull for fixing schema or other links which simply static cannot fix.

Args:
content (str, required): Text which you want to update links e.g. Cotent on Home Page.
link_from (str, required): string values of link which you want to change.
link_to (str, required): string values of links to be replaced with.
"""
if link_from and link_to and content:
link_from = link_from.split("://")[-1]
link_to = link_to.split("://")[-1]
return content.replace(link_from, link_to)
return content


if __name__ == "__main__":
pass

447
src/main.py Normal file
View file

@ -0,0 +1,447 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*- #

"""
Simply Static Netlify Process
https://github.com/serpwings/simply-static-post-process

A Python Library to prepare and deploy static version of a WordPress Installation on Netlify (Static Hosting Service Providers).


MIT License
Copyright (c) 2023 SERP Wings <www.serpwings.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

# +++++++++++++++++++++++++++++++++++++++++++++++++++++
# IMPORTS (Standard Library)
# +++++++++++++++++++++++++++++++++++++++++++++++++++++

import os
import glob
import codecs
from pathlib import Path
from zipfile import ZipFile
import shutil
import json
from urllib.parse import parse_qs

# +++++++++++++++++++++++++++++++++++++++++++++++++++++
# IMPORTS (3rd Party)
# +++++++++++++++++++++++++++++++++++++++++++++++++++++

from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
import requests
from requests.structures import CaseInsensitiveDict


# +++++++++++++++++++++++++++++++++++++++++++++++++++++
# Local Imports
# +++++++++++++++++++++++++++++++++++++++++++++++++++++

import helpers

# +++++++++++++++++++++++++++++++++++++++++++++++++++++
# CONSTANTS
# +++++++++++++++++++++++++++++++++++++++++++++++++++++

LUNR = {
"src": "https://cdnjs.cloudflare.com/ajax/libs/lunr.js/2.3.9/lunr.min.js",
"integrity": "sha512-4xUl/d6D6THrAnXAwGajXkoWaeMNwEKK4iNfq5DotEbLPAfk6FSxSP3ydNxqDgCw1c/0Z1Jg6L8h2j+++9BZmg==",
"crossorigin": "anonymous",
"referrerpolicy": "no-referrer",
}

SEARCH_INDEX = {"src": "search.js"}


# +++++++++++++++++++++++++++++++++++++++++++++++++++++
# CLASSES
# +++++++++++++++++++++++++++++++++++++++++++++++++++++


class StaticWordPressNetlify:
"""
This is a class processing Simply Static WordPress Plugin and converting it to netlify static site.

Attributes:
config (dic): Contains all important configurations about the class.
output_folder (Path): Contains Path for output folder location in Netlify
zip_file_path (Path): ZIP File Path to download simply-static-zip file freom remote server.
redirect_page (Path): Contains Path of redirect page
robots_txt_page (Path): Contains Path of robots.txt page
self._404_page (Path): Contains Path of 404 Page
"""

def __init__(self, config_=None):
"""Initialize StaticWordPressNetlify objet with a config values.

Args:
config_ (dict, optional): contains diverse conditions for StaticWordPressNetlify object.
"""
if config_:
self.config = config_
self.output_folder = Path(self.config["root"], self.config["output_folder"])
self.zip_file_path = Path(self.config["root"], self.config["zip_file_name"])
self.redirect_page = Path(
self.output_folder, self.config["pages"]["redirect"]
)
self.robots_txt_page = Path(
self.output_folder, self.config["pages"]["robots"]
)
self._404_page = Path(
self.output_folder, self.config["pages"]["404"], "index.html"
)

def download_zip_file(self):
"""Download zip file from remote server"""

helpers.log_to_console("INFO", configurations["zip_url"])

headers = CaseInsensitiveDict()
headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
headers["Pragma"] = "no-cache"
headers["Expires"] = "0"

current_session = requests.session()
response = current_session.get(configurations["zip_url"], headers=headers)

if response.status_code == 200:
with open(self.config["zip_file_name"], "wb") as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
helpers.log_to_console("INFO", "Simply Static Zip File Downloaded")
else:
helpers.log_to_console("ERROR", "Simply Static Zip File Not available")

current_session.cookies.clear()

def create_output_folder(self):
"""Create Ouput Folder it it doesnot exist."""

if not self.output_folder.is_dir():
self.output_folder.mkdir(parents=True, exist_ok=True)
helpers.log_to_console("INFO", "Output Folder Created")
else:
helpers.log_to_console("ERROR", "Cannot Create Output Folder")

def create_robots_txt(self):
"""Create Robots.txt File using robots-txt page. Default would be ``User-agent: *``"""
robots_path = Path(
self.robots_txt_page,
"index.html",
)

if robots_path.exists():
with codecs.open(robots_path, "r", "utf-8") as f:
robots_txt_contents = f.read()
soup = BeautifulSoup(robots_txt_contents, "lxml")
robots_table = soup.find_all("table")[0]

with open(f"{self.output_folder}/robots.txt", "w") as f:
for row in robots_table.tbody.find_all("tr"):
f.write("".join([cell.text.strip("\r") for cell in row("td")]))
f.write("\n")

shutil.rmtree(Path(self.output_folder, self.config["pages"]["robots"]))
else:
with open(f"{self.output_folder}/robots.txt", "w") as f:
f.write("User-agent: * \n")
f.write("Disallow: /wp-admin/ \n")
f.write("Allow: /wp-admin/admin-ajax.php \n")

helpers.log_to_console("INFO", "Created robots.txt file")

def extract_zip_file(self):
"""Extract simply static zip file to ouput folder."""
if self.output_folder.is_dir():
zf = ZipFile(self.zip_file_path, "r")
zf.extractall(self.output_folder)
zf.close()
helpers.log_to_console("INFO", "Zip File Extracted")
else:
helpers.log_to_console("ERROR", "Cannot extract Zip File")

def fix_404_error_page(self):
"""Fix 404 page by moving it to home directory. It deletes old folder."""
try:
with codecs.open(self._404_page, "r", "utf-8") as f:
contents_404_page = f.read()
contents_404_page = helpers.update_links(
contents_404_page,
self.config["callback_home"],
self.config["callback_deploy_url"],
)

with open(
Path(self.output_folder, "404.html"), "w", encoding="utf-8"
) as f:
f.write(contents_404_page)
helpers.log_to_console("INFO", "404 Page Created")
shutil.rmtree(Path(self.output_folder, self.config["pages"]["404"]))
helpers.log_to_console("INFO", "404 Folder Removed")

except:
helpers.log_to_console("ERROR", "404 Page Not Created")

def fix_home_page(self):
"""Fix Schemas and other links on home page which are ignored by simply static plugin."""
home_page_path = Path(self.output_folder, "index.html")

try:
with codecs.open(home_page_path, "r", "utf-8") as f:
contents_home_page = f.read()
contents_home_page = helpers.update_links(
contents_home_page,
self.config["callback_home"],
self.config["callback_deploy_url"],
)

with open(home_page_path, "w", encoding="utf-8") as f:
f.write(contents_home_page)
helpers.log_to_console("INFO", "Fixed Home Page")

except:
helpers.log_to_console("ERROR", "Home Page can not be fixed")

def build_search_index(self):
"""Buidl search index by using title, body content and href of a given page"""
helpers.log_to_console("INFO", "Start Building Search Index")

# Copy Search.js into search folder
source_path = Path(self.config["root"], "src/search.js")
target_path = Path(
self.output_folder, f"{self.config['pages']['search']}/search.js"
)
shutil.copyfile(source_path, target_path)

# Now Process all foldre with content/index.html files
paths_to_pages = [
path.split("index.html")[0]
for path in glob.glob(f"{self.output_folder}/**", recursive=True)
if path.endswith("index.html")
]

search_index_output = []

for page_path in paths_to_pages:
make_title = self.config["search"]["title"]
make_url = self.config["search"]["url"]
make_text = self.config["search"]["text"]
make_images = self.config["search"]["images"]
document_path = Path(page_path, "index.html")

if document_path.exists():
with codecs.open(document_path, "r", "utf-8") as f:
contents_document_page = f.read()
contents_document_page = helpers.update_links(
contents_document_page,
self.config["callback_home"],
self.config["callback_deploy_url"],
)

soup = BeautifulSoup(contents_document_page, "lxml")
# append tags only if search page is specified
if "/search/" in str(document_path):
script_text = [
soup.new_tag(
"script",
src=LUNR["src"],
integrity=LUNR["integrity"],
crossorigin=LUNR["crossorigin"],
referrerpolicy=LUNR["referrerpolicy"],
),
soup.new_tag(
"script",
src=SEARCH_INDEX["src"],
),
]

for script in script_text:
soup.find("head").append(str(script))

# TODO: In future add support for minification check
updated_content = soup.prettify(
formatter=HTMLFormatter(helpers.string_formatter)
)

with open(document_path, "w", encoding="utf-8") as f:
f.write(updated_content)

title = soup.find("title")
title = title.string if title else None
url = soup.find("meta", {"property": "og:url"})
url = url["content"] if url else None
canonical = soup.find("link", {"rel": "canonical"})
if canonical:
url = canonical["href"]

all_strings = soup.body.find_all(["h1", "h2", "h3", "p"])
output = [strings for bd in all_strings for strings in bd.strings]
text = " ".join(output)

if url and document_path.parts[-2] not in [
self.config["pages"][page] for page in self.config["pages"]
]:
out = {
"title": title if make_title else "",
"content": text if make_text else "",
"href": url if make_url else "",
}
search_index_output.append(out)
helpers.log_to_console("INFO", url)

search_index_json_file_path = Path(
self.output_folder,
self.config["pages"]["search"],
"lunr.json",
)

with open(search_index_json_file_path, "w") as fl:
json.dump(search_index_output, fl, indent=4)

helpers.log_to_console("INFO", "Prepare Search Index for title, Url and Text")

def clean_directory_check(self):
""" """
helpers.log_to_console(
"INFO", "Started Removing Bad URLs/Directories for forceful deploy."
)

files = [f for f in glob.glob(f"{self.output_folder}/**/*", recursive=True)]
for f in files:
if "#" in f or "?" in f:
if os.path.exists(f) and os.path.isdir(f):
print(f"removing {f} for forceful deploy.")
shutil.rmtree(f)

helpers.log_to_console("INFO", "Removed Bad URLs/Directories from deployement.")

def create_redirect_toml_file(self):
"""Create netlify.toml file with redirects information freom redirect page."""
helpers.log_to_console(
"INFO", "Source Redirect Page " + self.config["pages"]["redirect"]
)
redirect_path = Path(
self.redirect_page,
"index.html",
)

rules = [
[
"[[redirects]]\n",
'from = "/*"\n',
f'to = "/{self.config["pages"]["search"]}"\n',
"status = 301\n",
'query = {s = ":s"}\n',
"force = true\n",
"\n",
]
]

if redirect_path.exists():
with codecs.open(redirect_path, "r", "utf-8") as f:
contents = f.read()
soup = BeautifulSoup(contents, "lxml")
redirect_table = soup.find_all("table")[0]
table_data = [
[cell.text.strip("\r") for cell in row("td")]
for row in redirect_table.tbody.find_all("tr")
]

helpers.log_to_console(
"WARNING", f"Redirect Rules found - {len(table_data)}"
)

if len(table_data) > 1:
for data in table_data[1:]:
if data[3].strip() == "1":
rules.append(
[
f"[[redirects]]\n",
f'{table_data[0][0].lower().strip()} = "{data[0].strip()}"\n',
f'{table_data[0][1].lower().strip()} = "{data[1].strip()}"\n',
f"{table_data[0][2].lower().strip()} = {data[2].strip()}\n",
"\n",
]
)

shutil.rmtree(self.redirect_page)

else:
helpers.log_to_console("WARNING", "No Redirect File found")

netlify_toml_file = Path(self.output_folder, "netlify.toml")

with open(netlify_toml_file, "w", encoding="utf-8") as f:
f.writelines(["".join(rule) for rule in rules])

helpers.log_to_console("INFO", "Netlify toml File Created Successfully")


if __name__ == "__main__":

params = parse_qs(os.environ.get("INCOMING_HOOK_BODY"))
helpers.log_to_console("DEBUG", params)

wordpress_simply_static_zip_url = (
params["callback_home"][0]
+ "/wp-content/plugins/simply-static/static-files/"
+ params["archive_name"][0]
)

if wordpress_simply_static_zip_url:
configurations = {
"root": "",
"callback_home": params["callback_home"][0],
"callback_deploy_url": params["callback_deploy_url"][0],
"output_folder": "output",
"zip_url": wordpress_simply_static_zip_url,
"zip_file_name": "wordpress-simply-static.zip",
"pages": {
"404": "404-error",
"redirect": "redirects",
"robots": "robots",
"search": "search",
},
"search": {
"title": "true",
"url": "true",
"text": "true",
"images": "false",
},
}

swpn = StaticWordPressNetlify(config_=configurations)
swpn.download_zip_file()
swpn.create_output_folder()
swpn.extract_zip_file()
swpn.fix_404_error_page()
swpn.fix_home_page()
swpn.build_search_index()
swpn.create_redirect_toml_file()
swpn.create_robots_txt()

force_deploy = True # TODO: Pass via simply-static-netlify-callback

if force_deploy:
swpn.clean_directory_check()

else:
helpers.log_to_console("ERROR", "Zip File not avialable to deploy")

305
src/search.js Normal file
View file

@ -0,0 +1,305 @@

/*
The original source code of this file is avialble at
https://github.com/a-luna/aaronluna.dev/blob/master/static/js/search.js
There is no lincense string attached with original file/repository.

simply-static-post-process (SERPWings) also do not claim any license of this file.
Always check original source for updated Licensing information.
*/

let pagesIndex, searchIndex;
const MAX_SUMMARY_LENGTH = 200;
const SENTENCE_BOUNDARY_REGEX = /\b\.\s/gm;
const WORD_REGEX = /\b(\w*)[\W|\s|\b]?/gm;

async function initSearchIndex() {
try {
const response = await fetch("./lunr.json");
pagesIndex = await response.json();
searchIndex = lunr(function () {
this.field("title");
this.field("content");
this.ref("href");
pagesIndex.forEach((page) => this.add(page));
});
} catch (e) {
console.log(e);
}
}

function searchBoxFocused() {
document.querySelector(".search-container").classList.add("focused");
document
.getElementById("search")
.addEventListener("focusout", () => searchBoxFocusOut());
}

function searchBoxFocusOut() {
document.querySelector(".search-container").classList.remove("focused");
}

function handleSearchQuery(event) {
event.preventDefault();
const query = document.getElementById("search").value.trim().toLowerCase();
if (!query) {
displayErrorMessage("Please enter a search term");
return;
}
const results = searchSite(query);
if (!results.length) {
displayErrorMessage("Your search returned no results");
return;
}
renderSearchResults(query, results);
}

function displayErrorMessage(message) {
document.querySelector(".search-error-message").innerHTML = message;
document.querySelector(".search-container").classList.remove("focused");
document.querySelector(".search-error").classList.remove("hide-element");
document.querySelector(".search-error").classList.add("fade");
}

function removeAnimation() {
this.classList.remove("fade");
this.classList.add("hide-element");
document.querySelector(".search-container").classList.add("focused");
}

function searchSite(query) {
const originalQuery = query;
query = getLunrSearchQuery(query);
let results = getSearchResults(query);
return results.length
? results
: query !== originalQuery
? getSearchResults(originalQuery)
: [];
}

function getLunrSearchQuery(query) {
const searchTerms = query.split(" ");
if (searchTerms.length === 1) {
return query;
}
query = "";
for (const term of searchTerms) {
query += `+${term} `;
}
return query.trim();
}

function getSearchResults(query) {
return searchIndex.search(query).flatMap((hit) => {
if (hit.ref == "undefined") return [];
let pageMatch = pagesIndex.filter((page) => page.href === hit.ref)[0];
pageMatch.score = hit.score;
return [pageMatch];
});
}

function renderSearchResults(query, results) {
clearSearchResults();
updateSearchResults(query, results);
showSearchResults();
scrollToTop();
}

function clearSearchResults() {
const results = document.querySelector(".search-results ul");
while (results.firstChild) results.removeChild(results.firstChild);

document.getElementById("query").innerHTML = "";

if (!results.length) {
displayErrorMessage("");
return;
}
}

function updateSearchResults(query, results) {
document.querySelector(".search-results ul").innerHTML = results
.map(
(hit) => `
<li class="search-result-item" data-score="${hit.score.toFixed(2)}">
<a href="${hit.href}" class="search-result-page-title">${hit.title}</a>
<p>${createSearchResultBlurb(query, hit.content)}</p>
</li>
`
)
.join("");
const searchResultListItems = document.querySelectorAll(".search-results ul li");
document.getElementById("query").innerHTML = "Search Query: " + query + " (" + searchResultListItems.length + ")";
searchResultListItems.forEach(
(li) => (li.firstElementChild.style.color = getColorForSearchResult(li.dataset.score))
);
}

function createSearchResultBlurb(query, pageContent) {
const searchQueryRegex = new RegExp(createQueryStringRegex(query), "gmi");
const searchQueryHits = Array.from(
pageContent.matchAll(searchQueryRegex),
(m) => m.index
);
const sentenceBoundaries = Array.from(
pageContent.matchAll(SENTENCE_BOUNDARY_REGEX),
(m) => m.index
);
let searchResultText = "";
let lastEndOfSentence = 0;
for (const hitLocation of searchQueryHits) {
if (hitLocation > lastEndOfSentence) {
for (let i = 0; i < sentenceBoundaries.length; i++) {
if (sentenceBoundaries[i] > hitLocation) {
const startOfSentence = i > 0 ? sentenceBoundaries[i - 1] + 1 : 0;
const endOfSentence = sentenceBoundaries[i];
lastEndOfSentence = endOfSentence;
parsedSentence = pageContent.slice(startOfSentence, endOfSentence).trim();
searchResultText += `${parsedSentence} ... `;
break;
}
}
}
const searchResultWords = tokenize(searchResultText);
const pageBreakers = searchResultWords.filter((word) => word.length > 50);
if (pageBreakers.length > 0) {
searchResultText = fixPageBreakers(searchResultText, pageBreakers);
}
if (searchResultWords.length >= MAX_SUMMARY_LENGTH) break;
}
return ellipsize(searchResultText, MAX_SUMMARY_LENGTH).replace(
searchQueryRegex,
"<strong>$&</strong>"
);
}

function createQueryStringRegex(query) {
return query.split(" ").length == 1 ? `(${query})` : `(${query.split(" ").join("|")})`;
}

function tokenize(input) {
const wordMatches = Array.from(input.matchAll(WORD_REGEX), (m) => m);
return wordMatches.map((m) => ({
word: m[0],
start: m.index,
end: m.index + m[0].length,
length: m[0].length,
}));
}

function fixPageBreakers(input, largeWords) {
largeWords.forEach((word) => {
const chunked = chunkify(word.word, 20);
input = input.replace(word.word, chunked);
});
return input;
}

function chunkify(input, chunkSize) {
let output = "";
let totalChunks = (input.length / chunkSize) | 0;
let lastChunkIsUneven = input.length % chunkSize > 0;
if (lastChunkIsUneven) {
totalChunks += 1;
}
for (let i = 0; i < totalChunks; i++) {
let start = i * chunkSize;
let end = start + chunkSize;
if (lastChunkIsUneven && i === totalChunks - 1) {
end = input.length;
}
output += input.slice(start, end) + " ";
}
return output;
}

function ellipsize(input, maxLength) {
const words = tokenize(input);
if (words.length <= maxLength) {
return input;
}
return input.slice(0, words[maxLength].end) + "...";
}

function showSearchResults() {
document.querySelector(".search-results").classList.add("hide-element");
}

function scrollToTop() {
const toTopInterval = setInterval(function () {
const supportedScrollTop =
document.body.scrollTop > 0 ? document.body : document.documentElement;
if (supportedScrollTop.scrollTop > 0) {
supportedScrollTop.scrollTop = supportedScrollTop.scrollTop - 50;
}
if (supportedScrollTop.scrollTop < 1) {
clearInterval(toTopInterval);
}
}, 10);
}

function getColorForSearchResult(score) {
const warmColorHue = 171;
const coolColorHue = 212;
return adjustHue(warmColorHue, coolColorHue, score);
}

function adjustHue(hue1, hue2, score) {
if (score > 3) return `hsl(${hue1}, 100%, 50%)`;
const hueAdjust = (parseFloat(score) / 3) * (hue1 - hue2);
const newHue = hue2 + Math.floor(hueAdjust);
return `hsl(${newHue}, 100%, 50%)`;
}

function handleClearSearchButtonClicked() {
hideSearchResults();
clearSearchResults();
document.getElementById("search").value = "";
}

function hideSearchResults() {
document.querySelector(".search-results").classList.add("hide-element");
}

initSearchIndex();
document.addEventListener("DOMContentLoaded", function () {
if (document.getElementById("search-form") != null) {
const searchInput = document.getElementById("search");
searchInput.addEventListener("focus", () => searchBoxFocused());
searchInput.addEventListener("keydown", (event) => {
if (event.keyCode == 13) handleSearchQuery(event);
});
document
.querySelector(".search-error")
.addEventListener("animationend", removeAnimation);
}
document
.querySelectorAll(".fa-search")
.forEach((button) =>
button.addEventListener("click", (event) => handleSearchQuery(event))
);
document
.querySelectorAll(".clear-search-results")
.forEach((button) =>
button.addEventListener("click", () => handleClearSearchButtonClicked())
);
});

if (!String.prototype.matchAll) {
String.prototype.matchAll = function (regex) {
"use strict";
function ensureFlag(flags, flag) {
return flags.includes(flag) ? flags : flags + flag;
}
function* matchAll(str, regex) {
const localCopy = new RegExp(regex, ensureFlag(regex.flags, "g"));
let match;
while ((match = localCopy.exec(str))) {
match.index = localCopy.lastIndex - match[0].length;
yield match;
}
}
return matchAll(this, regex);
};
}