"""Filter to impose WCMS template on HTML pages.
Main program for the WCMS HTML filter.
"""
from collections import namedtuple
from datetime import datetime
from os import getenv, getpid
from pathlib import Path
import re
from sys import stdin, stdout, stderr
from urllib.parse import quote_plus
from bs4 import BeautifulSoup
from .templates import templatedir
Entry = namedtuple ('Entry', ('arcname', 'hidden', 'external', 'label'))
external_url_re = re.compile ('^(/|https?://)')
[docs]def parse_entry (entry):
entry = entry.strip ()
if not entry or entry.startswith ('--'):
return None
fields = [f.strip () for f in entry.split (',')]
return Entry (
fields[0],
fields[1] == 'H',
external_url_re.match (fields[0]) is not None,
fields[2],
)
[docs]def make_link (soup, href, content):
result = soup.new_tag ('a', href=href)
result.append (content)
return result
# not quite right, but point is we probably need a toplevel function
[docs]def path_to_arcs (path):
if not path.startswith ("/"):
raise ValueError ("Invalid path (%s) does not start with '/'" % path)
result = path.split ("/")[1:]
if result[-1] == "":
result = result[:-1]
return result
[docs]def apply_page (template, page, document_root, request_uri):
"""Fill in the template with content from the document page.
:param template: Beautiful Soup parsed template.
:param page: Beautiful Soup parsed document.
:param document_root: Python pathlib Path object for document root.
:param request_path: Array of URL path components for this request.
"""
# Insert login/logout link
link = template.new_tag ('a')
remote_user = getenv ("REMOTE_USER")
if remote_user is None:
# Login link
link['href'] = "/mellon/login?ReturnTo=" + quote_plus (request_uri)
link.append ('Log in')
else:
# Logout link
link['href'] = "/mellon/logout?ReturnTo=/"
link.append ('Log out ' + remote_user)
template.find ('div', id="cas_login").append (link)
# Insert menus and breadcrumbs
sitelink = (template
.find ('div', id="site-header")
.find ('div', class_="uw-section--inner")
.a)
sitetitle = str (sitelink.string)
home_page = apply_menu (template, document_root, path_to_arcs (request_uri), sitelink['href'], sitetitle)
# Insert page title in head and body
pagetitle = page.title.string if page.title else None
if home_page:
pagetitle = pagetitle or 'Welcome to ' + sitetitle
maintitle = 'Home'
else:
pagetitle = pagetitle or '[Please provide <title>]'
maintitle = pagetitle
titlespan = template.find ('span', id="wcms-main-title")
titlespan.string = maintitle
titlespan.unwrap ()
template.find ('div', class_="uw-site--title").h1.string = pagetitle
body_classes = set (template.body["class"])
# Insert sidebar from page, or remove if none in page
sidebar = page.body.find ('div', id="wcms-sidebar")
sidebar_wrapper = template.find ('div', id="site-sidebar-wrapper")
if sidebar is not None:
sidebar.extract ()
sidebar_content = sidebar_wrapper.find ('div', class_="content")
sidebar_content.clear ()
sidebar_content.append (sidebar)
sidebar.unwrap ()
elif sidebar_wrapper is not None:
sidebar_wrapper.decompose ()
body_classes = body_classes - {'two-sidebars'} | {'one-sidebar', 'sidebar-first', 'wide'}
# Add classes specified on page <body>
body_classes |= set (page.body.get ("class", []))
template.body["class"] = sorted (body_classes)
# Insert content from page
pagebody = page.body.extract ()
for h1 in pagebody.find_all ('h1'):
# WCMS pages have exactly one <h1>, generated above
if h1.string == pagetitle:
# Delete duplicate <h1> generated by Apache mod_autoindex
h1.decompose ()
else:
# Remind page author to remove spurious <h1> elements
h1.string = '[Please remove <h1>]'
maincontent = template.find ('div', id="content")
maincontent.clear ()
maincontent.append (pagebody)
pagebody.unwrap ()
[docs]def apply_template (template, page):
apply_page (template, page, Path (getenv ("DOCUMENT_ROOT")), getenv ("REQUEST_URI"))
return template
[docs]def main ():
# Parse raw page
page = BeautifulSoup (stdin.buffer.read ().decode ("utf-8"), 'lxml')
# Open template
templatefile = templatedir / getenv ("WCMS_TEMPLATE")
templatetext = templatefile.read_text (encoding='utf-8')
template = BeautifulSoup (templatetext, 'lxml')
template = apply_template (template, page)
template = str (template)
tempdir = getenv ("WCMS_DEBUG")
if tempdir is not None:
filename = Path (tempdir) / ('wcms-filter-%s-%s' % (datetime.now ().isoformat (), getpid ()))
f = filename.open (mode='x', encoding='utf-8')
filename.chmod (0o640)
for var in ["REMOTE_USER", "DOCUMENT_ROOT", "REQUEST_URI", "WCMS_TEMPLATE"]:
f.write ('%s=%s\n' % (var, getenv (var)))
f.write ('%s characters output:\n' % len (template))
f.write (template)
f.close ()
stderr.write ('Wrote WCMS filter output to %s\n' % filename)
# Write modified version of template
stdout.buffer.write (template.encode ('utf-8'))