Source code for uw.local.wcms.bin.filter

"""Filter to impose WCMS template on HTML pages.

Main program for the WCMS HTML filter.
"""

from collections import namedtuple
from datetime import datetime
from os import getenv, getpid
from pathlib import Path
import re
from sys import stdin, stdout, stderr
from urllib.parse import quote_plus

from bs4 import BeautifulSoup

from .templates import templatedir

Entry = namedtuple ('Entry', ('arcname', 'hidden', 'external', 'label'))

external_url_re = re.compile ('^(/|https?://)')

[docs]def parse_entry (entry): entry = entry.strip () if not entry or entry.startswith ('--'): return None fields = [f.strip () for f in entry.split (',')] return Entry ( fields[0], fields[1] == 'H', external_url_re.match (fields[0]) is not None, fields[2], )
[docs]def parse_htmenu (lines): return [entry for entry in map (parse_entry, lines) if entry is not None]
[docs]def show_submenu (soup, url_root, dir_root, arcs, sitelink=None, sitetitle=None): htmenu_path = dir_root / ".htmenu" if htmenu_path.is_file (): entries = parse_htmenu (htmenu_path.open ()) else: entries = [] breadcrumbs = [] root_current = False if sitelink is not None: entries.insert (0, Entry (sitelink, False, True, '%s home' % sitetitle)) if arcs: breadcrumbs.append (make_link (soup, sitelink, sitetitle)) else: root_current = True if not entries: return None if arcs: head, tail = arcs[0], arcs[1:] else: head, tail = None, None menu = soup.new_tag ("ul") menu['class'] = "menu" for entry in entries: entrypath = dir_root / entry.arcname item_open = not entry.external and head == entry.arcname item_current = item_open and not tail or root_current root_current = False item_dir = not entry.external and entrypath.is_dir () if entry.hidden and not item_open: continue if item_dir: li_classes = {"expanded" if item_open else "collapsed"} url = url_root + entry.arcname + "/" elif entry.external: # Not really supported by WCMS; style same as "file" links li_classes = {"leaf"} url = entry.arcname else: # file li_classes = {"leaf"} url = url_root + entry.arcname a = make_link (soup, url, entry.label) li = soup.new_tag ("li") li.append (a) if item_current: a_classes = {"active"} else: a_classes = set () if entry.hidden: # Not really supported by WCMS; keep legacy "hidden" class name li_classes.add ("hidden") if item_open: li_classes.add ("active-trail") a_classes.add ("active-trail") result = show_submenu (soup, url, entrypath, tail) if result is not None: submenu, subbreadcrumbs = result li.append (submenu) breadcrumbs.append (make_link (soup, url, entry.label)) breadcrumbs.extend (subbreadcrumbs) li['class'] = sorted (li_classes) a['class'] = sorted (a_classes) menu.append (li) # Apply "first" and "last" classes where appropriate contents = menu.contents if contents: contents[0]['class'] = sorted (set (contents[0]['class']) | {"first"}) contents[-1]['class'] = sorted (set (contents[-1]['class']) | {"last"}) return menu, breadcrumbs
# not quite right, but point is we probably need a toplevel function
[docs]def show_menu (soup, root, arcs, sitelink, sitetitle): return show_submenu (soup, "/", root, arcs, sitelink, sitetitle)
[docs]def path_to_arcs (path): if not path.startswith ("/"): raise ValueError ("Invalid path (%s) does not start with '/'" % path) result = path.split ("/")[1:] if result[-1] == "": result = result[:-1] return result
[docs]def apply_menu (template, document_root, request_path, sitelink, sitetitle): menu, breadcrumbs = show_menu (template, document_root, request_path, sitelink, sitetitle) if menu is not None: template.find ('div', id="main-menu").append (menu) breadcrumbdiv = template.find ('div', id="main").find ('div', class_="uw-site--breadcrumb") if breadcrumbs: breadcrumbdiv = breadcrumbdiv.find ('nav', class_="breadcrumb").find ('ol') for bc in breadcrumbs: li = template.new_tag ('li') li.append (bc) breadcrumbdiv.append (li) else: breadcrumbdiv.clear () return not bool (breadcrumbs)
[docs]def apply_page (template, page, document_root, request_uri): """Fill in the template with content from the document page. :param template: Beautiful Soup parsed template. :param page: Beautiful Soup parsed document. :param document_root: Python pathlib Path object for document root. :param request_path: Array of URL path components for this request. """ # Insert login/logout link link = template.new_tag ('a') remote_user = getenv ("REMOTE_USER") if remote_user is None: # Login link link['href'] = "/mellon/login?ReturnTo=" + quote_plus (request_uri) link.append ('Log in') else: # Logout link link['href'] = "/mellon/logout?ReturnTo=/" link.append ('Log out ' + remote_user) template.find ('div', id="cas_login").append (link) # Insert menus and breadcrumbs sitelink = (template .find ('div', id="site-header") .find ('div', class_="uw-section--inner") .a) sitetitle = str (sitelink.string) home_page = apply_menu (template, document_root, path_to_arcs (request_uri), sitelink['href'], sitetitle) # Insert page title in head and body pagetitle = page.title.string if page.title else None if home_page: pagetitle = pagetitle or 'Welcome to ' + sitetitle maintitle = 'Home' else: pagetitle = pagetitle or '[Please provide <title>]' maintitle = pagetitle titlespan = template.find ('span', id="wcms-main-title") titlespan.string = maintitle titlespan.unwrap () template.find ('div', class_="uw-site--title").h1.string = pagetitle body_classes = set (template.body["class"]) # Insert sidebar from page, or remove if none in page sidebar = page.body.find ('div', id="wcms-sidebar") sidebar_wrapper = template.find ('div', id="site-sidebar-wrapper") if sidebar is not None: sidebar.extract () sidebar_content = sidebar_wrapper.find ('div', class_="content") sidebar_content.clear () sidebar_content.append (sidebar) sidebar.unwrap () elif sidebar_wrapper is not None: sidebar_wrapper.decompose () body_classes = body_classes - {'two-sidebars'} | {'one-sidebar', 'sidebar-first', 'wide'} # Add classes specified on page <body> body_classes |= set (page.body.get ("class", [])) template.body["class"] = sorted (body_classes) # Insert content from page pagebody = page.body.extract () for h1 in pagebody.find_all ('h1'): # WCMS pages have exactly one <h1>, generated above if h1.string == pagetitle: # Delete duplicate <h1> generated by Apache mod_autoindex h1.decompose () else: # Remind page author to remove spurious <h1> elements h1.string = '[Please remove <h1>]' maincontent = template.find ('div', id="content") maincontent.clear () maincontent.append (pagebody) pagebody.unwrap ()
[docs]def apply_template (template, page): apply_page (template, page, Path (getenv ("DOCUMENT_ROOT")), getenv ("REQUEST_URI")) return template
[docs]def main (): # Parse raw page page = BeautifulSoup (stdin.buffer.read ().decode ("utf-8"), 'lxml') # Open template templatefile = templatedir / getenv ("WCMS_TEMPLATE") templatetext = templatefile.read_text (encoding='utf-8') template = BeautifulSoup (templatetext, 'lxml') template = apply_template (template, page) template = str (template) tempdir = getenv ("WCMS_DEBUG") if tempdir is not None: filename = Path (tempdir) / ('wcms-filter-%s-%s' % (datetime.now ().isoformat (), getpid ())) f = filename.open (mode='x', encoding='utf-8') filename.chmod (0o640) for var in ["REMOTE_USER", "DOCUMENT_ROOT", "REQUEST_URI", "WCMS_TEMPLATE"]: f.write ('%s=%s\n' % (var, getenv (var))) f.write ('%s characters output:\n' % len (template)) f.write (template) f.close () stderr.write ('Wrote WCMS filter output to %s\n' % filename) # Write modified version of template stdout.buffer.write (template.encode ('utf-8'))