;; -*- lexical-binding: t -*-
;;; markdown.el --- Emacs support for editing Gwern.net
;;; Copyright (C) 2009 by Gwern Branwen
;;; License: CC-0
;;; When: Time-stamp: "2024-12-01 09:54:53 gwern"
;;; Words: GNU Emacs, Markdown, HTML, GTX, Gwern.net, typography
;;;
;;; Commentary:
;;; Helper files for editing Markdown, HTML, and HTML-in-GTX, particularly reformatting & editing annotations in the Gwern.net house style.
;;; Additional functions include error-checking and prettifying confusable characters like dashes.
; since I hardly ever write elisp, and often start writing things in the *scratch* buffer, save time by defaulting to Markdown.
(setq initial-major-mode 'markdown-mode)
(setq initial-scratch-message "")
(push '("\\.gtx$" . html-mode) auto-mode-alist)
; add repo tool directory to path to avoid hardwiring script paths:
(add-to-list 'exec-path "~/wiki/static/build/")
; I do much of my editing in gwern.net files, so save myself some tab-completion hassle:
(setq default-directory "~/wiki/")
;;we rely on the Github dev version because the 2017 v2.3 stable release packaged everywhere is missing a bugfix (stable breaks on any Markdown file with HTML comments in it); NOTE: still seems to be true on Ubuntu `elpa-markdown-mode` 2.3+210-1 as of 2023-02-11!
(add-to-list 'load-path "~/src/markdown-mode/")
(require 'markdown-mode)
;; ; Metadata files are stored in YAML; but yaml-mode may be too slow to use given how large they have become...
;; (require 'yaml-mode)
;; (defun my/yaml-mode-decision ()
;; "Activate yaml-mode with conditionally disabled Flycheck."
;; (let (
;; (disable-flycheck (or (string-prefix-p (expand-file-name "~/wiki/metadata/") (buffer-file-name))
;; (> (nth 7 (file-attributes (buffer-file-name))) 1000000))))
;; ;; Disable Flycheck if the file is large or in a specific directory
;; (when disable-flycheck
;; (flycheck-mode -1))
;; ;; Activate yaml-mode
;; (yaml-mode)
;; ;; Additional settings if Flycheck is disabled
;; (when disable-flycheck
;; (font-lock-mode -1)
;; (message "Custom YAML mode settings applied for large file/specific directory"))))
;; (add-hook 'yaml-mode-hook 'my/yaml-mode-decision)
;; ; (add-to-list 'auto-mode-alist '("\\.yaml\\'" . yaml-mode))
; (setq major-mode 'markdown-mode) ; needs to be done via 'Customize'?
(setq markdown-command
"pandoc --mathjax --metadata title='Markdown-preview' --to=html5 --standalone --number-sections --toc --reference-links --css=https://gwern.net/static/css/default.css -f markdown+smart --template=/home/gwern/wiki/static/template/pandoc/template-html5-articleedit.html5 -V lang=en-us")
(setq markdown-enable-math t)
(setq markdown-italic-underscore t)
;"Set up highlighting of special words for selected modes."
; " . " ")
("
" . "\n\n")
("Kendall's Ï" . "Kendall's τ")
("\\\\u03bc" . "μ")
("\\\\u2018" . "‘")
("\\\\u2019" . "’")
("\u2009" . " ")
("\\\\u2013" . "–")
("â\\" . "'")
("â" . "'")
("â\\" . "—")
("â" . "−")
("\\\\u2014" . "—")
("\\\\u201c" . "“")
("\\\\u201d" . "”")
("\\\\u2009" . " ")
("\\\\u2212" . "−")
("\\\\u2192" . "→")
("\\\\u221e" . "𝓁∞")
("\\\\u03b5" . "𝜀")
("\\\\u223c" . "~")
("\\\\u2217" . "✱")
("\\\\u2020" . "†")
("\\\\u2021" . "‡")
("\\\\u2194" . "↔")
("\\\\u2248 " . "~")
("\\\\u03b1" . "α")
("\\\\u03b8i" . "θi")
("\\\\u2265" . "≥")
("\\\\u03b8" . "θ")
(" \\\\u2022 " . ", ")
("\\\\u2022" . "·")
("\\\\u2264" . "≤")
("\\\\U0001d442" . "𝒪")
("\\\\U0001d4412" . "_N_^2^")
("\\\\u2208" . "∈")
("\\\\U0001d45a" . "𝑚")
("\\\\u2113" . "𝓁")
("â¤" . "≤")
("](wiki/" . "](/")
("](//doc" . "](/doc")
("]]http" . "](https")
("]]/" . "](/")
(" \\[\" . " " [\"")
(" \"](" . "\"](")
("" . "=")
(" " . ", ")
("T h i s" . "This")
("T h e" . "The")
("Author links open overlay panel" . "")
("et al.," . "et al")
("\n---\n" . "\n
\n")
("" . " = ")
("" . " < ")
("\n " . "\n")
(" = " . " = ")
(" =" . " =")
("= " . "= ")
("‐" . "-")
("\n" . "")
("" . "")
("–" . "--")
(" ‑\n" . "")
("‑\n" . "")
("‑" . "-") ; deal with NON-BREAKING HYPHEN which NEJM uses for both line-breaking and regular hyphens, /sigh
("¬ " . "")
("" . "**")
("" . "**")
("" . "")
("" . "")
("= " . "= ")
("∼" . "~")
("Previous article in issue\nNext article in issue\nKeywords\n" . "[**Keywords**: ")
("Previous article in issue\nKeywords\n" . "[**Keywords**: ")
("•\n\n " . "- ")
(" ● " . "- ")
("eta≠analys" . "eta-analys") ; odd typo in some PDFs: "meta≠analyses"
("\n•\n" . "- ")
(" •\n " . "- ")
("
" "
") (replace-all "
" "") ; (replace-all "" "
\n") ; (replace-all "
" "
\n") (replace-all " id=\"cb1\">" "") ; the Pandoc syntax-highlighting IDs cause ID clashes when substituted into pages, so delete all (replace-all " id=\"cb2\">" "") (replace-all " id=\"cb3\">" "") (replace-all " id=\"cb4\">" "") (replace-all "
" "\" />") (replace-all "’’" "’") (replace-all "’s" "’s") (replace-all "%3Csup%3Est%3C/sup%3E" "th") (replace-all "%3Csup%3End%3C/sup%3E" "nd") (replace-all "%3Csup%3Erd%3C/sup%3E" "rd") (replace-all "" "") ; unnecessary in annotations for WP links because they will be regenerated by the single-source-of-truth: (replace-all " class=\"id-not link-live\"" "") ; unescaped single quotation marks will often break the YAML, so they need to either be replaced with the intended Unicode, or double-quoted to 'escape' them ; (query-replace "'" "''" nil begin end) (delete-trailing-whitespace) (forward-line) (html-mode) (my-frame-urgent-hint-set) ; XMonad is set to use XMonad.Hooks.UrgencyHook.withUrgencyHook’s FocusHook to yank focus to X11 frames with urgent hint set (ding) (message "Done.") ) ) ) ) (add-hook 'markdown-mode-hook (lambda () (define-key markdown-mode-map "\C-c\ w" 'markdown-annotation-compile))) (defvar html-mode-map) ; suppress reference-to-free-variable byte-compile warning (add-hook 'html-mode-hook (lambda () (define-key html-mode-map "\C-c\ w" 'markdown-annotation-compile))) ; for the `foo` buffer I do most of my annotation work in, on the first copy-paste of a block of text, detect if it has any paragraph breaks (ie. double newlines), and if it does not, then automatically run paragraphizer.py on it to try to break it up into logical paragraphs. ; (Note/warning: written by GPT-3.5. Curiously, GPT-4 failed when I tried to repeat this exercise in it using the same starting prompt & kind of feedback: because it tries to implement solutions using advice, buffer-local variables, and `:properties`—which are subtly buggy in their handling of state, and so wind up running `paragraphizer.py` on every paste.) (defun markdown-paragraphize () "Automatically paragraphize single-paragraph abstracts. Intended for Markdown mode with double-newlines for newlines; may malfunction if run on other formats like HTML \(where `` pairs can come in many forms, not to mention other block elements like blockquotes\)."
(interactive)
(delete-trailing-whitespace)
(let ((double-newline-found nil))
(save-excursion
(goto-char (point-min))
(unless (search-forward-regexp "\n\n" nil t)
(message "Paragraphizing abstract…")
(let ((paragraphizer-path (executable-find "paragraphizer.py")))
(if paragraphizer-path
(call-process-region (point-min) (point-max) paragraphizer-path t t nil)
(error "Error: Python `paragraphizer.py` script not found in path")))
(setq double-newline-found t)))
(when double-newline-found
(goto-char (point-max))
(message "Paragraphizing abstract done."))))
(defun markdown-paragraphize-hook ()
"Hook function for `markdown-paragraphize`."
(when (and (equal (buffer-name) "foo")
(derived-mode-p 'markdown-mode)
(eq this-command 'yank)
(>= (buffer-size) 500)) ; ensure that there is enough in the buffer to plausibly be a full copy-pasted abstract, as opposed to a random snippet or line.
(markdown-paragraphize)))
(add-hook 'post-command-hook #'markdown-paragraphize-hook)
; https://emacs.stackexchange.com/a/56037
(defun my-frame-urgent-hint-set--for-x11 (frame arg &optional window-id)
"Set the x11-urgency hint for the FRAME to ARG (on WINDOW-ID) :
- If ARG is nil, unset the urgency.
- If ARG is any other value, set the urgency.
If you unset the urgency, you still have to visit the frame to reset it."
(let* ((wm-prop "WM_HINTS") ;; Constants.
(wm-flag-urgent #x100)
(wm-hints (append (x-window-property wm-prop frame wm-prop window-id nil t) nil))
(flags (car wm-hints)))
(setcar wm-hints
(if arg
(logior flags wm-flag-urgent)
(logand flags (lognot wm-flag-urgent))))
(x-change-window-property wm-prop wm-hints frame wm-prop 32 t)))
(defun my-frame-urgent-hint-set (&optional arg)
"Mark the current Emacs frame as requiring urgent attention.
With prefix argument ARG which is not boolean value nil, remove urgency
\(which might or might not change display, depending on the window manager\)."
(interactive "P")
(let*
(
(frame (selected-frame))
(win-system (window-system frame)))
(cond
((eq win-system 'x)
(my-frame-urgent-hint-set--for-x11 frame (not arg)))
;; only Linux X11 is supported:
(t
(message "Urgent hint for window system %S unsupported" win-system)))))
; add new-line / paragraph snippet
(add-hook 'html-mode-hook
(lambda ()
(define-key html-mode-map (kbd "
")
(if (= ?\s (following-char)) (delete-char 1)))
)
))
(add-hook 'markdown-mode-hook 'visual-fill-column-mode)
;; Markup editing shortcuts for HTML/Markdown/GTX annotation editing.
;; Functions to easily add italics, bold, Wikipedia links, smallcaps, & margin-note syntax.
(defun surround-region-or-word (start-tag end-tag)
"Surround selected region (or next word if no region) with START-TAG and END-TAG."
(interactive)
(let ((begin (if (region-active-p)
(region-beginning)
(point)))
(end (if (region-active-p)
(region-end)
(progn
(forward-word)
(point)))))
(goto-char end)
(insert end-tag)
(goto-char begin)
(insert start-tag)
(goto-char (+ end (length start-tag) (length end-tag)))))
;; the wrappers:
(defun html-insert-emphasis ()
"Surround selected region (or word) with HTML tags for italics/emphasis (also Markdown, which supports `*FOO*`)."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-emphasis ()
"Surround selected region (or word) with Markdown asterisks for italics/emphasis.
Equivalent to `FOO` in HTML.
Gwern.net uses `*` for emphasis, and generally reserves `_` for italics such as book titles
(in keeping with Internet conventions predating Gruber's Markdown mistake of conflating `*`/`_`)."
(interactive)
(surround-region-or-word "*" "*"))
(defun html-insert-strong ()
"Surround selected region (or word) with bold tags (HTML, equivalent to `**` in Markdown).
Used in abstracts for topics, first-level list emphasis, etc."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-strong ()
"Surround selected region (or word) with `**` bold tags (Markdown).
Equivalent to `FOO` in HTML.
Used in abstracts for topics, first-level list emphasis, etc."
(interactive)
(surround-region-or-word "**" "**"))
(defun html-insert-smallcaps ()
"Surround selected region (or word) with smallcaps syntax.
Built-in CSS class in HTML & Pandoc Markdown, span syntax is equivalent to
`[FOO]{.smallcaps}`.
Smallcaps are used on Gwern.net for second-level emphasis after bold has been used."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-smallcaps ()
"Surround selected region (or word) with smallcaps syntax (Pandoc Markdown).
Built-in CSS class in HTML & Pandoc Markdown, equivalent to
`FOO`.
Smallcaps are used on Gwern.net for second-level emphasis after bold has been used."
(interactive)
(surround-region-or-word "[" "]{.smallcaps}"))
(defun html-insert-wp-link ()
"Surround selected region (or word) with custom Wikipedia link syntax in HTML.
Compiled by Interwiki.hs to the equivalent (usually) of `FOO`."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-wp-link ()
"Surround selected region (or word) with custom Wikipedia link syntax in Markdown."
(interactive)
(surround-region-or-word "[" "](!W)"))
(defun markdown-insert-margin-note ()
"Surround selected region FOO BAR (or word FOO) with a `margin-note`.
\(Implemented as a special `` class.\)
This creates marginal glosses (in the left margin) as counterparts to sidenotes.
These margin-notes are used as very abbreviated italicized summaries of the
paragraph \(like very small inlined section headers\)."
(interactive)
(surround-region-or-word "[" "]{.marginnote}"))
(defun html-insert-margin-note ()
"Surround selected region FOO BAR (or word FOO) with a `margin-note`.
\(Implemented as a special `` HTML class.\)
This creates marginal glosses (in the left margin) as counterparts to sidenotes.
These margin-notes are used as very abbreviated italicized summaries of the
paragraph \(like very small inlined section headers\).
When inserting margin-notes into HTML snippets, that usually means an annotation
and the margin-note is an editorial insertion, which are denoted by paired `[]` brackets.
To save typing effort, we add those as well if not present."
(interactive)
(let ((content (if (use-region-p)
(buffer-substring-no-properties (region-beginning) (region-end))
(thing-at-point 'word t))))
(if (and (string-prefix-p "[" content) (string-suffix-p "]" content))
(surround-region-or-word "" "")
(surround-region-or-word "[" "]"))))
;; keybindings:
;;; Markdown:
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-e" 'markdown-insert-emphasis)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-s" 'markdown-insert-strong)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ s" 'markdown-insert-smallcaps)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-w" 'markdown-insert-wp-link)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-m" 'markdown-insert-margin-note)))
;;; HTML:
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-e" 'html-insert-emphasis)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-s" 'html-insert-strong)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ s" 'html-insert-smallcaps)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-w" 'html-insert-wp-link)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-m" 'html-insert-margin-note)))
;; ;;; YAML: (the YAML files store raw HTML snippets, so insert HTML rather than Markdown markup)
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-e" 'html-insert-emphasis)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-s" 'html-insert-strong)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ s" 'html-insert-smallcaps)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-w" 'html-insert-wp-link)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-m" 'html-insert-margin-note)))
;sp
; (add-hook 'markdown-mode-hook 'flyspell)
;for toggling visibility of sections - makes big pages easier to work with
(add-hook 'markdown-mode-hook 'outline-minor-mode)
;In Markdown files, there are few excuses for unbalanced delimiters, and unbalance almost always indicates a link syntax error; in cases where quoted text must contain unbalanced delimiters (eg diffs, or neural-net-generated text or redirects fixing typos), a matching delimiter can be added in a comment like '' to make it add up.
(defun balance-parens () (when buffer-file-name
(add-hook 'after-save-hook
'check-parens
nil t)))
(add-hook 'markdown-mode-hook 'balance-parens)
(add-hook 'ledger-mode-hook 'balance-parens)
(add-hook 'emacs-lisp-mode-hook 'balance-parens)
(add-hook 'haskell-mode-hook 'balance-parens)
(add-hook 'css-mode-hook 'balance-parens)
(add-hook 'javascript-mode-hook 'balance-parens)
(add-hook 'html-mode-hook 'balance-parens)
(add-hook 'python-mode-hook 'balance-parens)
; NOTE: I skip YAML mode because syntax-level quoting is kept validated by the database processing, and within-annotation balancing is checked in Hakyll, and using `check-parens` in YAML mode triggers far too many spurious errors.
; Insert the secondary X clipboard at point (handles Unicode correctly); works better than `xclip -o`.
; Trims whitespace (spurious whitespace is often added by X GUI programs like Firefox eg. double-clicking HTML headers or page titles, requiring tedious manual deletion).
(global-set-key "\M-`" #'(lambda () (interactive)
(insert-for-yank
(gui-get-selection 'PRIMARY 'UTF8_STRING))))
; Trim spurious whitespace from other X GUI copy-pastes as well.
; (We do not attempt to hook `yank` and run this on *all* copy-paste-like behavior, because deleting whitespace could seriously interfere with document or programming modes.)
(defun my-trim-gui-selection (orig-fun &rest args)
"Trim whitespace from text selected via GUI before yanking into Emacs.
Runs ORIG-FUN on ARGS to create the selected text (ie. original `gui-get-selection` + args)."
(let ((selection (apply orig-fun args)))
(if (stringp selection)
(string-trim selection)
selection)))
(advice-add 'gui-get-selection :around #'my-trim-gui-selection)
; ispell: ignore code blocks in Pandoc Markdown
; TODO: add a fix for '#' not being handled in URLs. current hack borrowed from