;; -*- lexical-binding: t -*-
;;; markdown.el --- Emacs support for editing Gwern.net
;;; Copyright (C) 2009 by Gwern Branwen
;;; License: CC-0
;;; When: Time-stamp: "2024-05-12 11:11:55 gwern"
;;; Words: GNU Emacs, Markdown, HTML, GTX, Gwern.net, typography
;;;
;;; Commentary:
;;; Helper files for editing Markdown, HTML, and HTML-in-GTX, particularly reformatting & editing annotations in the Gwern.net house style.
;;; Additional functions include error-checking and prettifying confusable characters like dashes.
; since I hardly ever write elisp, and often start writing things in the *scratch* buffer, save time by defaulting to Markdown.
(setq initial-major-mode 'markdown-mode)
(setq initial-scratch-message "")
(push '("\\.gtx$" . html-mode) auto-mode-alist)
; I do much of my editing in gwern.net files, so save myself some tab-completion hassle:
(setq default-directory "~/wiki/")
;;we rely on the Github dev version because the 2017 v2.3 stable release packaged everywhere is missing a bugfix (stable breaks on any Markdown file with HTML comments in it); NOTE: still seems to be true on Ubuntu `elpa-markdown-mode` 2.3+210-1 as of 2023-02-11!
(add-to-list 'load-path "~/src/markdown-mode/")
(require 'markdown-mode)
;; ; Metadata files are stored in YAML; but yaml-mode may be too slow to use given how large they have become...
;; (require 'yaml-mode)
;; (defun my/yaml-mode-decision ()
;; "Activate yaml-mode with conditionally disabled Flycheck."
;; (let (
;; (disable-flycheck (or (string-prefix-p (expand-file-name "~/wiki/metadata/") (buffer-file-name))
;; (> (nth 7 (file-attributes (buffer-file-name))) 1000000))))
;; ;; Disable Flycheck if the file is large or in a specific directory
;; (when disable-flycheck
;; (flycheck-mode -1))
;; ;; Activate yaml-mode
;; (yaml-mode)
;; ;; Additional settings if Flycheck is disabled
;; (when disable-flycheck
;; (font-lock-mode -1)
;; (message "Custom YAML mode settings applied for large file/specific directory"))))
;; (add-hook 'yaml-mode-hook 'my/yaml-mode-decision)
;; ; (add-to-list 'auto-mode-alist '("\\.yaml\\'" . yaml-mode))
; (setq major-mode 'markdown-mode) ; needs to be done via 'Customize'?
(setq markdown-command
"pandoc --mathjax --metadata title='Markdown-preview' --to=html5 --standalone --number-sections --toc --reference-links --css=https://gwern.net/static/css/default.css -f markdown+smart --template=/home/gwern/wiki/static/template/pandoc/template-html5-articleedit.html5 -V lang=en-us")
(setq markdown-enable-math t)
(setq markdown-italic-underscore t)
; warn on dangerous use of statistical-significance testing language:
(add-hook 'markdown-mode-hook
(lambda ()
(font-lock-add-keywords nil '(
(" significant" 0 'taylor-special-words-warning t)
(" significance" 0 'taylor-special-words-warning t)
))
(setq show-trailing-whitespace t)
))
(add-hook 'html-mode-hook
(lambda ()
(font-lock-add-keywords nil '(
(" significant" 0 'taylor-special-words-warning t)
(" significance" 0 'taylor-special-words-warning t)
))))
;; (add-hook 'yaml-mode-hook
;; (lambda ()
;; (font-lock-add-keywords nil '(
;; (" significant" 0 'taylor-special-words-warning t)
;; (" significance" 0 'taylor-special-words-warning t)
;; ))))
;I like unusual semantic punctuation!
(defun interrobang () (interactive (insert-char ?‽ 1))) ;; interrobang: ‽ for replacing "?!"\"!?"
(defun irony () (interactive (insert-char ?⸮ 1))) (defalias 'sarcasm 'irony) ;; sarcasm mark: ⸮ (better than '' or '[!]', anyway)
(defun bitcoin () (interactive (insert-char ?₿ 1)))
(defun en-dash () (interactive (insert-char ?– 1)))
(defun em-dash () (interactive (insert-char ?— 1)))
(defun arrow-right () (interactive (insert-char ?→ 1)))
(defun arrow-left () (interactive (insert-char ?← 1)))
(defun arrow-both () (interactive (insert-char ?↔ 1)))
(defun arrow-up () (interactive (insert-char ?↑ 1)))
(defun arrow-down () (interactive (insert-char ?↓ 1)))
(defun interpunct () (interactive (insert-char ?· 1)))
(defun replace-all (original replacement)
"Regexp search-and-replace all instances of ORIGINAL to REPLACEMENT: define a local equivalent of `replace-string'
which won't throw annoying errors about only interactive use: exact string replacement (case-sensitive)."
(save-excursion
(let ((case-fold-search nil))
(progn (goto-char (point-min))
(while (re-search-forward original nil t)
(replace-match replacement nil t))
))))
(defun de-unicode ()
"Replace a subset of Unicode punctuation in the buffer with their ASCII equivalents. Most useful for Markdown mode."
(interactive
(save-excursion
(goto-char (point-min))
; (replace-all "−" "\-") ; Pandoc Markdown→HTML does not support an escape for the 'minus sign'/−, so we write it literally & disable de-unicoding
(replace-all "\\u2013" "--")
(replace-all "â" "")
(replace-all "\\u2022\n\n " "- ")
(replace-all "\\u2022\n" "- ")
(replace-all "\\u2018" "‘")
(replace-all "\\u2019" "’")
(replace-all "–" "--")
(replace-all "—" "---")
(replace-all "-" "-")
(replace-all "" "-")
(replace-all "" "-")
(replace-all "‐" "-")
(replace-all "‘" "'")
(replace-all "’" "'")
(replace-all "’" "'")
(replace-all "’" "'")
(replace-all "‛" "'")
(replace-all "" "'")
(replace-all "“" "\"")
(replace-all "”" "\"")
(replace-all ",”" "”,")
(replace-all "„" "\"")
(replace-all "fl" "fl")
(replace-all "fi" "fi")
(replace-all "…" "...")
(replace-all "" " ")
(replace-all "" " ")
(replace-all "" "")
(replace-all " " " ")
(replace-all "•" "-")
(replace-all "" "-")
(replace-all "
" "-")
(delete-trailing-whitespace)
nil)))
(add-hook 'markdown-mode-hook
(lambda ()
(when buffer-file-name
(add-hook 'before-save-hook
'de-unicode
nil t))))
; do *one* replacement and then quit. This is particularly useful in doing rewrites of hyperlinks: typically, we only want to hyperlink one instance (usually the first) of a word or phrase, and then skip the rest. The default `query-replace` requires us to either manually `n` them all, or `q` to quit. It can be toilsome to go through a lot of this. So we write our own to auto-exit on the first replacement.
; GPT-4-written. (Tried GPT-3.5 for most of it, but kept screwing up on parenthesis-matching. Neither version could remove the highlighting on substitutions.)
; currently primarily used by `getLinkSuggestions
(defun query-replace-once (from-string to-string &optional delimited start end)
"Replace the first occurrence of FROM-STRING with TO-STRING.
If DELIMITED is non-nil, only match whole words.
START and END specify the region to search."
(interactive
(list (read-from-minibuffer "Query replace once (regexp): ")
(read-from-minibuffer "Query replace once with: ")
nil
(when (use-region-p)
(region-beginning))
(when (use-region-p)
(region-end))))
(query-replace-regexp-once (regexp-quote from-string) to-string delimited start end))
(defun query-replace-regexp-once (regexp to-string &optional delimited start end)
"Replace the first occurrence of REGEXP with TO-STRING.
If DELIMITED is non-nil, only match whole words.
START and END specify the region to search."
(interactive
(list (read-from-minibuffer "Query replace regexp once (regexp): ")
(read-from-minibuffer "Query replace regexp once with: ")
nil
(when (use-region-p)
(region-beginning))
(when (use-region-p)
(region-end))))
(let ((inhibit-read-only t)
(case-fold-search nil)
(search-function (if delimited 're-search-forward-word 're-search-forward))
(replace-done nil))
(save-excursion
(goto-char (or start (point-min)))
(while (and (not replace-done) (funcall search-function regexp end t))
(isearch-highlight (match-beginning 0) (match-end 0))
(let ((response (read-char-choice
(concat "Replace this occurrence? (y/n/q): "
(substring-no-properties (match-string 0)))
'(?y ?n ?q))))
(cond ((eq response ?y)
(replace-match to-string t nil) ; NOTE: fixed-string replacement, not matched-case. We do not want to mangle URLs and create rewrites like 'Twitter' → '[Twitter](Https://En.Wikipedia.Org/Wiki/Twitter)'!
(setq replace-done t))
((eq response ?n)
(forward-char))
((eq response ?q)
(setq replace-done t) ; treat as successfully finished and exit politely
))))
(lazy-highlight-cleanup t))))
(defun re-search-forward-word (regexp &optional bound noerror count)
"Search forward from point for a whole-word occurrence of REGEXP.
This is a wrapper around `re-search-forward' that ensures word boundaries.
BOUND, NOERROR, and COUNT have the same meaning as in `re-search-forward'."
(let ((word-regexp (concat "\\b" regexp "\\b")))
(re-search-forward word-regexp bound noerror count)))
; Easy Unicode insertion mnemonics; uses the unusual X modifier key 'Super'.
; This is not bound by default to a key usually, but on my 102-key US layout, I rebind the useless 'Menu' key to it: `$ modmap -e 'keysym Menu = Super_R'`.
; Then 's-' in `kbd` notation is 'Super-'. (I avoid use of 'Compose' key because I find the shortcuts highly unintuitive: " . " ")
("
" . "\n\n")
("Kendall's Ï" . "Kendall's τ")
("\\\\u03bc" . "μ")
("\\\\u2018" . "‘")
("\\\\u2019" . "’")
("\u2009" . " ")
("\\\\u2013" . "–")
("â\\" . "'")
("â" . "'")
("â\\" . "—")
("â" . "−")
("\\\\u2014" . "—")
("\\\\u201c" . "“")
("\\\\u201d" . "”")
("\\\\u2009" . " ")
("\\\\u2212" . "−")
("\\\\u2192" . "→")
("\\\\u221e" . "𝓁∞")
("\\\\u03b5" . "𝜀")
("\\\\u223c" . "~")
("\\\\u2217" . "✱")
("\\\\u2020" . "†")
("\\\\u2021" . "‡")
("\\\\u2194" . "↔")
("\\\\u2248 " . "~")
("\\\\u03b1" . "α")
("\\\\u03b8i" . "θi")
("\\\\u2265" . "≥")
("\\\\u03b8" . "θ")
(" \\\\u2022 " . ", ")
("\\\\u2022" . "·")
("\\\\u2264" . "≤")
("\\\\U0001d442" . "𝒪")
("\\\\U0001d4412" . "_N_^2^")
("\\\\u2208" . "∈")
("\\\\U0001d45a" . "𝑚")
("\\\\u2113" . "𝓁")
("â¤" . "≤")
("](wiki/" . "](/")
("](//doc" . "](/doc")
("]]http" . "](https")
("]]/" . "](/")
(" \\[\" . " " [\"")
(" \"](" . "\"](")
("" . "=")
(" " . ", ")
("T h i s" . "This")
("T h e" . "The")
("Author links open overlay panel" . "")
("et al.," . "et al")
("\n---\n" . "\n
\n")
("" . " = ")
("" . " < ")
("\n " . "\n")
(" = " . " = ")
(" =" . " =")
("= " . "= ")
("‐" . "-")
("\n" . "")
("" . "")
("–" . "--")
("—" . "---")
(" ‑\n" . "")
("‑\n" . "")
("‑" . "-") ; deal with NON-BREAKING HYPHEN which NEJM uses for both line-breaking and regular hyphens, /sigh
("¬ " . "")
("" . "**")
("" . "**")
("" . "")
("" . "")
("= " . "= ")
("∼" . "~")
("Previous article in issue\nNext article in issue\nKeywords\n" . "[**Keywords**: ")
("Previous article in issue\nKeywords\n" . "[**Keywords**: ")
("•\n\n " . "- ")
(" ● " . "- ")
("eta≠analys" . "eta-analys") ; odd typo in some PDFs: "meta≠analyses"
("\n•\n" . "- ")
(" •\n " . "- ")
("
" "
") (replace-all "
" "") ; (replace-all "" "
\n") ; (replace-all "
" "
\n") (replace-all " id=\"cb1\">" "") ; the Pandoc syntax-highlighting IDs cause ID clashes when substituted into pages, so delete all (replace-all " id=\"cb2\">" "") (replace-all " id=\"cb3\">" "") (replace-all " id=\"cb4\">" "") (replace-all "
" "\" />") (replace-all "’’" "’") (replace-all "’s" "’s") (replace-all "%3Csup%3Est%3C/sup%3E" "th") (replace-all "%3Csup%3End%3C/sup%3E" "nd") (replace-all "%3Csup%3Erd%3C/sup%3E" "rd") (replace-all "" "") ; unescaped single quotation marks will often break the YAML, so they need to either be replaced with the intended Unicode, or double-quoted to 'escape' them ; (query-replace "'" "''" nil begin end) (delete-trailing-whitespace) (forward-line) (html-mode) (ding) (message "Done.") ) ) ) ) (add-hook 'markdown-mode-hook (lambda () (define-key markdown-mode-map "\C-c\ w" 'markdown-annotation-compile))) (defvar html-mode-map) ; suppress reference-to-free-variable byte-compile warning (add-hook 'html-mode-hook (lambda () (define-key html-mode-map "\C-c\ w" 'markdown-annotation-compile))) ; for the `foo` buffer I do most of my annotation work in, on the first copy-paste of a block of text, detect if it has any paragraph breaks (ie. double newlines), and if it does not, then automatically run paragraphizer.py on it to try to break it up into logical paragraphs. ; (Note/warning: written by GPT-3.5. Curiously, GPT-4 failed when I tried to repeat this exercise in it using the same starting prompt & kind of feedback: because it tries to implement solutions using advice, buffer-local variables, and :properties—which are subtly buggy in their handling of state, and so wind up running paragraphizer.py on every paste.) (defun markdown-paragraphize () "Automatically paragraphize single-paragraph abstracts. Intended for Markdown mode with double-newlines for newlines; may malfunction if run on other formats like HTML (where `` pairs can come in many forms, not to mention other block elements like blockquotes)."
(interactive)
(delete-trailing-whitespace)
(let ((double-newline-found nil))
(save-excursion
(goto-char (point-min))
(unless (search-forward-regexp "\n\n" nil t)
(message "Paragraphizing abstract…")
(shell-command-on-region (point-min) (point-max) "~/wiki/static/build/paragraphizer.py" nil t)
(setq double-newline-found t)))
(when double-newline-found
(goto-char (point-max))
(message "Paragraphizing abstract done."))))
(defun markdown-paragraphize-hook ()
"Hook function for `markdown-paragraphize`."
(when (and (equal (buffer-name) "foo")
(derived-mode-p 'markdown-mode)
(eq this-command 'yank)
(>= (buffer-size) 500)) ; ensure that there is enough in the buffer to plausibly be a full copy-pasted abstract, as opposed to a random snippet or line.
(markdown-paragraphize)))
(add-hook 'post-command-hook #'markdown-paragraphize-hook)
; add new-line / paragraph snippet
(add-hook 'html-mode-hook
(lambda ()
(define-key html-mode-map (kbd "
")
(if (= ?\s (following-char)) (delete-char 1)))
)
))
(add-hook 'markdown-mode-hook 'visual-fill-column-mode)
;; Markup editing shortcuts for HTML/Markdown/GTX annotation editing.
;; Functions to easily add italics, bold, Wikipedia links, smallcaps, & margin-note syntax.
(defun surround-region-or-word (start-tag end-tag)
"Surround selected region (or next word if no region) with START-TAG and END-TAG."
(interactive)
(let ((begin (if (region-active-p)
(region-beginning)
(point)))
(end (if (region-active-p)
(region-end)
(progn
(forward-word)
(point)))))
(goto-char end)
(insert end-tag)
(goto-char begin)
(insert start-tag)
(goto-char (+ end (length start-tag) (length end-tag)))))
;; the wrappers:
(defun html-insert-emphasis ()
"Surround selected region (or word) with HTML tags for italics/emphasis (also Markdown, which supports `*FOO*`)."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-emphasis ()
"Surround selected region (or word) with Markdown asterisks for italics/emphasis.
Equivalent to `FOO` in HTML.
Gwern.net uses `*` for emphasis, and generally reserves `_` for italics such as book titles
(in keeping with Internet conventions predating Gruber's Markdown mistake of conflating `*`/`_`)."
(interactive)
(surround-region-or-word "*" "*"))
(defun html-insert-strong ()
"Surround selected region (or word) with bold tags (HTML, equivalent to `**` in Markdown).
Used in abstracts for topics, first-level list emphasis, etc."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-strong ()
"Surround selected region (or word) with `**` bold tags (Markdown).
Equivalent to `FOO` in HTML.
Used in abstracts for topics, first-level list emphasis, etc."
(interactive)
(surround-region-or-word "**" "**"))
(defun html-insert-smallcaps ()
"Surround selected region (or word) with smallcaps syntax.
Built-in CSS class in HTML & Pandoc Markdown, span syntax is equivalent to
`[FOO]{.smallcaps}`.
Smallcaps are used on Gwern.net for second-level emphasis after bold has been used."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-smallcaps ()
"Surround selected region (or word) with smallcaps syntax (Pandoc Markdown).
Built-in CSS class in HTML & Pandoc Markdown, equivalent to
`FOO`.
Smallcaps are used on Gwern.net for second-level emphasis after bold has been used."
(interactive)
(surround-region-or-word "[" "]{.smallcaps}"))
(defun html-insert-wp-link ()
"Surround selected region (or word) with custom Wikipedia link syntax in HTML.
Compiled by Interwiki.hs to the equivalent (usually) of `FOO`."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-wp-link ()
"Surround selected region (or word) with custom Wikipedia link syntax in Markdown."
(interactive)
(surround-region-or-word "[" "](!W)"))
(defun markdown-insert-margin-note ()
"Surround selected region FOO BAR (or word FOO) with a `margin-note`.
\(Implemented as a special `` class.\)
This creates marginal glosses (in the left margin) as counterparts to sidenotes.
These margin-notes are used as very abbreviated italicized summaries of the
paragraph \(like very small inlined section headers\)."
(interactive)
(surround-region-or-word "[" "]{.marginnote}"))
(defun html-insert-margin-note ()
"Surround selected region FOO BAR (or word FOO) with a `margin-note`.
\(Implemented as a special `` class.\)
This creates marginal glosses (in the left margin) as counterparts to sidenotes.
These margin-notes are used as very abbreviated italicized summaries of the
paragraph \(like very small inlined section headers\).
When inserting margin-notes into HTML snippets, that usually means an annotation
and the margin-note is an editorial insertion, which are denoted by paired `[]` brackets.
To save effort, we add those as well."
(interactive)
(surround-region-or-word "[" "]"))
;; keybindings:
;;; Markdown:
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-e" 'markdown-insert-emphasis)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-s" 'markdown-insert-strong)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ s" 'markdown-insert-smallcaps)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-w" 'markdown-insert-wp-link)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-m" 'markdown-insert-margin-note)))
;;; HTML:
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-e" 'html-insert-emphasis)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-s" 'html-insert-strong)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ s" 'html-insert-smallcaps)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-w" 'html-insert-wp-link)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-m" 'html-insert-margin-note)))
;; ;;; YAML: (the YAML files store raw HTML snippets, so insert HTML rather than Markdown markup)
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-e" 'html-insert-emphasis)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-s" 'html-insert-strong)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ s" 'html-insert-smallcaps)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-w" 'html-insert-wp-link)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-m" 'html-insert-margin-note)))
;sp
; (add-hook 'markdown-mode-hook 'flyspell)
;for toggling visibility of sections - makes big pages easier to work with
(add-hook 'markdown-mode-hook 'outline-minor-mode)
;In Markdown files, there are few excuses for unbalanced delimiters, and unbalance almost always indicates a link syntax error; in cases where quoted text must contain unbalanced delimiters (eg diffs, or neural-net-generated text or redirects fixing typos), a matching delimiter can be added in a comment like '' to make it add up.
(defun balance-parens () (when buffer-file-name
(add-hook 'after-save-hook
'check-parens
nil t)))
(add-hook 'markdown-mode-hook 'balance-parens)
(add-hook 'ledger-mode-hook 'balance-parens)
(add-hook 'emacs-lisp-mode-hook 'balance-parens)
(add-hook 'haskell-mode-hook 'balance-parens)
(add-hook 'css-mode-hook 'balance-parens)
(add-hook 'javascript-mode-hook 'balance-parens)
(add-hook 'html-mode-hook 'balance-parens)
(add-hook 'python-mode-hook 'balance-parens)
; NOTE: I skip YAML mode because syntax-level quoting is kept validated by the database processing, and within-annotation balancing is checked in Hakyll, and using `check-parens` in YAML mode triggers far too many spurious errors.
; ispell: ignore code blocks in Pandoc Markdown
; TODO: add a fix for '#' not being handled in URLs. current hack borrowed from