;; -*- lexical-binding: t -*-
;;; markdown.el --- Emacs support for editing Gwern.net
;;; Copyright (C) 2009 by Gwern Branwen
;;; License: CC-0
;;; When: Time-stamp: "2024-06-27 22:56:52 gwern"
;;; Words: GNU Emacs, Markdown, HTML, GTX, Gwern.net, typography
;;;
;;; Commentary:
;;; Helper files for editing Markdown, HTML, and HTML-in-GTX, particularly reformatting & editing annotations in the Gwern.net house style.
;;; Additional functions include error-checking and prettifying confusable characters like dashes.
; since I hardly ever write elisp, and often start writing things in the *scratch* buffer, save time by defaulting to Markdown.
(setq initial-major-mode 'markdown-mode)
(setq initial-scratch-message "")
(push '("\\.gtx$" . html-mode) auto-mode-alist)
; I do much of my editing in gwern.net files, so save myself some tab-completion hassle:
(setq default-directory "~/wiki/")
;;we rely on the Github dev version because the 2017 v2.3 stable release packaged everywhere is missing a bugfix (stable breaks on any Markdown file with HTML comments in it); NOTE: still seems to be true on Ubuntu `elpa-markdown-mode` 2.3+210-1 as of 2023-02-11!
(add-to-list 'load-path "~/src/markdown-mode/")
(require 'markdown-mode)
;; ; Metadata files are stored in YAML; but yaml-mode may be too slow to use given how large they have become...
;; (require 'yaml-mode)
;; (defun my/yaml-mode-decision ()
;; "Activate yaml-mode with conditionally disabled Flycheck."
;; (let (
;; (disable-flycheck (or (string-prefix-p (expand-file-name "~/wiki/metadata/") (buffer-file-name))
;; (> (nth 7 (file-attributes (buffer-file-name))) 1000000))))
;; ;; Disable Flycheck if the file is large or in a specific directory
;; (when disable-flycheck
;; (flycheck-mode -1))
;; ;; Activate yaml-mode
;; (yaml-mode)
;; ;; Additional settings if Flycheck is disabled
;; (when disable-flycheck
;; (font-lock-mode -1)
;; (message "Custom YAML mode settings applied for large file/specific directory"))))
;; (add-hook 'yaml-mode-hook 'my/yaml-mode-decision)
;; ; (add-to-list 'auto-mode-alist '("\\.yaml\\'" . yaml-mode))
; (setq major-mode 'markdown-mode) ; needs to be done via 'Customize'?
(setq markdown-command
"pandoc --mathjax --metadata title='Markdown-preview' --to=html5 --standalone --number-sections --toc --reference-links --css=https://gwern.net/static/css/default.css -f markdown+smart --template=/home/gwern/wiki/static/template/pandoc/template-html5-articleedit.html5 -V lang=en-us")
(setq markdown-enable-math t)
(setq markdown-italic-underscore t)
; warn on dangerous use of statistical-significance testing language:
(add-hook 'markdown-mode-hook
(lambda ()
(font-lock-add-keywords nil '(
(" significant" 0 'taylor-special-words-warning t)
(" significance" 0 'taylor-special-words-warning t)
))
(setq show-trailing-whitespace t)
))
(add-hook 'html-mode-hook
(lambda ()
(font-lock-add-keywords nil '(
(" significant" 0 'taylor-special-words-warning t)
(" significance" 0 'taylor-special-words-warning t)
))))
;; (add-hook 'yaml-mode-hook
;; (lambda ()
;; (font-lock-add-keywords nil '(
;; (" significant" 0 'taylor-special-words-warning t)
;; (" significance" 0 'taylor-special-words-warning t)
;; ))))
;I like unusual semantic punctuation!
(defun interrobang () (interactive (insert-char ?‽ 1))) ;; interrobang: ‽ for replacing "?!"\"!?"
(defun irony () (interactive (insert-char ?⸮ 1))) (defalias 'sarcasm 'irony) ;; sarcasm mark: ⸮ (better than '' or '[!]', anyway)
(defun bitcoin () (interactive (insert-char ?₿ 1)))
(defun en-dash () (interactive (insert-char ?– 1)))
(defun em-dash () (interactive (insert-char ?— 1)))
(defun arrow-right () (interactive (insert-char ?→ 1)))
(defun arrow-left () (interactive (insert-char ?← 1)))
(defun arrow-both () (interactive (insert-char ?↔ 1)))
(defun arrow-up () (interactive (insert-char ?↑ 1)))
(defun arrow-down () (interactive (insert-char ?↓ 1)))
(defun interpunct () (interactive (insert-char ?· 1)))
(defun replace-all (original replacement)
"Regexp search-and-replace all instances of ORIGINAL to REPLACEMENT: define a local equivalent of `replace-string'
which won't throw annoying errors about only interactive use: exact string replacement (case-sensitive)."
(save-excursion
(let ((case-fold-search nil))
(progn (goto-char (point-min))
(while (re-search-forward original nil t)
(replace-match replacement nil t))
))))
(defun de-unicode ()
"Replace a subset of Unicode punctuation in the buffer with their ASCII equivalents. Most useful for Markdown mode."
(interactive
(save-excursion
(goto-char (point-min))
; (replace-all "−" "\-") ; Pandoc Markdown→HTML does not support an escape for the 'minus sign'/−, so we write it literally & disable de-unicoding
(replace-all "\\u2013" "--")
(replace-all "â" "")
(replace-all "\\u2022\n\n " "- ")
(replace-all "\\u2022\n" "- ")
(replace-all "\\u2018" "‘")
(replace-all "\\u2019" "’")
(replace-all "–" "--")
(replace-all "—" "---")
(replace-all "-" "-")
(replace-all "" "-")
(replace-all "" "-")
(replace-all "‐" "-")
(replace-all "‘" "'")
(replace-all "’" "'")
(replace-all "’" "'")
(replace-all "’" "'")
(replace-all "‛" "'")
(replace-all "" "'")
(replace-all "“" "\"")
(replace-all "”" "\"")
(replace-all ",”" "”,")
(replace-all "„" "\"")
(replace-all "fl" "fl")
(replace-all "fi" "fi")
(replace-all "…" "...")
(replace-all "" " ")
(replace-all "" " ")
(replace-all "" "")
(replace-all " " " ")
(replace-all "•" "-")
(replace-all "" "-")
(replace-all "
" "-")
(delete-trailing-whitespace)
nil)))
(add-hook 'markdown-mode-hook
(lambda ()
(when buffer-file-name
(add-hook 'before-save-hook
'de-unicode
nil t))))
; do *one* replacement and then quit. This is particularly useful in doing rewrites of hyperlinks: typically, we only want to hyperlink one instance (usually the first) of a word or phrase, and then skip the rest. The default `query-replace` requires us to either manually `n` them all, or `q` to quit. It can be toilsome to go through a lot of this. So we write our own to auto-exit on the first replacement.
; GPT-4-written. (Tried GPT-3.5 for most of it, but kept screwing up on parenthesis-matching. Neither version could remove the highlighting on substitutions.)
; currently primarily used by `getLinkSuggestions
(defun query-replace-once (from-string to-string &optional delimited start end)
"Replace the first occurrence of FROM-STRING with TO-STRING.
If DELIMITED is non-nil, only match whole words.
START and END specify the region to search."
(interactive
(list (read-from-minibuffer "Query replace once (regexp): ")
(read-from-minibuffer "Query replace once with: ")
nil
(when (use-region-p)
(region-beginning))
(when (use-region-p)
(region-end))))
(query-replace-regexp-once (regexp-quote from-string) to-string delimited start end))
(defun query-replace-regexp-once (regexp to-string &optional delimited start end)
"Replace the first occurrence of REGEXP with TO-STRING.
If DELIMITED is non-nil, only match whole words.
START and END specify the region to search."
(interactive
(list (read-from-minibuffer "Query replace regexp once (regexp): ")
(read-from-minibuffer "Query replace regexp once with: ")
nil
(when (use-region-p)
(region-beginning))
(when (use-region-p)
(region-end))))
(let ((inhibit-read-only t)
(case-fold-search nil)
(search-function (if delimited 're-search-forward-word 're-search-forward))
(replace-done nil))
(save-excursion
(goto-char (or start (point-min)))
(while (and (not replace-done) (funcall search-function regexp end t))
(isearch-highlight (match-beginning 0) (match-end 0))
(let ((response (read-char-choice
(concat "Replace this occurrence? (y/n/q): "
(substring-no-properties (match-string 0)))
'(?y ?n ?q))))
(cond ((eq response ?y)
(replace-match to-string t nil) ; NOTE: fixed-string replacement, not matched-case. We do not want to mangle URLs and create rewrites like 'Twitter' → '[Twitter](Https://En.Wikipedia.Org/Wiki/Twitter)'!
(setq replace-done t))
((eq response ?n)
(forward-char))
((eq response ?q)
(setq replace-done t) ; treat as successfully finished and exit politely
))))
(lazy-highlight-cleanup t))))
(defun re-search-forward-word (regexp &optional bound noerror count)
"Search forward from point for a whole-word occurrence of REGEXP.
This is a wrapper around `re-search-forward' that ensures word boundaries.
BOUND, NOERROR, and COUNT have the same meaning as in `re-search-forward'."
(let ((word-regexp (concat "\\b" regexp "\\b")))
(re-search-forward word-regexp bound noerror count)))
; Easy Unicode insertion mnemonics; uses the unusual X modifier key 'Super'.
; This is not bound by default to a key usually, but on my 102-key US layout, I rebind the useless 'Menu' key to it: `$ modmap -e 'keysym Menu = Super_R'`.
; Then 's-' in `kbd` notation is 'Super-'. (I avoid use of 'Compose' key because I find the shortcuts highly unintuitive: " . " ")
("
" . "\n\n")
("Kendall's Ï" . "Kendall's τ")
("\\\\u03bc" . "μ")
("\\\\u2018" . "‘")
("\\\\u2019" . "’")
("\u2009" . " ")
("\\\\u2013" . "–")
("â\\" . "'")
("â" . "'")
("â\\" . "—")
("â" . "−")
("\\\\u2014" . "—")
("\\\\u201c" . "“")
("\\\\u201d" . "”")
("\\\\u2009" . " ")
("\\\\u2212" . "−")
("\\\\u2192" . "→")
("\\\\u221e" . "𝓁∞")
("\\\\u03b5" . "𝜀")
("\\\\u223c" . "~")
("\\\\u2217" . "✱")
("\\\\u2020" . "†")
("\\\\u2021" . "‡")
("\\\\u2194" . "↔")
("\\\\u2248 " . "~")
("\\\\u03b1" . "α")
("\\\\u03b8i" . "θi")
("\\\\u2265" . "≥")
("\\\\u03b8" . "θ")
(" \\\\u2022 " . ", ")
("\\\\u2022" . "·")
("\\\\u2264" . "≤")
("\\\\U0001d442" . "𝒪")
("\\\\U0001d4412" . "_N_^2^")
("\\\\u2208" . "∈")
("\\\\U0001d45a" . "𝑚")
("\\\\u2113" . "𝓁")
("â¤" . "≤")
("](wiki/" . "](/")
("](//doc" . "](/doc")
("]]http" . "](https")
("]]/" . "](/")
(" \\[\" . " " [\"")
(" \"](" . "\"](")
("" . "=")
(" " . ", ")
("T h i s" . "This")
("T h e" . "The")
("Author links open overlay panel" . "")
("et al.," . "et al")
("\n---\n" . "\n
\n")
("" . " = ")
("" . " < ")
("\n " . "\n")
(" = " . " = ")
(" =" . " =")
("= " . "= ")
("‐" . "-")
("\n" . "")
("" . "")
("–" . "--")
(" ‑\n" . "")
("‑\n" . "")
("‑" . "-") ; deal with NON-BREAKING HYPHEN which NEJM uses for both line-breaking and regular hyphens, /sigh
("¬ " . "")
("" . "**")
("" . "**")
("" . "")
("" . "")
("= " . "= ")
("∼" . "~")
("Previous article in issue\nNext article in issue\nKeywords\n" . "[**Keywords**: ")
("Previous article in issue\nKeywords\n" . "[**Keywords**: ")
("•\n\n " . "- ")
(" ● " . "- ")
("eta≠analys" . "eta-analys") ; odd typo in some PDFs: "meta≠analyses"
("\n•\n" . "- ")
(" •\n " . "- ")
("
" "
") (replace-all "
" "") ; (replace-all "" "
\n") ; (replace-all "
" "
\n") (replace-all " id=\"cb1\">" "") ; the Pandoc syntax-highlighting IDs cause ID clashes when substituted into pages, so delete all (replace-all " id=\"cb2\">" "") (replace-all " id=\"cb3\">" "") (replace-all " id=\"cb4\">" "") (replace-all "
` pairs can come in many forms, not to mention other block elements like blockquotes)."
(interactive)
(delete-trailing-whitespace)
(let ((double-newline-found nil))
(save-excursion
(goto-char (point-min))
(unless (search-forward-regexp "\n\n" nil t)
(message "Paragraphizing abstract…")
(shell-command-on-region (point-min) (point-max) "~/wiki/static/build/paragraphizer.py" nil t)
(setq double-newline-found t)))
(when double-newline-found
(goto-char (point-max))
(message "Paragraphizing abstract done."))))
(defun markdown-paragraphize-hook ()
"Hook function for `markdown-paragraphize`."
(when (and (equal (buffer-name) "foo")
(derived-mode-p 'markdown-mode)
(eq this-command 'yank)
(>= (buffer-size) 500)) ; ensure that there is enough in the buffer to plausibly be a full copy-pasted abstract, as opposed to a random snippet or line.
(markdown-paragraphize)))
(add-hook 'post-command-hook #'markdown-paragraphize-hook)
; add new-line / paragraph snippet
(add-hook 'html-mode-hook
(lambda ()
(define-key html-mode-map (kbd "
")
(if (= ?\s (following-char)) (delete-char 1)))
)
))
(add-hook 'markdown-mode-hook 'visual-fill-column-mode)
;; Markup editing shortcuts for HTML/Markdown/GTX annotation editing.
;; Functions to easily add italics, bold, Wikipedia links, smallcaps, & margin-note syntax.
(defun surround-region-or-word (start-tag end-tag)
"Surround selected region (or next word if no region) with START-TAG and END-TAG."
(interactive)
(let ((begin (if (region-active-p)
(region-beginning)
(point)))
(end (if (region-active-p)
(region-end)
(progn
(forward-word)
(point)))))
(goto-char end)
(insert end-tag)
(goto-char begin)
(insert start-tag)
(goto-char (+ end (length start-tag) (length end-tag)))))
;; the wrappers:
(defun html-insert-emphasis ()
"Surround selected region (or word) with HTML tags for italics/emphasis (also Markdown, which supports `*FOO*`)."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-emphasis ()
"Surround selected region (or word) with Markdown asterisks for italics/emphasis.
Equivalent to `FOO` in HTML.
Gwern.net uses `*` for emphasis, and generally reserves `_` for italics such as book titles
(in keeping with Internet conventions predating Gruber's Markdown mistake of conflating `*`/`_`)."
(interactive)
(surround-region-or-word "*" "*"))
(defun html-insert-strong ()
"Surround selected region (or word) with bold tags (HTML, equivalent to `**` in Markdown).
Used in abstracts for topics, first-level list emphasis, etc."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-strong ()
"Surround selected region (or word) with `**` bold tags (Markdown).
Equivalent to `FOO` in HTML.
Used in abstracts for topics, first-level list emphasis, etc."
(interactive)
(surround-region-or-word "**" "**"))
(defun html-insert-smallcaps ()
"Surround selected region (or word) with smallcaps syntax.
Built-in CSS class in HTML & Pandoc Markdown, span syntax is equivalent to
`[FOO]{.smallcaps}`.
Smallcaps are used on Gwern.net for second-level emphasis after bold has been used."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-smallcaps ()
"Surround selected region (or word) with smallcaps syntax (Pandoc Markdown).
Built-in CSS class in HTML & Pandoc Markdown, equivalent to
`FOO`.
Smallcaps are used on Gwern.net for second-level emphasis after bold has been used."
(interactive)
(surround-region-or-word "[" "]{.smallcaps}"))
(defun html-insert-wp-link ()
"Surround selected region (or word) with custom Wikipedia link syntax in HTML.
Compiled by Interwiki.hs to the equivalent (usually) of `FOO`."
(interactive)
(surround-region-or-word "" ""))
(defun markdown-insert-wp-link ()
"Surround selected region (or word) with custom Wikipedia link syntax in Markdown."
(interactive)
(surround-region-or-word "[" "](!W)"))
(defun markdown-insert-margin-note ()
"Surround selected region FOO BAR (or word FOO) with a `margin-note`.
\(Implemented as a special `` class.\)
This creates marginal glosses (in the left margin) as counterparts to sidenotes.
These margin-notes are used as very abbreviated italicized summaries of the
paragraph \(like very small inlined section headers\)."
(interactive)
(surround-region-or-word "[" "]{.marginnote}"))
(defun html-insert-margin-note ()
"Surround selected region FOO BAR (or word FOO) with a `margin-note`.
\(Implemented as a special `` class.\)
This creates marginal glosses (in the left margin) as counterparts to sidenotes.
These margin-notes are used as very abbreviated italicized summaries of the
paragraph \(like very small inlined section headers\).
When inserting margin-notes into HTML snippets, that usually means an annotation
and the margin-note is an editorial insertion, which are denoted by paired `[]` brackets.
To save effort, we add those as well."
(interactive)
(surround-region-or-word "[" "]"))
;; keybindings:
;;; Markdown:
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-e" 'markdown-insert-emphasis)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-s" 'markdown-insert-strong)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ s" 'markdown-insert-smallcaps)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-w" 'markdown-insert-wp-link)))
(add-hook 'markdown-mode-hook (lambda()(define-key markdown-mode-map "\C-c\ \C-m" 'markdown-insert-margin-note)))
;;; HTML:
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-e" 'html-insert-emphasis)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-s" 'html-insert-strong)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ s" 'html-insert-smallcaps)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-w" 'html-insert-wp-link)))
(add-hook 'html-mode-hook (lambda()(define-key html-mode-map "\C-c\ \C-m" 'html-insert-margin-note)))
;; ;;; YAML: (the YAML files store raw HTML snippets, so insert HTML rather than Markdown markup)
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-e" 'html-insert-emphasis)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-s" 'html-insert-strong)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ s" 'html-insert-smallcaps)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-w" 'html-insert-wp-link)))
;; (add-hook 'yaml-mode-hook (lambda()(define-key yaml-mode-map "\C-c\ \C-m" 'html-insert-margin-note)))
;sp
; (add-hook 'markdown-mode-hook 'flyspell)
;for toggling visibility of sections - makes big pages easier to work with
(add-hook 'markdown-mode-hook 'outline-minor-mode)
;In Markdown files, there are few excuses for unbalanced delimiters, and unbalance almost always indicates a link syntax error; in cases where quoted text must contain unbalanced delimiters (eg diffs, or neural-net-generated text or redirects fixing typos), a matching delimiter can be added in a comment like '' to make it add up.
(defun balance-parens () (when buffer-file-name
(add-hook 'after-save-hook
'check-parens
nil t)))
(add-hook 'markdown-mode-hook 'balance-parens)
(add-hook 'ledger-mode-hook 'balance-parens)
(add-hook 'emacs-lisp-mode-hook 'balance-parens)
(add-hook 'haskell-mode-hook 'balance-parens)
(add-hook 'css-mode-hook 'balance-parens)
(add-hook 'javascript-mode-hook 'balance-parens)
(add-hook 'html-mode-hook 'balance-parens)
(add-hook 'python-mode-hook 'balance-parens)
; NOTE: I skip YAML mode because syntax-level quoting is kept validated by the database processing, and within-annotation balancing is checked in Hakyll, and using `check-parens` in YAML mode triggers far too many spurious errors.
; ispell: ignore code blocks in Pandoc Markdown
; TODO: add a fix for '#' not being handled in URLs. current hack borrowed from