module Config.Tags where

import Data.Char (toLower)
import Data.List (isInfixOf, isPrefixOf)

import Utils (anyInfix, anyPrefix, setLike)

-- Maximum edit distance for typo correction in tag guessing using Levenshtein edit-distance (eg. 'sunkcost' → 'sunk-cost'):
tagTypoMaxDistance :: Int
tagTypoMaxDistance = 4

-- sub-directories where directory ≠ tag; this is usually the case in projects or archives/mirrors/dumps. We don't consider them to be tags.
tagGuessBlacklist :: String -> Bool
tagGuessBlacklist path = anyPrefix path $ setLike ["/doc/biology/2000-iapac-norvir", "/doc/personal/2011-gwern-yourmorals.org", "/doc/rotten.com/",
                                          "/doc/statistics/order/beanmachine-multistage", "/doc/www/"]
tagListBlacklist :: [String]
tagListBlacklist = setLike ["2000-iapac-norvir", "rotten.com", "personal/2011-gwern-yourmorals.org", "beanmachine-multistage", "/www/", "biology/2000-iapac-norvir"]

-- Similar to guessing tags from local files' directories, we can also guess tags from their URLs.
-- We allow arbitrary string predicates when guessing (so one might use regexps as well).
urlTagDB :: [(String -> Bool, String)]
urlTagDB = map (\(s, t) -> ((s `isPrefixOf`), t)) prefixMatches
           ++ map (\(s, t) -> ((s `isInfixOf`), t)) infixMatches
           ++ specialCases
  where
    prefixMatches, infixMatches :: [(String,String)]
    prefixMatches = setLike [("https://publicdomainreview.org/", "history/public-domain-review")
                    , ("https://www.filfre.net/", "technology/digital-antiquarian")
                    , ("https://abandonedfootnotes.blogspot.com", "sociology/abandoned-footnotes")
                    , ("https://dresdencodak.com", "humor")
                    , ("https://www.theonion.com", "humor")
                    , ("https://tvtropes.org", "fiction")]

    infixMatches = setLike [("r-project.org", "cs/r"),
                    ("haskell.org", "cs/haskell")]

    specialCases :: [(String -> Bool, String)]
    specialCases = [(\u -> anyInfix u ["evageeks.org","eva.onegeek.org", "evamonkey.com"], "anime/eva")]

-- testing: unique keys, regex validation
wholeTagRewritesRegexes  :: [(String,String)]
wholeTagRewritesRegexes = setLike [("^cs/", "CS/")
                     , ("^cs$", "CS")
                     , ("^cs/c$", "C (language)")
                     , ("^cs/r$", "R (language)")
                     , ("^ai/", "AI/")
                     , ("^ai$", "AI")
                     , ("^iq/", "IQ/")
                     , ("^iq$", "IQ")
                     , ("^iq/high$", "high IQ")
                     , ("^anime/eva$", "<em>NGE</em>")
                     , ("^gan$", "GAN")
                     , ("^psychology/", "psych/")
                     , ("^technology/", "tech/")
                     , ("^doc$", "Tags Index") -- NOTE: nothing is tagged this, so this just sets the <title> on /doc/index to something more useful than '<code>docs</code> tag'.
                     , ("^genetics/selection$", "evolution")
                     , ("^genetics/selection/natural$", "natural selection")
                     , ("^artificial/selection$", "genetics/selection/artificial")
                     , ("^genetics/selection/artificial-selection$", "genetics/selection/artificial")
                     ]

-- intended for use with full literal fixed-string matches, not regexps/infix/suffix/prefix matches.
-- testing: unique keys; keys are all-lowercase (on-disk directories are always lowercase)
tagsLong2Short, tagsShort2Long, tagsShort2LongRewrites :: [(String,String)]
tagsShort2LongRewrites =
   [("power", "statistics/power-analysis"), ("statistics/power", "statistics/power-analysis"), ("reinforcement-learning/robotics", "reinforcement-learning/robot")
   , ("reinforcement-learning/robotic", "reinforcement-learning/robot"), ("dogs", "dog"), ("dog/genetics", "genetics/heritable/dog")
   , ("dog/cloning", "genetics/cloning/dog")
   , ("T5", "ai/nn/transformer/t5"), ("link-rot", "cs/linkrot"), ("linkrot", "cs/linkrot")
   , ("ai/clip", "ai/nn/transformer/clip"), ("clip/samples", "ai/nn/transformer/clip/sample"), ("samples", "ai/nn/transformer/clip/sample")
   , ("japanese", "japan"), ("quantised", "ai/nn/sparsity/low-precision"), ("quantized", "ai/nn/sparsity/low-precision")
   , ("quantization", "ai/nn/sparsity/low-precision") , ("reduced-precision", "ai/nn/sparsity/low-precision"), ("mixed-precision", "ai/nn/sparsity/low-precision"), ("evolution", "genetics/selection/natural")
   , ("gpt-2", "ai/nn/transformer/gpt/2"), ("gpt2", "ai/nn/transformer/gpt/2")
   , ("gpt-3", "ai/nn/transformer/gpt/3"), ("gpt3", "ai/nn/transformer/gpt/3"), ("gpt/nonfiction", "ai/nn/transformer/gpt/nonfiction")
   , ("red", "design/typography/rubrication"), ("self-attention", "ai/nn/transformer/attention"), ("efficient-attention", "ai/nn/transformer/attention")
   , ("ai/rnn", "ai/nn/rnn"), ("ai/retrieval", "ai/nn/retrieval"), ("mr", "genetics/heritable/correlation/mendelian-randomization")
   , ("japan/anime", "anime"), ("psychology/bird", "psychology/animal/bird"), ("psychology/birds/neuroscience", "psychology/animal/bird/neuroscience")
   , ("psychology/birds", "psychology/animal/bird"), ("dalle", "dall-e/3"), ("dall-e", "ai/nn/transformer/gpt/dall-e/3"), ("dall-e-3", "ai/nn/transformer/gpt/dall-e/3"), ("dalle3", "ai/nn/transformer/gpt/dall-e/3"), ("dalle-3", "ai/nn/transformer/gpt/dall-e/3"), ("dall-1", "ai/nn/transformer/gpt/dall-e/1"), ("dall-e-1", "ai/nn/transformer/gpt/dall-e/1"), ("dall-2", "ai/nn/transformer/gpt/dall-e/2"), ("dall-e-2", "ai/nn/transformer/gpt/dall-e/2"), ("dall-3", "ai/nn/transformer/gpt/dall-e/3"), ("ai/gpt/dall-e", "ai/nn/transformer/gpt/dall-e/3")
   , ("darknet-markets", "darknet-market"), ("silk-road-1", "darknet-market/silk-road/1"), ("sr1", "darknet-market/silk-road/1"), ("sr-1", "darknet-market/silk-road/1"), ("sr-2", "darknet-market/silk-road/2")
   , ("silk-road-2", "darknet-market/silk-road/2"), ("sr2", "darknet-market/silk-road/2"), ("sr/1", "darknet-market/silk-road/1"), ("silkroad-2", "silk-road/2"), ("silkroad2", "silk-road/2")
   , ("sr/2", "darknet-market/silk-road/2"), ("sr", "darknet-market/silk-road"), ("psychology/neuroscience/bird", "psychology/animal/bird/neuroscience"), ("bird-brain", "psychology/animal/bird/neuroscience"), ("bird/brain", "psychology/animal/bird/neuroscience"), ("brain/bird", "psychology/animal/bird/neuroscience")
   , ("uighurs", "history/uighur"), ("ai/adversarial", "ai/nn/adversarial"), ("add", "psychiatry/adhd")
   , ("asperger", "psychiatry/autism"), ("aspergers", "psychiatry/autism"), ("personality/conscientiousness", "psychology/personality/conscientiousness")
   , ("conscientiousness", "psychology/personality/conscientiousness"), ("anorexia-nervosa", "psychiatry/anorexia"), ("anxiety-disorder", "psychiatry/anxiety")
   , ("masked-auto-encoder", "ai/nn/vae/mae"), ("masked-autoencoder", "ai/nn/vae/mae"), ("masked", "ai/nn/vae/mae"), ("autoencoder", "ai/nn/vae"), ("auto-encoder", "ai/nn/vae")
   , ("algoprithm", "algorithm")
   , ("alzheimer's", "psychiatry/alzheimers"), ("ad", "advertising"), ("alzheimers-disease", "psychiatry/alzheimers")
   , ("alzheimer", "psychiatry/alzheimers"), ("psychedelics", "psychedelic"), ("stylometric", "statistics/stylometry")
   , ("stylometrics", "statistics/stylometry"), ("dune", "fiction/science-fiction/frank-herbert"), ("herbert", "fiction/science-fiction/frank-herbert")
   , ("instruct-tuning", "instruction-tuning"), ("instruction-finetuning", "instruction-tuning"), ("psychopath", "psychology/personality/psychopathy")
   , ("sociopath", "psychology/personality/psychopathy"), ("sociopathy", "psychology/personality/psychopathy"), ("psychopathic", "psychology/personality/psychopathy"), ("sociopathic", "psychology/personality/psychopathy")
   , ("cognitive-biases", "psychology/cognitive-bias"), ("sort", "cs/algorithm/sorting"), ("moe", "ai/scaling/mixture-of-experts")
   , ("ai/datasets", "ai/dataset"), ("ai/gan", "ai/nn/gan"), ("safety", "reinforcement-learning/safe")
   , ("ads", "economics/advertising"), ("rl/scaling", "reinforcement-learning/scaling"), ("rl/scale", "reinforcement-learning/scaling")
   , ("reinforcement-learning/scale", "reinforcement-learning/scaling"), ("rl-scaling", "reinforcement-learning/scaling"), ("scaling/rl", "reinforcement-learning/scaling")
   , ("scaling/reinforcement-learning", "reinforcement-learning/scaling"), ("reinforcement-learning/alphago", "reinforcement-learning/model/alphago"), ("evolution/human", "genetics/selection/natural/human"), ("rl/model", "reinforcement-learning/model")
   , ("rl/chess", "reinforcement-learning/chess"), ("xrisk", "existential-risk"), ("risk", "existential-risk")
   , ("human-adversarial", "ai/nn/adversarial/human"), ("adversarial-human", "ai/nn/adversarial/human"), ("mlps", "ai/nn/fully-connected")
   , ("mlp", "ai/nn/fully-connected"), ("gpt-4", "ai/nn/transformer/gpt/4"), ("gpt4", "ai/nn/transformer/gpt/4")
   , ("fim", "anime/my-little-pony"), ("mylittlepony", "anime/my-little-pony"), ("pony", "anime/my-little-pony"), ("mlpfim", "anime/my-little-pony"), ("mlp-fim", "anime/my-little-pony")
   , ("gp-4", "ai/nn/transformer/gpt/4"), ("gpt-5", "ai/nn/transformer/gpt/5"), ("gpt5", "ai/nn/transformer/gpt/5")
   , ("gp-5", "ai/nn/transformer/gpt/5"), ("gp5", "ai/nn/transformer/gpt/5"), ("attention/sparse", "ai/nn/transformer/attention/sparsity")
   , ("ai/sparsity", "sparsity")
   , ("gp4-4", "ai/nn/transformer/gpt/4"), ("gp4", "ai/nn/transformer/gpt/4"), ("gpt-4/nonfiction", "ai/nn/transformer/gpt/4/nonfiction")
   , ("ai/nn/transformer/gpt/4/non-fiction", "ai/nn/transformer/gpt/4/nonfiction"), ("gpt-4/non-fiction", "ai/nn/transformer/gpt/4/nonfiction")
   , ("4/non", "ai/nn/transformer/gpt/4/nonfiction")
   , ("gpt-4/fiction", "ai/nn/transformer/gpt/4/fiction"), ("gpt-4/poetry", "ai/nn/transformer/gpt/4/poetry"), ("gpt-4poetry", "ai/nn/transformer/gpt/4/poetry")
   , ("gpt4/poetry", "ai/nn/transformer/gpt/4/poetry"), ("gpt-4/poem", "ai/nn/transformer/gpt/4/poetry"), ("chess", "reinforcement-learning/chess")
   , ("rl-chess", "reinforcement-learning/chess"), ("aimusic", "ai/music")
   , ("animal", "psychology/animal"), ("code", "cs")
   , ("for", "statistics/prediction"), ("forecast", "statistics/prediction"), ("forecasting", "statistics/prediction")
   , ("genetic", "genetics"), ("genomic", "genetics"), ("genomics", "genetics"), ("graph", "design/visualization"), ("hardware" , "cs/hardware")
   , ("human" , "genetics/selection/natural/human"), ("learning", "reinforcement-learning"), ("sf", "fiction/science-fiction"), ("scifi", "sci-fi")
   , ("text" , "fiction/text-game"), ("psych", "psychology"), ("psych/inner-monologue", "psychology/inner-voice")
   , ("latex", "design/typography/tex"), ("vitamind", "vitamin-d"), ("des", "design")
   , ("attention/recurrence", "attention/recurrent"), ("human-evolution", "genetics/selection/natural/human"), ("attention/algebra", "ai/nn/transformer/attention/linear-algebra"), ("hierarchy", "ai/nn/transformer/attention/hierarchical")
   , ("bpe", "tokenization"), ("bpes", "tokenization"), ("silex", "psychiatry/anxiety/lavender")
   , ("lavandar", "psychiatry/anxiety/lavender"), ("decision-theory", "decision"), ("statistics/decision-theory", "statistics/decision")
   , ("language", "linguistics"), ("auction-design", "auction"), ("bilingualism", "bilingual")
   , ("rare-variants", "rare"), ("explore", "exploration"), ("allergies", "allergy")
   , ("cat-allergy", "cat/biology/allergy"), ("cat-allergies", "cat/biology/allergy"), ("antibodies", "antibody")
   , ("animal/iq", "iq/animal"), ("cellular-automata", "cellular-automaton"), ("mathematics", "math")
   , ("frank-p-ramsey", "frank-ramsey")
   , ("intrasexual-agression", "intrasexual-aggression")
   , ("javascript", "js"), ("psych/chess", "psychology/chess"), ("human-chess", "psychology/chess"), ("human/chess", "psychology/chess"), ("chess/human", "psychology/chess"), ("chess-human", "psychology/chess"), ("self-experiment", "quantified-self")
   , ("energy","psychology/energy"), ("lithium","psychiatry/lithium"), ("sequence", "sequencing"), ("quadratic-vote", "quadratic-voting")
   , ("bipolar/genes", "bipolar/genetics"), ("dynamic-evaliation", "dynamic-evaluation"), ("dog-cloning", "genetics/cloning/dog"), ("elonmusk", "elon-musk")
   , ("dog-clone", "genetics/cloning/dog"), ("dog/clone", "genetics/cloning/dog"), ("cat-drug", "cat/psychology/drug")
   , ("cat/drug", "cat/psychology/drug"), ("cat/silvervine", "cat/psychology/drug/silvervine"), ("fonts", "typography")
   , ("time-lock", "timelock"), ("self-decrypting", "timelock"), ("tokenizer", "tokenization"), ("bash", "shell")
   , ("d+q", "d-q"), ("dasatinib", "d-q"), ("quercetin", "d-q"), ("dastinib", "d-q"), ("dasitinib", "d-q")
   , ("borderline-personality", "borderline"), ("borderline-disorder", "borderline"), ("borderline-personality-disorder", "borderline")
   , ("bp", "bipolar"), ("bpd", "borderline"), ("security-blanket", "transitional-object"), ("comfort-object", "transitional-object")
   , ("animal/psych", "psychology/animal"), ("dataset-pruning", "data-pruning"), ("ai/scaing", "ai/scaling")
   , ("dropcap", "design/typography/dropcap"), ("dropcaps", "design/typography/dropcap"), ("drop-cap", "design/typography/dropcap")
   , ("ai-dropcap", "ai/nn/diffusion/midjourney/dropcap"), ("ai/dropcap", "ai/nn/diffusion/midjourney/dropcap"), ("dropcap-ai", "ai/nn/diffusion/midjourney/dropcap"), ("dropcap-mj", "ai/nn/diffusion/midjourney/dropcap"), ("mj/dropcap", "ai/nn/diffusion/midjourney/dropcap"), ("wolfe", "gene-wolfe"), ("genewolfe", "gene-wolfe")
   , ("batch", "offline"), ("offline-rl", "reinforcement-learning/offline"), ("off-line", "offline")
   , ("recapture", "statistics/order/capture"), ("capture-recapture", "statistics/order/capture"), ("mark", "statistics/order/capture"), ("mark-and-recapture", "statistics/order/capture"), ("mark-and-capture", "statistics/order/capture"), ("mark-recapture", "statistics/order/capture")
   , ("timetravel", "time-travel"), ("inner-monlogue", "inner-monologue")
   , ("narcisism", "narcissism"), ("narcississm", "narcissism"), ("narcissist", "narcissism"), ("narcisist", "narcissism"), ("narcisisst", "narcissism")
   , ("ai/diffusion", "ai/nn/diffusion"), ("longevity/semaglutide", "longevity/glp/semaglutide"), ("dnm-archives", "dnm-archive"), ("ants", "ant"), ("alphazero", "alphago")
   , ("steganographic", "steganography"), ("stenography", "steganography"), ("blacksun", "black-sun"), ("landscapes", "landscape"), ("genewolfe-dropcaps", "genewolfe-dropcap"), ("dropcats", "dropcat"), ("ninits", "ninit")
   , ("publication-bias", "statistics/bias/publication"), ("statistics/bias/publication-bias", "statistics/bias/publication"), ("bias/pbulication", "bias/publication"), ("publication/bias", "bias/publication")
   , ("information-theory", "cs/algorithm/information"), ("compressor", "cs/algorithm/information/compression"), ("xz", "compression"), ("gzip", "compression"), ("cs/algorithm/compression", "cs/algorithm/information/compression"), ("cs/information/compression", "cs/algorithm/information/compression"), ("algorithm/compression", "cs/algorithm/information/compression")
   , ("artificial-selection", "genetics/selection/artificial"), ("bacopa-monnieri", "bacopa"), ("anime/stylegan", "stylegan/anime"), ("video-analysis", "video/analysis")
   , ("search", "google"), ("tbi", "psychiatry/traumatic-brain-injury"), ("osciology", "sociology"), ("microdosing", "nootropic/lsd"), ("micro-dosing", "nootropic/lsd"), ("lsd", "psychedelic/lsd"), ("alcohol", "alcoholism"), ("transfomer", "transformer"), ("transfromer", "transformer"), ("recurrency", "recurrent"), ("ann", "ai/nn"), ("darknet-market/archives", "darknet-market/dnm-archive"), ("nback", "dual-n-back"), ("savantism", "psychology/neuroscience/memory/savant"), ("idiot-savant", "psychology/neuroscience/memory/savant"), ("savant-syndrome", "psychology/neuroscience/memory/savant"), ("bmr", "blackmarket-reloaded"), ("grok", "grokking"), ("grokk", "grokking")
   , ("anaesthesia", "anesthesia"), ("anesthetic", "anesthesia"), ("anesthsia", "anesthesia")
   , ("music-distraction", "psychology/music/distraction"), ("true-sight", "truesight")
   , ("stiglers-diet", "stigler-diet"), ("stigler-problem", "stigler-diet"), ("stiglers-problem", "stigler-diet"), ("stiglers-diet-problem", "stigler-diet"), ("stigler-diet-problem", "stigler-diet"), ("stigler", "stigler-diet"), ("sigler", "stigler")
   , ("disappearing-polymorphs", "disappearing-polymorph"), ("polymorph", "disappearing-polymorph"), ("polymorphs", "disappearing-polymorph"), ("disappear", "disappearing-polymorph")
   , ("miscite", "miscitation"), ("openia", "openai"), ("dnm/evolution", "darknet-market/evolution"), ("bing-sydney", "sydney"), ("sidney", "sydney"), ("sunk-costs", "sunk-cost"), ("sunkcost", "sunk-cost"), ("dnm-evolution", "darknet-market/evolution"), ("dnm", "darknet-market"), ("darknet-markets/evolution", "darknet-market/evolution")
   , ("fermi", "science/fermi-problem"), ("fermi-estimate", "science/fermi-problem"), ("fermi-calculation", "science/fermi-problem"), ("fermi-problems", "science/fermi-problem")
   , ("clock", "longevity/epigenetics"), ("aging-clock", "longevity/epigenetics"), ("epigenetic-clock", "longevity/epigenetics"), ("clocks", "longevity/epigenetics")
   , ("desing", "design"), ("animal/psychology", "psychology/animal"), ("ramssey", "ramsey"), ("hrv", "nootropic/quantified-self/heart-rate-variability")
   , ("eamcs", "emacs"), ("elisp", "emacs"), ("emacs-lisp", "emacs")
   , ("adblocker", "adblock"), ("adblocking", "adblock"), ("ad-block", "adblock")
   , ("order-statistics", "statistics/order")
   , ("aphantasic", "aphantasia"), ("aphantasy", "aphantasia"), ("economic", "economics"), ("math-humor", "math/humor"), ("low-iq", "iq/low"), ("low/[ iq", "iq/low"), ("stylegan-2", "stylegan"), ("stylegan/2", "stylegan")
   , ("marbling", "paper-marbling"), ("marble", "paper-marbling"), ("marbling-paper", "paper-marbling"), ("marble-paper", "paper-marbling")
   , ("microbion", "microbiome"), ("germfree", "germ-free"), ("selfsinking", "self-sinking")
   , ("dantzig", "stigler-diet"), ("4/nonficton", "4/nonfiction"), ("nonficton", "nonfiction")
   , ("seriate", "seriation"), ("seriations", "seriation"), ("series", "seriation")
   , ("longnow", "long-now"), ("deep-seek", "deepseek"), ("ds", "deepseek"), ("deepsek", "deepseek"), ("cat/allergy", "cat/biology/allergy")
   , ("m", "model"), ("mf", "model-free"), ("m-f", "model-free")
   , ("technology/sociology", "sociology/technology"), ("socciology", "sociology")
   , ("open", "openness"), ("openess", "openness"), ("opennes", "openness")
   , ("neurotic", "neuroticism"), ("instability", "neuroticism"), ("instable", "neuroticism"), ("stable", "neuroticism")
   , ("agreeable", "psychology/personality/agreeableness"), ("agreeble", "agreeable")
   , ("intraversion", "extraversion"), ("introversion", "extraversion"), ("introverted", "extraversion"), ("extravert", "extraversion"), ("extroversion", "extraversion"), ("extroverted", "extraversion")
   , ("O", "openness"), ("C", "conscientiousness"), ("E", "extraversion"), ("A", "agreeableness"), ("N", "neuroticism"), ("IQ", "iq"), ("G", "iq"), ("rg", "genetics/heritable/correlation")
   , ("physic", "physics"), ("pyhsics", "physics")
   , ("marl", "reinforcement-learning/multi-agent"), ("borge", "borges"), ("jlborge", "borges"), ("jlborges", "borges"), ("iq-ses", "iq/ses")
   , ("chemist", "chemistry"), ("chemisty", "chemistry"), ("chemical", "chemistry")
   , ("preference-falsification", "sociology/false-preference"), ("4.5", "4-5"), ("45", "4-5"),("54", "4-5"),("5.4", "4-5"), ("5-4", "4-5"), ("gpt-4-5", "ai/nn/transformer/gpt/4-5"), ("gpt-45", "ai/nn/transformer/gpt/4-5"), ("gpt-4.5", "ai/nn/transformer/gpt/4-5"), ("gpt45", "ai/nn/transformer/gpt/4-5")
   , ("vr", "virtual-reality"), ("v-r", "virtual-reality"), ("virtualreality", "virtual-reality"), ("oculus", "virtual-reality"), ("vrhmd", "virtual-reality")
   , ("retratutide", "retatrutide"), ("retrutide", "retatrutide"), ("sclaing", "scaling"), ("frankherbert", "frank-herbert"), ("wp", "wikipedia"), ("quaker", "quakers"), ("qaker", "quakers"),("small", "sociology/small-groups"), ("smallgroups", "sociology/small-groups"), ("smallgroup", "sociology/small-groups")
   , ("parasociality", "parasocial"), ("parasocialness", "parasocial")
   , ("hunting-lion", "math/humor/lion-hunting"), ("hunting-lions", "math/humor/lion-hunting"), ("lions", "math/humor/lion-hunting"), ("huntinglions", "math/humor/lion-hunting"), ("huntinglion", "math/humor/lion-hunting"), ("lizard-man", "lizardman"), ("lizard-men", "lizardman"), ("lizardmen", "lizardman"), ("lizardmen-constant", "lizardman"), ("lizardman-constant", "lizardman"), ("constant", "lizardman")
   , ("pipelines", "statistics/order/selection/pipeline"), ("leaky-pipelines", "statistics/order/selection/pipeline"), ("leaky-pipeline", "statistics/order/selection/pipeline"), ("leakypipeline", "statistics/order/selection/pipeline"), ("leakypipelines", "statistics/order/selection/pipeline")
   , ("regression-to-the-mean", "regression-to-mean"), ("regressiontothemean", "regression-to-mean"), ("regression-mean", "mean-regression"), ("reversion-to-the-mean", "regression-to-mean"), ("reversion-to-mean", "regression-to-mean"), ("mean-reversion", "regression-to-mean"), ("reversion", "regression-to-mean"), ("mean", "regression-to-mean")
   , ("lafferty", "r-a-lafferty"), ("ralafferty", "r-a-lafferty"), ("r-lafferty", "r-a-lafferty")
   , ("40", "4o"), ("04", "4o"), ("4-o", "4o"), ("o-4", "4o"), ("o4", "4o"), ("hertaible", "heritable")
   , ("claude-4", "claude/4"), ("c4", "claude/4"), ("claude4", "claude/4")
   , ("animal-bias", "statistics/bias/animal"), ("animal/bias", "statistics/bias/animal"), ("schema", "scheme")
   , ("wm", "dual-n-back"), ("radaince", "radiance"), ("synthetic", "synthesis"), ("nail", "nail-growth"), ("psychology/cat", "cat/psychology"), ("modecollapse", "mode-collapse"), ("hakell", "haskell"), ("shortsleep", "zeo/short-sleeper"), ("nuroscience", "neuroscience"), ("human-smell", "psychology/smell/human"), ("human/smell", "psychology/smell/human"), ("man-hand", "man-hands"), ("manhands", "man-hands"), ("manhand", "man-hands"), ("hand", "man-hands"), ("hands", "man-hands"), ("presonal", "personal"), ("spider", "biology/portia"), ("humor/math", "math/humor"), ("innermonologue", "inner-monologue"), ("palm2", "palm/2"), ("dnd", "text-game"), ("cty", "smpy")
   , ("qeueing", "queuing"), ("qeuing", "queuing"), ("queing", "queuing"), ("queing-theory", "queuing"), ("queing_theory", "queuing"), ("queingtheory", "queuing")
   , ("queueing-theory", "queuing"), ("queueing_theory", "queuing"), ("queueingtheory", "queuing"), ("queue", "queuing"), ("queues", "queuing"), ("queue-theory", "queuing"), ("queue_theory", "queuing"), ("queuetheory", "queuing"), ("queuingtheory", "queuing"), ("queuing-theory", "queuing"), ("queuing_theory", "queuing"), ("quing", "queuing")   -- there are just way too many ways to spell 'queue'...
   , ("japanese/art", "japan/art"), ("rlhf", "preference-learning"), ("evoluton", "evolution"), ("epigentics", "epigenetics")
   , ("kufic", "design/typography/square"), ("square-kufic", "design/typography/square"), ("kufic-square", "design/typography/square"), ("textgame", "text-game"), ("got", "gpt"), ("high-iq", "iq/high"), ("nootropics", "nootropic"), ("bac", "abc"), ("cab", "abc"), ("acb", "abc"), ("hardware/scaling", "scaling/hardware")
   , ("sunkcosts", "sunk-cost"), ("sparse", "sparsity"), ("ai/nn/transformer/attention/sparse", "ai/nn/transformer/attention/sparsity"), ("wrtigin", "writing")
   , ("star-war", "star-wars"), ("starwars", "star-wars"), ("starwar", "star-wars")
   , ("hydranencephaly", "anencephaly"), ("aencephaly", "anencephaly"), ("amencephaly", "anencephaly"), ("anancephaly", "anencephaly"), ("ancephaly", "anencephaly"), ("anecephaly", "anencephaly"), ("anecnephaly", "anencephaly"), ("anencefaly", "anencephaly"), ("anenceohalyt", "anencephaly"), ("anencepahly", "anencephaly"), ("anencepaly", "anencephaly"), ("anencephal", "anencephaly"), ("anencephaley", "anencephaly"), ("anencephalg", "anencephaly"), ("anencephalt", "anencephaly"), ("anencephay", "anencephaly"), ("anencephlay", "anencephaly"), ("anencephly", "anencephaly"), ("anencepphaly", "anencephaly"), ("anencphaly", "anencephaly"), ("anencwphaly", "anencephaly"), ("anenecephaly", "anencephaly"), ("anenecphaly", "anencephaly"), ("anenenecephaly", "anencephaly"), ("anennccephaly", "anencephaly"), ("anensefaly", "anencephaly"), ("anensephaly", "anencephaly"), ("anincephaly", "anencephaly"), ("annencephaly", "anencephaly"), ("gydrancrephaly", "anencephaly"), ("hydracephaly", "anencephaly"), ("hydrancefaly", "anencephaly"), ("hydrancepahly", "anencephaly"), ("hydrancepaly", "anencephaly"), ("hydrancephakt", "anencephaly"), ("hydrancephal", "anencephaly"), ("hydrancephaley", "anencephaly"), ("hydrancephalg", "anencephaly"), ("hydrancephay", "anencephaly"), ("hydrancephlay", "anencephaly"), ("hydrancephly", "anencephaly"), ("hydrancepphaly", "anencephaly"), ("hydrancwphaly", "anencephaly"), ("hydranecephaly", "anencephaly"), ("hydranecphaly", "anencephaly"), ("hydranephaly", "anencephaly"), ("hydrannccephaly", "anencephaly"), ("hydransefaly", "anencephaly"), ("hydransephaly", "anencephaly"), ("hydranxephaly", "anencephaly"), ("hydrnacephaly", "anencephaly"), ("hydroancephaly", "anencephaly"), ("hydrocephaly", "anencephaly"), ("hydrrancephaly", "anencephaly"), ("hyrdrancephaly", "anencephaly"), ("jydrancephaly", "anencephaly"), ("naencephaly", "anencephaly"), ("nencephaly", "anencephaly"), ("snencephaly", "anencephaly")
   , ("physucs", "physics"), ("illusoin", "illusion"), ("opetry", "poetry"), ("poem", "poetry"), ("poet", "poetry"), ("typogrpahy", "typography"), ("bipolar-energy", "bipolar/energy"), ("clade-4", "claude/4"), ("discrete-diffusion", "diffusion/discrete"), ("discrete/diffusion", "diffusion/discrete"), ("working-memory", "dnb")
   , ("ai/nn/transformer/gpt/non-fiction", "ai/nn/transformer/gpt/nonfiction"), ("ai/nn/transformer/gpt/5/4-5", "ai/nn/transformer/gpt/4-5"), ("non-fiction", "nonfiction"), ("nonfiction" , "ai/nn/transformer/gpt/nonfiction"), ("piblication-bias", "statistics/bias/publication"), ("embryo-selection", "selection/artificial"), ("embryo/selection", "selection/artificial"), ("selection/embryo", "selection/artificial"), ("multiagent", "multi-agent"), ("stegranoaphy", "steganography"), ("dpeeseek", "deepseek")
   ]
   -- , ("genetics/artificial", "genetics/selection/artificial"), ("artificial", "ai"),  ("genetics/selection/artificial/apple-breeding","genetics/selection/artificial/apple"), ("apples", "genetics/selection/artificial/apple"),

tagsShort2Long = tagsShort2LongRewrites ++
  -- ^ custom tag shortcuts, to fix typos etc
  -- attempt to infer short → long rewrites from the displayed tag names, which are long→short; but note that many of them are inherently invalid and the mapping only goes one way.
   map (\(a,b) -> (map toLower b,a)) (filter (\(_,fancy) -> not (anyInfix fancy [" ", "<", ">", "(",")"])) tagsLong2Short)

-- testing: unique list
-- 'shortTagBlacklist' is used primarily in `Tags.guessTagFromShort`:
shortTagBlacklist :: [String]
shortTagBlacklist = setLike ["a", "al", "an", "analysis", "and", "are", "as", "at", "be", "box", "done", "e", "error", "f",
                      "fine", "free", "g", "git", "if", "in", "is", "it", "of", "on", "option", "rm", "sed", "strong", "t",
                      "the", "to", "tr", "up", "we", "ls", "<ul>", "<ol>", "<p>", "<blockquote>"]

-- testing: unique all
tagsLong2Short = reverse [ -- priority: first one wins. so sub-directories should come before their directories if they are going to override the prefix.
  ("traffic/ab-testing", "Web A/B testing") -- NOTE: TLAs cannot be put into smallcaps because we italicize tags but Source Serif Pro does not have italic smallcaps <https://github.com/adobe-fonts/source-serif/issues/46>.
  , ("science/fermi-problem", "Fermi problems")
  , ("science/chemistry/disappearing-polymorph", "disappearing polymorphs (chemistry)")
  , ("science/chemistry", "chemistry")
  , ("science/physics/astronomy", "astronomy")
  , ("science/physics", "physics")
  , ("technology/virtual-reality", "VR")
  , ("technology/northpaw", "North Paw compass")
  , ("technology/self-sinking", "self-sinking waste disposal")
  , ("technology/google/alerts", "Google Alerts")
  , ("statistics/probability/queueing", "queueing")
  , ("statistics/probability", "probability")
  , ("statistics/peer-review", "peer review")
  , ("statistics/causality", "causality")
  , ("statistics/bias/publication/miscitation", "miscite bias")
  , ("statistics/bias/publication", "publication bias")
  , ("statistics/bias/animal", "animal study methodology")
  , ("statistics/bias", "scientific bias")
  , ("statistics/bayes/hope-function", "Hope function")
  , ("reinforcement-learning/safe/clippy", "Clippy (AI safety)")
  , ("reinforcement-learning/imperfect-information/poker", "poker AI")
  , ("reinforcement-learning/imperfect-information/hanabi", "<em>Hanabi</em> AI")
  , ("reinforcement-learning/imperfect-information/diplomacy", "<em>Diplomacy</em> AI")
  , ("reinforcement-learning/imperfect-information", "hidden-information game")
  , ("reinforcement-learning/imitation-learning/brain-imitation-learning", "brain imitation learning")
  , ("reinforcement-learning/imitation-learning",                          "imitation learning")
  , ("reinforcement-learning/offline",                          "offline RL")
  , ("reinforcement-learning/armstrong-controlproblem", "Armstrong’s control problem")
  , ("psychology/neuroscience/anencephaly", "anencephaly")
  , ("psychology/neuroscience/memory/savant", "savantism")
  , ("psychology/neuroscience/memory", "memory")
  , ("psychology/neuroscience/pain/anesthesia", "anesthesia")
  , ("psychology/neuroscience/pain",            "pain")
  , ("psychology/inner-voice", "inner voice (psych)")
  , ("psychology/writing", "writing psychology")
  , ("psychology/willpower", "willpower")
  , ("psychology/vision/dream", "dreams")
  , ("psychology/vision/aphantasia", "aphantasia")
  , ("psychology/vision", "sight")
  , ("psychology/smell/human", "human olfaction")
  , ("psychology/smell/perfume", "perfume")
  , ("psychology/smell", "smelling")
  , ("psychology/linguistics/bilingual", "bilingualism")
  , ("psychology/linguistics", "language")
  , ("psychology/collecting", "collector psychology")
  , ("psychology/cognitive-bias/illusion-of-depth/extramission", "extramission sight theory")
  , ("psychology/cognitive-bias/illusion-of-depth", "illusion-of-depth bias")
  , ("psychiatry/meditation/lewis", "Lewis’s meditation experiment")
  , ("psychiatry/lithium", "lithium-in-water")
  , ("psychiatry/autism/schizoid", "schizoid personality")
  , ("psychiatry/autism", "autism")
  , ("psychiatry/alcoholism", "alcoholism")
  , ("philosophy/religion", "religion")
  , ("philosophy/ontology", "ontology")
  , ("philosophy/mind", "mind")
  , ("philosophy/logic", "logic")
  , ("personal/twitter", "Twitter analytics")
  , ("personal/mulberry-tree", "my mulberry tree")
  , ("personal/2013-cicadas", "2013 cicadas")
  , ("personal/2011-gwern-yourmorals.org", "Gwern’s YourMorals surveys")
  , ("nootropic/phenibut", "phenibut")
  , ("nootropic/caffeine", "caffeine")
  , ("nootropic/bacopa", "<em>Bacopa</em>")
  , ("nootropic/magnesium", "magnesium (nootropic)")
  , ("nootropic/potassium", "potassium (sleep)")
  , ("math/humor/lion-hunting", "lion-hunting math")
  , ("math/humor", "STEM humor")
  , ("longevity/metformin", "metformin")
  , ("longevity/fasting", "fasting")
  , ("longevity/epigenetics", "epigenetics (aging)")
  , ("longevity/aspirin", "aspirin (aging)")
  , ("japan/history/tominaga-nakamoto", "Tominaga Nakamoto")
  , ("genetics/selection/artificial/index-selection", "index selection (breeding)")
  , ("genetics/selection/www.mountimprobable.com", "<em>Climbing Mt. Improbable</em>")
  , ("genetics/heritable/adoption", "adoption studies")
  , ("genetics/genome-synthesis/virus-proof", "virus-proof cells")
  , ("genetics/genome-synthesis", "genome synthesis")
  , ("food/mead", "mead")
  , ("fiction/science-fiction/star-wars", "<em>Star Wars</em>")
  , ("fiction/science-fiction/r-a-lafferty", "R. A. Lafferty")
  , ("fiction/science-fiction/batman", "<em>Batman</em> (story)")
  , ("fiction/science-fiction/time-travel", "time-travel")
  , ("fiction/science-fiction/frank-herbert", "<em>Dune</em>")
  , ("fiction/science-fiction", "Sci-Fi")
  , ("fiction/fantasy", "fantasy")
  , ("fiction/humor/hardtruthsfromsoftcats.tumblr.com", "<em>Hard Truths From Soft Cats</em>")
  , ("fiction/humor/dinosaur-comics", "<em>Dinosaur Comics</em>")
  , ("existential-risk/nuclear/hofstadter", "nuclear war (Hofstadter)")
  , ("existential-risk/nuclear", "nuclear war")
  , ("economics/perpetuities", "perpetuities")
  , ("economics/copyright", "copyright")
  , ("economics/automation/metcalfes-law", "Metcalfe’s Law")
  , ("economics/automation", "tech economics")
  , ("economics/mechanism-design/quadratic-voting", "quadratic voting")
  , ("economics/mechanism-design/auction", "auctions")
  , ("economics/mechanism-design", "mechanism design")
  , ("design/typography/floral", "floral ornaments")
  , ("design/typography/paper-marbling", "paper marbling")
  , ("design/typography/square", "block fonts")
  , ("design/typography/dropcap", "dropcaps (typography)")
  , ("design/typography/sidenote", "sidenotes (typography)")
  , ("design/typography/sentence-spacing", "sentence-spacing (typography)")
  , ("darknet-market/silk-road/1/lsd", "SR1 LSD")
  , ("cs/security", "computer security")
  , ("cs/lisp/emacs", "Emacs")
  , ("cs/lisp/scheme", "Scheme")
  , ("cs/lisp", "Lisp")
  , ("cs/hardware", "computer hardware")
  , ("cs/cryptography/steganography", "steganography")
  , ("cs/cryptography/nash", "John Nash (cryptography)")
  , ("cs/cryptography/timelock", "timelock crypto")
  , ("cs/algorithm/information/compression", "compression")
  , ("cs/algorithm/information", "information theory")
  , ("cs/algorithm/sorting/seriation", "seriation")
  , ("cs/algorithm/sorting", "sorting")
  , ("cs/algorithm", "algorithm")
  , ("cs/computable", "computability")
  , ("cat/biology/taurine", "taurine (cat)")
  , ("cat/biology/allergy/antibody", "cat-allergen antibody")
  , ("cat/biology/allergy", "cat allergies")
  , ("cat/biology", "cat biology")
  , ("biology/booger", "boogers")
  , ("biology/ant", "ants")
  , ("anime/my-little-pony", "<em>My Little Pony</em>")
  , ("anime/hafu", "<em>hafu</em> (anime)")
  , ("anime/nadia", "<em>Nadia</em> (anime)")
  , ("anime/eva/rebuild/2/2010-crc", "<em>Rebuild 2.0</em> book")
  , ("anime/eva/rebuild/2", "<em>Rebuild 2.0</em>")
  , ("anime/eva/rebuild", "<em>Rebuild</em> (NGE)")
  , ("anime/eva/notenki-memoirs/blue-blazes", "<em>Blue Blazes</em>")
  , ("anime/eva/notenki-memoirs", "<em>Notenki Memoirs</em>")
  , ("anime/eva/little-boy/otaku-talk", "“Otaku Talk” roundtable")
  , ("ai/scaling/economics", "AI economics")
  , ("ai/scaling/hardware", "AI hardware")
  , ("ai/poetry", "poetry by AI")
  , ("ai/nn/transformer/gpt/calibration", "LLM calibration")
  -- , ("ai/nn/transformer/fiction", "Transformer fiction")
  , ("ai/nn/gan/stylegan/progan", "ProGAN")
  , ("ai/nn/gan/data-augmentation", "data-augmented GANs")
  , ("ai/nn/diffusion/discrete", "discrete diffusion NN")
  , ("ai/nn/diffusion/imagen", "Google Imagen")
  , ("ai/nn/diffusion/midjourney/dropcap/ninit", "Ninit dropcaps")
  , ("ai/nn/diffusion/midjourney/dropcap/genewolfe-dropcap", "Gene Wolfe dropcaps")
  , ("ai/nn/diffusion/midjourney/dropcap/dropcat", "Dropcat dropcaps")
  , ("ai/nn/diffusion/midjourney/dropcap", "dropcaps (AI typography)")
  , ("ai/nn/diffusion/midjourney/black-sun", "black sun sigil")
  , ("ai/nn/diffusion/midjourney/landscape", "Midjourney landscapes")
  , ("ai/nn/diffusion/midjourney", "Midjourney")
  , ("ai/highleyman", "Highleyman’s AI")
  , ("psychology/music/distraction", "music distraction")
  , ("psychology/music", "music psychology")
  , ("psychology/neuroscience/tcs", "TDCS")
  , ("traffic", "site traffic")
  , ("co2", "CO<sub>2</sub>")
  , ("zeo/short-sleeper", "short sleepers")
  , ("zeo", "sleep")
  , ("touhou", "Touhou")
  , ("bitcoin/pirateat40", "Pirateat40")
  , ("bitcoin/nashx", "Nash eXchange")
  , ("bitcoin", "Bitcoin")
  , ("borges", "Borges")
  , ("algernon", "Algernon’s Law")
  , ("japan/poetry/teika",    "Fujiwara no Teika")
  , ("japan/poetry/shotetsu", "Shōtetsu")
  , ("japan/poetry/zeami",    "Zeami Motokiyo (Noh)")
  , ("japan/poetry", "Japanese poetry")
  , ("japan/art", "Japanese art")
  , ("japan/history", "Japanese history")
  , ("japan", "Japan")
  , ("long-now", "Long Now")
  , ("radiance", "<em>Radiance</em> (novel)")
  , ("psychology/cognitive-bias/stereotype-threat", "stereotype threat")
  , ("psychology/cognitive-bias/sunk-cost", "sunk cost bias")
  , ("psychology/cognitive-bias", "cognitive bias")
  , ("wikipedia", "Wikipedia")
  , ("insight-porn", "insight porn")
  , ("fiction/poetry", "poetry")
  , ("fiction/opera", "opera")
  , ("biology/portia", "<em>Portia</em> spider")
  , ("history/medici", "the Medici")
  , ("lesswrong-survey/hpmor", "<em>HP:MoR</em> surveys")
  , ("lesswrong-survey", "LW surveys")
  , ("modafinil/survey", "modafinil surveys")
  , ("modafinil/darknet-market", "modafinil (DNM)")
  , ("crime/terrorism/rumiyah", "<em>Rumiyah</em> (ISIS)")
  , ("crime/terrorism", "terrorism")
  , ("cat/psychology/earwax", "cats & earwax")
  , ("cat/genetics", "cat genetics")
  , ("cat/psychology/drug/silvervine", "silvervine (cat)")
  , ("cat/psychology/drug/catnip/survey", "catnip survey")
  , ("cat/psychology/drug/catnip", "catnip")
  , ("cat/psychology/drug/tatarian-honeysuckle", "Tatarian honeysuckle (cat)")
  , ("cat/psychology/drug/valerian", "Valerian (cat)")
  , ("cat/psychology/drug", "cat drugs")
  , ("cat/psychology", "cat psychology")
  , ("fiction/gene-wolfe/suzanne-delage", "Wolfe’s “Delage” ")
  , ("fiction/gene-wolfe", "Gene Wolfe")
  , ("fiction/text-game", "text game")
  , ("fiction/humor", "humor")
  , ("fiction/criticism", "literary criticism")
  , ("economics/advertising/adblock", "adblocking")
  , ("economics/advertising", "ads")
  , ("economics/experience-curve", "experience curve")
  , ("economics/georgism", "Georgism")
  , ("genetics/microbiome/acne", "acne")
  , ("genetics/microbiome/germ-free", "germ-free animals")
  , ("genetics/microbiome", "microbiome")
  , ("genetics/heritable/correlation/mendelian-randomization", "Mendelian Randomization")
  , ("genetics/heritable/correlation", "genetic correlation")
  , ("genetics/gametogenesis", "gametogenesis")
  , ("genetics/selection/artificial/apple", "apple breeding")
  , ("genetics/selection/artificial", "breeding")
  , ("genetics/selection/natural/human/dysgenics", "dysgenics")
  , ("genetics/selection/natural/human", "human evolution")
  , ("genetics/heritable/emergenesis", "emergenesis")
  , ("genetics/heritable/rare", "rare mutations")
  , ("genetics/heritable/dog", "dog genetics")
  , ("genetics/heritable", "heritability")
  , ("genetics/cloning/dog", "dog cloning")
  , ("genetics/cloning", "cloning")
  , ("genetics/editing", "gene editing")
  , ("genetics/sequencing", "genome sequencing")
  , ("longevity/senolytic/d-q", "D+Q senolytic")
  , ("longevity/senolytic", "senolytic")
  , ("longevity/johan-bjorksten", "Johan Bjorksten (aging)")
  , ("psychiatry/meditation", "meditation")
  , ("psychiatry/depression", "depression")
  , ("psychiatry/bipolar/autism", "bipolar/autism")
  , ("psychiatry/bipolar/elon-musk", "Elon Musk")
  , ("psychiatry/bipolar/sleep", "bipolar/sleep")
  , ("psychiatry/bipolar/lithium", "lithium (bipolar)")
  , ("psychiatry/bipolar/energy", "bipolar personality")
  , ("psychiatry/bipolar/genetics", "bipolar genes")
  , ("psychiatry/bipolar", "bipolar")
  , ("psychiatry/borderline/transitional-object", "security blanket (BPD)")
  , ("psychiatry/borderline", "borderline")
  , ("psychiatry/schizophrenia/rosenhan", "Rosenhan fraud")
  , ("psychiatry/schizophrenia", "SCZ")
  , ("psychiatry/anorexia", "anorexia")
  , ("psychiatry/adhd", "ADHD")
  , ("psychiatry/anxiety/lavender", "Silexan™")
  , ("psychiatry/anxiety", "anxiety")
  , ("psychiatry/traumatic-brain-injury", "TBI")
  , ("psychiatry/alzheimers", "Alzheimer’s")
  , ("statistics/stylometry/truesight", "truesight (stylometry)")
  , ("statistics/stylometry", "stylometry")
  , ("statistics/decision/stigler-diet", "Stigler’s diet problem")
  , ("statistics/decision/mail-delivery", "mail-delivery optimization")
  , ("statistics/decision", "decision theory")
  , ("statistics/order/selection/pipeline", "leaky pipelines")
  , ("statistics/order/selection", "statistical selection")
  , ("statistics/order/capture", "mark-and-recapture")
  , ("statistics/order/comparison", "statistical comparison")
  , ("statistics/order", "order statistics")
  , ("statistics/bayes/abc", "ABC Bayes")
  , ("statistics/bayes", "Bayes")
  , ("statistics/power-analysis", "power analysis")
  , ("statistics/meta-analysis", "meta-analysis")
  , ("philosophy/ethics/ethicists", "ethicists")
  , ("statistics/variance-component", "variance components")
  , ("statistics/survival-analysis", "survival analysis")
  , ("sociology/survey/lizardman", "Lizardman survey constant")
  , ("sociology/survey", "sociology surveys")
  , ("sociology/small-groups/quakers", "Quakers")
  , ("sociology/small-groups", "small groups")
  , ("sociology/intrasexual-aggression", "intrasexual aggression")
  , ("sociology/technology/parasocial", "parasociality")
  , ("sociology/technology", "sociology of technology")
  , ("sociology/false-preference", "preference falsification")
  , ("sociology/abandoned-footnotes", "<em>Abandoned Footnotes</em>")
  , ("psychology/spaced-repetition", "spaced repetition")
  , ("psychology/parapsychology/european-journal-of-parapsychology", "<em>EJP</em>")
  , ("psychology/parapsychology", "parapsychology")
  , ("psychology/animal/bird/neuroscience", "bird brains")
  , ("psychology/animal/bird", "bird")
  , ("psychology/animal/maze", "maze-running")
  , ("psychology/animal", "animal psych")
  , ("psychology/neuroscience/neurosurgery", "neurosurgery")
  , ("psychology/neuroscience", "neuroscience")
  , ("psychology/illusion-of-depth", "illusion of depth")
  , ("psychology/energy", "mental energy")
  , ("psychology/novelty", "novelty U-curve")
  , ("psychology/chess", "chess psychology")
  , ("psychology/personality/psychopathy", "psychopathy")
  , ("psychology/personality/narcissism", "narcissism")
  , ("psychology/personality/fantasizing", "fantasizing")
  , ("psychology/personality/conscientiousness", "Conscientious")
  , ("psychology/personality/openness", "Openness")
  , ("psychology/personality/neuroticism", "Neuroticism")
  , ("psychology/personality/agreeableness", "Agreeable")
  , ("psychology/personality/extraversion", "Extraversion")
  , ("psychology/personality", "personality")
  , ("psychology/okcupid", "OKCupid")
  , ("psychology/nature", "psych of nature")
  , ("psychology/dark-knowledge", "dark knowledge (human)")
  , ("psychology/man-hands", "male-hand sexiness")
  , ("psychedelic/lsd", "LSD")
  , ("psychedelic/ketamine", "ketamine")
  , ("psychedelic", "psychedelics")
  , ("statistics/prediction/election", "election forecast")
  , ("statistics/prediction", "forecasting")
  , ("reinforcement-learning/scaling", "RL scaling")
  , ("reinforcement-learning/exploration/active-learning/data-pruning", "data pruning")
  , ("reinforcement-learning/exploration/active-learning", "active learning")
  , ("reinforcement-learning/exploration", "RL exploration")
  , ("reinforcement-learning/safe", "AI safety")
  , ("reinforcement-learning/robot", "robotics")
  , ("reinforcement-learning/multi-agent", "MARL")
  , ("reinforcement-learning/preference-learning/mode-collapse", "AI mode collapse")
  , ("reinforcement-learning/preference-learning", "preference learning")
  , ("reinforcement-learning/meta-learning/continual-learning", "continual learning")
  , ("reinforcement-learning/meta-learning", "meta-learning")
  , ("ai/nn/anthropic", "Anthropic")
  , ("ai/nn/deepseek", "DeepSeek")
  , ("reinforcement-learning/deepmind", "DeepMind")
  , ("reinforcement-learning/openai", "OA")
  , ("cs/linkrot/archiving", "Internet archiving")
  , ("cs/linkrot", "linkrot")
  , ("technology/security", "infosec")
  , ("technology/google", "Google")
  , ("technology/digital-antiquarian", "<em>Digital Antiquarian</em>")
  , ("technology/carbon-capture", "carbon capture")
  , ("technology/stevensinstituteoftechnology-satmnewsletter", "<em>SATM</em> archive")
  , ("technology", "tech")
  , ("history/public-domain-review", "<em>PD Review</em>")
  , ("history/uighur", "Uighur genocide")
  , ("reinforcement-learning/nethack", "<em>Nethack</em> AI")
  , ("reinforcement-learning/model-free/oa5", "OA5")
  , ("reinforcement-learning/model-free/alphastar", "AlphaStar")
  , ("reinforcement-learning/model/alphago", "AlphaGo")
  , ("reinforcement-learning/model/muzero", "MuZero")
  , ("reinforcement-learning/model/decision-transformer", "Decision Transformer")
  , ("reinforcement-learning/model-free", "model-free RL")
  , ("reinforcement-learning/model", "model-based RL")
  , ("darknet-market/william-pickard", "William Pickard (LSD)")
  , ("darknet-market/silk-road/2", "SR2 DNM")
  , ("darknet-market/silk-road/1", "SR1 DNM")
  , ("darknet-market/silk-road", "SR DNMs")
  , ("darknet-market/hydra", "Hydra DNM")
  , ("darknet-market/sheep-marketplace", "Sheep DNM")
  , ("darknet-market/evolution", "Evolution DNM")
  , ("darknet-market/blackmarket-reloaded", "BMR DNM")
  , ("darknet-market/atlantis", "Atlantis DNM")
  , ("darknet-market/alphabay", "AlphaBay DNM")
  , ("darknet-market/agora", "Agora DNM")
  , ("darknet-market/dnm-archive/file", "DNM Archives (files)")
  , ("darknet-market/dnm-archive", "DNM Archives")
  , ("darknet-market", "DNMs")
  , ("nootropic/quantified-self/nail-growth", "nail growth rate")
  , ("nootropic/quantified-self/weather", "weather & mood")
  , ("nootropic/quantified-self/heart-rate-variability", "HRV")
  , ("nootropic/quantified-self", "QS")
  , ("nootropic/lsd", "LSD microdosing")
  , ("philosophy/frank-ramsey", "Frank Ramsey")
  , ("cs/end-to-end-principle", "end-to-end")
  , ("cs/python", "Python")
  , ("cs/haskell/darcs", "darcs (VCS)")
  , ("cs/haskell", "Haskell")
  , ("cs/js", "JS")
  , ("cs/cryptography/time-lock", "time-lock crypto")
  , ("cs/cryptography", "cryptography") -- NOTE: we can no longer shorten it to "crypto" because that now connotes "cryptocurrency"
  , ("cs/css", "CSS")
  , ("cs/shell", "CLI")
  , ("cs/cellular-automaton", "cellular automata")
  , ("history/s-l-a-marshall", "SLAM (fraud)")
  , ("ai/video/analysis", "video analysis")
  , ("ai/video/generation", "video generation")
  , ("ai/video", "AI video")
  , ("ai/text-style-transfer", "text style transfer")
  , ("exercise/gravitostat", "gravitostat")
  , ("longevity/glp/retatrutide", "retatrutide")
  , ("longevity/glp/psychology", "GLP diet (psych)")
  , ("longevity/glp/semaglutide", "semaglutide")
  , ("longevity/glp/tirzepatide", "tirzepatide")
  , ("longevity/glp", "GLP diet drug")
  , ("philosophy/epistemology", "epistemology")
  , ("philosophy/brethren-of-purity", "Brethren of Purity")
  , ("philosophy/ethics", "ethics")
  , ("existential-risk", "x-risk")
  , ("ai/nn/sparsity/knowledge-distillation", "knowledge distillation")
  , ("ai/nn/sparsity/pruning", "NN pruning")
  , ("ai/nn/sparsity/low-precision", "low-precision NN")
  , ("ai/nn/sparsity", "NN sparsity")
  , ("ai/nn/transformer/attention/meta-descent", "attention ≈ SGD")
  , ("ai/nn/transformer/attention/hierarchical", "multi-scale Transformer")
  , ("ai/nn/transformer/attention/sparsity", "sparse Transformer")
  , ("ai/nn/transformer/attention/linear-algebra", "Transformer matrix optimizations")
  , ("ai/nn/transformer/attention/compression", "compressed Transformer")
  , ("ai/nn/transformer/attention/recurrent", "recurrent Transformer")
  , ("ai/nn/transformer/t5", "T5 Transformer")
  , ("ai/nn/transformer/alphafold", "AlphaFold")
  , ("ai/nn/transformer/gpt/claude/4",           "Claude-4 AI")
  , ("ai/nn/transformer/gpt/claude",             "Claude AI")
  , ("ai/nn/transformer/gpt/whisper",            "Whisper NN")
  , ("ai/nn/transformer/gpt/2/poetry",           "GPT-2 poetry")
  , ("ai/nn/transformer/gpt/2/nonfiction",       "GPT-2 nonfiction")
  , ("ai/nn/transformer/gpt/2/fiction",          "GPT-2 fiction")
  -- , ("ai/nn/transformer/gpt/2/humor",            "GPT-2 humor")
  , ("ai/nn/transformer/gpt/3/humor",            "GPT-3 humor")
  -- , ("ai/nn/transformer/gpt/4/humor",            "GPT-4 humor")
  , ("ai/nn/transformer/gpt/4-5",                "GPT-4.5")
  , ("ai/nn/transformer/gpt/5",                  "GPT-5")
  , ("ai/nn/transformer/gpt/4/poetry",           "GPT-4 poetry")
  , ("ai/nn/transformer/gpt/4/nonfiction",       "GPT-4 nonfiction")
  , ("ai/nn/transformer/gpt/4/fiction",          "GPT-4 fiction")
  , ("ai/nn/transformer/gpt/4/sydney",           "Sydney AI")
  , ("ai/nn/transformer/gpt/4",                  "GPT-4")
  , ("ai/nn/transformer/gpt/3/fiction",          "GPT-3 fiction")
  , ("ai/nn/transformer/gpt/3/nonfiction",       "GPT-3 nonfiction")
  , ("ai/nn/transformer/gpt/3/poetry",           "GPT-3 poetry")
  , ("ai/nn/transformer/gpt/3",                  "GPT-3")
  , ("ai/nn/transformer/gpt/2",                  "GPT-2")
  , ("ai/nn/transformer/gpt/instruction-tuning", "instruct-tuning LLMs")
  , ("ai/nn/transformer/gpt/jukebox",            "Jukebox")
  , ("ai/nn/transformer/gpt/poetry",             "GPT poetry")
  , ("ai/nn/transformer/gpt/fiction",            "GPT fiction")
  , ("ai/nn/transformer/gpt/dall-e/4o",          "GPT-4o media")
  , ("ai/nn/transformer/gpt/dall-e/3",           "DALL·E 3")
  , ("ai/nn/transformer/gpt/dall-e/2",           "DALL·E 2")
  , ("ai/nn/transformer/gpt/dall-e/1",           "DALL·E 1")
  , ("ai/nn/transformer/gpt/dall-e",             "DALL·E")
  , ("ai/nn/transformer/gpt/palm/2",             "PaLM 2")
  , ("ai/nn/transformer/gpt/palm",               "PaLM")
  , ("ai/nn/transformer/gpt/lamda",              "LaMDA")
  , ("ai/nn/transformer/gpt/codex",              "Codex")
  , ("ai/nn/transformer/gpt/inner-monologue",    "inner monologue (AI)")
  , ("ai/nn/transformer/gpt/nonfiction",        "GPT non-fiction")
  , ("ai/nn/transformer/gpt",                    "GPT")
  , ("ai/fiction", "fiction by AI")
  , ("ai/nn/gan/stylegan/anime", "StyleGAN anime")
  , ("ai/nn/gan/stylegan", "StyleGAN")
  , ("ai/nn/gan/biggan", "BigGAN")
  , ("ai/nn/gan", "GAN")
  , ("ai/nn/diffusion/discrete ", "discrete diffusion")
  , ("ai/nn/diffusion", "diffusion NN")
  , ("dual-n-back", "DNB")
  , ("vitamin-d", "Vitamin D")
  , ("design/visualization", "data visualization")
  , ("design/typography/tex", "<span class=\"logotype-tex\">T<sub>e</sub>X</span>")
  , ("design/typography/rubrication", "rubricated typography")
  , ("design/typography/subscript", "subscript notation")
  , ("design/typography", "typography")
  , ("ai/nn/transformer/attention", "self-attention")
  , ("ai/nn/transformer/clip/sample", "CLIP samples")
  , ("ai/nn/transformer/clip", "CLIP")
  , ("iq/high/anne-roe", "Anne Roe’s Scientists")
  , ("iq/high/fullerton", "Fullerton Longitudinal Study")
  , ("iq/high/munich", "Munich Giftedness Study")
  , ("iq/high/smpy", "SMPY")
  , ("iq/high", "high IQ")
  , ("iq/low", "low IQ")
  , ("iq/ses", "IQ & SES")
  , ("iq/animal", "animal cognition")
  , ("ai/nn/retrieval", "retrieval AI")
  , ("ai/nn/tokenization", "LM tokenization")
  , ("ai/scaling/emergence/grokking", "grokking (NN)")
  , ("ai/scaling/emergence", "AI emergence")
  , ("ai/scaling/mixture-of-experts", "MoE NN")
  , ("ai/scaling", "AI scaling")
  , ("ai/nn/vae/mae", "masked autoencoder")
  , ("ai/nn/vae", "autoencoder NN")
  , ("ai/nn/transformer", "Transformer")
  , ("ai/nn/fully-connected", "MLP NN")
  , ("ai/nn/dynamic-evaluation", "dynamic evaluation (NN)")
  , ("ai/nn/rnn", "RNN")
  , ("ai/nn/cnn", "CNN")
  , ("ai/nn/sampling", "NN sampling")
  , ("ai/music", "AI music")
  , ("anime/eva/little-boy", "<em>Little Boy</em>")
  , ("ai/anime/danbooru", "Danbooru AI")
  , ("ai/anime", "anime AI")
  , ("ai/nn/adversarial/human", "adversarial examples (human)")
  , ("ai/nn/adversarial", "adversarial examples")
  , ("ai/nn", "NN")
  , ("ai/tabular", "tabular ML")
  , ("ai/dataset", "ML dataset")
  , ("reinforcement-learning/chess", "AI chess")
  , ("reinforcement-learning", "RL")
  , ("newest", "newest links")
  ]

-- testing: unique keys
shortTagTestSuite :: [(String, String)]
shortTagTestSuite = setLike
 [("active-learning", "reinforcement-learning/exploration/active-learning")
 , ("add" , "psychiatry/adhd")
 , ("adhd" , "psychiatry/adhd")
 , ("adoption" , "genetics/heritable/adoption")
 , ("adversarial" , "ai/nn/adversarial")
 , ("advertising" , "economics/advertising")
 , ("agora" , "darknet-market/agora")
 , ("ai/adversarial" , "ai/nn/adversarial")
 , ("ai/clip" , "ai/nn/transformer/clip")
 , ("ai/gan" , "ai/nn/gan")
 , ("ai/retrieval" , "ai/nn/retrieval")
 , ("ai/rnn" , "ai/nn/rnn")
 , ("algorithm" , "cs/algorithm")
 , ("alphabay" , "darknet-market/alphabay")
 , ("alphafold" , "ai/nn/transformer/alphafold")
 , ("alphago" , "reinforcement-learning/model/alphago")
 , ("alzheimers" , "psychiatry/alzheimers")
 , ("animal" , "psychology/animal")
 , ("anorexia" , "psychiatry/anorexia")
 , ("anxiety" , "psychiatry/anxiety")
 , ("apple" , "genetics/selection/artificial/apple")
 , ("archiving" , "cs/linkrot/archiving")
 , ("artificial" , "genetics/selection/artificial")
 , ("aspirin" , "longevity/aspirin")
 , ("attention" , "ai/nn/transformer/attention")
 , ("attention/hierarchical", "ai/nn/transformer/attention/hierarchical")
 , ("attention/recurrent", "ai/nn/transformer/attention/recurrent")
 , ("autism" , "psychiatry/autism")
 , ("automation" , "economics/automation")
 , ("bayes" , "statistics/bayes")
 , ("bias" , "statistics/bias")
 , ("biggan" , "ai/nn/gan/biggan")
 , ("bipolar" , "psychiatry/bipolar")
 , ("bird" , "psychology/animal/bird")
 , ("bird/neuroscience" , "psychology/animal/bird/neuroscience")
 , ("brain-imitation-learning", "reinforcement-learning/imitation-learning/brain-imitation-learning")
 , ("c" , "cs/c")
 , ("caffeine" , "nootropic/caffeine")
 , ("calibration" , "ai/nn/transformer/gpt/calibration")
 , ("carbon-capture" , "technology/carbon-capture")
 , ("catnip" , "cat/psychology/drug/catnip")
 , ("causality" , "statistics/causality")
 , ("cellular-automaton" , "cs/cellular-automaton")
 , ("chess" , "reinforcement-learning/chess")
 , ("clip" , "ai/nn/transformer/clip")
 , ("clip/samples" , "ai/nn/transformer/clip/sample")
 , ("cloning" , "genetics/cloning")
 , ("cnn" , "ai/nn/cnn")
 , ("code" , "cs")
 , ("codex" , "ai/nn/transformer/gpt/codex")
 , ("cognitive-bias" , "psychology/cognitive-bias")
 , ("collecting" , "psychology/collecting")
 , ("comparison" , "statistics/order/comparison")
 , ("computable" , "cs/computable")
 , ("conscientiousness", "psychology/personality/conscientiousness")
 , ("copyright" , "economics/copyright")
 , ("correlation" , "genetics/heritable/correlation")
 , ("cost" , "psychology/cognitive-bias/sunk-cost")
 , ("cryptography" , "cs/cryptography")
 , ("css" , "cs/css")
 , ("dall-e" , "ai/nn/transformer/gpt/dall-e/3")
 , ("danbooru" , "ai/anime/danbooru")
 , ("dark-knowledge" , "psychology/dark-knowledge")
 , ("data" , "reinforcement-learning/exploration/active-learning/data-pruning")
 , ("data-augmentation" , "ai/nn/gan/data-augmentation")
 , ("decision" , "statistics/decision")
 , ("decision-transformer", "reinforcement-learning/model/decision-transformer")
 , ("deepmind" , "reinforcement-learning/deepmind")
 , ("depression" , "psychiatry/depression")
 , ("des" , "design")
 , ("diff" , "ai/nn/diffusion")
 , ("diffusion" , "ai/nn/diffusion")
 , ("diplomacy", "reinforcement-learning/imperfect-information/diplomacy")
 , ("discrete" , "ai/nn/diffusion/discrete")
 , ("dnm-archive" , "darknet-market/dnm-archive")
 , ("do" , "dog")
 , ("dog/genetics" , "genetics/heritable/dog")
 , ("dream" , "psychology/vision/dream")
 , ("dune" , "fiction/science-fiction/frank-herbert")
 , ("editing" , "genetics/editing")
 , ("election" , "statistics/prediction/election")
 , ("emergence" , "ai/scaling/emergence")
 , ("emergenesis" , "genetics/heritable/emergenesis")
 , ("end-to-end" , "cs/end-to-end-principle")
 , ("end-to-end-principle" , "cs/end-to-end-principle")
 , ("energy" , "psychology/energy")
 , ("epigenetic" , "longevity/epigenetics")
 , ("epigenetics" , "longevity/epigenetics")
 , ("epistemology" , "philosophy/epistemology")
 , ("ethicists" , "philosophy/ethics/ethicists")
 , ("ethics" , "philosophy/ethics")
 , ("eva" , "anime/eva")
 , ("evolution" , "genetics/selection/natural")
 , ("evolution/human" , "genetics/selection/natural/human")
 , ("experience-curve" , "economics/experience-curve")
 , ("exploration" , "reinforcement-learning/exploration")
 , ("for" , "statistics/prediction")
 , ("frank-herbert" , "fiction/science-fiction/frank-herbert")
 , ("full" , "ai/nn/fully-connected")
 , ("fully-connected" , "ai/nn/fully-connected")
 , ("gametogenesis" , "genetics/gametogenesis")
 , ("gan" , "ai/nn/gan")
 , ("generation" , "ai/video/generation")
 , ("genetic" , "genetics")
 , ("gene-wolfe" , "fiction/gene-wolfe")
 , ("genome-synthesis" , "genetics/genome-synthesis")
 , ("georgism" , "economics/georgism")
 , ("google" , "technology/google")
 , ("gp-4" , "ai/nn/transformer/gpt/4")
 , ("gp4" , "ai/nn/transformer/gpt/4")
 , ("gpt" , "ai/nn/transformer/gpt")
 , ("gpt-3" , "ai/nn/transformer/gpt/3")
 , ("gpt-4" , "ai/nn/transformer/gpt/4")
 , ("gpt4" , "ai/nn/transformer/gpt/4")
 , ("gpt-4/fiction" , "ai/nn/transformer/gpt/4/fiction")
 , ("gpt-4/non" , "ai/nn/transformer/gpt/4/nonfiction")
 , ("gpt/4/non" , "ai/nn/transformer/gpt/4/nonfiction")
 , ("gpt-4/nonfiction" , "ai/nn/transformer/gpt/4/nonfiction")
 , ("gpt/4/non-fiction" , "ai/nn/transformer/gpt/4/nonfiction")
 , ("gpt/4/nonfiction" , "ai/nn/transformer/gpt/4/nonfiction")
 , ("gpt-4/poetry" , "ai/nn/transformer/gpt/4/poetry")
 , ("gpt/4/poetry" , "ai/nn/transformer/gpt/4/poetry")
 , ("gpt4/poetry" , "ai/nn/transformer/gpt/4/poetry")
 , ("gpt-4poetry" , "ai/nn/transformer/gpt/4/poetry")
 , ("gpt/codex" , "ai/nn/transformer/gpt/codex")
 , ("gpt/fiction" , "ai/nn/transformer/gpt/fiction")
 , ("gpt/inner-monologue", "ai/nn/transformer/gpt/inner-monologue")
 , ("gpt/non" , "ai/nn/transformer/gpt/nonfiction")
 , ("gpt/non-fiction" , "ai/nn/transformer/gpt/nonfiction")
 , ("gpt/nonfiction" , "ai/nn/transformer/gpt/nonfiction")
 , ("gpt/poetry" , "ai/nn/transformer/gpt/poetry")
 , ("graph" , "design/visualization")
 , ("hanabi", "reinforcement-learning/imperfect-information/hanabi")
 , ("hardware" , "cs/hardware")
 , ("haskell" , "cs/haskell")
 , ("heritable" , "genetics/heritable")
 , ("heritable/correlation" , "genetics/heritable/correlation")
 , ("hierarchical" , "ai/nn/transformer/attention/hierarchical")
 , ("highleyman" , "ai/highleyman")
 , ("human" , "genetics/selection/natural/human")
 , ("humor" , "fiction/humor")
 , ("illusion-of-depth", "psychology/cognitive-bias/illusion-of-depth")
 , ("imperfect-information", "reinforcement-learning/imperfect-information")
 , ("inner-monologue" , "ai/nn/transformer/gpt/inner-monologue")
 , ("instruction-tuning", "ai/nn/transformer/gpt/instruction-tuning")
 , ("japan/anime" , "anime")
 , ("japanese" , "japan")
 , ("jukebox" , "ai/nn/transformer/gpt/jukebox")
 , ("knowledge-distillation", "ai/nn/sparsity/knowledge-distillation")
 , ("lamda" , "ai/nn/transformer/gpt/lamda")
 , ("learning", "reinforcement-learning")
 , ("less" , "lesswrong-survey")
 , ("link-rot" , "cs/linkrot")
 , ("linkrot" , "cs/linkrot")
 , ("linkrot/archiving" , "cs/linkrot/archiving")
 , ("lisp" , "cs/lisp")
 , ("lithium" , "psychiatry/lithium")
 , ("logic" , "philosophy/logic")
 , ("low-precision" , "ai/nn/sparsity/low-precision")
 , ("mae" , "ai/nn/vae/mae")
 , ("meditation" , "psychiatry/meditation")
 , ("mendelian-randomization", "genetics/heritable/correlation/mendelian-randomization")
 , ("meta-analysis" , "statistics/meta-analysis")
 , ("meta-learning" , "reinforcement-learning/meta-learning")
 , ("microbiome" , "genetics/microbiome")
 , ("mind" , "philosophy/mind")
 , ("mixture" , "ai/scaling/mixture-of-experts")
 , ("mixture-of-experts" , "ai/scaling/mixture-of-experts")
 , ("model" , "reinforcement-learning/model")
 , ("model-free" , "reinforcement-learning/model-free")
 , ("moe" , "ai/scaling/mixture-of-experts")
 , ("multi-agent" , "reinforcement-learning/multi-agent")
 , ("music-distraction" , "psychology/music/distraction")
 , ("muzero" , "reinforcement-learning/model/muzero")
 , ("natural" , "genetics/selection/natural")
 , ("nature" , "psychology/nature")
 , ("n-back" , "dual-n-back")
 , ("nethack" , "reinforcement-learning/nethack")
 , ("neuroscience" , "psychology/neuroscience")
 , ("nn" , "ai/nn")
 , ("non-fiction" , "ai/nn/transformer/gpt/nonfiction")
 , ("novelty" , "psychology/novelty")
 , ("oa5" , "reinforcement-learning/model-free/oa5")
 , ("ontology" , "philosophy/ontology")
 , ("opera" , "fiction/opera")
 , ("order" , "statistics/order")
 , ("palm" , "ai/nn/transformer/gpt/palm")
 , ("peer-review" , "statistics/peer-review")
 , ("perpetuities" , "economics/perpetuities")
 , ("personality" , "psychology/personality")
 , ("personality/conscientiousness", "psychology/personality/conscientiousness")
 , ("poetry" , "fiction/poetry")
 , ("portia" , "biology/portia")
 , ("power" , "statistics/power-analysis")
 , ("power-analysis" , "statistics/power-analysis")
 , ("prediction" , "statistics/prediction")
 , ("prediction/election" , "statistics/prediction/election")
 , ("preference-falsification", "sociology/false-preference")
 , ("preference-learning", "reinforcement-learning/preference-learning")
 , ("probability" , "statistics/probability")
 , ("pruning" , "ai/nn/sparsity/pruning")
 , ("psycholog" , "psychology/animal/bird")
 , ("psychology/bird" , "psychology/animal/bird")
 , ("psychopath" , "psychology/personality/psychopathy")
 , ("public-domain-review" , "history/public-domain-review")
 , ("python" , "cs/python")
 , ("quantified-self" , "nootropic/quantified-self")
 , ("r" , "cs/r")
 , ("red" , "design/typography/rubrication")
 , ("reduced-precision" , "ai/nn/sparsity/low-precision")
 , ("reinforcement-learning/alphago", "reinforcement-learning/model/alphago")
 , ("religion" , "philosophy/religion")
 , ("repetition" , "psychology/spaced-repetition")
 , ("retrieval" , "ai/nn/retrieval")
 , ("review" , "history/public-domain-review")
 , ("risk" , "existential-risk")
 , ("rl-scaling" , "reinforcement-learning/scaling")
 , ("rl/scaling" , "reinforcement-learning/scaling")
 , ("rnn" , "ai/nn/rnn")
 , ("robot" , "reinforcement-learning/robot")
 , ("robotics" , "reinforcement-learning/robot")
 , ("rosenhan" , "psychiatry/schizophrenia/rosenhan")
 , ("rubrication" , "design/typography/rubrication")
 , ("rumiyah" , "crime/terrorism/rumiyah")
 , ("safe" , "reinforcement-learning/safe")
 , ("samples" , "ai/nn/transformer/clip/sample")
 , ("sampling" , "ai/nn/sampling")
 , ("scaling" , "ai/scaling")
 , ("scaling/economics" , "ai/scaling/economics")
 , ("scaling/hardware" , "ai/scaling/hardware")
 , ("schizophrenia" , "psychiatry/schizophrenia")
 , ("science-fiction" , "fiction/science-fiction")
 , ("security" , "cs/security")
 , ("selection" , "genetics/selection")
 , ("selection/artificial" , "genetics/selection/artificial")
 , ("selection/natural" , "genetics/selection/natural")
 , ("self-sinking" , "technology/self-sinking")
 , ("semaglutide" , "longevity/glp/semaglutide")
 , ("sentence-spacing" , "design/typography/sentence-spacing")
 , ("sequencing" , "genetics/sequencing")
 , ("sf" , "fiction/science-fiction")
 , ("short-sleeper" , "zeo/short-sleeper")
 , ("silk-road" , "darknet-market/silk-road")
 , ("silk-road/1" , "darknet-market/silk-road/1")
 , ("silk-road/2" , "darknet-market/silk-road/2")
 , ("sleep" , "zeo")
 , ("smell" , "psychology/smell")
 , ("sort" , "cs/algorithm/sorting")
 , ("sorting" , "cs/algorithm/sorting")
 , ("spaced-repetition" , "psychology/spaced-repetition")
 , ("sparsity" , "ai/nn/sparsity")
 , ("sparsity/pruning" , "ai/nn/sparsity/pruning")
 , ("stereotype-threat", "psychology/cognitive-bias/stereotype-threat")
 , ("stylegan" , "ai/nn/gan/stylegan")
 , ("stylometrics" , "statistics/stylometry")
 , ("stylometry" , "statistics/stylometry")
 , ("sunk-cost" , "psychology/cognitive-bias/sunk-cost")
 , ("survival" , "statistics/survival-analysis")
 , ("survival-analysis" , "statistics/survival-analysis")
 , ("t5" , "ai/nn/transformer/t5")
 , ("tabular" , "ai/tabular")
 , ("tbi" , "psychiatry/traumatic-brain-injury")
 , ("tcs" , "psychology/neuroscience/tcs")
 , ("teika" , "japan/poetry/teika")
 , ("terrorism" , "crime/terrorism")
 , ("text" , "fiction/text-game")
 , ("text-game" , "fiction/text-game")
 , ("text-style-transfer" , "ai/text-style-transfer")
 , ("tirzepatide" , "longevity/glp/tirzepatide")
 , ("tokenization" , "ai/nn/tokenization")
 , ("traction" , "psychology/music/distraction")
 , ("transformer" , "ai/nn/transformer")
 , ("transformer/attention" , "ai/nn/transformer/attention")
 , ("transformer/gpt" , "ai/nn/transformer/gpt")
 , ("traumatic-brain-injury", "psychiatry/traumatic-brain-injury")
 , ("typography" , "design/typography")
 , ("uighur" , "history/uighur")
 , ("vae" , "ai/nn/vae")
 , ("video/analysis" , "ai/video/analysis")
 , ("video/generatio" , "ai/video/generation")
 , ("video/generation" , "ai/video/generation")
 , ("vision" , "psychology/vision")
 , ("visual" , "design/visualization")
 , ("visualization" , "design/visualization")
 , ("willpower" , "psychology/willpower")
 , ("writing" , "psychology/writing")
 , ("psych/inner-monologue", "psychology/inner-voice")
 -- some edit-distance test cases:
 , ("psychology/writingg" , "psychology/writing")
 , ("ai.dataset", "ai/dataset")
 , ("aidataset", "ai/dataset")
 , ("ai/datase", "ai/dataset")
 , ("i/dataset", "ai/dataset")
 , ("economicss", "economics") -- Doubled letter (distance 1)
 , ("econmics", "economics") -- Missing letter (distance 1)
 , ("psycholoyg", "psychology") -- Transposition (distance 2)
 , ("ecenomics", "economics") -- Wrong letter (distance 1)
 ]
