-- see </style-guide#gwernnet-testing>

{-# LANGUAGE OverloadedStrings #-}
module Test where

import Control.Monad (unless)
import Data.Either (lefts)
import Data.List (intersect, isInfixOf, isPrefixOf)
import qualified Data.Map.Strict as M (keys)
import Data.Char (isAlpha, isDigit, isLower)
import Text.Read (readMaybe)
import qualified Data.Text as T (unpack, elem, head, pack)

import Text.Regex.Base.RegexLike (makeRegexM)
import Text.Regex (Regex)
import Control.Exception (try, SomeException)

import Text.Pandoc (Inline(Link, Span, Str))

import Cycle (isCycleLess)
import Metadata.Format (printDoubleTestSuite, cleanAbstractsHTMLTest, balanced, cleanAbstractsHTML,
                      footnoteRegex, sectionAnonymousRegex, badUrlRegex)
import Metadata.Date (isDate, dateRangeDurationTestCasesTestsuite)
import Utils (printGreen, printRed, isDomainT, isURL, isURLT, isURLAny, isURLAnyT, ensure)
import LinkID (url2ID, isValidID)
import Unique (isUniqueAll, isUniqueKeys, isUniqueKeys3, isUniqueKeys4, isUniqueList, isUnique)

-- module self-tests:
import Annotation (tooltipToMetadata, testGuessAuthorDate)
import qualified Cycle (testCycleDetection)
import Inflation (inflationDollarTestSuite)
import Interwiki (interwikiTestSuite, interwikiCycleTestSuite, isWPArticle, isWPDisambig)
import LinkArchive (readArchiveMetadataAndCheck, testLinkRewrites)
import LinkIcon (linkIconTest)
import LinkLive (linkLiveTest, linkLivePrioritize)
import Tags (testTags)
import Typography (titleCaseTest, abstractBlockquotesTest, completionProgressInline)
import Metadata.Author (authorCollapseTest, cleanAuthorsTest, extractTwitterUsername, authorDB)
import GenerateSimilar (generateSimilarTestSuite)

-- test the tests as configuration files for duplicates etc.:
import qualified Config.GenerateSimilar (blackListURLs)
import qualified Config.Interwiki (testCases, quoteOverrides, redirectDB)
import qualified Config.LinkArchive (whiteListMatchesFixed, localizeLinktestCases)
import qualified Config.LinkIcon (prioritizeLinkIconBlackList, linkIconTestUnitsText)
import qualified Config.LinkLive (goodDomainsSub, goodDomainsSimple, badDomainsSub, badDomainsSimple, goodLinks, badLinks)
import qualified Config.LinkSuggester (badAnchorStrings, whiteList)
import qualified Config.Tags (shortTagBlacklist, tagsLong2Short, wholeTagRewritesRegexes, tagsShort2LongRewrites, shortTagTestSuite)
import qualified Config.Typography (surnameFalsePositivesWhiteList, titleCaseTestCases, dateRangeDurationTestCases,
                                    lowercaseUnicode, completionMap, essayCompletionMap, confidenceMap,
                                    titlecaseCommonErrors)
import qualified Config.XOfTheDay (siteBlackList, quoteDBPath, siteDBPath)
import qualified XOfTheDay as XOTD (readTTDB)
import qualified Config.Inflation (bitcoinUSDExchangeRateHistory, inflationDollarLinkTestCases)
import qualified Config.LinkID (linkIDOverrides, affiliationAnchors)
import qualified Config.Metadata.Format (htmlRewriteRegexpBefore, htmlRewriteRegexpAfter, htmlRewriteFixed, filterMetaBadSubstrings, filterMetaBadWholes, balancedBracketTestCases, htmlRewriteTestCases)
import qualified Config.Misc (cd, tooltipToMetadataTestcases, cycleTestCases, cleanArxivAbstracts, arxivAbstractFixedRewrites, arxivAbstractRegexps)
import qualified Config.Paragraph (whitelist)
import qualified Config.Metadata.Author (authorCollapseTestCases, canonicals, canonicalsWithInitials, authorLink, authorLinkBlacklist, cleanAuthorsFixedRewrites, cleanAuthorsRegexps, extractTwitterUsernameTestSuite, authorWhitelist)
import qualified Config.Metadata.Title (badStrings, stringReplace, stringDelete)
import LinkMetadata (readLinkMetadata, generateFileTransclusionBlock)
import Config.LinkMetadata (fileTranscludesTest, badDOISubstrings, allowedNonHttpURLPrefixes, uriValidationExemptInfixes, ignoredMalformedURLPrefixes, badAuthorSubstrings, duplicateAffiliationWhitelist, allowedNonHttpURLPrefixes, documentPreviewableExtensions, codePreviewableExtensions, fileViewableExtensions, documentPreviewableExtensions, codePreviewableExtensions, annotationClasses, positiveAnnotationClasses)

import Utext (rawMarkdown2Utext, rawMarkdown2UtextStyled)
import Config.Utext (utextTestSuite, defaultStyle, Style(sLigature))

-- test function to validate lists of regex patterns
testRegexPatterns :: [String] -> IO ()
testRegexPatterns patterns = do
    results <- mapM validateRegex patterns
    let failures = lefts results
    unless (null failures) $ mapM_ putStrLn failures
 where -- Function to validate a regex pattern
  validateRegex :: String -> IO (Either String ())
  validateRegex pttrn = do
      result <- try (makeRegexM pttrn :: IO Regex)
      case result of
          Left e -> return . Left $ "Regex compilation failed for pattern '" ++ pttrn ++
                                    "': " ++ show (e :: SomeException)
          Right _ -> return $ Right ()

testXOTD :: IO Int
testXOTD = do s <- XOTD.readTTDB Config.XOfTheDay.siteDBPath
              q <- XOTD.readTTDB Config.XOfTheDay.quoteDBPath
              -- check that the non-URL fields are *not* valid URLs, which implies a copy-paste error, duplication, or something.
              return $ sum [length $ ensure "Test.testXOTD.sites" "isURLAny/not-isURLAny" (\(u,title,_) -> isURLAny u && not (null title || isURL title)) s
                          , length $ isUniqueKeys3 s
                          , length $ ensure "Test.testXOTD.quotes" "not-isURLAny" (\(qt,a,_) -> not (null qt || isURLAny qt) && (null a || not (isURLAny a))) q
                          , length $ isUniqueKeys3 q
                          ]

{- Run pure config sanity checks in one place rather than in the hot paths that consume each config.
Many checks scan whole lists or maps. Embedding them in link conversion, filtering, or cleanup
functions would repeat that scan for every input element, quietly turning an otherwise-linear
pass into an accidental quadratic one.

The Int is only a strictness/accounting hook: `testAll` prints `show testConfigs`, which forces
these pure checks and reports roughly how many config entries were inspected. Failed checks abort
during evaluation with their own error messages; callers should not interpret the numeric value
as a pass/fail result. -}
testConfigs :: Int
testConfigs = sum $ map length [isUniqueList Config.Metadata.Format.filterMetaBadSubstrings, isUniqueList Config.Metadata.Format.filterMetaBadWholes
                               , ensure "Test.GenerateSimilar.blackListURLs" "isURLAny (URL & file)" isURLAny $
                                 isUniqueList Config.GenerateSimilar.blackListURLs
                               , isUniqueList Config.LinkArchive.whiteListMatchesFixed
                               , isUniqueList Config.LinkID.affiliationAnchors
                               , isUniqueList Config.Tags.shortTagBlacklist
                               , isUniqueList Config.Typography.surnameFalsePositivesWhiteList
                               ] ++ -- String
                               map length [isUniqueList Config.LinkIcon.prioritizeLinkIconBlackList
                                           , isUniqueList Config.LinkLive.goodDomainsSub, isUniqueList Config.LinkLive.goodDomainsSimple, isUniqueList Config.LinkLive.badDomainsSub, isUniqueList Config.LinkLive.badDomainsSimple, isUniqueList Config.LinkLive.goodLinks, isUniqueList Config.LinkLive.badLinks
                                           , isUniqueList Config.LinkSuggester.badAnchorStrings
                                           , isUniqueList Config.XOfTheDay.siteBlackList
                                           , ensure "Test.XOfTheDay.siteBlackList" "isDomainT" isDomainT Config.XOfTheDay.siteBlackList] ++ -- T.Text
              [length $ isUniqueKeys4 Config.LinkIcon.linkIconTestUnitsText,
               length $ ensure "Test.linkIconTestUnitsText" "isURLAnyT" (\(u,_,_,_) -> T.head u == '#' || isURLAnyT u) Config.LinkIcon.linkIconTestUnitsText] ++
              [length $ isUniqueKeys Config.Interwiki.testCases, length (isUniqueKeys Config.Interwiki.redirectDB), length $ isUniqueList Config.Interwiki.quoteOverrides
              , length (ensure "Test.testConfigs.testCases" "isURLT (URL of second)" safeLink Config.Interwiki.testCases)
              , length (ensure "Test.testConfigs.redirectDB" "isURLT (URL of second)" (\(_,u2) -> isURLT u2) Config.Interwiki.redirectDB)
              , length (ensure "Test.testConfigs.extracTwitterUsernameTestSuite" "isURL (URL of first)" (\(u1,_) -> isURL u1) Config.Metadata.Author.extractTwitterUsernameTestSuite)
              , length $ isUniqueAll Config.LinkSuggester.whiteList
              , length $ ensure "Test.LinkSuggester.whiteList" "isURLAnyT" (isURLAnyT . fst) Config.LinkSuggester.whiteList
              , length $ ensure "Test.LinkSuggester.whiteList" "not isURLT" (not . any isURLT . snd) Config.LinkSuggester.whiteList
              , length $ isUniqueAll Config.Tags.tagsLong2Short, length $ isUniqueKeys Config.Tags.wholeTagRewritesRegexes, length $ isUniqueKeys Config.Tags.tagsShort2LongRewrites, length $ isCycleLess Config.Tags.tagsShort2LongRewrites, length $ isUniqueKeys Config.Tags.shortTagTestSuite
              , length $ ensure "Test.Config.Tags.tagsLong2Short" "isLower" (all Data.Char.isLower . filter Data.Char.isAlpha . fst) Config.Tags.tagsLong2Short
              , length $ isUniqueKeys Config.Typography.titleCaseTestCases
              , length $ isUniqueList $ T.unpack Config.Typography.lowercaseUnicode
              , completionMapConfigTest "completionMap" Config.Typography.completionMap
              , completionMapConfigTest "essayCompletionMap" Config.Typography.essayCompletionMap
              , completionMapConfigTest "confidenceMap" Config.Typography.confidenceMap
              , length $ isUniqueList Config.Typography.titlecaseCommonErrors
              , length $ isUniqueKeys Config.Typography.titlecaseCommonErrors
              , length $ isCycleLess Config.Typography.titlecaseCommonErrors
              , length $ isUniqueKeys Config.Misc.tooltipToMetadataTestcases
              , length $ isUniqueKeys Config.Misc.cleanArxivAbstracts
              , length $ isUniqueKeys Config.Misc.arxivAbstractRegexps
              , length $ isUniqueKeys Config.Misc.arxivAbstractFixedRewrites
              , length $ isUniqueKeys Config.Inflation.bitcoinUSDExchangeRateHistory, length $ isUniqueAll Config.Inflation.inflationDollarLinkTestCases
              , length $ ensure "Test.Inflation.dates" "isDate" (isDate . fst) $ Config.Inflation.bitcoinUSDExchangeRateHistory
              , length $ isUniqueAll Config.LinkID.linkIDOverrides
              , length $ ensure "Test.linkIDOverrides" "HTML identifier lambda" (\(_,ident) -> -- NOTE: HTML identifiers *must* start with `[a-zA-Z]`, and not numbers or periods etc.; they must not contain periods for CSS/JS compatibility
                                                                                        let ident' = T.unpack ident in '.' `notElem` ident' && isAlpha (head ident'))
                Config.LinkID.linkIDOverrides
               , length $ ensure "Test.linkIDOverrides" "URI (first), not URL (second)" (\(u,ident) -> isURLAny u && not (isURLT ident)) Config.LinkID.linkIDOverrides
               , length $ Metadata.Author.authorDB
              , length $ isUniqueKeys Config.Metadata.Author.cleanAuthorsFixedRewrites, length $ isUniqueKeys Config.Misc.cycleTestCases, length $ isUniqueKeys Config.Metadata.Author.cleanAuthorsRegexps, length $ isUniqueKeys Config.Metadata.Format.htmlRewriteRegexpBefore, length $ isUniqueKeys Config.Metadata.Format.htmlRewriteRegexpAfter, length $ isUniqueKeys Config.Metadata.Format.htmlRewriteFixed, length $ isUniqueKeys Config.Metadata.Author.extractTwitterUsernameTestSuite
              , length $ filter (\(input,output) -> Metadata.Format.balanced input /= output) $ isUniqueKeys Config.Metadata.Format.balancedBracketTestCases
              , length $ isUniqueKeys Config.Metadata.Author.authorCollapseTestCases, length $ isUnique Config.Metadata.Author.authorCollapseTestCases,
                length $ isUniqueAll Config.Metadata.Author.authorLink
              , length $ isUniqueKeys Config.Metadata.Author.canonicals, length $ isUniqueList Config.Metadata.Author.canonicalsWithInitials, length $ isUniqueList Config.Metadata.Author.authorLinkBlacklist, length $ isUniqueList Config.Metadata.Author.authorWhitelist
              , length $ isUniqueAll Config.Metadata.Format.htmlRewriteTestCases
              , length $ isUniqueList Config.Typography.dateRangeDurationTestCases
              , length $ ensure "Test.authorLink" "isURLAny (URL of second)" (all isURLAnyT) Config.Metadata.Author.authorLink
              , length $ ensure "Test.authorLink" "no broken HTML entities indicated by a '&'" (\name -> not ('&' `T.elem` name)) (map fst Config.Metadata.Author.authorLink)
              , length $ isCycleLess Config.Metadata.Author.canonicals, length $ isCycleLess Config.Metadata.Author.authorLink
              , length $ (map T.unpack $ map fst Config.Metadata.Author.authorLink) `intersect` (map fst Config.Metadata.Author.canonicals)
              , length $ isUniqueList Config.Metadata.Title.badStrings, length $ isUniqueList Config.Metadata.Title.stringDelete, length $ isUniqueKeys Config.Metadata.Title.stringReplace
              , length $ isUniqueList Config.Paragraph.whitelist, length $ ensure "Test.Paragraph.whitelist" "isURLAny" isURLAny Config.Paragraph.whitelist
              , length $ isUniqueList Config.LinkMetadata.badDOISubstrings
              , length $ isUniqueList Config.LinkMetadata.allowedNonHttpURLPrefixes
              , length $ isUniqueList Config.LinkMetadata.uriValidationExemptInfixes
              , length $ isUniqueList Config.LinkMetadata.ignoredMalformedURLPrefixes
              , length $ isUniqueList Config.LinkMetadata.badAuthorSubstrings
              , length $ isUniqueList Config.LinkMetadata.duplicateAffiliationWhitelist
              , length $ ensure "Config.LinkMetadata.allowedNonHttpURLPrefixes" "looks like a URI scheme/prefix" (\p -> ":" `isInfixOf` p) Config.LinkMetadata.allowedNonHttpURLPrefixes
              , length $ isUniqueList Config.LinkMetadata.documentPreviewableExtensions
              , length $ isUniqueList Config.LinkMetadata.codePreviewableExtensions
              , length $ isUniqueList Config.LinkMetadata.fileViewableExtensions
              , length $ isUniqueList Config.LinkMetadata.positiveAnnotationClasses
              , length $ isUniqueList Config.LinkMetadata.annotationClasses
              , length $ ensure "documentPreviewableExtensions" "leading dot" (isPrefixOf ".") Config.LinkMetadata.documentPreviewableExtensions
              , length $ ensure "codePreviewableExtensions" "leading dot" (isPrefixOf ".") Config.LinkMetadata.codePreviewableExtensions
              ] ++
              [sum $ map length [ ensure "goodDomainsSimple" "isDomainT" isDomainT Config.LinkLive.goodDomainsSimple
                                , ensure "goodDomainsSub"    "isDomainT" isDomainT Config.LinkLive.goodDomainsSub
                                , ensure "badDomainsSimple"  "isDomainT" isDomainT Config.LinkLive.badDomainsSimple
                                , ensure "badDomainsSub"     "isDomainT" isDomainT Config.LinkLive.badDomainsSub
                                , ensure "Test.prioritizeLinkIconBlackList" "isDomainT" isDomainT Config.LinkIcon.prioritizeLinkIconBlackList]
              ] ++
              [length (ensure "Test.localizeLinktestCases" "URL/URI" (\(u, (af, mobileversion, html, _)) -> isURLT u && (af=="" || isURLAnyT af) && (mobileversion=="" || isURLT mobileversion) && (html=="" || isURLT html)) Config.LinkArchive.localizeLinktestCases)]
  where safeLink :: (Show a) => (a,Inline) -> Bool
        safeLink (_, (Link _ _ (u,_))) = isURLT u
        safeLink x = error $ "Test.isURLT (URL of second).safeLink: passed an Inline which was not a 'Link' (with a valid URL)? erroring out. Original: " ++ show x

        completionMapConfigTest :: String -> [(String,String)] -> Int
        completionMapConfigTest label xs =
          sum [ length $ isUniqueList xs
              , length $ isUniqueKeys xs
              , length $ ensure ("Test.Config.Typography." ++ label) "non-empty status key" (not . null . fst) xs
              , length $ ensure ("Test.Config.Typography." ++ label) "percentage integer 0–100" completionPercentage xs
              ]

        completionPercentage :: (String,String) -> Bool
        completionPercentage (_, value) =
          not (null value) && all isDigit value &&
          case readMaybe value :: Maybe Int of
            Just n  -> n >= 0 && n <= 100
            Nothing -> False

completionProgressExpected :: String -> String -> Inline
completionProgressExpected status progress =
  Span ("", ["completion-status"], [("progress-percentage", T.pack progress)]) [Str (T.pack status)]

--------------------------------------------------------------------------------------------------------

testAll :: IO ()
testAll = do Config.Misc.cd

             printGreen ("Testing link icon matches…" :: String)
             unless (null linkIconTest) $ printRed ("Link icon rules have errors in: " ++ show linkIconTest)

             printGreen ("Testing interwiki rewrite rules…" :: String)
             unless (null interwikiTestSuite) $ printRed ("Interwiki rules have errors in: " ++ show interwikiTestSuite)
             unless (null interwikiCycleTestSuite) $ printRed ("Interwiki redirect rewrite rules have errors in: " ++ show interwikiCycleTestSuite)

             printGreen ("Tested config rules for uniqueness requirements, verified: " ++ show testConfigs)

             archives <- testLinkRewrites
             unless (null archives) $ printRed ("Link-archive rewrite test suite has errors in: " ++ show archives)
             printGreen ("Testing regexps for regex validity…" :: String)
             testRegexPatterns $
               [footnoteRegex, sectionAnonymousRegex, badUrlRegex] ++
               (map fst $ Config.Tags.wholeTagRewritesRegexes ++ Config.Metadata.Author.cleanAuthorsRegexps ++ Config.Metadata.Format.htmlRewriteRegexpBefore ++ Config.Metadata.Format.htmlRewriteRegexpAfter ++ Config.Misc.arxivAbstractRegexps)
             let regexUnitTests = filter (\(before,after) -> Metadata.Format.cleanAbstractsHTML before /= after) Config.Metadata.Format.htmlRewriteTestCases
             unless (null regexUnitTests) $ printRed ("Regex rewrite unit test suite has errors in: " ++ show regexUnitTests)
             let twitterUsernameTests = filter (\(u,username) -> extractTwitterUsername u /= username) Config.Metadata.Author.extractTwitterUsernameTestSuite
             unless (null twitterUsernameTests) $ printRed ("Twitter username parsing unit test suite has errors in: " ++ show twitterUsernameTests)

             printGreen ("Reading in metadata databases…" :: String)
             md <- readLinkMetadata
             am <- readArchiveMetadataAndCheck

             let linkids = map (url2ID . T.pack) $ M.keys md
             let linkidLength = length $ isUniqueList linkids
             printGreen ("Checked URL hash uniqueness for: " ++ show linkidLength)

             let invalidIDs = filter (\i -> let i' = T.unpack i in not (isValidID i' || isDate i')) linkids
             unless (null invalidIDs) $ printRed ("IDs: Some IDs are valid dates, which should never happen and indicates metadata corruption: " ++ show invalidIDs)

             printGreen ("Testing file-transclusions…" :: String)
             let fileTranscludes = fileTranscludesTest generateFileTransclusionBlock md am
             let fileTranscludesResults = filter (uncurry (/=)) fileTranscludes
             unless (null fileTranscludesResults) $ printRed ("File-transclude unit test suite has errors in: " ++ show fileTranscludesResults)

             unless (null authorCollapseTest) $ printRed ("Author-collapse test suite has errors in: " ++ show authorCollapseTest)

             xn <- testXOTD
             printGreen ("Testing X-of-the-day data… verified: " ++ show xn :: String)

             unless (null printDoubleTestSuite) $ printRed ("Double-printing function test suite has errors in: " ++ show printDoubleTestSuite)

             unless (null Cycle.testCycleDetection) $ printRed ("Cycle-detection test suite has errors in: " ++ show Cycle.testCycleDetection)

             unless (null titleCaseTest) $ printRed ("Title-case typography test suite has errors in: " ++ show titleCaseTest)
             unless (null abstractBlockquotesTest) $ printRed ("Abstract-blockquote typography test suite has errors in: " ++ show abstractBlockquotesTest)

             let completionProgressTests =
                   filter (\(status,progress) -> completionProgressInline status /= completionProgressExpected status progress) $
                   Config.Typography.completionMap ++ [("0","0"), ("50","50"), ("100","100")]
             unless (null completionProgressTests) $ printRed ("Completion-progress typography test suite has errors in: " ++ show completionProgressTests)

             unless (null dateRangeDurationTestCasesTestsuite) $ printRed ("Date-range-duration subscript typography test suite has errors in: " ++ show dateRangeDurationTestCasesTestsuite)

             printGreen ("Tested HTML/author cleanup rules for infinite loops, verified: " ++ show (length (cleanAbstractsHTMLTest ++ cleanAuthorsTest)))

             printGreen ("Testing tag rewrites…" :: String)
             testTags

             printGreen ("Testing live-link-popup rules…" :: String)
             unless (null linkLiveTest) $ printRed ("Live link popup rules have errors in: " ++ show linkLiveTest)
             _ <- linkLivePrioritize -- generate testcases for new live-link targets
             -- NOTE: we skip `linkLiveTestHeaders` due to requiring too much time & IO & bandwidth, and instead do it once in a while post-sync

             unless (null inflationDollarTestSuite) $ printRed ("Inflation-adjustment rules have errors in: " ++ show inflationDollarTestSuite)

             let tooltipResults = filter (\((t1, t2), goodResult) -> Annotation.tooltipToMetadata t1 t2 /= goodResult) Config.Misc.tooltipToMetadataTestcases
             unless (null tooltipResults) $ printRed ("Tooltip-parsing rules have errors in: " ++ show tooltipResults)

             let fileGuessResults = testGuessAuthorDate
             unless (null fileGuessResults) $ printRed ("File metadata-guessing unit-tests have errors in: " ++ show fileGuessResults)

             interwikiUnitTests <- do
               a <- Interwiki.isWPArticle False "https://en.wikipedia.org/wiki/George_Washington_XYZ"
               b <- Interwiki.isWPArticle False "https://en.wikipedia.org/wiki/George_Washington"
               c <- Interwiki.isWPDisambig "Mercury"
               d <- Interwiki.isWPDisambig "George_Washington"
               pure (not a && b && c == Just True && d == Just False)

             let utext = utextTestSuite rawMarkdown2Utext (rawMarkdown2UtextStyled (defaultStyle { sLigature = True }))
             unless (null utext) $ printRed ("Utext test suite has errors in: " ++ show utext)

             unless interwikiUnitTests $
               printRed "Interwiki disambig or non-existence checks failed?"

             generateSimilarTestSuite

             printGreen ("Testing finished." :: String)
