#!/bin/bash

## Copyright (C) 2025 - 2025 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
## See the file COPYING for copying conditions.

#set -x
set -e
set -o nounset
set -o pipefail
set -o errtrace

if [ "$(id -u)" = 0 ] && [ -z "${GITHUB_WORKSPACE:-}" ]; then
    printf '%s\n' "$0: ERROR: No need to run as root!" >&2
    exit 1
fi

## XXX: Unrelated unit test. Non-ideal location but OK.
source "${HELPER_SCRIPTS_PATH:-}"/usr/libexec/helper-scripts/use_sudo.sh
source "${HELPER_SCRIPTS_PATH:-}"/usr/libexec/helper-scripts/use_pkexec.sh
source "${HELPER_SCRIPTS_PATH:-}"/usr/libexec/helper-scripts/boot-session-detection.bsh

source "${HELPER_SCRIPTS_PATH:-}"/usr/libexec/helper-scripts/wc-test.sh
source "${HELPER_SCRIPTS_PATH:-}"/usr/libexec/helper-scripts/strings.bsh

folder_list="Assembly  Bash  C  C#  C++  Go  Java  JavaScript  Python  RegEx  Rust  Solidity"

total_file_counter=0

process_file() {
  local file_name file_content
  file_name="$1"
  test -r "$file_name"
  base_name="$(basename -- "$file_name")"

  if [ "$base_name" = "README.md" ] || \
    [ "$base_name" = "omnisharp.json" ] || \
    [ "$base_name" = "java.regex" ] || \
    [ "$base_name" = "pcre2.regex" ]; then
    return 0
  fi

  total_file_counter=$(( total_file_counter + 1))
  printf '%s\n' "$file_name" >/dev/null

  ## grep-find-unicode-wrapper is similar like grep.
  ## - Exit code 0: if found.
  ## - Non-zero exit code: if not found.
  if ! grep-find-unicode-wrapper "$file_name" >/dev/null ; then
    printf '%s\n' "$0: ERROR: Failed to find unicode using 'grep-find-unicode-wrapper' in file '$file_name'." >&2
    exit 1
  fi

  ## unicode-show will exit 1 if unicode was found.
  ## So if unicode-show was to exit 0, then it would have failed to find unicode.
  if unicode-show "$file_name" >/dev/null ; then
    ## unicode-show exit code 0.
    printf '%s\n' "$0: ERROR: Failed to find unicode using 'unicode-show' (file check) in file '$file_name'." >&2
    exit 1
  fi

  if cat -- "$file_name" | unicode-show >/dev/null ; then
    ## unicode-show exit code 0.
    printf '%s\n' "$0: ERROR: Failed to find unicode using 'unicode-show' (stdin check) in file '$file_name'." >&2
    exit 1
  fi
  ## unicode-show exit code non-zero.

  ## 'stcatn' should sanitize. Therefore 'unicode-show' should not find any unicode and exit 0.
  ## Check if there is a non-zero exit code in this pipe, because if so, that's a bug.
  if ! stcatn "$file_name" | unicode-show >/dev/null ; then
    ## unicode-show exit code non-zero.
    printf '%s\n' "$0: ERROR: Failed to sanitize string using 'stcatn' in file '$file_name'." >&2
    exit 1
  fi
  ## unicode-show exit code 0.

  if ! cat -- "$file_name" | sanitize-string nolimit >/dev/null ; then
    ## sanitize-string exit code non-zero.
    printf '%s\n' "$0: ERROR: Failed to sanitize string using 'sanitize-string' (stdin check) in file '$file_name'." >&2
    exit 1
  fi

  ## > ./unicode-testscript: line 83: warning: command substitution: ignored null byte in input
  file_content="$(cat -- "$file_name")"

  if ! sanitize-string nolimit "$file_content" >/dev/null ; then
    ## sanitize-string exit code non-zero.
    printf '%s\n' "$0: ERROR: Failed to sanitize string using 'sanitize-string' (command check) in file '$file_name'." >&2
    exit 1
  fi

  ## string_quote_safe (strings.bsh) wraps 'LC_ALL=C printf %q' to
  ## render its argument as a pure-ASCII printable bash literal,
  ## neutralizing trojan unicode. text-safety-scan-file relies on
  ## this for log-injection-safe path quoting; verify it holds for
  ## the same trojan corpus that exercises the other tools.
  ##
  ## Use grep-find-unicode-wrapper (the same tool already used at
  ## the top of process_file()) to verify the property. It is
  ## path-based, so stage the output via mktemp.
  printf_q_tmpfile="$(mktemp)"
  string_quote_safe "$file_content" > "$printf_q_tmpfile"
  ## grep-find-unicode-wrapper does not consume '--' itself (it is a
  ## thin wrapper around grep that forwards "$@"); pass '--' so grep
  ## treats the path as a positional argument even if it starts with
  ## '-'. mktemp paths begin with '/' so this is defensive, but
  ## documents intent for any future test using a user-controlled path.
  if grep-find-unicode-wrapper -- "$printf_q_tmpfile" >/dev/null ; then
    safe-rm --force -- "$printf_q_tmpfile"
    printf '%s\n' "$0: ERROR: 'LC_ALL=C printf %%q' did not neutralize trojan unicode in file '$file_name'." >&2
    exit 1
  fi
  safe-rm --force -- "$printf_q_tmpfile"

  ## text-safety-scan-file orchestrates unicode-show + modeline-show.
  ## All fixtures here are known-trojan, so it must exit non-zero.
  if text-safety-scan-file -- "$file_name" </dev/null >/dev/null 2>&1 ; then
    printf '%s\n' "$0: ERROR: 'text-safety-scan-file' did not flag trojan in file '$file_name'." >&2
    exit 1
  fi

  return 0
}

if ! test -d "$HOME/trojan-source" ; then
  printf '%s\n' "
$0: ERROR: Folder '$HOME/trojan-source' missing. To get it:

cd ~
git clone git@github.com:nickboucher/trojan-source.git" >&2
  exit 1
fi

## Creates files:
## /tmp/user/1000/suspicious-unicode-test/1.txt
## etc.
/usr/libexec/helper-scripts/write-suspicious-unicode "$HOME/suspicious-unicode-tests-temp-folder/suspicious-unicode-test" >/dev/null

## Create files:
## /tmp/user/1000/suspicious-ascii-test
## etc.
/usr/libexec/helper-scripts/write-suspicious-ascii "$HOME/suspicious-unicode-tests-temp-folder/suspicious-ascii-test" >/dev/null

for folder_name in $folder_list ; do
  while IFS= read -r -d '' file_name; do
    ## Must be outside of process substitution to be able to exit script.
    process_file "$file_name"
  done < <(find "$HOME/trojan-source/$folder_name" -maxdepth 1 -type f -not -iwholename '*.git*' -print0 )
done

while IFS= read -r -d '' file_name; do
  ## Must be outside of process substitution to be able to exit script.
  process_file "$file_name"
done < <(find "$HOME/suspicious-unicode-tests-temp-folder/suspicious-unicode-test" -maxdepth 1 -type f -not -iwholename '*.git*' -print0 )

while IFS= read -r -d '' file_name; do
  ## Must be outside of process substitution to be able to exit script.
  process_file "$file_name"
done < <(find "$HOME/suspicious-unicode-tests-temp-folder/suspicious-ascii-test" -maxdepth 1 -type f -not -iwholename '*.git*' -print0 )

if [ ! "$(printf '%s' "test" | stcatn | wc -l)" = "1" ]; then
  printf '%s\n' "$0: ERROR: 1" >&2
  exit 1
fi

if [ ! "$(printf '%s\n' "test" | stcatn | wc -l)" = "1" ]; then
  printf '%s\n' "$0: ERROR: 2" >&2
  exit 1
fi

if [ ! "$(printf '%s\n\n' "test" | stcatn | wc -l)" = "2" ]; then
  printf '%s\n' "$0: ERROR: 3" >&2
  exit 1
fi

mkdir --parents -- "$HOME/suspicious-unicode-tests-temp-folder/suspicious-whitespace-test"
printf "\n \n " | sponge -- "$HOME/suspicious-unicode-tests-temp-folder/suspicious-whitespace-test/1.txt"

if ! stcatn "$HOME/suspicious-unicode-tests-temp-folder/suspicious-whitespace-test/1.txt" | unicode-show ; then
  printf '%s\n' "$0: ERROR: 4" >&2
  exit 1
fi

if [ ! "$(stcatn "$HOME/suspicious-unicode-tests-temp-folder/suspicious-whitespace-test/1.txt" | wc -l)" = "3" ]; then
  printf '%s\n' "$0: ERROR: 5" >&2
  exit 1
fi

## text-safety-scan-find smoke. text-safety-scan-file is exercised
## per-file inside process_file(); this one assertion covers the
## find/xargs wrapper itself.
trojan_dir="$HOME/suspicious-unicode-tests-temp-folder/suspicious-unicode-test"
rc=0
text-safety-scan-find "$trojan_dir" >/dev/null 2>&1 || rc=$?
if [ "$rc" -eq 0 ]; then
  printf '%s\n' "$0: ERROR: text-safety-scan-find on trojan dir exited 0, expected non-zero" >&2
  exit 1
fi

printf '%s\n' "$0: INFO: OK"
