/**
 * Lezer doesn't support case-insensitive key-words. This ExternalTokenizer will do that.
 */
import { ExternalTokenizer } from "@lezer/lr";
import {
  ABS_KEY,
  ADD_KEY,
  ALL_KEY,
  AS_KEY,
  ASC_KEY,
  ASK_KEY,
  AVG_KEY,
  BASE_KEY,
  BIND_KEY,
  BNODE_KEY,
  BOUND_KEY,
  BY_KEY,
  CEIL_KEY,
  CLEAR_KEY,
  COALESCE_KEY,
  CONCAT_KEY,
  CONSTRUCT_KEY,
  CONTAINS_KEY,
  COPY_KEY,
  COUNT_KEY,
  CREATE_KEY,
  DATA_KEY,
  DATATYPE_KEY,
  DAY_KEY,
  DEFAULT_KEY,
  DELETE_KEY,
  DESC_KEY,
  DESCRIBE_KEY,
  DISTINCT_KEY,
  DROP_KEY,
  ENCODE_FOR_URI_KEY,
  EXISTS_KEY,
  FALSE_KEY,
  FILTER_KEY,
  FLOOR_KEY,
  FROM_KEY,
  GRAPH_KEY,
  GROUP_CONCAT_KEY,
  GROUP_KEY,
  HAVING_KEY,
  HOURS_KEY,
  IF_KEY,
  IN_KEY,
  INSERT_KEY,
  INTO_KEY,
  IRI_KEY,
  ISBLANK_KEY,
  ISIRI_KEY,
  ISLITERAL_KEY,
  ISNUMERIC_KEY,
  ISURI_KEY,
  LANG_KEY,
  LANGMATCHES_KEY,
  LCASE_KEY,
  LIMIT_KEY,
  LOAD_KEY,
  MAX_KEY,
  MD5_KEY,
  MIN_KEY,
  MINUS_KEY,
  MINUTES_KEY,
  MONTH_KEY,
  MOVE_KEY,
  NAMED_KEY,
  NOT_KEY,
  NOW_KEY,
  OFFSET_KEY,
  OPTIONAL_KEY,
  ORDER_KEY,
  PREFIX_KEY,
  RAND_KEY,
  REDUCED_KEY,
  REGEX_KEY,
  REPLACE_KEY,
  ROUND_KEY,
  SAMETERM_KEY,
  SAMPLE_KEY,
  SECONDS_KEY,
  SELECT_KEY,
  SEPARATOR_KEY,
  SERVICE_KEY,
  SHA1_KEY,
  SHA256_KEY,
  SHA384_KEY,
  SHA512_KEY,
  SILENT_KEY,
  STR_KEY,
  STRAFTER_KEY,
  STRBEFORE_KEY,
  STRDT_KEY,
  STRENDS_KEY,
  STRLANG_KEY,
  STRLEN_KEY,
  STRSTARTS_KEY,
  STRUUID_KEY,
  SUBSTR_KEY,
  SUM_KEY,
  TIMEZONE_KEY,
  TO_KEY,
  TRUE_KEY,
  TZ_KEY,
  UCASE_KEY,
  UNDEF_KEY,
  UNION_KEY,
  URI_KEY,
  USING_KEY,
  UUID_KEY,
  VALUES_KEY,
  WHERE_KEY,
  WITH_KEY,
  YEAR_KEY,
} from "./sparqlParser.terms.ts";

const wordMap: { [word: string]: number } = {
  BASE: BASE_KEY,
  PREFIX: PREFIX_KEY,
  STR: STR_KEY,
  LANG: LANG_KEY,
  LANGMATCHES: LANGMATCHES_KEY,
  SELECT: SELECT_KEY,
  DISTINCT: DISTINCT_KEY,
  REDUCED: REDUCED_KEY,
  AS: AS_KEY,
  CONSTRUCT: CONSTRUCT_KEY,
  WHERE: WHERE_KEY,
  DESCRIBE: DESCRIBE_KEY,
  ASK: ASK_KEY,
  FROM: FROM_KEY,
  GROUP: GROUP_KEY,
  BY: BY_KEY,
  HAVING: HAVING_KEY,
  ORDER: ORDER_KEY,
  ASC: ASC_KEY,
  DESC: DESC_KEY,
  LIMIT: LIMIT_KEY,
  OFFSET: OFFSET_KEY,
  VALUES: VALUES_KEY,
  SILENT: SILENT_KEY,
  LOAD: LOAD_KEY,
  INTO: INTO_KEY,
  CLEAR: CLEAR_KEY,
  DROP: DROP_KEY,
  CREATE: CREATE_KEY,
  ADD: ADD_KEY,
  TO: TO_KEY,
  MOVE: MOVE_KEY,
  COPY: COPY_KEY,
  DATA: DATA_KEY,
  INSERT: INSERT_KEY,
  DELETE: DELETE_KEY,
  WITH: WITH_KEY,
  USING: USING_KEY,
  NAMED: NAMED_KEY,
  DEFAULT: DEFAULT_KEY,
  GRAPH: GRAPH_KEY,
  ALL: ALL_KEY,
  OPTIONAL: OPTIONAL_KEY,
  SERVICE: SERVICE_KEY,
  BIND: BIND_KEY,
  UNDEF: UNDEF_KEY,
  MINUS: MINUS_KEY,
  UNION: UNION_KEY,
  FILTER: FILTER_KEY,
  DATATYPE: DATATYPE_KEY,
  BOUND: BOUND_KEY,
  IRI: IRI_KEY,
  URI: URI_KEY,
  BNODE: BNODE_KEY,
  RAND: RAND_KEY,
  ABS: ABS_KEY,
  CEIL: CEIL_KEY,
  FLOOR: FLOOR_KEY,
  ROUND: ROUND_KEY,
  CONCAT: CONCAT_KEY,
  STRLEN: STRLEN_KEY,
  UCASE: UCASE_KEY,
  LCASE: LCASE_KEY,
  ENCODE_FOR_URI: ENCODE_FOR_URI_KEY,
  CONTAINS: CONTAINS_KEY,
  STRSTARTS: STRSTARTS_KEY,
  STRENDS: STRENDS_KEY,
  STRBEFORE: STRBEFORE_KEY,
  STRAFTER: STRAFTER_KEY,
  YEAR: YEAR_KEY,
  MONTH: MONTH_KEY,
  DAY: DAY_KEY,
  HOURS: HOURS_KEY,
  MINUTES: MINUTES_KEY,
  SECONDS: SECONDS_KEY,
  TIMEZONE: TIMEZONE_KEY,
  TZ: TZ_KEY,
  NOW: NOW_KEY,
  UUID: UUID_KEY,
  STRUUID: STRUUID_KEY,
  MD5: MD5_KEY,
  SHA1: SHA1_KEY,
  SHA256: SHA256_KEY,
  SHA384: SHA384_KEY,
  SHA512: SHA512_KEY,
  COALESCE: COALESCE_KEY,
  IF: IF_KEY,
  STRLANG: STRLANG_KEY,
  STRDT: STRDT_KEY,
  SAMETERM: SAMETERM_KEY,
  ISIRI: ISIRI_KEY,
  ISURI: ISURI_KEY,
  ISBLANK: ISBLANK_KEY,
  ISLITERAL: ISLITERAL_KEY,
  ISNUMERIC: ISNUMERIC_KEY,
  REGEX: REGEX_KEY,
  SUBSTR: SUBSTR_KEY,
  REPLACE: REPLACE_KEY,
  EXISTS: EXISTS_KEY,
  NOT: NOT_KEY,
  COUNT: COUNT_KEY,
  SUM: SUM_KEY,
  MIN: MIN_KEY,
  MAX: MAX_KEY,
  AVG: AVG_KEY,
  SAMPLE: SAMPLE_KEY,
  GROUP_CONCAT: GROUP_CONCAT_KEY,
  SEPARATOR: SEPARATOR_KEY,
  TRUE: TRUE_KEY,
  FALSE: FALSE_KEY,
  IN: IN_KEY,
};

function isLowerCase(charNr: number) {
  return 97 <= charNr && charNr <= 122;
}
function isUpperCase(charNr: number) {
  return 65 <= charNr && charNr <= 90;
}
function isUnderscore(charNr: number) {
  return charNr === 95;
}

// The keys in the sha function only have 1-6 and 8
function isNumber(charNr: number) {
  return (49 <= charNr && charNr <= 54) || charNr === 56;
}

function isKeyWordChar(charNr: number) {
  return isLowerCase(charNr) || isUpperCase(charNr) || isNumber(charNr) || isUnderscore(charNr);
}

function canBePartOfPrefixName(charNr: number) {
  // Characters which can occur in a prefix definition - . :
  return isKeyWordChar(charNr) || charNr === 45 || charNr === 46 || charNr === 58;
}
function isEndOfPrefixDeclaration(charNr: number, charNr2: number) {
  return charNr === 58 && !isKeyWordChar(charNr2);
}

export const KeyWords = new ExternalTokenizer((input, stack) => {
  if (input.pos === -1) return;
  if (!isKeyWordChar(input.next)) return;

  let word = "";
  while (isKeyWordChar(input.next)) {
    word += String.fromCharCode(input.next);
    input.advance();
  }
  // We check here if we're not tokenizing a prefix definition
  if (canBePartOfPrefixName(input.next) && canBePartOfPrefixName(input.peek(1))) return;
  if (isEndOfPrefixDeclaration(input.next, input.peek(1))) return;

  if (word.length === 0) return;
  const keyWord = word.toUpperCase();
  if (wordMap[keyWord]) input.acceptToken(wordMap[keyWord]);
  return;
}, {});
