
import NGrams from './vendor/natural/ngrams/ngrams'
import { WordTokenizer } from './vendor/natural/tokenizers/regexp_tokenizer'

const debug = require('debug')('wd:natural')
const tokenizer = new WordTokenizer()
const flatten = arr => arr.reduce((acc, val) => acc.concat(Array.isArray(val) ? flatten(val) : val), [])
const range = (min, max) => [...Array(max - min + 1)].map((_, i) => i + min)

// Using a similar strategy to https://medium.com/xeneta/fuzzy-search-with-mongodb-and-python-57103928ee5d#.xdxu0stiq
// with some different tuning

export function generateNgrams (phrase, min = 3, max = 5, prefix = false) {
  const words = tokenizer.tokenize(phrase.toLowerCase())
  const ngrams = flatten(range(min, max).map(n =>
    flatten(words.map(word => {
      return NGrams.ngrams(word.split(''), n).map((letter) => letter.join(''))
    })))
  )
  return prefix ? ngrams.filter(n => words.some(w => w.startsWith(n))) : ngrams
}
export function generatePrefixNgrams (phrase, min, max) {
  return generateNgrams(phrase, min, max, true)
}

debug('loaded')
/*
debug(generateNgrams('Increased respiratory rate'))
debug(generateNgrams('History of smoking'))
debug(generateNgrams('Hi'))
debug(generateNgrams('His'))
debug(generateNgrams('History of smoking', 2, 5, true))
debug(generatePrefixNgrams('History of smoking'))
*/

export function normalizeString (s) {
  return (s || '').trim().toLowerCase()
}
export function tokenizeQuery (searchQuery) {
  const normalizedQueryString = normalizeString(searchQuery)
  const tokens = new Set(normalizedQueryString.split(/\s+/) || [])
  const normalizedQuery = [...tokens.values()].join(' ')
  // Also include the fully normalizd query as a token to improve exact matches
  tokens.add(normalizedQuery)
  // And double it, for good measure
  return [...tokens.values(), ...[normalizedQuery]]
}
/**
 * Returns true if a string matches a given searchQuery.
 *
 * Does some optimizations under the hood for case insensitivity and other
 * search goodness. (maybe ngrams / fuzzy / phonemes in future)
 */
export function stringMatchesQuery (string, searchQuery) {
  const tokens = tokenizeQuery(searchQuery)
  const normalizedString = normalizeString(string)
  return tokens.some(t => normalizedString.indexOf(t) !== -1)
}

export function stringMatchesTokens (string, tokens) {
  // debug('stringMatchesTokens', string, tokens)
  const normalizedString = normalizeString(string)
  return tokens.some(t => normalizedString.indexOf(t) !== -1)
}
export function scoreString (string, tokens) {
  const normalizedString = normalizeString(string)
  return tokens
    .map(t => normalizedString.indexOf(t) !== -1)
    .reduce((a, b) => a + b, 0)
}
