Optimize the prefix tree for string search

150 Views Asked by At

I'm thinking about improving the prefix tree. It allows me to search for the specified number of words containing the input string.

Task: We need a class that implements a list of company names by substring - from the list of all available names, output a certain number of companies that start with the entered line. It is assumed that the class will be called when filling out a form on a website/mobile application with a high RPS (Requests per second).

My Solution:

 class SuggestService(companyNames : Seq[String]) {

  val arrayCompany = companyNames.toArray
  val tree = getTree(Ternary.apply, 0)

  def getTree(tree: Ternary, index: Int): Ternary = {
    if(index == arrayCompany.length-1)
      return tree.insert(arrayCompany(index))

    getTree(tree.insert(arrayCompany(index)), index+1)
  }

  def suggest(input: String, numberOfSuggest : Int) : Seq[String] = {
    val result = tree.keysWithPrefix(input)
    result.take(numberOfSuggest)
  }
}

Tree Class:

sealed trait Ternary {
  def insert(key: String): Ternary = Ternary.insert(this, key, 0)

  def keysWithPrefix(prefix: String): List[String] = Ternary.keys(this, prefix)
}

case class Node(value: Option[Int], char: Char, left: Ternary, mid: Ternary, right: Ternary) extends Ternary

case object Leaf extends Ternary

object Ternary {
  def apply: Ternary = Leaf

  private def keys(root: Ternary, prefix: String): List[String] =
    get(root, prefix, 0) match {
      case None => Nil
      case Some(node) =>
        collect(node, prefix.dropRight(1))
    }

  private def collect(node: Ternary, prefix: String): List[String] =
    node match {
      case Leaf => Nil
      case node: Node if node.value.isDefined =>
        (prefix + node.char) +: (collect(node.left, prefix) ++ collect(node.mid, prefix + node.char) ++ collect(node.right, prefix))
      case node: Node =>
        collect(node.left, prefix) ++ collect(node.mid, prefix + node.char) ++ collect(node.right, prefix)
    }

  private def get(root: Ternary, prefix: String, step: Int): Option[Ternary] = root match {
    case Leaf => None
    case node: Node if node.char > prefix.charAt(step) => get(node.left, prefix, step)
    case node: Node if node.char < prefix.charAt(step) => get(node.right, prefix, step)
    case node: Node if step < prefix.length - 1 => get(node.mid, prefix, step + 1)
    case node: Node => Some(node)
  }

  private def insert(root: Ternary, key: String, step: Int): Ternary = root match {
    case Leaf =>
      val node = Node(None, key.charAt(step), Leaf, Leaf, Leaf)
      insert(node, key, step)

    case node: Node if node.char > key.charAt(step) =>
      val left = insert(node.left, key, step)
      node.copy(left = left)

    case node: Node if node.char < key.charAt(step) =>
      val right = insert(node.right, key, step)
      node.copy(right = right)

    case node: Node if step < key.length - 1 =>
      val mid = insert(node.mid, key, step + 1)
      node.copy(mid = mid)

    case node: Node =>
      node.copy(value = Some(0))
  }
}

The solution works fine, and it seems to promise to work very efficiently, but I am dissatisfied with it to a sufficient extent. By the condition, we must return a list of words in an amount equal to the number of numberOfSuggest.

And I force the tree to return all the words containing input. And only then I take the required number of words from the resulting list:

def suggest(input: String, numberOfSuggest : Int) : Seq[String] = {
    val result = tree.keysWithPrefix(input)
    result.take(numberOfSuggest)
  }

I want to try to save time, and teach the tree to return a ready-made list of words limited by the number of numberOfSuggest.

Experiment: https://scastie.scala-lang.org/m0MxnlChT0GkpGJIBnNnUQ

0

There are 0 best solutions below