Hashing interview puzzle











up vote
1
down vote

favorite












I've stumbled upon this pretty old article about a hashing interview question, and here it is converted to Swift in a more generic way:



struct CustomHasher {

/// An enum of the errors that may be thrown
enum HashingError: Error {

/// Thrown by the initializer
/// when the alphabet contains repeating letters
case invalidAlphabet

/// Thrown by the initializer
/// when the base is negative
case invalidBase

/// Thrown by the initializer
/// when the offset is negative
case invalidOffset

/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case outOfAlphabet(String)

/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case exceedsInt64

/// Thrown by the reverseHash(_:) function
/// when the string provided uses illegal letters
case invalidHash
}

//Parameters
private let base: Int64
private let offset: Int64
private let alphabet: String

// An array that eases the access to the elements of the alphabet
private let alphabetArray: [Character]

private let stringLengthLimit: Int

/// Convinience inializer
/// - Parameters:
/// - alphabet: Must be a string of unique characters
/// - offset: A strictly positive Int64
/// - base: A strictly positive Int64
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.invalidOffset
init(alphabet: String, offset: Int64, base: Int64) throws {
self.alphabet = alphabet
self.alphabetArray = Array(alphabet)
guard alphabetArray.count == Set(alphabet).count else {
throw HashingError.invalidAlphabet
}

guard offset > 0 else {
throw HashingError.invalidOffset
}
self.offset = offset

guard base > 1 else {
throw HashingError.invalidBase
}
self.base = base

let b = Double(base)
let c = Double(alphabetArray.count)
let dOffset = Double(offset)
let int64limit = Double(Int64.max)

self.stringLengthLimit = ((0...).first {
let power = pow(b, Double($0))
let tail = $0 == 1 ? c * power : c * (power - 1) / (b - 1)
let head = dOffset * power
return head + tail > int64limit
} ?? 0) - 1
}

/// Takes a string and converts it to a corresponding Int64
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.exceedsInt64
func hash(_ str: String) throws -> Int64 {

guard Array(str).count <= stringLengthLimit else {
throw HashingError.exceedsInt64
}
var h: Int64 = offset
for char in str {
if let index: Int = alphabetArray.firstIndex(of: char) {
h = h * base + Int64(index)
} else {
throw HashingError.outOfAlphabet(alphabet)
}
}
return h
}

/// Reverses the hashing process
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.invalidHash
func reverseHash(_ hash: Int64) throws -> String {

guard hash >= offset else {
throw HashingError.invalidHash
}

//Reached the end
if hash == offset {
return ""
}

let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base

let index: Int = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
throw HashingError.invalidHash
}

let char: Character = alphabetArray[index]
return try reverseHash(quotient) + String(char)
}
}


And here it is in use:



let base37 = try! CustomHasher(alphabet: "acdegilmnoprstuw",
offset: 7,
base: 37)

do {
try base37.hash("leepadg")
} catch {
print(error)
}

do {
try base37.reverseHash(680131659347)
} catch {
print(error)
}


Feedback on all aspects of the code is welcome, such as (but not limited to):




  • Should such a hasher throw? Or would it be more natural/idiomatic to return nil if it fails?

  • Possible improvements (speed, clarity, especially the latter),

  • Naming,

  • Better comments.










share|improve this question




























    up vote
    1
    down vote

    favorite












    I've stumbled upon this pretty old article about a hashing interview question, and here it is converted to Swift in a more generic way:



    struct CustomHasher {

    /// An enum of the errors that may be thrown
    enum HashingError: Error {

    /// Thrown by the initializer
    /// when the alphabet contains repeating letters
    case invalidAlphabet

    /// Thrown by the initializer
    /// when the base is negative
    case invalidBase

    /// Thrown by the initializer
    /// when the offset is negative
    case invalidOffset

    /// Thrown by the hash(_:) function
    /// when the string provided uses illegal letters
    case outOfAlphabet(String)

    /// Thrown by the hash(_:) function
    /// when the string provided uses illegal letters
    case exceedsInt64

    /// Thrown by the reverseHash(_:) function
    /// when the string provided uses illegal letters
    case invalidHash
    }

    //Parameters
    private let base: Int64
    private let offset: Int64
    private let alphabet: String

    // An array that eases the access to the elements of the alphabet
    private let alphabetArray: [Character]

    private let stringLengthLimit: Int

    /// Convinience inializer
    /// - Parameters:
    /// - alphabet: Must be a string of unique characters
    /// - offset: A strictly positive Int64
    /// - base: A strictly positive Int64
    /// - Throws:
    /// - HashingError.outOfAlphabet(String)
    /// - HashingError.invalidOffset
    init(alphabet: String, offset: Int64, base: Int64) throws {
    self.alphabet = alphabet
    self.alphabetArray = Array(alphabet)
    guard alphabetArray.count == Set(alphabet).count else {
    throw HashingError.invalidAlphabet
    }

    guard offset > 0 else {
    throw HashingError.invalidOffset
    }
    self.offset = offset

    guard base > 1 else {
    throw HashingError.invalidBase
    }
    self.base = base

    let b = Double(base)
    let c = Double(alphabetArray.count)
    let dOffset = Double(offset)
    let int64limit = Double(Int64.max)

    self.stringLengthLimit = ((0...).first {
    let power = pow(b, Double($0))
    let tail = $0 == 1 ? c * power : c * (power - 1) / (b - 1)
    let head = dOffset * power
    return head + tail > int64limit
    } ?? 0) - 1
    }

    /// Takes a string and converts it to a corresponding Int64
    /// - Parameters:
    /// - str: The string to be hashed
    /// - Throws:
    /// - HashingError.outOfAlphabet(String)
    /// - HashingError.exceedsInt64
    func hash(_ str: String) throws -> Int64 {

    guard Array(str).count <= stringLengthLimit else {
    throw HashingError.exceedsInt64
    }
    var h: Int64 = offset
    for char in str {
    if let index: Int = alphabetArray.firstIndex(of: char) {
    h = h * base + Int64(index)
    } else {
    throw HashingError.outOfAlphabet(alphabet)
    }
    }
    return h
    }

    /// Reverses the hashing process
    /// - Parameters:
    /// - str: The string to be hashed
    /// - Throws:
    /// - HashingError.invalidHash
    func reverseHash(_ hash: Int64) throws -> String {

    guard hash >= offset else {
    throw HashingError.invalidHash
    }

    //Reached the end
    if hash == offset {
    return ""
    }

    let remainder: Int64 = hash % base
    let quotient: Int64 = (hash - remainder)/base

    let index: Int = Int(truncatingIfNeeded: remainder)
    guard index < alphabetArray.count else {
    throw HashingError.invalidHash
    }

    let char: Character = alphabetArray[index]
    return try reverseHash(quotient) + String(char)
    }
    }


    And here it is in use:



    let base37 = try! CustomHasher(alphabet: "acdegilmnoprstuw",
    offset: 7,
    base: 37)

    do {
    try base37.hash("leepadg")
    } catch {
    print(error)
    }

    do {
    try base37.reverseHash(680131659347)
    } catch {
    print(error)
    }


    Feedback on all aspects of the code is welcome, such as (but not limited to):




    • Should such a hasher throw? Or would it be more natural/idiomatic to return nil if it fails?

    • Possible improvements (speed, clarity, especially the latter),

    • Naming,

    • Better comments.










    share|improve this question


























      up vote
      1
      down vote

      favorite









      up vote
      1
      down vote

      favorite











      I've stumbled upon this pretty old article about a hashing interview question, and here it is converted to Swift in a more generic way:



      struct CustomHasher {

      /// An enum of the errors that may be thrown
      enum HashingError: Error {

      /// Thrown by the initializer
      /// when the alphabet contains repeating letters
      case invalidAlphabet

      /// Thrown by the initializer
      /// when the base is negative
      case invalidBase

      /// Thrown by the initializer
      /// when the offset is negative
      case invalidOffset

      /// Thrown by the hash(_:) function
      /// when the string provided uses illegal letters
      case outOfAlphabet(String)

      /// Thrown by the hash(_:) function
      /// when the string provided uses illegal letters
      case exceedsInt64

      /// Thrown by the reverseHash(_:) function
      /// when the string provided uses illegal letters
      case invalidHash
      }

      //Parameters
      private let base: Int64
      private let offset: Int64
      private let alphabet: String

      // An array that eases the access to the elements of the alphabet
      private let alphabetArray: [Character]

      private let stringLengthLimit: Int

      /// Convinience inializer
      /// - Parameters:
      /// - alphabet: Must be a string of unique characters
      /// - offset: A strictly positive Int64
      /// - base: A strictly positive Int64
      /// - Throws:
      /// - HashingError.outOfAlphabet(String)
      /// - HashingError.invalidOffset
      init(alphabet: String, offset: Int64, base: Int64) throws {
      self.alphabet = alphabet
      self.alphabetArray = Array(alphabet)
      guard alphabetArray.count == Set(alphabet).count else {
      throw HashingError.invalidAlphabet
      }

      guard offset > 0 else {
      throw HashingError.invalidOffset
      }
      self.offset = offset

      guard base > 1 else {
      throw HashingError.invalidBase
      }
      self.base = base

      let b = Double(base)
      let c = Double(alphabetArray.count)
      let dOffset = Double(offset)
      let int64limit = Double(Int64.max)

      self.stringLengthLimit = ((0...).first {
      let power = pow(b, Double($0))
      let tail = $0 == 1 ? c * power : c * (power - 1) / (b - 1)
      let head = dOffset * power
      return head + tail > int64limit
      } ?? 0) - 1
      }

      /// Takes a string and converts it to a corresponding Int64
      /// - Parameters:
      /// - str: The string to be hashed
      /// - Throws:
      /// - HashingError.outOfAlphabet(String)
      /// - HashingError.exceedsInt64
      func hash(_ str: String) throws -> Int64 {

      guard Array(str).count <= stringLengthLimit else {
      throw HashingError.exceedsInt64
      }
      var h: Int64 = offset
      for char in str {
      if let index: Int = alphabetArray.firstIndex(of: char) {
      h = h * base + Int64(index)
      } else {
      throw HashingError.outOfAlphabet(alphabet)
      }
      }
      return h
      }

      /// Reverses the hashing process
      /// - Parameters:
      /// - str: The string to be hashed
      /// - Throws:
      /// - HashingError.invalidHash
      func reverseHash(_ hash: Int64) throws -> String {

      guard hash >= offset else {
      throw HashingError.invalidHash
      }

      //Reached the end
      if hash == offset {
      return ""
      }

      let remainder: Int64 = hash % base
      let quotient: Int64 = (hash - remainder)/base

      let index: Int = Int(truncatingIfNeeded: remainder)
      guard index < alphabetArray.count else {
      throw HashingError.invalidHash
      }

      let char: Character = alphabetArray[index]
      return try reverseHash(quotient) + String(char)
      }
      }


      And here it is in use:



      let base37 = try! CustomHasher(alphabet: "acdegilmnoprstuw",
      offset: 7,
      base: 37)

      do {
      try base37.hash("leepadg")
      } catch {
      print(error)
      }

      do {
      try base37.reverseHash(680131659347)
      } catch {
      print(error)
      }


      Feedback on all aspects of the code is welcome, such as (but not limited to):




      • Should such a hasher throw? Or would it be more natural/idiomatic to return nil if it fails?

      • Possible improvements (speed, clarity, especially the latter),

      • Naming,

      • Better comments.










      share|improve this question















      I've stumbled upon this pretty old article about a hashing interview question, and here it is converted to Swift in a more generic way:



      struct CustomHasher {

      /// An enum of the errors that may be thrown
      enum HashingError: Error {

      /// Thrown by the initializer
      /// when the alphabet contains repeating letters
      case invalidAlphabet

      /// Thrown by the initializer
      /// when the base is negative
      case invalidBase

      /// Thrown by the initializer
      /// when the offset is negative
      case invalidOffset

      /// Thrown by the hash(_:) function
      /// when the string provided uses illegal letters
      case outOfAlphabet(String)

      /// Thrown by the hash(_:) function
      /// when the string provided uses illegal letters
      case exceedsInt64

      /// Thrown by the reverseHash(_:) function
      /// when the string provided uses illegal letters
      case invalidHash
      }

      //Parameters
      private let base: Int64
      private let offset: Int64
      private let alphabet: String

      // An array that eases the access to the elements of the alphabet
      private let alphabetArray: [Character]

      private let stringLengthLimit: Int

      /// Convinience inializer
      /// - Parameters:
      /// - alphabet: Must be a string of unique characters
      /// - offset: A strictly positive Int64
      /// - base: A strictly positive Int64
      /// - Throws:
      /// - HashingError.outOfAlphabet(String)
      /// - HashingError.invalidOffset
      init(alphabet: String, offset: Int64, base: Int64) throws {
      self.alphabet = alphabet
      self.alphabetArray = Array(alphabet)
      guard alphabetArray.count == Set(alphabet).count else {
      throw HashingError.invalidAlphabet
      }

      guard offset > 0 else {
      throw HashingError.invalidOffset
      }
      self.offset = offset

      guard base > 1 else {
      throw HashingError.invalidBase
      }
      self.base = base

      let b = Double(base)
      let c = Double(alphabetArray.count)
      let dOffset = Double(offset)
      let int64limit = Double(Int64.max)

      self.stringLengthLimit = ((0...).first {
      let power = pow(b, Double($0))
      let tail = $0 == 1 ? c * power : c * (power - 1) / (b - 1)
      let head = dOffset * power
      return head + tail > int64limit
      } ?? 0) - 1
      }

      /// Takes a string and converts it to a corresponding Int64
      /// - Parameters:
      /// - str: The string to be hashed
      /// - Throws:
      /// - HashingError.outOfAlphabet(String)
      /// - HashingError.exceedsInt64
      func hash(_ str: String) throws -> Int64 {

      guard Array(str).count <= stringLengthLimit else {
      throw HashingError.exceedsInt64
      }
      var h: Int64 = offset
      for char in str {
      if let index: Int = alphabetArray.firstIndex(of: char) {
      h = h * base + Int64(index)
      } else {
      throw HashingError.outOfAlphabet(alphabet)
      }
      }
      return h
      }

      /// Reverses the hashing process
      /// - Parameters:
      /// - str: The string to be hashed
      /// - Throws:
      /// - HashingError.invalidHash
      func reverseHash(_ hash: Int64) throws -> String {

      guard hash >= offset else {
      throw HashingError.invalidHash
      }

      //Reached the end
      if hash == offset {
      return ""
      }

      let remainder: Int64 = hash % base
      let quotient: Int64 = (hash - remainder)/base

      let index: Int = Int(truncatingIfNeeded: remainder)
      guard index < alphabetArray.count else {
      throw HashingError.invalidHash
      }

      let char: Character = alphabetArray[index]
      return try reverseHash(quotient) + String(char)
      }
      }


      And here it is in use:



      let base37 = try! CustomHasher(alphabet: "acdegilmnoprstuw",
      offset: 7,
      base: 37)

      do {
      try base37.hash("leepadg")
      } catch {
      print(error)
      }

      do {
      try base37.reverseHash(680131659347)
      } catch {
      print(error)
      }


      Feedback on all aspects of the code is welcome, such as (but not limited to):




      • Should such a hasher throw? Or would it be more natural/idiomatic to return nil if it fails?

      • Possible improvements (speed, clarity, especially the latter),

      • Naming,

      • Better comments.







      interview-questions swift hashcode






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Nov 29 at 22:14









      Martin R

      15.4k12264




      15.4k12264










      asked Nov 26 at 22:59









      Carpsen90

      221111




      221111






















          1 Answer
          1






          active

          oldest

          votes

















          up vote
          2
          down vote



          accepted










          General stuff



          Several explicit type annotations are not needed, such as in



          var h: Int64  = offset

          if let index: Int = alphabetArray.firstIndex(of: char)

          let remainder: Int64 = hash % base
          let quotient: Int64 = (hash - remainder)/base


          Error handling





          • In the initializer:



            Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why offset is required to be strictly positive. Is there any problem with allowing a zero offset?




          • In the hash method:



            Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it overflow error instead of exceedsInt64.




          • In the reverseHash method:



            This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return nil instead of throwing an error if no matching source string is found, meaning “no result.”




          The overflow checking



          It is not immediately obvious how self.stringLengthLimit is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double has a 53-bit significand precision and cannot store all 64-bit integers precisely.



          Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With



          let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)


          the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17."223372036854775808" has the same length, but its hash calculation would overflow.



          This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:



          guard let index = alphabetArray.firstIndex(of: char) else {
          throw HashingError.outOfAlphabet(alphabet)
          }
          guard
          case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
          case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
          else {
          throw HashingError.exceedsInt64
          }
          h = r2.partialValue


          Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+ and &* for addition and multiplication with automatic “wrap around.”



          Simplifications



          There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.



          The second line in



          let remainder = hash % base
          let quotient = (hash - remainder)/base


          can be simplified to



          let quotient = hash/base


          You can also compute both quotient and remainder with a call to the BSD library function lldiv



          let remQuot = lldiv(hash, base)


          but I doubt that it would be faster.



          The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:



          var hash = hash
          var result = ""
          while hash > offset {
          let remainder = hash % base
          hash = hash / base
          let index = Int(truncatingIfNeeded: remainder)
          guard index < alphabetArray.count else {
          return nil
          }
          result.append(alphabetArray[index])
          }
          return hash < 0 ? nil : String(result.reversed())


          But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.



          Naming



          CustomHasher does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher if you want to emphasize where it comes from, or MultAddHasher ... naming is difficult (and subjective)!



          Possible alternative method names would be



          func hash(of s: String) {}
          func string(for hash: Int64) {}





          share|improve this answer





















          • General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the stringLengthLimit by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hashfunction won't overflow.
            – Carpsen90
            Nov 29 at 10:55










          • (continuation) AFAIK Int64.maxdoes fit into double precision: Double.greatestFiniteMagnitude is way bigger than Int64.max. and Double(Int64.max) == 9223372036854775807 is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv jewel. quotientAndRemainder(dividingBy:) was also possible.
            – Carpsen90
            Nov 29 at 10:56










          • @Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try print(Double(Int64.max) == Double(Int64.max - 500)).
            – Martin R
            Nov 29 at 19:21












          • You're right, there is a mathematical limit where Double wouldn't be precise enough. Float80 seems to solve that (I'll have to check the docs).
            – Carpsen90
            Nov 29 at 20:02












          • @Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
            – Martin R
            Nov 29 at 20:09











          Your Answer





          StackExchange.ifUsing("editor", function () {
          return StackExchange.using("mathjaxEditing", function () {
          StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
          StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
          });
          });
          }, "mathjax-editing");

          StackExchange.ifUsing("editor", function () {
          StackExchange.using("externalEditor", function () {
          StackExchange.using("snippets", function () {
          StackExchange.snippets.init();
          });
          });
          }, "code-snippets");

          StackExchange.ready(function() {
          var channelOptions = {
          tags: "".split(" "),
          id: "196"
          };
          initTagRenderer("".split(" "), "".split(" "), channelOptions);

          StackExchange.using("externalEditor", function() {
          // Have to fire editor after snippets, if snippets enabled
          if (StackExchange.settings.snippets.snippetsEnabled) {
          StackExchange.using("snippets", function() {
          createEditor();
          });
          }
          else {
          createEditor();
          }
          });

          function createEditor() {
          StackExchange.prepareEditor({
          heartbeatType: 'answer',
          convertImagesToLinks: false,
          noModals: true,
          showLowRepImageUploadWarning: true,
          reputationToPostImages: null,
          bindNavPrevention: true,
          postfix: "",
          imageUploader: {
          brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
          contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
          allowUrls: true
          },
          onDemand: true,
          discardSelector: ".discard-answer"
          ,immediatelyShowMarkdownHelp:true
          });


          }
          });














          draft saved

          draft discarded


















          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f208486%2fhashing-interview-puzzle%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown

























          1 Answer
          1






          active

          oldest

          votes








          1 Answer
          1






          active

          oldest

          votes









          active

          oldest

          votes






          active

          oldest

          votes








          up vote
          2
          down vote



          accepted










          General stuff



          Several explicit type annotations are not needed, such as in



          var h: Int64  = offset

          if let index: Int = alphabetArray.firstIndex(of: char)

          let remainder: Int64 = hash % base
          let quotient: Int64 = (hash - remainder)/base


          Error handling





          • In the initializer:



            Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why offset is required to be strictly positive. Is there any problem with allowing a zero offset?




          • In the hash method:



            Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it overflow error instead of exceedsInt64.




          • In the reverseHash method:



            This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return nil instead of throwing an error if no matching source string is found, meaning “no result.”




          The overflow checking



          It is not immediately obvious how self.stringLengthLimit is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double has a 53-bit significand precision and cannot store all 64-bit integers precisely.



          Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With



          let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)


          the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17."223372036854775808" has the same length, but its hash calculation would overflow.



          This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:



          guard let index = alphabetArray.firstIndex(of: char) else {
          throw HashingError.outOfAlphabet(alphabet)
          }
          guard
          case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
          case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
          else {
          throw HashingError.exceedsInt64
          }
          h = r2.partialValue


          Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+ and &* for addition and multiplication with automatic “wrap around.”



          Simplifications



          There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.



          The second line in



          let remainder = hash % base
          let quotient = (hash - remainder)/base


          can be simplified to



          let quotient = hash/base


          You can also compute both quotient and remainder with a call to the BSD library function lldiv



          let remQuot = lldiv(hash, base)


          but I doubt that it would be faster.



          The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:



          var hash = hash
          var result = ""
          while hash > offset {
          let remainder = hash % base
          hash = hash / base
          let index = Int(truncatingIfNeeded: remainder)
          guard index < alphabetArray.count else {
          return nil
          }
          result.append(alphabetArray[index])
          }
          return hash < 0 ? nil : String(result.reversed())


          But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.



          Naming



          CustomHasher does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher if you want to emphasize where it comes from, or MultAddHasher ... naming is difficult (and subjective)!



          Possible alternative method names would be



          func hash(of s: String) {}
          func string(for hash: Int64) {}





          share|improve this answer





















          • General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the stringLengthLimit by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hashfunction won't overflow.
            – Carpsen90
            Nov 29 at 10:55










          • (continuation) AFAIK Int64.maxdoes fit into double precision: Double.greatestFiniteMagnitude is way bigger than Int64.max. and Double(Int64.max) == 9223372036854775807 is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv jewel. quotientAndRemainder(dividingBy:) was also possible.
            – Carpsen90
            Nov 29 at 10:56










          • @Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try print(Double(Int64.max) == Double(Int64.max - 500)).
            – Martin R
            Nov 29 at 19:21












          • You're right, there is a mathematical limit where Double wouldn't be precise enough. Float80 seems to solve that (I'll have to check the docs).
            – Carpsen90
            Nov 29 at 20:02












          • @Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
            – Martin R
            Nov 29 at 20:09















          up vote
          2
          down vote



          accepted










          General stuff



          Several explicit type annotations are not needed, such as in



          var h: Int64  = offset

          if let index: Int = alphabetArray.firstIndex(of: char)

          let remainder: Int64 = hash % base
          let quotient: Int64 = (hash - remainder)/base


          Error handling





          • In the initializer:



            Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why offset is required to be strictly positive. Is there any problem with allowing a zero offset?




          • In the hash method:



            Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it overflow error instead of exceedsInt64.




          • In the reverseHash method:



            This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return nil instead of throwing an error if no matching source string is found, meaning “no result.”




          The overflow checking



          It is not immediately obvious how self.stringLengthLimit is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double has a 53-bit significand precision and cannot store all 64-bit integers precisely.



          Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With



          let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)


          the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17."223372036854775808" has the same length, but its hash calculation would overflow.



          This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:



          guard let index = alphabetArray.firstIndex(of: char) else {
          throw HashingError.outOfAlphabet(alphabet)
          }
          guard
          case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
          case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
          else {
          throw HashingError.exceedsInt64
          }
          h = r2.partialValue


          Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+ and &* for addition and multiplication with automatic “wrap around.”



          Simplifications



          There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.



          The second line in



          let remainder = hash % base
          let quotient = (hash - remainder)/base


          can be simplified to



          let quotient = hash/base


          You can also compute both quotient and remainder with a call to the BSD library function lldiv



          let remQuot = lldiv(hash, base)


          but I doubt that it would be faster.



          The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:



          var hash = hash
          var result = ""
          while hash > offset {
          let remainder = hash % base
          hash = hash / base
          let index = Int(truncatingIfNeeded: remainder)
          guard index < alphabetArray.count else {
          return nil
          }
          result.append(alphabetArray[index])
          }
          return hash < 0 ? nil : String(result.reversed())


          But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.



          Naming



          CustomHasher does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher if you want to emphasize where it comes from, or MultAddHasher ... naming is difficult (and subjective)!



          Possible alternative method names would be



          func hash(of s: String) {}
          func string(for hash: Int64) {}





          share|improve this answer





















          • General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the stringLengthLimit by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hashfunction won't overflow.
            – Carpsen90
            Nov 29 at 10:55










          • (continuation) AFAIK Int64.maxdoes fit into double precision: Double.greatestFiniteMagnitude is way bigger than Int64.max. and Double(Int64.max) == 9223372036854775807 is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv jewel. quotientAndRemainder(dividingBy:) was also possible.
            – Carpsen90
            Nov 29 at 10:56










          • @Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try print(Double(Int64.max) == Double(Int64.max - 500)).
            – Martin R
            Nov 29 at 19:21












          • You're right, there is a mathematical limit where Double wouldn't be precise enough. Float80 seems to solve that (I'll have to check the docs).
            – Carpsen90
            Nov 29 at 20:02












          • @Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
            – Martin R
            Nov 29 at 20:09













          up vote
          2
          down vote



          accepted







          up vote
          2
          down vote



          accepted






          General stuff



          Several explicit type annotations are not needed, such as in



          var h: Int64  = offset

          if let index: Int = alphabetArray.firstIndex(of: char)

          let remainder: Int64 = hash % base
          let quotient: Int64 = (hash - remainder)/base


          Error handling





          • In the initializer:



            Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why offset is required to be strictly positive. Is there any problem with allowing a zero offset?




          • In the hash method:



            Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it overflow error instead of exceedsInt64.




          • In the reverseHash method:



            This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return nil instead of throwing an error if no matching source string is found, meaning “no result.”




          The overflow checking



          It is not immediately obvious how self.stringLengthLimit is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double has a 53-bit significand precision and cannot store all 64-bit integers precisely.



          Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With



          let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)


          the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17."223372036854775808" has the same length, but its hash calculation would overflow.



          This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:



          guard let index = alphabetArray.firstIndex(of: char) else {
          throw HashingError.outOfAlphabet(alphabet)
          }
          guard
          case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
          case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
          else {
          throw HashingError.exceedsInt64
          }
          h = r2.partialValue


          Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+ and &* for addition and multiplication with automatic “wrap around.”



          Simplifications



          There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.



          The second line in



          let remainder = hash % base
          let quotient = (hash - remainder)/base


          can be simplified to



          let quotient = hash/base


          You can also compute both quotient and remainder with a call to the BSD library function lldiv



          let remQuot = lldiv(hash, base)


          but I doubt that it would be faster.



          The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:



          var hash = hash
          var result = ""
          while hash > offset {
          let remainder = hash % base
          hash = hash / base
          let index = Int(truncatingIfNeeded: remainder)
          guard index < alphabetArray.count else {
          return nil
          }
          result.append(alphabetArray[index])
          }
          return hash < 0 ? nil : String(result.reversed())


          But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.



          Naming



          CustomHasher does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher if you want to emphasize where it comes from, or MultAddHasher ... naming is difficult (and subjective)!



          Possible alternative method names would be



          func hash(of s: String) {}
          func string(for hash: Int64) {}





          share|improve this answer












          General stuff



          Several explicit type annotations are not needed, such as in



          var h: Int64  = offset

          if let index: Int = alphabetArray.firstIndex(of: char)

          let remainder: Int64 = hash % base
          let quotient: Int64 = (hash - remainder)/base


          Error handling





          • In the initializer:



            Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why offset is required to be strictly positive. Is there any problem with allowing a zero offset?




          • In the hash method:



            Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it overflow error instead of exceedsInt64.




          • In the reverseHash method:



            This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return nil instead of throwing an error if no matching source string is found, meaning “no result.”




          The overflow checking



          It is not immediately obvious how self.stringLengthLimit is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double has a 53-bit significand precision and cannot store all 64-bit integers precisely.



          Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With



          let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)


          the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17."223372036854775808" has the same length, but its hash calculation would overflow.



          This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:



          guard let index = alphabetArray.firstIndex(of: char) else {
          throw HashingError.outOfAlphabet(alphabet)
          }
          guard
          case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
          case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
          else {
          throw HashingError.exceedsInt64
          }
          h = r2.partialValue


          Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+ and &* for addition and multiplication with automatic “wrap around.”



          Simplifications



          There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.



          The second line in



          let remainder = hash % base
          let quotient = (hash - remainder)/base


          can be simplified to



          let quotient = hash/base


          You can also compute both quotient and remainder with a call to the BSD library function lldiv



          let remQuot = lldiv(hash, base)


          but I doubt that it would be faster.



          The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:



          var hash = hash
          var result = ""
          while hash > offset {
          let remainder = hash % base
          hash = hash / base
          let index = Int(truncatingIfNeeded: remainder)
          guard index < alphabetArray.count else {
          return nil
          }
          result.append(alphabetArray[index])
          }
          return hash < 0 ? nil : String(result.reversed())


          But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.



          Naming



          CustomHasher does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher if you want to emphasize where it comes from, or MultAddHasher ... naming is difficult (and subjective)!



          Possible alternative method names would be



          func hash(of s: String) {}
          func string(for hash: Int64) {}






          share|improve this answer












          share|improve this answer



          share|improve this answer










          answered Nov 28 at 23:05









          Martin R

          15.4k12264




          15.4k12264












          • General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the stringLengthLimit by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hashfunction won't overflow.
            – Carpsen90
            Nov 29 at 10:55










          • (continuation) AFAIK Int64.maxdoes fit into double precision: Double.greatestFiniteMagnitude is way bigger than Int64.max. and Double(Int64.max) == 9223372036854775807 is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv jewel. quotientAndRemainder(dividingBy:) was also possible.
            – Carpsen90
            Nov 29 at 10:56










          • @Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try print(Double(Int64.max) == Double(Int64.max - 500)).
            – Martin R
            Nov 29 at 19:21












          • You're right, there is a mathematical limit where Double wouldn't be precise enough. Float80 seems to solve that (I'll have to check the docs).
            – Carpsen90
            Nov 29 at 20:02












          • @Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
            – Martin R
            Nov 29 at 20:09


















          • General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the stringLengthLimit by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hashfunction won't overflow.
            – Carpsen90
            Nov 29 at 10:55










          • (continuation) AFAIK Int64.maxdoes fit into double precision: Double.greatestFiniteMagnitude is way bigger than Int64.max. and Double(Int64.max) == 9223372036854775807 is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv jewel. quotientAndRemainder(dividingBy:) was also possible.
            – Carpsen90
            Nov 29 at 10:56










          • @Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try print(Double(Int64.max) == Double(Int64.max - 500)).
            – Martin R
            Nov 29 at 19:21












          • You're right, there is a mathematical limit where Double wouldn't be precise enough. Float80 seems to solve that (I'll have to check the docs).
            – Carpsen90
            Nov 29 at 20:02












          • @Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
            – Martin R
            Nov 29 at 20:09
















          General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the stringLengthLimit by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hashfunction won't overflow.
          – Carpsen90
          Nov 29 at 10:55




          General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the stringLengthLimit by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hashfunction won't overflow.
          – Carpsen90
          Nov 29 at 10:55












          (continuation) AFAIK Int64.maxdoes fit into double precision: Double.greatestFiniteMagnitude is way bigger than Int64.max. and Double(Int64.max) == 9223372036854775807 is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv jewel. quotientAndRemainder(dividingBy:) was also possible.
          – Carpsen90
          Nov 29 at 10:56




          (continuation) AFAIK Int64.maxdoes fit into double precision: Double.greatestFiniteMagnitude is way bigger than Int64.max. and Double(Int64.max) == 9223372036854775807 is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv jewel. quotientAndRemainder(dividingBy:) was also possible.
          – Carpsen90
          Nov 29 at 10:56












          @Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try print(Double(Int64.max) == Double(Int64.max - 500)).
          – Martin R
          Nov 29 at 19:21






          @Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try print(Double(Int64.max) == Double(Int64.max - 500)).
          – Martin R
          Nov 29 at 19:21














          You're right, there is a mathematical limit where Double wouldn't be precise enough. Float80 seems to solve that (I'll have to check the docs).
          – Carpsen90
          Nov 29 at 20:02






          You're right, there is a mathematical limit where Double wouldn't be precise enough. Float80 seems to solve that (I'll have to check the docs).
          – Carpsen90
          Nov 29 at 20:02














          @Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
          – Martin R
          Nov 29 at 20:09




          @Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
          – Martin R
          Nov 29 at 20:09


















          draft saved

          draft discarded




















































          Thanks for contributing an answer to Code Review Stack Exchange!


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          Use MathJax to format equations. MathJax reference.


          To learn more, see our tips on writing great answers.





          Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


          Please pay close attention to the following guidance:


          • Please be sure to answer the question. Provide details and share your research!

          But avoid



          • Asking for help, clarification, or responding to other answers.

          • Making statements based on opinion; back them up with references or personal experience.


          To learn more, see our tips on writing great answers.




          draft saved


          draft discarded














          StackExchange.ready(
          function () {
          StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f208486%2fhashing-interview-puzzle%23new-answer', 'question_page');
          }
          );

          Post as a guest















          Required, but never shown





















































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown

































          Required, but never shown














          Required, but never shown












          Required, but never shown







          Required, but never shown







          Popular posts from this blog

          Сан-Квентин

          8-я гвардейская общевойсковая армия

          Алькесар