Hashing interview puzzle
up vote
1
down vote
favorite
I've stumbled upon this pretty old article about a hashing interview question, and here it is converted to Swift in a more generic way:
struct CustomHasher {
/// An enum of the errors that may be thrown
enum HashingError: Error {
/// Thrown by the initializer
/// when the alphabet contains repeating letters
case invalidAlphabet
/// Thrown by the initializer
/// when the base is negative
case invalidBase
/// Thrown by the initializer
/// when the offset is negative
case invalidOffset
/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case outOfAlphabet(String)
/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case exceedsInt64
/// Thrown by the reverseHash(_:) function
/// when the string provided uses illegal letters
case invalidHash
}
//Parameters
private let base: Int64
private let offset: Int64
private let alphabet: String
// An array that eases the access to the elements of the alphabet
private let alphabetArray: [Character]
private let stringLengthLimit: Int
/// Convinience inializer
/// - Parameters:
/// - alphabet: Must be a string of unique characters
/// - offset: A strictly positive Int64
/// - base: A strictly positive Int64
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.invalidOffset
init(alphabet: String, offset: Int64, base: Int64) throws {
self.alphabet = alphabet
self.alphabetArray = Array(alphabet)
guard alphabetArray.count == Set(alphabet).count else {
throw HashingError.invalidAlphabet
}
guard offset > 0 else {
throw HashingError.invalidOffset
}
self.offset = offset
guard base > 1 else {
throw HashingError.invalidBase
}
self.base = base
let b = Double(base)
let c = Double(alphabetArray.count)
let dOffset = Double(offset)
let int64limit = Double(Int64.max)
self.stringLengthLimit = ((0...).first {
let power = pow(b, Double($0))
let tail = $0 == 1 ? c * power : c * (power - 1) / (b - 1)
let head = dOffset * power
return head + tail > int64limit
} ?? 0) - 1
}
/// Takes a string and converts it to a corresponding Int64
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.exceedsInt64
func hash(_ str: String) throws -> Int64 {
guard Array(str).count <= stringLengthLimit else {
throw HashingError.exceedsInt64
}
var h: Int64 = offset
for char in str {
if let index: Int = alphabetArray.firstIndex(of: char) {
h = h * base + Int64(index)
} else {
throw HashingError.outOfAlphabet(alphabet)
}
}
return h
}
/// Reverses the hashing process
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.invalidHash
func reverseHash(_ hash: Int64) throws -> String {
guard hash >= offset else {
throw HashingError.invalidHash
}
//Reached the end
if hash == offset {
return ""
}
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
let index: Int = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
throw HashingError.invalidHash
}
let char: Character = alphabetArray[index]
return try reverseHash(quotient) + String(char)
}
}
And here it is in use:
let base37 = try! CustomHasher(alphabet: "acdegilmnoprstuw",
offset: 7,
base: 37)
do {
try base37.hash("leepadg")
} catch {
print(error)
}
do {
try base37.reverseHash(680131659347)
} catch {
print(error)
}
Feedback on all aspects of the code is welcome, such as (but not limited to):
- Should such a hasher throw? Or would it be more natural/idiomatic to return nil if it fails?
- Possible improvements (speed, clarity, especially the latter),
- Naming,
- Better comments.
interview-questions swift hashcode
add a comment |
up vote
1
down vote
favorite
I've stumbled upon this pretty old article about a hashing interview question, and here it is converted to Swift in a more generic way:
struct CustomHasher {
/// An enum of the errors that may be thrown
enum HashingError: Error {
/// Thrown by the initializer
/// when the alphabet contains repeating letters
case invalidAlphabet
/// Thrown by the initializer
/// when the base is negative
case invalidBase
/// Thrown by the initializer
/// when the offset is negative
case invalidOffset
/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case outOfAlphabet(String)
/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case exceedsInt64
/// Thrown by the reverseHash(_:) function
/// when the string provided uses illegal letters
case invalidHash
}
//Parameters
private let base: Int64
private let offset: Int64
private let alphabet: String
// An array that eases the access to the elements of the alphabet
private let alphabetArray: [Character]
private let stringLengthLimit: Int
/// Convinience inializer
/// - Parameters:
/// - alphabet: Must be a string of unique characters
/// - offset: A strictly positive Int64
/// - base: A strictly positive Int64
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.invalidOffset
init(alphabet: String, offset: Int64, base: Int64) throws {
self.alphabet = alphabet
self.alphabetArray = Array(alphabet)
guard alphabetArray.count == Set(alphabet).count else {
throw HashingError.invalidAlphabet
}
guard offset > 0 else {
throw HashingError.invalidOffset
}
self.offset = offset
guard base > 1 else {
throw HashingError.invalidBase
}
self.base = base
let b = Double(base)
let c = Double(alphabetArray.count)
let dOffset = Double(offset)
let int64limit = Double(Int64.max)
self.stringLengthLimit = ((0...).first {
let power = pow(b, Double($0))
let tail = $0 == 1 ? c * power : c * (power - 1) / (b - 1)
let head = dOffset * power
return head + tail > int64limit
} ?? 0) - 1
}
/// Takes a string and converts it to a corresponding Int64
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.exceedsInt64
func hash(_ str: String) throws -> Int64 {
guard Array(str).count <= stringLengthLimit else {
throw HashingError.exceedsInt64
}
var h: Int64 = offset
for char in str {
if let index: Int = alphabetArray.firstIndex(of: char) {
h = h * base + Int64(index)
} else {
throw HashingError.outOfAlphabet(alphabet)
}
}
return h
}
/// Reverses the hashing process
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.invalidHash
func reverseHash(_ hash: Int64) throws -> String {
guard hash >= offset else {
throw HashingError.invalidHash
}
//Reached the end
if hash == offset {
return ""
}
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
let index: Int = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
throw HashingError.invalidHash
}
let char: Character = alphabetArray[index]
return try reverseHash(quotient) + String(char)
}
}
And here it is in use:
let base37 = try! CustomHasher(alphabet: "acdegilmnoprstuw",
offset: 7,
base: 37)
do {
try base37.hash("leepadg")
} catch {
print(error)
}
do {
try base37.reverseHash(680131659347)
} catch {
print(error)
}
Feedback on all aspects of the code is welcome, such as (but not limited to):
- Should such a hasher throw? Or would it be more natural/idiomatic to return nil if it fails?
- Possible improvements (speed, clarity, especially the latter),
- Naming,
- Better comments.
interview-questions swift hashcode
add a comment |
up vote
1
down vote
favorite
up vote
1
down vote
favorite
I've stumbled upon this pretty old article about a hashing interview question, and here it is converted to Swift in a more generic way:
struct CustomHasher {
/// An enum of the errors that may be thrown
enum HashingError: Error {
/// Thrown by the initializer
/// when the alphabet contains repeating letters
case invalidAlphabet
/// Thrown by the initializer
/// when the base is negative
case invalidBase
/// Thrown by the initializer
/// when the offset is negative
case invalidOffset
/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case outOfAlphabet(String)
/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case exceedsInt64
/// Thrown by the reverseHash(_:) function
/// when the string provided uses illegal letters
case invalidHash
}
//Parameters
private let base: Int64
private let offset: Int64
private let alphabet: String
// An array that eases the access to the elements of the alphabet
private let alphabetArray: [Character]
private let stringLengthLimit: Int
/// Convinience inializer
/// - Parameters:
/// - alphabet: Must be a string of unique characters
/// - offset: A strictly positive Int64
/// - base: A strictly positive Int64
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.invalidOffset
init(alphabet: String, offset: Int64, base: Int64) throws {
self.alphabet = alphabet
self.alphabetArray = Array(alphabet)
guard alphabetArray.count == Set(alphabet).count else {
throw HashingError.invalidAlphabet
}
guard offset > 0 else {
throw HashingError.invalidOffset
}
self.offset = offset
guard base > 1 else {
throw HashingError.invalidBase
}
self.base = base
let b = Double(base)
let c = Double(alphabetArray.count)
let dOffset = Double(offset)
let int64limit = Double(Int64.max)
self.stringLengthLimit = ((0...).first {
let power = pow(b, Double($0))
let tail = $0 == 1 ? c * power : c * (power - 1) / (b - 1)
let head = dOffset * power
return head + tail > int64limit
} ?? 0) - 1
}
/// Takes a string and converts it to a corresponding Int64
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.exceedsInt64
func hash(_ str: String) throws -> Int64 {
guard Array(str).count <= stringLengthLimit else {
throw HashingError.exceedsInt64
}
var h: Int64 = offset
for char in str {
if let index: Int = alphabetArray.firstIndex(of: char) {
h = h * base + Int64(index)
} else {
throw HashingError.outOfAlphabet(alphabet)
}
}
return h
}
/// Reverses the hashing process
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.invalidHash
func reverseHash(_ hash: Int64) throws -> String {
guard hash >= offset else {
throw HashingError.invalidHash
}
//Reached the end
if hash == offset {
return ""
}
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
let index: Int = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
throw HashingError.invalidHash
}
let char: Character = alphabetArray[index]
return try reverseHash(quotient) + String(char)
}
}
And here it is in use:
let base37 = try! CustomHasher(alphabet: "acdegilmnoprstuw",
offset: 7,
base: 37)
do {
try base37.hash("leepadg")
} catch {
print(error)
}
do {
try base37.reverseHash(680131659347)
} catch {
print(error)
}
Feedback on all aspects of the code is welcome, such as (but not limited to):
- Should such a hasher throw? Or would it be more natural/idiomatic to return nil if it fails?
- Possible improvements (speed, clarity, especially the latter),
- Naming,
- Better comments.
interview-questions swift hashcode
I've stumbled upon this pretty old article about a hashing interview question, and here it is converted to Swift in a more generic way:
struct CustomHasher {
/// An enum of the errors that may be thrown
enum HashingError: Error {
/// Thrown by the initializer
/// when the alphabet contains repeating letters
case invalidAlphabet
/// Thrown by the initializer
/// when the base is negative
case invalidBase
/// Thrown by the initializer
/// when the offset is negative
case invalidOffset
/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case outOfAlphabet(String)
/// Thrown by the hash(_:) function
/// when the string provided uses illegal letters
case exceedsInt64
/// Thrown by the reverseHash(_:) function
/// when the string provided uses illegal letters
case invalidHash
}
//Parameters
private let base: Int64
private let offset: Int64
private let alphabet: String
// An array that eases the access to the elements of the alphabet
private let alphabetArray: [Character]
private let stringLengthLimit: Int
/// Convinience inializer
/// - Parameters:
/// - alphabet: Must be a string of unique characters
/// - offset: A strictly positive Int64
/// - base: A strictly positive Int64
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.invalidOffset
init(alphabet: String, offset: Int64, base: Int64) throws {
self.alphabet = alphabet
self.alphabetArray = Array(alphabet)
guard alphabetArray.count == Set(alphabet).count else {
throw HashingError.invalidAlphabet
}
guard offset > 0 else {
throw HashingError.invalidOffset
}
self.offset = offset
guard base > 1 else {
throw HashingError.invalidBase
}
self.base = base
let b = Double(base)
let c = Double(alphabetArray.count)
let dOffset = Double(offset)
let int64limit = Double(Int64.max)
self.stringLengthLimit = ((0...).first {
let power = pow(b, Double($0))
let tail = $0 == 1 ? c * power : c * (power - 1) / (b - 1)
let head = dOffset * power
return head + tail > int64limit
} ?? 0) - 1
}
/// Takes a string and converts it to a corresponding Int64
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.outOfAlphabet(String)
/// - HashingError.exceedsInt64
func hash(_ str: String) throws -> Int64 {
guard Array(str).count <= stringLengthLimit else {
throw HashingError.exceedsInt64
}
var h: Int64 = offset
for char in str {
if let index: Int = alphabetArray.firstIndex(of: char) {
h = h * base + Int64(index)
} else {
throw HashingError.outOfAlphabet(alphabet)
}
}
return h
}
/// Reverses the hashing process
/// - Parameters:
/// - str: The string to be hashed
/// - Throws:
/// - HashingError.invalidHash
func reverseHash(_ hash: Int64) throws -> String {
guard hash >= offset else {
throw HashingError.invalidHash
}
//Reached the end
if hash == offset {
return ""
}
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
let index: Int = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
throw HashingError.invalidHash
}
let char: Character = alphabetArray[index]
return try reverseHash(quotient) + String(char)
}
}
And here it is in use:
let base37 = try! CustomHasher(alphabet: "acdegilmnoprstuw",
offset: 7,
base: 37)
do {
try base37.hash("leepadg")
} catch {
print(error)
}
do {
try base37.reverseHash(680131659347)
} catch {
print(error)
}
Feedback on all aspects of the code is welcome, such as (but not limited to):
- Should such a hasher throw? Or would it be more natural/idiomatic to return nil if it fails?
- Possible improvements (speed, clarity, especially the latter),
- Naming,
- Better comments.
interview-questions swift hashcode
interview-questions swift hashcode
edited Nov 29 at 22:14
Martin R
15.4k12264
15.4k12264
asked Nov 26 at 22:59
Carpsen90
221111
221111
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
up vote
2
down vote
accepted
General stuff
Several explicit type annotations are not needed, such as in
var h: Int64 = offset
if let index: Int = alphabetArray.firstIndex(of: char)
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
Error handling
In the initializer:
Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why
offset
is required to be strictly positive. Is there any problem with allowing a zero offset?
In the
hash
method:
Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it
overflow
error instead ofexceedsInt64
.
In the
reverseHash
method:
This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return
nil
instead of throwing an error if no matching source string is found, meaning “no result.”
The overflow checking
It is not immediately obvious how self.stringLengthLimit
is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double
has a 53-bit significand precision and cannot store all 64-bit integers precisely.
Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With
let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)
the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17
."223372036854775808" has the same length, but its hash calculation would overflow.
This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:
guard let index = alphabetArray.firstIndex(of: char) else {
throw HashingError.outOfAlphabet(alphabet)
}
guard
case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
else {
throw HashingError.exceedsInt64
}
h = r2.partialValue
Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+
and &*
for addition and multiplication with automatic “wrap around.”
Simplifications
There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.
The second line in
let remainder = hash % base
let quotient = (hash - remainder)/base
can be simplified to
let quotient = hash/base
You can also compute both quotient and remainder with a call to the BSD library function lldiv
let remQuot = lldiv(hash, base)
but I doubt that it would be faster.
The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:
var hash = hash
var result = ""
while hash > offset {
let remainder = hash % base
hash = hash / base
let index = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
return nil
}
result.append(alphabetArray[index])
}
return hash < 0 ? nil : String(result.reversed())
But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.
Naming
CustomHasher
does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher
if you want to emphasize where it comes from, or MultAddHasher
... naming is difficult (and subjective)!
Possible alternative method names would be
func hash(of s: String) {}
func string(for hash: Int64) {}
General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust thestringLengthLimit
by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by thehash
function won't overflow.
– Carpsen90
Nov 29 at 10:55
(continuation) AFAIKInt64.max
does fit into double precision:Double.greatestFiniteMagnitude
is way bigger thanInt64.max
. andDouble(Int64.max) == 9223372036854775807
is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the littlelldiv
jewel.quotientAndRemainder(dividingBy:)
was also possible.
– Carpsen90
Nov 29 at 10:56
@Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, tryprint(Double(Int64.max) == Double(Int64.max - 500))
.
– Martin R
Nov 29 at 19:21
You're right, there is a mathematical limit whereDouble
wouldn't be precise enough.Float80
seems to solve that (I'll have to check the docs).
– Carpsen90
Nov 29 at 20:02
@Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
– Martin R
Nov 29 at 20:09
|
show 3 more comments
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
2
down vote
accepted
General stuff
Several explicit type annotations are not needed, such as in
var h: Int64 = offset
if let index: Int = alphabetArray.firstIndex(of: char)
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
Error handling
In the initializer:
Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why
offset
is required to be strictly positive. Is there any problem with allowing a zero offset?
In the
hash
method:
Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it
overflow
error instead ofexceedsInt64
.
In the
reverseHash
method:
This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return
nil
instead of throwing an error if no matching source string is found, meaning “no result.”
The overflow checking
It is not immediately obvious how self.stringLengthLimit
is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double
has a 53-bit significand precision and cannot store all 64-bit integers precisely.
Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With
let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)
the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17
."223372036854775808" has the same length, but its hash calculation would overflow.
This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:
guard let index = alphabetArray.firstIndex(of: char) else {
throw HashingError.outOfAlphabet(alphabet)
}
guard
case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
else {
throw HashingError.exceedsInt64
}
h = r2.partialValue
Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+
and &*
for addition and multiplication with automatic “wrap around.”
Simplifications
There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.
The second line in
let remainder = hash % base
let quotient = (hash - remainder)/base
can be simplified to
let quotient = hash/base
You can also compute both quotient and remainder with a call to the BSD library function lldiv
let remQuot = lldiv(hash, base)
but I doubt that it would be faster.
The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:
var hash = hash
var result = ""
while hash > offset {
let remainder = hash % base
hash = hash / base
let index = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
return nil
}
result.append(alphabetArray[index])
}
return hash < 0 ? nil : String(result.reversed())
But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.
Naming
CustomHasher
does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher
if you want to emphasize where it comes from, or MultAddHasher
... naming is difficult (and subjective)!
Possible alternative method names would be
func hash(of s: String) {}
func string(for hash: Int64) {}
General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust thestringLengthLimit
by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by thehash
function won't overflow.
– Carpsen90
Nov 29 at 10:55
(continuation) AFAIKInt64.max
does fit into double precision:Double.greatestFiniteMagnitude
is way bigger thanInt64.max
. andDouble(Int64.max) == 9223372036854775807
is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the littlelldiv
jewel.quotientAndRemainder(dividingBy:)
was also possible.
– Carpsen90
Nov 29 at 10:56
@Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, tryprint(Double(Int64.max) == Double(Int64.max - 500))
.
– Martin R
Nov 29 at 19:21
You're right, there is a mathematical limit whereDouble
wouldn't be precise enough.Float80
seems to solve that (I'll have to check the docs).
– Carpsen90
Nov 29 at 20:02
@Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
– Martin R
Nov 29 at 20:09
|
show 3 more comments
up vote
2
down vote
accepted
General stuff
Several explicit type annotations are not needed, such as in
var h: Int64 = offset
if let index: Int = alphabetArray.firstIndex(of: char)
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
Error handling
In the initializer:
Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why
offset
is required to be strictly positive. Is there any problem with allowing a zero offset?
In the
hash
method:
Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it
overflow
error instead ofexceedsInt64
.
In the
reverseHash
method:
This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return
nil
instead of throwing an error if no matching source string is found, meaning “no result.”
The overflow checking
It is not immediately obvious how self.stringLengthLimit
is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double
has a 53-bit significand precision and cannot store all 64-bit integers precisely.
Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With
let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)
the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17
."223372036854775808" has the same length, but its hash calculation would overflow.
This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:
guard let index = alphabetArray.firstIndex(of: char) else {
throw HashingError.outOfAlphabet(alphabet)
}
guard
case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
else {
throw HashingError.exceedsInt64
}
h = r2.partialValue
Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+
and &*
for addition and multiplication with automatic “wrap around.”
Simplifications
There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.
The second line in
let remainder = hash % base
let quotient = (hash - remainder)/base
can be simplified to
let quotient = hash/base
You can also compute both quotient and remainder with a call to the BSD library function lldiv
let remQuot = lldiv(hash, base)
but I doubt that it would be faster.
The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:
var hash = hash
var result = ""
while hash > offset {
let remainder = hash % base
hash = hash / base
let index = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
return nil
}
result.append(alphabetArray[index])
}
return hash < 0 ? nil : String(result.reversed())
But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.
Naming
CustomHasher
does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher
if you want to emphasize where it comes from, or MultAddHasher
... naming is difficult (and subjective)!
Possible alternative method names would be
func hash(of s: String) {}
func string(for hash: Int64) {}
General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust thestringLengthLimit
by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by thehash
function won't overflow.
– Carpsen90
Nov 29 at 10:55
(continuation) AFAIKInt64.max
does fit into double precision:Double.greatestFiniteMagnitude
is way bigger thanInt64.max
. andDouble(Int64.max) == 9223372036854775807
is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the littlelldiv
jewel.quotientAndRemainder(dividingBy:)
was also possible.
– Carpsen90
Nov 29 at 10:56
@Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, tryprint(Double(Int64.max) == Double(Int64.max - 500))
.
– Martin R
Nov 29 at 19:21
You're right, there is a mathematical limit whereDouble
wouldn't be precise enough.Float80
seems to solve that (I'll have to check the docs).
– Carpsen90
Nov 29 at 20:02
@Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
– Martin R
Nov 29 at 20:09
|
show 3 more comments
up vote
2
down vote
accepted
up vote
2
down vote
accepted
General stuff
Several explicit type annotations are not needed, such as in
var h: Int64 = offset
if let index: Int = alphabetArray.firstIndex(of: char)
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
Error handling
In the initializer:
Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why
offset
is required to be strictly positive. Is there any problem with allowing a zero offset?
In the
hash
method:
Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it
overflow
error instead ofexceedsInt64
.
In the
reverseHash
method:
This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return
nil
instead of throwing an error if no matching source string is found, meaning “no result.”
The overflow checking
It is not immediately obvious how self.stringLengthLimit
is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double
has a 53-bit significand precision and cannot store all 64-bit integers precisely.
Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With
let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)
the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17
."223372036854775808" has the same length, but its hash calculation would overflow.
This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:
guard let index = alphabetArray.firstIndex(of: char) else {
throw HashingError.outOfAlphabet(alphabet)
}
guard
case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
else {
throw HashingError.exceedsInt64
}
h = r2.partialValue
Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+
and &*
for addition and multiplication with automatic “wrap around.”
Simplifications
There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.
The second line in
let remainder = hash % base
let quotient = (hash - remainder)/base
can be simplified to
let quotient = hash/base
You can also compute both quotient and remainder with a call to the BSD library function lldiv
let remQuot = lldiv(hash, base)
but I doubt that it would be faster.
The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:
var hash = hash
var result = ""
while hash > offset {
let remainder = hash % base
hash = hash / base
let index = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
return nil
}
result.append(alphabetArray[index])
}
return hash < 0 ? nil : String(result.reversed())
But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.
Naming
CustomHasher
does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher
if you want to emphasize where it comes from, or MultAddHasher
... naming is difficult (and subjective)!
Possible alternative method names would be
func hash(of s: String) {}
func string(for hash: Int64) {}
General stuff
Several explicit type annotations are not needed, such as in
var h: Int64 = offset
if let index: Int = alphabetArray.firstIndex(of: char)
let remainder: Int64 = hash % base
let quotient: Int64 = (hash - remainder)/base
Error handling
In the initializer:
Throwing an error for illegal parameters is fine, and allows to provide more details about the error than in a failable initializer. I only wonder why
offset
is required to be strictly positive. Is there any problem with allowing a zero offset?
In the
hash
method:
Again, throwing an error for bad input seems fine to me. However: Most hashing method accept arbitrary long input strings. If “overflow” is an error for this very special hasher then I would call just it
overflow
error instead ofexceedsInt64
.
In the
reverseHash
method:
This is again a special situation for this hasher, most hashing methods are designed in a way to make “dehashing” as computing intensive as possible. Here I would return
nil
instead of throwing an error if no matching source string is found, meaning “no result.”
The overflow checking
It is not immediately obvious how self.stringLengthLimit
is calculated, this calls for an explaining comment. Also I am always a bit suspicious if integer and floating point arithmetic is mixed: A (64-bit) Double
has a 53-bit significand precision and cannot store all 64-bit integers precisely.
Anyway: Detecting an overflow based on the string length alone cannot work in all cases. Here is an example: With
let base10 = CustomHasher(alphabet: "0123456789", offset: 9, base: 10)
the hash of "223372036854775807" fits into a 64-bit integer, but your program rejects that input string because its length exceeds stringLengthLimit = 17
."223372036854775808" has the same length, but its hash calculation would overflow.
This shows that it is difficult to detect the overflow in advance. As an alternative, one could use the “overflow-reporting methods” for multiplication and addition. This is more code (and not nice to read) but detects overflow reliably:
guard let index = alphabetArray.firstIndex(of: char) else {
throw HashingError.outOfAlphabet(alphabet)
}
guard
case let r1 = h.multipliedReportingOverflow(by: base), !r1.overflow,
case let r2 = r1.partialValue.addingReportingOverflow(Int64(index)), !r2.overflow
else {
throw HashingError.exceedsInt64
}
h = r2.partialValue
Of course these thoughts apply only to your special hasher. Usually one would accept arbitrary input, using (for example) &+
and &*
for addition and multiplication with automatic “wrap around.”
Simplifications
There is not very much to say, the code is written clearly. You store the given alphabet both as the original string and as an array of characters, but later use only the array.
The second line in
let remainder = hash % base
let quotient = (hash - remainder)/base
can be simplified to
let quotient = hash/base
You can also compute both quotient and remainder with a call to the BSD library function lldiv
let remQuot = lldiv(hash, base)
but I doubt that it would be faster.
The recursion in the dehasher could be replaced by an iteration, that would allow to append to the result string, and only reverse once, instead of prepending characters to a string repeatedly:
var hash = hash
var result = ""
while hash > offset {
let remainder = hash % base
hash = hash / base
let index = Int(truncatingIfNeeded: remainder)
guard index < alphabetArray.count else {
return nil
}
result.append(alphabetArray[index])
}
return hash < 0 ? nil : String(result.reversed())
But since the number of recursions/iterations is quite limited it probably won't make much of a difference, and you can choose what you find more natural.
Naming
CustomHasher
does not tell anything about the type. I am not good in finding names, perhaps TrelloHasher
if you want to emphasize where it comes from, or MultAddHasher
... naming is difficult (and subjective)!
Possible alternative method names would be
func hash(of s: String) {}
func string(for hash: Int64) {}
answered Nov 28 at 23:05
Martin R
15.4k12264
15.4k12264
General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust thestringLengthLimit
by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by thehash
function won't overflow.
– Carpsen90
Nov 29 at 10:55
(continuation) AFAIKInt64.max
does fit into double precision:Double.greatestFiniteMagnitude
is way bigger thanInt64.max
. andDouble(Int64.max) == 9223372036854775807
is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the littlelldiv
jewel.quotientAndRemainder(dividingBy:)
was also possible.
– Carpsen90
Nov 29 at 10:56
@Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, tryprint(Double(Int64.max) == Double(Int64.max - 500))
.
– Martin R
Nov 29 at 19:21
You're right, there is a mathematical limit whereDouble
wouldn't be precise enough.Float80
seems to solve that (I'll have to check the docs).
– Carpsen90
Nov 29 at 20:02
@Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
– Martin R
Nov 29 at 20:09
|
show 3 more comments
General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust thestringLengthLimit
by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by thehash
function won't overflow.
– Carpsen90
Nov 29 at 10:55
(continuation) AFAIKInt64.max
does fit into double precision:Double.greatestFiniteMagnitude
is way bigger thanInt64.max
. andDouble(Int64.max) == 9223372036854775807
is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the littlelldiv
jewel.quotientAndRemainder(dividingBy:)
was also possible.
– Carpsen90
Nov 29 at 10:56
@Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, tryprint(Double(Int64.max) == Double(Int64.max - 500))
.
– Martin R
Nov 29 at 19:21
You're right, there is a mathematical limit whereDouble
wouldn't be precise enough.Float80
seems to solve that (I'll have to check the docs).
– Carpsen90
Nov 29 at 20:02
@Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
– Martin R
Nov 29 at 20:09
General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the
stringLengthLimit
by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hash
function won't overflow.– Carpsen90
Nov 29 at 10:55
General stuff Type annotations were added purposefully to make the code clearer. It's a personal choice to make the reading experience seamless/effortless. Many errors occur out of misinferring the types. Error handling) With an offset equal to zero, the hash for an empty string would be the same as the one of the first character in the alphabet. The overflow checking) I'll have to adjust the
stringLengthLimit
by 1, and the formula for calculating it (the sum of a geometric series). The basic idea is checking that the max number generated by the hash
function won't overflow.– Carpsen90
Nov 29 at 10:55
(continuation) AFAIK
Int64.max
does fit into double precision: Double.greatestFiniteMagnitude
is way bigger than Int64.max
. and Double(Int64.max) == 9223372036854775807
is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv
jewel. quotientAndRemainder(dividingBy:)
was also possible.– Carpsen90
Nov 29 at 10:56
(continuation) AFAIK
Int64.max
does fit into double precision: Double.greatestFiniteMagnitude
is way bigger than Int64.max
. and Double(Int64.max) == 9223372036854775807
is true. I am avoiding wrap around since there might be overlaps (two -or more- words with the same hash). Simplifications) Thank you for the little lldiv
jewel. quotientAndRemainder(dividingBy:)
was also possible.– Carpsen90
Nov 29 at 10:56
@Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try
print(Double(Int64.max) == Double(Int64.max - 500))
.– Martin R
Nov 29 at 19:21
@Carpsen90: You are right about offset=0. – Overflow checking based on the string length alone cannot work, as I tried to demonstrate with "223372036854775807" and "223372036854775808". – Double cannot represent all Int64 precisely, try
print(Double(Int64.max) == Double(Int64.max - 500))
.– Martin R
Nov 29 at 19:21
You're right, there is a mathematical limit where
Double
wouldn't be precise enough. Float80
seems to solve that (I'll have to check the docs).– Carpsen90
Nov 29 at 20:02
You're right, there is a mathematical limit where
Double
wouldn't be precise enough. Float80
seems to solve that (I'll have to check the docs).– Carpsen90
Nov 29 at 20:02
@Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
– Martin R
Nov 29 at 20:09
@Carpsen90: Yes, but just note that Float80 is not portable: github.com/apple/swift/blob/master/stdlib/public/core/…: "Float80 is only available on non-Windows x86 targets."
– Martin R
Nov 29 at 20:09
|
show 3 more comments
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f208486%2fhashing-interview-puzzle%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown