Fast fast low level LastIndexOf
I need an implementation of lastIndexOf that is as fast as possible. I find that the String advance function is extremely slow. I tried using the c strrchr function and tried to copy the string to NSData and using pointers, but I can't get the syntax right. My string will always contain 1 byte character, and the string I'm looking for is "|" always 1 byte.
Any implementation using advance would be too slow, but here's the fastest example I could find:
func indexOf(target: String, startIndex: Int) -> Int
{
var startRange = advance(self.startIndex, startIndex)
var range = self.rangeOfString(target, options: NSStringCompareOptions.LiteralSearch, range: Range<String.Index>(start: startRange, end: self.endIndex))
if let range = range {
return distance(self.startIndex, range.startIndex)
} else {
return -1
}
}
func lastIndexOf(target: String) -> Int
{
var index = -1
var stepIndex = self.indexOf(target)
while stepIndex > -1
{
index = stepIndex
if stepIndex + target.length < self.length
{
stepIndex = indexOf(target, startIndex: stepIndex + target.length)
}
else
{
stepIndex = -1
}
}
return index
}
This is an example of a string that I need to parse. var str: String = "4 | 0 | 66 | 5 | 0 | 3259744 | 6352141 | 1 | 3259744 | WSMxt208L54yZ5irtHC3 | Mc02 | efland, nc | 36.027992 | -79.2212834 | 0 | 4 | 6 | 0 | 3259744 | 6352141 | 46 | 14 | 1 | 0 | 7 | 7 | 3259744 | 6352141 | 4 | 1 | 0 | 8 | 8 | 3259744 | 6352141 | 4 | 0 | 22 | 9 | 0 | 3259744 | 6352141 | 2 | 3 | Room1 | 2 | 72 | 86330534 | 1 | 0 | 10 | 9 | 3259744 | 6352141 | 4 | 1 | 0 | 11 | 10 | 3259744 | 6352141 | 4 | 1 | 0 | 12 | 11 | 3259744 | 6352141 | 4 | 1 | 0 | 13 | 12 | 3259744 | 6352141 | 4 | 0 | 4 | 14 | 0 | 3259744 | 6352141 | 46 | 24 | 0 | 5 | 15 | 0 | 3259744 | 6352141 | 46 | 654 | 0 | 66 | 0 | 0 | 3259744 | 6352141 | 1 | 3259744 | WSMxt208L54yZ5irtHC3 | Mc02 | Efland, n / a | 36.027992 | -79.2212834 | 0 | 4 | 16 | 0 | 3259744 | 6352141 | 46 | 4sageReceived:4 | 0 | 66 | 5 | 0 | 3259744 | 6352141 | 1 | 3259744 | WSMxt208L54yZ5irtHC3 | Mc02 | Efland, n / a | 36.027992 | -79,2212834 | 0 | 4 | 6 | 0 | 3259744 | 6352141 | 46 | 14 | 1 | 0 | 7 | 7 | 3259744 | 6352141 | 4 | 1 | 0 | 8 | 8 | 3259744 | 6352141 | 4 | 0 | 22 | 9 | 0 | 3259744 | 6352141 | 2 | 3 | Room1 | 2 | 72 | 86330534 | 1 | 0 | 10 | 9 | 3259744 | 6352141 | 4 | 1 | 0 | 11 | 10 | 3259744 | 6352141 | 4 | 1 | 0 | 12 | 11 | 3259744 | 6352141 | 4 | 1 | 0 | 13 | 12 | 3259744 | 6352141 | 4 | 0 | 4 | 14 | 0 | 3259744 | 6352141 | 46 | 24 | 0 | 5 | 15 | 0 | 3259744 | 6352141 | 46 | 654 | 0 | 66 | 0 | 0 | 3259744 | 6352141 | 1 | 3259744 | WSMxt208L54y Z5irtHC3 | Mc02 | Efland, n / a | 36.027992 | -79,2212834 | 0 | 4 | 16 | 0 | 3259744 | 6352141 | 46 | 4352141 | 1 | 3259744 | WSMxt208L54yZ5irtHC3 | Mc02 | Efland, n / a | 36.027992 | -79,2212834 | 0 | 4 | 6 | 0 | 3259744 | 6352141 | 46 | 14 | 1 | 0 | 7 | 7 | 3259744 | 6352141 | 4 | 1 | 0 | 8 | 8 | 3259744 | 6352141 | 4 | 0 | 22 | 9 | 0 | 3259744 | 6352141 | 2 | 3 | Room1 | 2 | 72 | 86330534 | 1 | 0 | 10 | 9 | 3259744 | 6352141 | 4 | 1 | 0 | 11 | 10 | 3259744 | 6352141 | 4 | 1 | 0 | 12 | 11 | 3259744 | 6352141 | 4 | 1 | 0 | 13 | 12 | 3259744 | 6352141 | 4 | 0 | 4 | 14 | 0 | 3259744 | 6352141 | 46 | 24 | 0 | 5 | 15 | 0 | 3259744 | 6352141 | 46 | 654 | 0 | 66 | 0 | 0 | 3259744 | 6352141 | 1 | 3259744 | WSMxt208L54yZ5irtHC3 | Mc02 | efland, nc | 36.027992 | -79.2212834 | 0 | 4 | 16 | 0 | 3259744 | 6352141 | 46 | 4TCPListener.on Received: 4 | 0 | 66 | 5 | 0 | 3259744 | 6352141 | 1 | 3259744 | WSMxt208L54yZ5irtHC3 | Mc02 | Efland, n / a | 36.027992 | -79,2212834 | 0 | 4 | 6 | 0 | 3259744 | 6352141 | 46 | 14 | 1 | 0 | 7 | 7 | 3259744 | 6352141 | 4 | 1 | 0 | 8 | 8 | 3259744 | 6352141 | 4 | 0 | 22 | 9 | 0 | 3259744 | 6352141 | 2 | 3 | Room1 | 2 | 72 | 86330534 | 1 | 0 | 10 | 9 | 3259744 | 6352141 | 4 | 1 | 0 | 11 | 10 | 3259744 | 6352141 | 4 | 1 | 0 | 12 | 11 | 3259744 | 6352141 | 4 | 1 | 0 | 13 | 12 | 3259744 | 6352141 | 4 | 0 | 4 | 14 | 0 | 3259744 | 6352141 | 46 | 24 | 0 | 5 | 15 | 0 | 3259744 | 6352141 | 46 | 654 | 0 | 66 | 0 | 0 | 3259744 | 6352141 | 1 | 3259744 | WSMxt208L54yZ5irtHC3 | Mc02 | Efland, n / a | 36.027992 | -79,2212834 | 0 | 4 | 16 | 0 | 3259744 | 6352141 | 46 | 4preParse 4 | 0 | 66 | 5 | 0 | 3259744 | 6352141 | 1 | 3259744 | WSMxt208L54yZ5irtHC3 | Mc02 | Efland, n / a | 36.027992 | -79.221283 "
source to share
You can use strrchr
in Swift
import Darwin
let str = "4|0|66|5|0|3259744|6352141|1|3259744"
func stringLastIndexOf(src:String, target:UnicodeScalar) -> Int? {
let c = Int32(bitPattern: target.value)
return src.withCString { s -> Int? in
let pos = strrchr(s, c)
return pos != nil ? pos - s : nil
}
}
stringLastIndexOf(str, "|") // -> {Some 28}
stringLastIndexOf(str, ",") // -> nil
source to share
Here is Swift 2.0's answer
func lastIndexOf(s: String) -> Int? {
if let r: Range<Index> = self.rangeOfString(s, options: .BackwardsSearch) {
return self.startIndex.distanceTo(r.startIndex)
}
return Optional<Int>()
}
Tests
func testStringLastIndexOf() {
let lastIndex = "0|2|45|7|9".lastIndexOf("|")
XCTAssertEqual(lastIndex, 8)
}
func testStringLastIndexOfNotFound() {
let lastIndex = "0123456789".lastIndexOf("|")
XCTAssertEqual(lastIndex, nil);
}
source to share
If you do this so that all substrings are separated by "|", you can test this approach:
import Foundation
let s = "4|0|66|5|0|3259744|6352141|1|3259744|WSMxt208L54yZ5irtHC3|..."
let a = s.componentsSeparatedByString("|")
Built-in functions are sometimes very fast, and you can get the performance you want even with String.
If you really only need to get the position of the last "|", you can work with utf16 representation, where character advancement should be faster.
I think this should work:
let utf16String = s.utf16
var i = s.utf16Count - 1
while i >= 0 {
if utf16String[i] == 124 {
break
}
i--
}
println(i)
source to share
If the characters are guaranteed to be one byte, the data is huge, and performance is critical, then it might be wise to convert to an array of bytes (UInt8) and operate on them directly. Then you can convert the part you want to string.
Also note that optimized builds can be much faster than Debug builds, so you should do any performance testing with the optimizer. It can also be helpful to check if the optimized versions are too slow at the moment.
source to share