Skip to content

Commit

Permalink
add benchmarks ; add explicit #[inline] annotations
Browse files Browse the repository at this point in the history
This diff adds benchmarks to get more info regarding Issue #1.

It appears that the remaining difference between the "simple"
case and the "cargo" case is the result of a difference in
performance between using `match` and `if` for tight loops.

I suspect it's because of the way that match arms get reordered:
if I manually reorder the "if" statement, I can reproduce the
match performance.

Also added a couple #[inline] annotations in tables.rs, though
the difference in performance in my measurements is negligible.

Bumped version number to 0.1.1.
  • Loading branch information
kwantam committed Apr 19, 2015
1 parent 1d27b56 commit cbad406
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 67 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
target
Cargo.lock
scripts/tmp
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ script:
- cargo clean
- cargo build --verbose --features default
- cargo test --verbose --features default
- cargo bench --verbose --features default
- rustdoc --test README.md -L target/debug -L target/debug/deps
- cargo doc
after_success: |
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "unicode-width"
version = "0.1.0"
version = "0.1.1"
authors = ["kwantam <[email protected]>"]

homepage = "https://github.com/unicode-rs/unicode-width"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ to your `Cargo.toml`:

```toml
[dependencies]
unicode-width = "0.1.0"
unicode-width = "0.1.1"
```
2 changes: 2 additions & 0 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ def emit_charwidth_module(f, width_table):
#[cfg(feature = "no_std")]
use core::result::Result::{Ok, Err};
#[inline]
fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 {
#[cfg(feature = "no_std")]
use core::cmp::Ordering::{Equal, Less, Greater};
Expand All @@ -226,6 +227,7 @@ def emit_charwidth_module(f, width_table):
""")

f.write("""
#[inline]
pub fn width(c: char, is_cjk: bool) -> Option<usize> {
match c as usize {
_c @ 0 => Some(0), // null is zero width
Expand Down
78 changes: 13 additions & 65 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
//!
//! ```toml
//! [dependencies]
//! unicode-width = "0.1.0"
//! unicode-width = "0.1.1"
//! ```
#![deny(missing_docs, unsafe_code)]
Expand All @@ -49,6 +49,8 @@
#![cfg_attr(feature = "no_std", no_std)]
#![cfg_attr(feature = "no_std", feature(no_std, core))]

#![cfg_attr(test, feature(test, unicode))]

#[cfg(feature = "no_std")]
#[macro_use]
extern crate core;
Expand All @@ -57,6 +59,9 @@ extern crate core;
#[macro_use]
extern crate std;

#[cfg(test)]
extern crate test;

#[cfg(feature = "no_std")]
use core::prelude::*;

Expand All @@ -70,6 +75,9 @@ use std::ops::Add;

mod tables;

#[cfg(test)]
mod tests;

/// Methods for determining displayed width of Unicode characters.
pub trait UnicodeWidthChar {
/// Returns the character's displayed width in columns, or `None` if the
Expand All @@ -92,8 +100,10 @@ pub trait UnicodeWidthChar {
}

impl UnicodeWidthChar for char {
#[inline]
fn width(self) -> Option<usize> { cw::width(self, false) }

#[inline]
fn width_cjk(self) -> Option<usize> { cw::width(self, true) }
}

Expand Down Expand Up @@ -121,75 +131,13 @@ pub trait UnicodeWidthStr {
}

impl UnicodeWidthStr for str {
#[inline]
fn width(&self) -> usize {
self.chars().map(|c| cw::width(c, false).unwrap_or(0)).fold(0, Add::add)
}

#[inline]
fn width_cjk(&self) -> usize {
self.chars().map(|c| cw::width(c, true).unwrap_or(0)).fold(0, Add::add)
}
}

#[cfg(test)]
mod tests {
#[test]
fn test_str() {
use super::UnicodeWidthStr;

assert_eq!(UnicodeWidthStr::width("hello"), 10);
assert_eq!("hello".width_cjk(), 10);
assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0);
assert_eq!("\0\0\0\x01\x01".width_cjk(), 0);
assert_eq!(UnicodeWidthStr::width(""), 0);
assert_eq!("".width_cjk(), 0);
assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4);
assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8);
}

#[test]
fn test_char() {
use super::UnicodeWidthChar;
#[cfg(feature = "no_std")]
use core::option::Option::{Some, None};

assert_eq!(UnicodeWidthChar::width('h'), Some(2));
assert_eq!('h'.width_cjk(), Some(2));
assert_eq!(UnicodeWidthChar::width('\x00'), Some(0));
assert_eq!('\x00'.width_cjk(), Some(0));
assert_eq!(UnicodeWidthChar::width('\x01'), None);
assert_eq!('\x01'.width_cjk(), None);
assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1));
assert_eq!('\u{2081}'.width_cjk(), Some(2));
}

#[test]
fn test_char2() {
use super::UnicodeWidthChar;
#[cfg(feature = "no_std")]
use core::option::Option::{Some, None};

assert_eq!(UnicodeWidthChar::width('\x00'),Some(0));
assert_eq!('\x00'.width_cjk(),Some(0));

assert_eq!(UnicodeWidthChar::width('\x0A'),None);
assert_eq!('\x0A'.width_cjk(),None);

assert_eq!(UnicodeWidthChar::width('w'),Some(1));
assert_eq!('w'.width_cjk(),Some(1));

assert_eq!(UnicodeWidthChar::width('h'),Some(2));
assert_eq!('h'.width_cjk(),Some(2));

assert_eq!(UnicodeWidthChar::width('\u{AD}'),Some(1));
assert_eq!('\u{AD}'.width_cjk(),Some(1));

assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0));
assert_eq!('\u{1160}'.width_cjk(),Some(0));

assert_eq!(UnicodeWidthChar::width('\u{a1}'),Some(1));
assert_eq!('\u{a1}'.width_cjk(),Some(2));

assert_eq!(UnicodeWidthChar::width('\u{300}'),Some(0));
assert_eq!('\u{300}'.width_cjk(),Some(0));
}
}
2 changes: 2 additions & 0 deletions src/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub mod charwidth {
#[cfg(feature = "no_std")]
use core::result::Result::{Ok, Err};

#[inline]
fn bsearch_range_value_table(c: char, is_cjk: bool, r: &'static [(char, char, u8, u8)]) -> u8 {
#[cfg(feature = "no_std")]
use core::cmp::Ordering::{Equal, Less, Greater};
Expand All @@ -42,6 +43,7 @@ pub mod charwidth {
}
}

#[inline]
pub fn width(c: char, is_cjk: bool) -> Option<usize> {
match c as usize {
_c @ 0 => Some(0), // null is zero width
Expand Down
148 changes: 148 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::iter;
use test::{self, Bencher};

use super::UnicodeWidthChar;

#[cfg(feature = "no_std")]
use std::prelude::v1::*;

#[bench]
fn cargo(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(UnicodeWidthChar::width(c));
}
});
}

#[bench]
fn stdlib(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(c.width(false));
}
});
}

#[bench]
fn simple_if(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(simple_width_if(c));
}
});
}

#[bench]
fn simple_match(b: &mut Bencher) {
let string = iter::repeat('a').take(4096).collect::<String>();

b.iter(|| {
for c in string.chars() {
test::black_box(simple_width_match(c));
}
});
}

#[inline]
fn simple_width_if(c: char) -> Option<usize> {
let cu = c as u32;
if cu < 127 {
if cu > 31 {
Some(1)
} else if cu == 0 {
Some(0)
} else {
None
}
} else {
UnicodeWidthChar::width(c)
}
}

#[inline]
fn simple_width_match(c: char) -> Option<usize> {
match c as u32 {
cu if cu == 0 => Some(0),
cu if cu < 0x20 => None,
cu if cu < 0x7f => Some(1),
_ => UnicodeWidthChar::width(c)
}
}

#[test]
fn test_str() {
use super::UnicodeWidthStr;

assert_eq!(UnicodeWidthStr::width("hello"), 10);
assert_eq!("hello".width_cjk(), 10);
assert_eq!(UnicodeWidthStr::width("\0\0\0\x01\x01"), 0);
assert_eq!("\0\0\0\x01\x01".width_cjk(), 0);
assert_eq!(UnicodeWidthStr::width(""), 0);
assert_eq!("".width_cjk(), 0);
assert_eq!(UnicodeWidthStr::width("\u{2081}\u{2082}\u{2083}\u{2084}"), 4);
assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8);
}

#[test]
fn test_char() {
use super::UnicodeWidthChar;
#[cfg(feature = "no_std")]
use core::option::Option::{Some, None};

assert_eq!(UnicodeWidthChar::width('h'), Some(2));
assert_eq!('h'.width_cjk(), Some(2));
assert_eq!(UnicodeWidthChar::width('\x00'), Some(0));
assert_eq!('\x00'.width_cjk(), Some(0));
assert_eq!(UnicodeWidthChar::width('\x01'), None);
assert_eq!('\x01'.width_cjk(), None);
assert_eq!(UnicodeWidthChar::width('\u{2081}'), Some(1));
assert_eq!('\u{2081}'.width_cjk(), Some(2));
}

#[test]
fn test_char2() {
use super::UnicodeWidthChar;
#[cfg(feature = "no_std")]
use core::option::Option::{Some, None};

assert_eq!(UnicodeWidthChar::width('\x00'),Some(0));
assert_eq!('\x00'.width_cjk(),Some(0));

assert_eq!(UnicodeWidthChar::width('\x0A'),None);
assert_eq!('\x0A'.width_cjk(),None);

assert_eq!(UnicodeWidthChar::width('w'),Some(1));
assert_eq!('w'.width_cjk(),Some(1));

assert_eq!(UnicodeWidthChar::width('h'),Some(2));
assert_eq!('h'.width_cjk(),Some(2));

assert_eq!(UnicodeWidthChar::width('\u{AD}'),Some(1));
assert_eq!('\u{AD}'.width_cjk(),Some(1));

assert_eq!(UnicodeWidthChar::width('\u{1160}'),Some(0));
assert_eq!('\u{1160}'.width_cjk(),Some(0));

assert_eq!(UnicodeWidthChar::width('\u{a1}'),Some(1));
assert_eq!('\u{a1}'.width_cjk(),Some(2));

assert_eq!(UnicodeWidthChar::width('\u{300}'),Some(0));
assert_eq!('\u{300}'.width_cjk(),Some(0));
}

0 comments on commit cbad406

Please sign in to comment.