Skip to content

Commit

Permalink
JavaDoc, code-style
Browse files Browse the repository at this point in the history
  • Loading branch information
hohwille committed Jan 26, 2025
1 parent d4734d7 commit 56823e3
Show file tree
Hide file tree
Showing 15 changed files with 77 additions and 28 deletions.
25 changes: 19 additions & 6 deletions ascii/src/main/java/io/github/mmm/text/ascii/AsciiConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,34 @@
import io.github.mmm.text.ascii.impl.AsciiConverterImpl;

/**
* {@link AsciiConverter} converts Unicode to simple 7-bit ASCII characters. It strips diacritic marks and transliterates non-Latin letters and glyphs to ASCII.
* It is optimized for performance and does not implement perfectly correct transliteration (e.g. it has no state to transliterate a code-point dependening on
* its predecessors).<br>
* However, it is very helpful to decode strings for use-cases like indexing and searching or if you want to build a {@link String} to be used for restricted
* environments (names of files, folders, etc.) where special characters could cause problems.
* {@link AsciiConverter} converts Unicode to simple 7-bit ASCII characters. It strips diacritic marks and
* transliterates non-Latin letters and glyphs to ASCII. It is optimized for performance and does not implement
* perfectly correct transliteration (e.g. it has no state to transliterate a code-point dependening on its
* predecessors).<br>
* However, it is very helpful to decode strings for use-cases like indexing and searching or if you want to build a
* {@link String} to be used for restricted environments (names of files, folders, etc.) where special characters could
* cause problems.
*
* @since 1.0.0
* @see java.text.Normalizer
*/
public interface AsciiConverter {

/**
* @param codePoint the {@link String#codePointAt(int) codePoint} to convert.
* @return the converted ASCII representation of the given {@link String#codePointAt(int) codePoint}.
*/
String convert(int codePoint);
default String convert(int codePoint) {

return convert(codePoint, AsciiConverterConfig.of());
}

/**
* @param codePoint the {@link String#codePointAt(int) codePoint} to convert.
* @param config the {@link AsciiConverterConfig}.
* @return the converted ASCII representation of the given {@link String#codePointAt(int) codePoint}.
*/
String convert(int codePoint, AsciiConverterConfig config);

/**
* @param text the (unicode) {@link CharSequence} to convert.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
/**
* State for {@link AsciiConverterImpl}.
*
* @see CodePointMapping#append(AsciiBuilder, int)
* @see CodePointMapping#append(AsciiBuilder, int, CodePointMapping)
*/
class AsciiBuilder {
public class AsciiBuilder {

final StringBuilder sb;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
*/
public final class AsciiConverterConfigImpl implements AsciiConverterConfig {

/** The default instance. */
public static final AsciiConverterConfigImpl DEFAULT = new AsciiConverterConfigImpl();

final CaseConversion caseConversion;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import java.util.PrimitiveIterator.OfInt;

import io.github.mmm.base.text.CaseConversion;
import io.github.mmm.text.ascii.AsciiConverter;
import io.github.mmm.text.ascii.AsciiConverterConfig;

Expand All @@ -25,13 +26,23 @@ private AsciiConverterImpl() {
}

@Override
public String convert(int codePoint) {
public String convert(int codePoint, AsciiConverterConfig config) {

CodePointMapping asc = getAsc(codePoint);
if (asc == null) {
return null;
}
return asc.toString();
String string;
if (config.useLongForms()) {
string = asc.asStringLong();
} else {
string = asc.asString();
}
CaseConversion cc = config.caseConversion();
if (cc != CaseConversion.ORIGINAL_CASE) {
string = cc.convert(string);
}
return string;
}

private static CodePointMapping getAsc(int codePoint) {
Expand Down Expand Up @@ -251,8 +262,10 @@ public String convert(CharSequence text, AsciiConverterConfig config) {
next = current.append(builder, codePoint, next);
}
}
// since text is not empty, we entered the while loop, next is the last mapping to process due to buffering.
next.append(builder, codePoint, null);
if (next != null) {
next.append(builder, codePoint, null);
// } else { fail(); } // since text is not empty, we entered the while loop, and next can never be null.
}
return builder.getAscii();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/**
* Implementation of {@link CodePointMapping} for static {@link Character}.
*/
class Char extends AbstractCodePointMapping {
public class Char extends AbstractCodePointMapping {

// other non-letters
static final Char TAB = new Char('\t');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/**
* Implementation of {@link CodePointMapping} for static {@link String}.
*/
class Chars extends AbstractCodePointMapping {
public class Chars extends AbstractCodePointMapping {

final String s;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/**
* Interface for the mapping result of a code-point.
*/
interface CodePointMapping {
public interface CodePointMapping {

/**
* @param builder the {@link AsciiBuilder}.
Expand All @@ -15,18 +15,30 @@ interface CodePointMapping {
*/
CodePointMapping append(AsciiBuilder builder, int codePoint, CodePointMapping next);

/**
* @return the {@link #toString() string representation}.
*/
String asString();

/**
* @return a longer {@link #toString() string representation} as available (may be the same as {@link #asString()}).
*/
default String asStringLong() {

return asString();
}

/**
* @return the {@link Type}.
*/
default Type getType() {

return Type.OTHER;
}

/**
* {@link Enum} with the available {@link CodePointMapping#getType() type}s for a {@link CodePointMapping}.
*/
enum Type {

TEXT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/**
* Common factors used in physics, etc.
*/
class Factor extends Fraction {
public class Factor extends Fraction {

static final Factor TERA = new Factor(1_000_000_000_000L, 1, "T", "tera");

Expand Down
12 changes: 6 additions & 6 deletions ascii/src/main/java/io/github/mmm/text/ascii/impl/Letters.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@
/**
* Extends {@link Chars} for multiple latin letters. It typically represents a syllabe or sound (unlike a {@link Word}).
*/
class Letters extends Chars {
public class Letters extends Chars {

Letters(String s) {

super(s);
assert isLetters(s);
}

private boolean isLetters(String s) {
private boolean isLetters(String string) {

int len = s.length();
int len = string.length();
if (len < 2) {
throw new IllegalArgumentException(s + " - expected at least two letters.");
throw new IllegalArgumentException(string + " - expected at least two letters.");
}
for (int i = 0; i < len; i++) {
int codePoint = s.codePointAt(i);
int codePoint = string.codePointAt(i);
if (!isLatinLetter(codePoint)) {
throw new IllegalArgumentException(
s + " - at index " + i + " illegal code point 0x" + Long.toHexString(codePoint));
string + " - at index " + i + " illegal code point 0x" + Long.toHexString(codePoint));
}
}
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/**
* {@link CodePointMapping} for a numeric value sign.
*/
interface Number extends CodePointMapping {
public interface Number extends CodePointMapping {

/**
* @return the numerator as {@code long}. The actual value is {@link #getNumerator()}/{@link #getDenominator()}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* Extends {@link Word} for a structured term that has a {@link #asStringLong() long form} where as {@link #asString()}
* typically is an abbreviation.
*/
class StructuredWord extends Word {
public class StructuredWord extends Word {

final String full;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/**
* Extends {@link Letters} for a (physical) unit.
*/
class Unit extends StructuredWord {
public class Unit extends StructuredWord {

static final Unit ACRE = new Unit("a", "acre");

Expand Down
10 changes: 9 additions & 1 deletion ascii/src/main/java/io/github/mmm/text/ascii/impl/Word.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/**
* Extends {@link Letters} for a word. Subsequent words will be separated by space.
*/
class Word extends Chars {
public class Word extends Chars {

Word(String s) {

Expand All @@ -18,6 +18,14 @@ public Type getType() {
return Type.TEXT;
}

/**
* Default part of {@link #append(AsciiBuilder, int, CodePointMapping)} that can be overridden by sub-classes.
*
* @param builder the {@link AsciiBuilder}.
* @param codePoint the original {@link String#codePointAt(int) code point}.
* @param next the next {@link CodePointMapping}.
* @return an {@link AsciiState} for stateful processing.
*/
protected CodePointMapping doAppend(AsciiBuilder builder, int codePoint, CodePointMapping next) {

return super.append(builder, codePoint, next);
Expand Down
2 changes: 1 addition & 1 deletion ascii/src/main/java/io/github/mmm/text/ascii/impl/Xxx.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* <li>{@code WD_*} for word</li>
* </ul>
*/
abstract class Xxx {
public abstract class Xxx {

// strings

Expand Down
4 changes: 3 additions & 1 deletion ascii/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
*/

/**
* Conversion from unicode to ASCII (simplified transliteration) and related features.
* Conversion from Unicode to ASCII (simplified transliteration) and related features.
*
* @see io.github.mmm.text.ascii.AsciiConverter
*/
module io.github.mmm.text.ascii {

Expand Down

0 comments on commit 56823e3

Please sign in to comment.