Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement PhoneNumberMatcher for regional contexts #179

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,91 @@ import (
)

type PhoneNumberMatcher struct {
PhoneNumber *PhoneNumber
Next *PhoneNumberMatcher
Start int
End int
}

func NewPhoneNumberMatcher(seq string) *PhoneNumberMatcher {
// TODO(ttacon): to be implemented
return nil
}

// NewPhoneNumberMatcherForRegion constructs a PhoneNumberMatcher for the specified text sequence and region.
// It returns a linked list of valid phone numbers found in the sequence that are valid for the given region.
// If no valid numbers are found, the function returns nil.
func NewPhoneNumberMatcherForRegion(seq string, region string) *PhoneNumberMatcher {
var head, current *PhoneNumberMatcher
seenNumbers := make(map[uint64]bool) // Tracks numbers to avoid duplicate entries in the list.

// First, find all starting indices where a phone number could potentially start.
startIndicesMatches := VALID_START_CHAR_PATTERN.FindAllIndex([]byte(seq), -1)
startIndices := make([]int, len(startIndicesMatches))
for i, match := range startIndicesMatches {
startIndices[i] = match[0]
}

// Similarly, find all indices where a phone number should not end.
unwantedEndIndicesMatches := UNWANTED_END_CHAR_PATTERN.FindAllIndex([]byte(seq), -1)
unwantedEndIndices := make([]int, len(unwantedEndIndicesMatches))
for i, match := range unwantedEndIndicesMatches {
unwantedEndIndices[i] = match[0]
}

// Append the length of the sequence as an end index if not already included.
if len(unwantedEndIndices) == 0 || unwantedEndIndices[len(unwantedEndIndices)-1] != len(seq) {
unwantedEndIndices = append(unwantedEndIndices, len(seq)-1)
}

// Iterate over each possible start index.
for i := 0; i < len(startIndices); i++ {
for j := 0; j < len(unwantedEndIndices); j++ {
if unwantedEndIndices[j] < startIndices[i] {
continue // Ensure the end index is after the start index.
}
// Explore the sequence between the start and end indices to find valid phone numbers.
for k := startIndices[i]; k <= unwantedEndIndices[j]; k++ {
if k-startIndices[i] > MAX_LENGTH_COUNTRY_CODE+MAX_LENGTH_FOR_NSN {
break // Exceeds max phone number length, stop searching this slice.
}
if k-startIndices[i] < MIN_LENGTH_FOR_NSN {
continue // Does not meet min phone number length, continue searching.
}

phoneNumber, err := Parse(seq[startIndices[i]:k+1], region)
if err != nil || !IsValidNumberForRegion(phoneNumber, region) {
continue // Skip invalid numbers or parse errors.
}

nationalNumber := phoneNumber.GetNationalNumber()
if _, exists := seenNumbers[nationalNumber]; exists {
continue // Skip this number if already seen.
}

// Mark this phone number as seen.
seenNumbers[nationalNumber] = true

// Create a new matcher node and append it to the linked list.
newMatcher := &PhoneNumberMatcher{
PhoneNumber: phoneNumber,
Start: startIndices[i],
End: k,
}
if head == nil {
head = newMatcher
current = head
} else {
current.Next = newMatcher
current = current.Next
}
}
break
}
}
return head
}

func ContainsOnlyValidXChars(number *PhoneNumber, candidate string) bool {
// The characters 'x' and 'X' can be (1) a carrier code, in which
// case they always precede the national significant number or (2)
Expand Down
56 changes: 56 additions & 0 deletions matcher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package phonenumbers

import (
"testing"
)

func TestNewPhoneNumberMatcherForRegion(t *testing.T) {
tests := []struct {
name string
seq string
region string
want map[uint64]int // expected phone numbers with their expected counts
}{
{
name: "Valid US numbers",
seq: "Call me at 202-555-0130 or 415-555-0198 for more info.",
region: "US",
want: map[uint64]int{2025550130: 1, 4155550198: 1},
},
{
name: "Invalid patterns mixed with valid numbers",
seq: "Try 12345 and then call 503-555-0110 or reach out at 999-000",
region: "US",
want: map[uint64]int{5035550110: 1},
},
{
name: "Valid Tunisian numbers",
seq: "Call me at 71 123 456 or 71 123 457 for more info.",
region: "TN",
want: map[uint64]int{71123456: 1, 71123457: 1},
},
{
name: "Valid when sequence ends with a number",
seq: "for more info 71123458",
region: "TN",
want: map[uint64]int{71123458: 1},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
matcher := NewPhoneNumberMatcherForRegion(tt.seq, tt.region)
got := make(map[uint64]int)
for matcher != nil {
got[*matcher.PhoneNumber.NationalNumber]++
matcher = matcher.Next
}

for num, count := range tt.want {
if got[num] != count {
t.Errorf("Test %s failed: expected %d occurrences of %d, got %d", tt.name, count, num, got[num])
}
}
})
}
}