diff --git a/matcher.go b/matcher.go index fffaba6..45d0478 100644 --- a/matcher.go +++ b/matcher.go @@ -8,6 +8,10 @@ import ( ) type PhoneNumberMatcher struct { + PhoneNumber *PhoneNumber + Next *PhoneNumberMatcher + Start int + End int } func NewPhoneNumberMatcher(seq string) *PhoneNumberMatcher { @@ -15,6 +19,80 @@ func NewPhoneNumberMatcher(seq string) *PhoneNumberMatcher { return nil } +// NewPhoneNumberMatcherForRegion constructs a PhoneNumberMatcher for the specified text sequence and region. +// It returns a linked list of valid phone numbers found in the sequence that are valid for the given region. +// If no valid numbers are found, the function returns nil. +func NewPhoneNumberMatcherForRegion(seq string, region string) *PhoneNumberMatcher { + var head, current *PhoneNumberMatcher + seenNumbers := make(map[uint64]bool) // Tracks numbers to avoid duplicate entries in the list. + + // First, find all starting indices where a phone number could potentially start. + startIndicesMatches := VALID_START_CHAR_PATTERN.FindAllIndex([]byte(seq), -1) + startIndices := make([]int, len(startIndicesMatches)) + for i, match := range startIndicesMatches { + startIndices[i] = match[0] + } + + // Similarly, find all indices where a phone number should not end. + unwantedEndIndicesMatches := UNWANTED_END_CHAR_PATTERN.FindAllIndex([]byte(seq), -1) + unwantedEndIndices := make([]int, len(unwantedEndIndicesMatches)) + for i, match := range unwantedEndIndicesMatches { + unwantedEndIndices[i] = match[0] + } + + // Append the length of the sequence as an end index if not already included. + if len(unwantedEndIndices) == 0 || unwantedEndIndices[len(unwantedEndIndices)-1] != len(seq) { + unwantedEndIndices = append(unwantedEndIndices, len(seq)-1) + } + + // Iterate over each possible start index. + for i := 0; i < len(startIndices); i++ { + for j := 0; j < len(unwantedEndIndices); j++ { + if unwantedEndIndices[j] < startIndices[i] { + continue // Ensure the end index is after the start index. + } + // Explore the sequence between the start and end indices to find valid phone numbers. + for k := startIndices[i]; k <= unwantedEndIndices[j]; k++ { + if k-startIndices[i] > MAX_LENGTH_COUNTRY_CODE+MAX_LENGTH_FOR_NSN { + break // Exceeds max phone number length, stop searching this slice. + } + if k-startIndices[i] < MIN_LENGTH_FOR_NSN { + continue // Does not meet min phone number length, continue searching. + } + + phoneNumber, err := Parse(seq[startIndices[i]:k+1], region) + if err != nil || !IsValidNumberForRegion(phoneNumber, region) { + continue // Skip invalid numbers or parse errors. + } + + nationalNumber := phoneNumber.GetNationalNumber() + if _, exists := seenNumbers[nationalNumber]; exists { + continue // Skip this number if already seen. + } + + // Mark this phone number as seen. + seenNumbers[nationalNumber] = true + + // Create a new matcher node and append it to the linked list. + newMatcher := &PhoneNumberMatcher{ + PhoneNumber: phoneNumber, + Start: startIndices[i], + End: k, + } + if head == nil { + head = newMatcher + current = head + } else { + current.Next = newMatcher + current = current.Next + } + } + break + } + } + return head +} + func ContainsOnlyValidXChars(number *PhoneNumber, candidate string) bool { // The characters 'x' and 'X' can be (1) a carrier code, in which // case they always precede the national significant number or (2) diff --git a/matcher_test.go b/matcher_test.go new file mode 100644 index 0000000..57b79f6 --- /dev/null +++ b/matcher_test.go @@ -0,0 +1,56 @@ +package phonenumbers + +import ( + "testing" +) + +func TestNewPhoneNumberMatcherForRegion(t *testing.T) { + tests := []struct { + name string + seq string + region string + want map[uint64]int // expected phone numbers with their expected counts + }{ + { + name: "Valid US numbers", + seq: "Call me at 202-555-0130 or 415-555-0198 for more info.", + region: "US", + want: map[uint64]int{2025550130: 1, 4155550198: 1}, + }, + { + name: "Invalid patterns mixed with valid numbers", + seq: "Try 12345 and then call 503-555-0110 or reach out at 999-000", + region: "US", + want: map[uint64]int{5035550110: 1}, + }, + { + name: "Valid Tunisian numbers", + seq: "Call me at 71 123 456 or 71 123 457 for more info.", + region: "TN", + want: map[uint64]int{71123456: 1, 71123457: 1}, + }, + { + name: "Valid when sequence ends with a number", + seq: "for more info 71123458", + region: "TN", + want: map[uint64]int{71123458: 1}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + matcher := NewPhoneNumberMatcherForRegion(tt.seq, tt.region) + got := make(map[uint64]int) + for matcher != nil { + got[*matcher.PhoneNumber.NationalNumber]++ + matcher = matcher.Next + } + + for num, count := range tt.want { + if got[num] != count { + t.Errorf("Test %s failed: expected %d occurrences of %d, got %d", tt.name, count, num, got[num]) + } + } + }) + } +}