Обновлено сейчас включает в себя список игнорируемых слов.
WordSequence: после элементарного синтаксического анализа слов выполняется поиск самой длинной подходящей последовательности слов на основе количества символов этих слов. Возвращает последовательность, которая встречается в первом аргументе.
Использование: WordSequence (String1, String2 [, caseInsensitive As Boolean])
Пример 1. Пример данных OP с IgnoredWords = "a an"
как указано ниже.
=WordSequence("Yesterday I ate an apple", "today I ate an orange")
Результат: I ate
Пример 2 - Тот же пример данных с правкой IgnoredWords = ""
для кода (без пропущенных слов).
=WordSequence("Yesterday I ate an apple", "today I ate an orange")
Результат: I ate an
Пример 3. Результаты могут различаться в зависимости от того, какая строка назначена параметру String1.
,
B1:C2 показывают влияние порядка ввода строк на возвращаемое значение.
B3:C4 Демонстрирует caseInsensitive := TRUE
(и возвращаемое значение порядка ввода).
Примечание. В примере 3 IgnoredWords = "a an"
как указано ниже. В этом случае такое же совпадение будет иметь место, даже если a
или an
отсутствует в одной или другой ячейке. Кроме того, обратный результат будет включать в a
или , когда это происходит в строки1 и включено в совпадающей последовательности. Результат примера 1 также показывает, что an
не появляются в начале или в конце соответствующей последовательности слов. Это потому, что они ничего не соответствуют, так как они были проигнорированы. То же самое верно для любых указанных знаков препинания.
' WordSequence (String1, String2 [, caseInsensitive As Boolean])
'
' Compares two String arguments for the longest common sequence
' of words. Longest is defined by the number of characters in each
' matched word in the sequence.
'
' Make the comparison case insensitive with an optional third
' argument set to True.
'
' Return the sequence of words from String1 along with inclusive
' text (white space, ignored words and specified punctuation).
'
' Authored by Ted Dillard
'
' Spaces, Tabs and the WordBoundary characters delimit the words
' in the comparison; only whole word matches are returned.
'
' Punctuation Indifference: Two mechanisms regulate matching based on
' specified punctuation characters. The matched sequence in String1
' is returned with any inclusive punctuation. Spaces, tabs and the
' defined punctuations are not counted when comparing match lengths.
' Any punctuation not specified in one of these two constants is
' treated as a character of the word (like the apostrophe in "I'll")
'
' Ignored Words: A list of words to ignore in matching. These words'
' lengths are not counted towards the longest match. Theses words
' will only be returned if they are between two matched words
' in String1, in which case they will be returned even if not in
' String2, because they were ignored.
'
Option Explicit
'
' IgnoredWords is a String of space separated words to ignore. Punctuation
' not listed in WordBoundary or IgnoredPunctuation can be in the words.
Private Const IgnoredWords = "a an" ' "" empty string ok
'
' (Two consecutive double quotes inside a quoted string puts one
' double quote into the string value.)
'
' WordBoundary characters are word delimiters and ignored in matching.
Private Const wordBoundary = "(){}[]:;<>,.?!"""
'
' IgnoredPunctuation are deleted before parsing words (ignored in matching).
' Therefore "'.-" means "we're"="were" and "123.456.7890"="123-456-7890"
Private Const IgnoredPunctuation = "-"
'
' WhiteSpace characters are used as word boundaries along with spaces.
Private Const WhiteSpace = vbTab & vbNewLine & vbLf & vbCr & vbCrLf
'
'-------------------------------------------------------------------------
' END of User Configurable Settings - the rest is code
'
Private Enum WordList
matchOnWords ' list first =0 also missing Optional Long parameter value
NonDeletion
AllPartials
End Enum
Private Type LongestMatch
start As Long
items As Long
End Type
Public Function WordSequence(ByVal sentence As String, ByVal sentence2 _
As String, Optional caseInsensitive As Boolean) As String
Dim matchOnWords() As String
Dim matchInfo As LongestMatch
'Optional Booleans are False by default. If Case_Insensitive is True the
'search will not be case sensitive. The default search is case sensitive.
'To make default case insensitive, change parameter name to caseSensitve
'and edit If condition to "Not caseSensitive"
WordSequence = sentence
If caseInsensitive Then
sentence = LCase(sentence)
sentence2 = LCase(sentence2)
End If
matchOnWords = getWords(sentence)
matchInfo = getLongestMatch(matchOnWords, getWords(sentence2))
If matchInfo.start = -1 Or matchInfo.items = 0 Then
WordSequence = ""
Else
WordSequence = getMatchedString(sentence, WordSequence, matchOnWords, _
matchInfo.start, matchInfo.items)
End If
End Function
Private Function getMatchedString(ByVal sentence As String, _
ByVal original As String, ByRef matchOnWords() As String, _
ByVal start As Long, ByVal items As Long) As String
Dim allPartialWords() As String
Dim allWords() As String
Dim begun As Boolean, ignoredWord As Boolean, wordBoundary As Boolean
Dim w As Long, i As Long, j As Long
' word list where all specified punctuations are used as word boundaries,
' [1] to deal with punctuation in the actual word but not the match words
allPartialWords = getWords(sentence, AllPartials)
allWords = getWords(sentence, NonDeletion) 'deals with ignored words
begun = False
ignoredWord = False
wordBoundary = True
i = 0: j = 0
For w = 0 To UBound(allPartialWords)
' make the beginning of the sentence be the beginning location of the
' first occurrence in the sentence where the current iterative word
' list element (partial word) is located removing preceding spaces,
' tabs and punctuation characters defined in punctuation constants.
sentence = Mid(sentence, InStr(sentence, allPartialWords(w)))
If Not begun And i = start Then 'Beginning of words match
begun = True
' delete any characters at the beginning of the original sentence
' that have already been removed from the sentence variable
original = Mid(original, Len(original) - Len(sentence) + 1)
End If
' remove the current partial word from the beginning of the sentence
sentence = Mid(sentence, Len(allPartialWords(w)) + 1)
If wordBoundary Then
' is entirety of all_word in ignored word list
If InStr(" " & IgnoredWords & " ", " " & allWords(j) & " ") Then
ignoredWord = True
End If
End If
' also remove from the beginning of all_word & match_word, along with
' [1] preceding ignored characters inclusive to this matchOnWord.
allWords(j) = Mid(allWords(j), InStr(allWords(j), _
allPartialWords(w)) + Len(allPartialWords(w)))
' ignored words are not part of match_on_words' list
If Not ignoredWord Then
matchOnWords(i) = Mid(matchOnWords(i), InStr(matchOnWords(i), _
allPartialWords(w)) + Len(allPartialWords(w)))
End If
If allWords(j) = "" Then ' all_word is consumed
wordBoundary = True ' no longer in a word,
ignoredWord = False ' so can no longer be in an ignored word
j = j + 1
Else ' part of the word is consumed, next time through dont test the
wordBoundary = False ' remaing part against ignored word list
End If
If matchOnWords(i) = "" Then ' match_on_word is consumed
i = i + 1 ' advance match_on_word iterator to next matched word
If begun Then
items = items - 1 'consumed word, decrement matched items count
If items = 0 Then ' consumed all matched words.
' original already starts at beginning of match.
' sentence had all matched partial words removed.
' remove rest of sentence characters from return value.
getMatchedString = Mid(original, 1, _
Len(original) - Len(sentence))
Exit Function
End If
End If
End If
Next w
getMatchedString = ""
End Function
Private Function getLongestMatch(ByRef words1() As String, _
ByRef words2() As String) As LongestMatch
Dim largestCharCnt As Long
largestCharCnt = 0
getLongestMatch.start = -1
getLongestMatch.items = 0
Dim i1 As Long, i2 As Long, i As Long, l As Long
For i1 = 0 To UBound(words1)
For i2 = 0 To UBound(words2)
If words1(i1) = words2(i2) Then
l = Len(words1(i1))
i = 1
Do While i1 + i <= UBound(words1)
If i2 + i > UBound(words2) Then Exit Do
If words1(i1 + i) <> words2(i2 + i) Then Exit Do
l = l + Len(words1(i1 + i))
i = i + 1
Loop
If l > largestCharCnt Then
largestCharCnt = l
getLongestMatch.start = i1
getLongestMatch.items = i
End If
End If
Next i2
Next i1
End Function
Private Function getWords(ByVal sentence As String, _
Optional listType As WordList) As String()
sentence = replaceChars(sentence, WhiteSpace, " ")
sentence = replaceChars(sentence, wordBoundary, " ")
If listType = matchOnWords Or listType = NonDeletion Then
sentence = replaceChars(sentence, IgnoredPunctuation, "")
Else ' listType = AllPartials
sentence = replaceChars(sentence, IgnoredPunctuation, " ")
End If
If listType = matchOnWords Then
' start & end as well for space delimiter-brackets' match
sentence = " " & sentence & " "
Dim w As Variant
' only match whole word using space delimiter-brackets' match
For Each w In Split(IgnoredWords)
sentence = Replace(sentence, " " & w & " ", " ")
Next w
End If
Do While InStr(sentence, " ") <> 0
sentence = Replace(sentence, " ", " ")
Loop
sentence = Trim(sentence)
getWords = Split(sentence)
End Function
Private Function replaceChars(ByVal source As String, ByVal chars As String, _
ByVal replacement As String) As String
Dim c As Long
For c = 1 To Len(chars)
source = Replace(source, Mid(chars, c, 1), replacement)
Next c
replaceChars = source
End Function