Code: Select all
0: INPUTSTRING= 2013 12 04 11-2-2013 1-feb-2012 december 5, 2013 march 12,2013 5 maart 2014
2013-06-30 2013/06/30 6/12/2013 20130709 20131225 02022012 24122015
7 false look alikes 24122020 00022012 24132013 13022012 32122013 december 2013 april 99 2015
The actual code to include in your own script after testing, is marked and is most times just some 10-15 lines.
Code: Select all
// version 1.1
//Set IGNORESPACES to 1 to force script interpreter to ignore spaces.
//If using IGNORESPACES quote strings in {" ... "}
//Let>IGNORESPACES=1
//
// these are test strings: one string is actually scanned, and - for testing only - is derived from the test input string
// the other, the test input string, contains = as a start of a date and is only used to check whether all have been recognized
// if there is no =, during testing all dates will berecognized, but error processing reports an error
//
// for testing purposes ONLY; dates in the test string should start with an = . The = symbol will be replaced by a space and then used as the actual test string
//
let>inputCheck= =2013 12 04 =11-2-2013 =1-feb-2012 =December 5, 2013 =March 12,2016 =5 maart 2014 =2013-06-30 =2013/06/30 =6/12/2013 =20130709 =20131225 =02022012 =24122015 7 false look alikes =24122020 =00022012 =24132013 =13022012 =32122013 =december 2013 =april 99 2015
// derive real life input string to scan by replacing = symbols
StringReplace>inputCheck,=,SPACE,inputString
let>_inputLeftOver=inputCheck
let>multipleLeadTrailSpaces=1
//
//
// =================================== actual start of initialization
// These regular expressions patterns describe a part of a date string. Some have alternative descriptions like patMon and patDay
// Later on the right parts are combined in the right order to form a complete regular expression pattern that searches a date string
// For example combining patterns patDay patMon patYear (ignore separators patterns for a moment) form a search pattern for 22 07 2013
// A complete date pattern also combine separator patterns: most times these separator patterns search for / - or space, with spaces around them allowed
//
// 25 31 (0)2 catches date strings with one/two, or exactly 2 digits
let>patDay=([12][0-9]|3[01]|0*[1-9])
let>patDayStart=([12][0-9]|3[01]|0[1-9]|\D[1-9])
let>patDay2Digits=([12][0-9]|3[01]|0[1-9])
// 8 11 09 catches month strings, with one/two digits , or exactly two digits, or name as a month
let>patMon=(1[012]|0*[1-9])
let>patMonStart=(1[012]|0[1-9]|\D[1-9])
let>patMon2Digits=(1[012]|0[1-9])
let>patMonName=([A-Z]|[a-z])+
// 20 ' 12-16 catches 2012-2016 modify for greater range of years
let>patYear=((20|')1[2-6])
//
// separators between day month and year.
// choose not to allow multiple spaces before or after a separator, modify + into * if allowed
if>multipleLeadTrailSpaces=1
// these catch - or / or one space, all 3 surrounded by optional spaces; or no space character; or seperator for a USA notation of , year
let>sepaInclSpace=(\s*[-/\s]\s*)
let>sepaButNoSpace=(\s*[-/]\s*)
let>sepaMonDayCommaYear=(\s*,\s*)
else>
let>sepaInclSpace=(\s+[-/\s]\s+)
let>sepaButNoSpace=(\s+[-/]\s+)
let>sepaMonDayCommaYear=(\s+,\s+)
endif>
// =================================== end
//
// following are relaxed simple version for date and month in case the above fitting is to tight
//let>patMon=\d{1,2}
//let>patDay=\d{1,2}
//
// for debugging sorting of vars, all outputs better starts at array pos 10
let>datesFoundBase=10
let>DF_numberOf=datesFoundBase
let>caseNum=0
let>comma=,
//
//
//
// Each of the following pattern combinations detects a certain format. Choose the ones that you want to cover
// Perhaps trying the first one, if result that is the date. If not try the second one, etc.
// Perhaps try all applicable and do smart things with those found in case more than one is found
//
// x denotes a separator: - / or (multiple) space. The parameters of the gosub would not be used in an actual App.
// An App would just use the global var pattern and the actual regex in the subroutine extractDates
// The e.g parameter is just comment, is is NOT used in the SRT
//
// =================================== actual start of forming full patterns and doing the data extractions from the test input string
// europe
// case 1
let>pattern=%patYear%%sepaInclSpace%%patMon%%sepaInclSpace%%patDay%
GoSub>goFind,yyyy x (m)m x (d)d yyyymmdd, e.g. 2013-12-31
// =================================== end, however, choose more of these GoSubs if more formats support wished
//
// case 2
let>pattern=%patDayStart%%sepaInclSpace%%patMon%%sepaInclSpace%%patYear%
GoSub>goFind,(d)d x (m)m x yyyy ddmmyyyy, e.g. 31/12/2013
//
// case 3
let>pattern=%patDay2Digits%%patMon2Digits%%patYear%
GoSub>goFind,ddmmyyyy, e.g. 0102013
//
// case 4
let>pattern=%patDayStart%%sepaInclSpace%%patMonName%%sepaInclSpace%%patYear%
GoSub>goFind,(d)d x mon x yyyy, e.g. 2 jan 2013
//
// USA
// case 5
let>pattern=%patMonStart%%sepaInclSpace%%patDay%%sepaInclSpace%%patYear%
GoSub>goFind,mm x dd x yyyy, e.g. 12-31-2013
//
// case 6
let>pattern=%patMonName%%sepaInclSpace%%patDay%%sepaMonDayCommaYear%%patYear%
GoSub>goFind,mon dd %comma% yyyy, e.g. January 31, 2013
//
// case 7
let>pattern=%patMon2Digits%%patDay2Digits%%patYear%
GoSub>goFind,ddmmyyyy, e.g. 12312013
//
// case 8
// Military and computing
let>pattern=%patYear%%patMon2Digits%%patDay2Digits%
GoSub>goFind,ddmmyyyy, e.g. 20130902
//
//
let>DF_numberOf={%DF_numberOf%-%datesFoundBase%}
//
//
// =================================== actual start of extracting dates
srt>goFind
let>caseNum=caseNum+1
RegEx>pattern,inputString,0,matchArray,numOfMatches,,,
// end>goFind
// for regular usage, this is it, rest is storing dates found and checking whther all OK's are found
// =================================== end
let>machtchInfoPart=%numOfMatches%*Case %caseNum%
while>numOfMatches>0
let>DF_numberOf=DF_numberOf+1
// prepare first part of found comment string
//let curDate=matchArray_%numOfMatches%
Trim>matchArray_%numOfMatches%,curDate
//let>curDate=matchArray_%numOfMatches%
// store the dates found in an array
let>DF_str_%DF_numberOf%=curDate
// erase the date found in the check string, so we can check whether all dates caught
StringReplace>_inputLeftOver,=%curDate%,=ok,_inputLeftOver
// store commment on the type found info with OK indecator
Position>=%curDate%,inputCheck,1,nPos,FALSE
if>nPos>0
let>foundDateOK=ok %curDate%
else>
let>foundDateOK=ERROR <%curDate%>
endif
let>DF_info_%DF_numberOf%=%foundDateOK% %machtchInfoPart%/%numOfMatches%: %goFind_var_1%
let>numOfMatches=numOfMatches-1
endwhile>
END>goFind
label>done
// for testing purposes, check MSCHED watch list, sort it, to see all dates caught in sequence
// var _inputLeftOver will contain all dates not recognized, others will be =OK
// the trailing part of the test string contains 'false dates', all of those should be in leftOver!
// also look at VAR DF_info You will see some errors with the current test string.
// But overall a good result, I think. Perhaps the errors will be handled in a future version.
**BREAKPOINT**