import re s = """ <input>YOUNG J STRONG 309 1356 1994 <input>FELDMAN DJ WEAK 15 297 1962 Some other stuff """ print re.compile('(?<=<input>)([A-Z]+\s+[A-Z]+\s+(?:STRONG|WEAK)\s+\d+\s+\d+\s+\d+)\s*').findall(s)