Looks like you're trying to pull all the strings after <Compile Include="
and until ">
. We can do that, but be aware this will probably break on edge cases!
import re
def extract_files(filename):
with open(filename,'r') as file:
text = file.read
matches = re.findall(r'(?<=<Compile Include=")[-.A-Za-z\\]+(?=")', text)
# finds all pathnames that contain ONLY lowercase or uppercase letters,
# a dash (-) or a dot (.), separated ONLY by a backslash (\)
# terminates as soon as it finds a double-quote ("), NOT WHEN IT FINDS A
# SINGLE QUOTE (')
if not matches:
sys.stderr.write("no match")
sys.exit()
for match in matches:
print(match)