nltk parses parenthesis incorrectly

Question

If you know what you want to return as the tag value for the parens, then you can use a RegexpTagger to match the parens and fallback to the preferred tagger for all else.

import nltk
from nltk.data import load
_POS_TAGGER = 'taggers/maxent_treebank_pos_tagger/english.pickle'
tagger = load(_POS_TAGGER)  # same tagger as using nltk.pos_tag

regexp_tagger = nltk.tag.RegexpTagger([(r'\(|\)', '--')], backoff = tagger)

regexp_tagger.tag(nltk.word_tokenize(text))

Result:

[(u'Developed', 'NNP'), (u'at', 'IN'), (u'the', 'DT'), (u'Vaccine', 'NNP'), (u'and', 'CC'), (u'Gene', 'NNP'), (u'Therapy', 'NNP'), (u'Institute', 'NNP'), (u'at', 'IN'), (u'the', 'DT'), (u'Oregon', 'NNP'), (u'Health', 'NNP'), (u'and', 'CC'), (u'Science', 'NNP'), (u'University', 'NNP'), (u'(', '--'), (u'OHSU', 'NNP'), (u')', '--'), (u',', ','), (u'the', 'DT'), (u'vaccine', 'NN'), (u'proved', 'VBD'), (u'successful', 'JJ'), (u'in', 'IN'), (u'about', 'IN'), (u'fifty', 'JJ'), (u'percent', 'NN'), (u'of', 'IN'), (u'the', 'DT'), (u'subjects', 'NNS'), (u'tested', 'VBD'), (u'and', 'CC'), (u'could', 'MD'), (u'lead', 'VB'), (u'to', 'TO'), (u'a', 'DT'), (u'human', 'NN'), (u'vaccine', 'NN'), (u'preventing', 'VBG'), (u'the', 'DT'), (u'onset', 'NN'), (u'of', 'IN'), (u'HIV/AIDS', 'NNS'), (u'and', 'CC'), (u'even', 'RB'), (u'cure', 'NN'), (u'patients', 'NNS'), (u'currently', 'RB'), (u'on', 'IN'), (u'anti-retroviral', 'JJ'), (u'drugs', 'NNS'), (u'.', '.')]