import re
import nltk
from nltk.tree import *
from nltk.corpus import ptb
# len(ptb.parsed_sents())
count = 0
for t in ptb.parsed_sents():
    for st in t.subtrees():
        if st.label().startswith('S'):
            for ith, child in enumerate(st):
                if not isinstance(child, str): 
                    m = re.match(r'NP-SBJ-(\d+)',child.label())
                    if m:
                        index = m.group(1)
                        if len(st) > ith+1 and st[ith+1].label().startswith('VP'):
                            for postag in st[ith+1].pos():
#                                print(postag)
                                if postag[1] == '-NONE-' and postag[0] == '*-' + index:
                                    count += 1
                                    break
print(count)
