import os
import stanza
from stanza.server import CoreNLPClient


stanza.__version__

'1.2.3'


# Sentences
en_text = """
To Sherlock Holmes she is always the woman. 
I have seldom heard him mention her under any other name. 
In his eyes she eclipses and predominates the whole of her sex. 
It was not that he felt any emotion akin to love for Irene Adler.
"""


print("Downloading English model...")
#stanza.download('en')

Downloading English model...


# Build an English pipeline, with all processors by default
print("Building an English pipeline...")
en_nlp = stanza.Pipeline('en')

2022-01-24 11:04:59 INFO: Loading these models for language: en (English):
=========================
| Processor | Package   |
-------------------------
| tokenize  | combined  |
| pos       | combined  |
| lemma     | combined  |
| depparse  | combined  |
| sentiment | sstplus   |
| ner       | ontonotes |
=========================

2022-01-24 11:04:59 INFO: Use device: cpu
2022-01-24 11:04:59 INFO: Loading: tokenize
2022-01-24 11:04:59 INFO: Loading: pos

Building an English pipeline...

2022-01-24 11:05:00 INFO: Loading: lemma
2022-01-24 11:05:00 INFO: Loading: depparse
2022-01-24 11:05:01 INFO: Loading: sentiment
2022-01-24 11:05:01 INFO: Loading: ner
2022-01-24 11:05:03 INFO: Done loading processors!


# Creating English model and processing text
en_doc = en_nlp(en_text)
print(type(en_doc))

<class 'stanza.models.common.doc.Document'>


print("No. sentences:", len(en_doc.sentences))

No. sentences: 4


for i, sent in enumerate(en_doc.sentences):
    print("[Sentence {}]".format(i+1))
    for word in sent.words:
        print("{:12s}\t{:12s}\t{:6s}\t{:d}\t{:12s}".format(\
              word.text, word.lemma, word.pos, word.head, word.deprel))
    print("")

[Sentence 1]
To          	to          	ADP   	2	case        
Sherlock    	Sherlock    	PROPN 	8	obl         
Holmes      	Holmes      	PROPN 	2	flat        
she         	she         	PRON  	8	nsubj       
is          	be          	AUX   	8	cop         
always      	always      	ADV   	8	advmod      
the         	the         	DET   	8	det         
woman       	woman       	NOUN  	0	root        
.           	.           	PUNCT 	8	punct       

[Sentence 2]
I           	I           	PRON  	4	nsubj       
have        	have        	AUX   	4	aux         
seldom      	seldom      	ADV   	4	advmod      
heard       	hear        	VERB  	0	root        
him         	he          	PRON  	4	obj         
mention     	mention     	VERB  	4	xcomp       
her         	she         	PRON  	6	obj         
under       	under       	ADP   	11	case        
any         	any         	DET   	11	det         
other       	other       	ADJ   	11	amod        
name        	name        	NOUN  	6	obl         
.           	.           	PUNCT 	4	punct       

[Sentence 3]
In          	in          	ADP   	3	case        
his         	he          	PRON  	3	nmod:poss   
eyes        	eye         	NOUN  	5	obl         
she         	she         	PRON  	5	nsubj       
eclipses    	eclipse     	VERB  	0	root        
and         	and         	CCONJ 	7	cc          
predominates	predominate 	VERB  	5	conj        
the         	the         	DET   	9	det         
whole       	whole       	NOUN  	7	obj         
of          	of          	ADP   	12	case        
her         	she         	PRON  	12	nmod:poss   
sex         	sex         	NOUN  	9	nmod        
.           	.           	PUNCT 	5	punct       

[Sentence 4]
It          	it          	PRON  	2	nsubj       
was         	be          	AUX   	0	root        
not         	not         	PART  	2	advmod      
that        	that        	SCONJ 	6	mark        
he          	he          	PRON  	6	nsubj       
felt        	feel        	VERB  	2	ccomp       
any         	any         	DET   	8	det         
emotion     	emotion     	NOUN  	6	obj         
akin        	akin        	ADJ   	8	amod        
to          	to          	PART  	11	mark        
love        	love        	VERB  	9	xcomp       
for         	for         	ADP   	13	case        
Irene       	Irene       	PROPN 	11	obl         
Adler       	Adler       	PROPN 	13	flat        
.           	.           	PUNCT 	2	punct


print("Mention text\tType\tStart-End")
for ent in en_doc.ents:
    print("{}\t{}\t{}-{}".format(ent.text, ent.type, ent.start_char, ent.end_char))

Mention text	Type	Start-End
Sherlock Holmes	PERSON	4-19
Irene Adler	PERSON	223-234


env_var = "CORENLP_HOME"
os.environ[env_var]

'C:\\Dev Projects\\Libraries\\stanford-corenlp-4.3.1'


# Examine the CoreNLP installation folder to make sure the installation is successful
!dir {os.environ[env_var]}

The system cannot find the path specified.


# Import client module
import time
from stanza.server import CoreNLPClient


# Construct a CoreNLPClient with some basic annotators, a memory allocation of 2GB, and port number 9001
client = CoreNLPClient(
    #properties='spanish',
    annotators=['tokenize','ssplit', 'pos', 'lemma', 'ner', 'parse', 'depparse','coref'], 
    memory='2G', 
    endpoint='http://localhost:9001',
    be_quiet=False)

2022-01-24 11:05:04 INFO: Writing properties to tmp file: corenlp_server-aaf3dd7c793a4b29.props


# Start the background server and wait for some time
client.start()
time.sleep(10)

2022-01-24 11:05:04 INFO: Starting server with command: java -Xmx2G -cp C:\Dev Projects\Libraries\stanford-corenlp-4.3.1\* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 -timeout 60000 -threads 5 -maxCharLength 100000 -quiet False -serverProperties corenlp_server-aaf3dd7c793a4b29.props -annotators tokenize,ssplit,pos,lemma,ner,parse,depparse,coref -preload -outputFormat serialized


# Print background processes and look for java
# You should be able to see a StanfordCoreNLPServer java process running in the background
!tasklist | findstr java

java.exe                     12944 Console                    3      7,292 K
java.exe                     12192 Console                    3  1,024,520 K


# Annotate English text
document = client.annotate(en_text)
print(type(document))

<class 'CoreNLP_pb2.Document'>


# Iterate over all tokens in all sentences, and print out the word, lemma, pos and ner tags
print("{:12s}\t{:12s}\t{:6s}\t{}".format("Word", "Lemma", "POS", "NER"))

for i, sent in enumerate(document.sentence):
    print("[Sentence {}]".format(i+1))
    for t in sent.token:
        print("{:12s}\t{:12s}\t{:6s}\t{}".format(t.word, t.lemma, t.pos, t.ner))
    print("")

Word        	Lemma       	POS   	NER
[Sentence 1]
To          	to          	TO    	O
Sherlock    	Sherlock    	NNP   	PERSON
Holmes      	Holmes      	NNP   	PERSON
she         	she         	PRP   	O
is          	be          	VBZ   	O
always      	always      	RB    	O
the         	the         	DT    	O
woman       	woman       	NN    	O
.           	.           	.     	O

[Sentence 2]
I           	I           	PRP   	O
have        	have        	VBP   	O
seldom      	seldom      	RB    	O
heard       	hear        	VBN   	O
him         	he          	PRP   	O
mention     	mention     	VB    	O
her         	she         	PRP$  	O
under       	under       	IN    	O
any         	any         	DT    	O
other       	other       	JJ    	O
name        	name        	NN    	O
.           	.           	.     	O

[Sentence 3]
In          	in          	IN    	O
his         	he          	PRP$  	O
eyes        	eye         	NNS   	O
she         	she         	PRP   	O
eclipses    	eclipse     	VBZ   	O
and         	and         	CC    	O
predominates	predominate 	VBZ   	O
the         	the         	DT    	O
whole       	whole       	NN    	O
of          	of          	IN    	O
her         	she         	PRP$  	O
sex         	sex         	NN    	O
.           	.           	.     	O

[Sentence 4]
It          	it          	PRP   	O
was         	be          	VBD   	O
not         	not         	RB    	O
that        	that        	IN    	O
he          	he          	PRP   	O
felt        	feel        	VBD   	O
any         	any         	DT    	O
emotion     	emotion     	NN    	O
akin        	akin        	JJ    	O
to          	to          	TO    	O
love        	love        	VB    	O
for         	for         	IN    	O
Irene       	Irene       	NNP   	PERSON
Adler       	Adler       	NNP   	PERSON
.           	.           	.     	O


# Iterate over all detected entity mentions
print("{:30s}\t{}".format("Mention", "Type"))

for sent in document.sentence:
    for m in sent.mentions:
        print("{:30s}\t{}".format(m.entityMentionText, m.entityType))

Mention                       	Type
Sherlock Holmes               	PERSON
she                           	PERSON
him                           	PERSON
her                           	PERSON
his                           	PERSON
she                           	PERSON
her                           	PERSON
Irene Adler                   	PERSON
he                            	PERSON


sentence = document.sentence[0]
constituency_parse = sentence.parseTree
print(constituency_parse)

child {
  child {
    child {
      child {
        value: "To"
      }
      value: "TO"
      score: -4.007333278656006
    }
    child {
      child {
        child {
          value: "Sherlock"
        }
        value: "NNP"
        score: -11.400341033935547
      }
      child {
        child {
          value: "Holmes"
        }
        value: "NNP"
        score: -9.204214096069336
      }
      value: "NP"
      score: -23.002450942993164
    }
    value: "WHPP"
    score: -34.39811325073242
  }
  child {
    child {
      child {
        child {
          value: "she"
        }
        value: "PRP"
        score: -3.9035301208496094
      }
      value: "NP"
      score: -4.765097141265869
    }
    child {
      child {
        child {
          value: "is"
        }
        value: "VBZ"
        score: -0.14797931909561157
      }
      child {
        child {
          child {
            value: "always"
          }
          value: "RB"
          score: -4.51917028427124
        }
        value: "ADVP"
        score: -4.762180328369141
      }
      child {
        child {
          child {
            value: "the"
          }
          value: "DT"
          score: -0.5893322825431824
        }
        child {
          child {
            value: "woman"
          }
          value: "NN"
          score: -6.736452579498291
        }
        value: "NP"
        score: -9.268684387207031
      }
      value: "VP"
      score: -21.771156311035156
    }
    value: "S"
    score: -26.872772216796875
  }
  child {
    child {
      value: "."
    }
    value: "."
    score: -0.43762221932411194
  }
  value: "SBAR"
  score: -66.49877166748047
}
value: "ROOT"
score: -72.68036651611328


# Shut down the background CoreNLP server
client.stop()
time.sleep(10)
!tasklist | findstr java

java.exe                     12192 Console                    3  2,294,324 K

NLP - Using Stanza library¶

Stanza Text Processing¶

Step 1 - Downloading model¶

Step 2 - Creating pipeline¶

Step 3 - Accessing annotations¶

Stanford CoreNLP interface¶

Reference¶