Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def write():
"""Writes the page in gallery.py"""
st.sidebar.title("Interactive spaCy visualizer")
st.sidebar.markdown(
"""
Process text with [spaCy](https://spacy.io) models and visualize named entities,
dependencies and more. Uses spaCy's built-in
[displaCy](http://spacy.io/usage/visualizers) visualizer under the hood.
"""
)
st.write("Author: [Ines Montani](https://gist.github.com/ines)")
st.write(
"Source: [Github](https://gist.github.com/ines/b320cb8441b590eedf19137599ce6685)"
)
spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
model_load_state = st.info(f"Loading model '{spacy_model}'...")
nlp = load_model(spacy_model)
model_load_state.empty()
dset = get_state(config)
dset.expand = True
st.title("Dataset Explorer: {}".format(type(dset).__name__))
input_method = st.sidebar.selectbox(
"Index selection method", ["Slider", "Number input", "Sample"]
)
if input_method == "Slider":
idx = st.sidebar.slider("Index", 0, len(dset) - 1, 0)
elif input_method == "Number input":
idx = st.sidebar.number_input("Index", 0, len(dset) - 1, 0)
elif input_method == "Sample":
idx = 0
if st.sidebar.button("Sample"):
idx = np.random.choice(len(dset))
st.sidebar.text("Index: {}".format(idx))
show_example(dset, idx, config)
st.header("config")
cfg_string = pp2mkdtable(config, jupyter_style=True)
cfg = st.markdown(cfg_string)
@st.cache(allow_output_mutation=True)
def load_vectors(path):
return Sense2Vec().from_disk(path)
st.sidebar.title("sense2vec")
st.sidebar.markdown(
"Explore semantic similarities of multi-word phrases using "
"[`sense2vec`](https://github.com/explosion/sense2vec/)."
)
word = st.sidebar.text_input("Word", DEFAULT_WORD)
sense_dropdown = st.sidebar.empty()
n_similar = st.sidebar.slider("Max number of similar entries", 1, 100, value=20, step=1)
show_senses = st.sidebar.checkbox("Distinguish results by sense")
vectors_path = st.sidebar.selectbox("Vectors", SENSE2VEC_PATHS)
if not vectors_path:
st.error(
f"""
#### No vectors available
You can pass one or more paths to this
script on the command line. For example:
```bash
streamlit run {sys.argv[0]} /path/to/sense2vec /path/to/other_sense2vec
""" ) else: s2v = load_vectors(vectors_path)
def intro():
st.sidebar.success("Select a demo above.")
st.markdown(
"""
Streamlit is an open-source app framework built specifically for
components = load_data()
title = st.empty()
st.sidebar.title("Options")
def label(symbol):
a = components.loc[symbol]
return symbol + ' - ' + a.Security
if st.sidebar.checkbox('View companies list'):
st.dataframe(components[['Security',
'GICS Sector',
'Date first added',
'Founded']])
st.sidebar.subheader('Select asset')
asset = st.sidebar.selectbox('Click below to select a new asset',
components.index.sort_values(), index=3,
format_func=label)
title.title(components.loc[asset].Security)
if st.sidebar.checkbox('View company info', True):
st.table(components.loc[asset])
data0 = load_quotes(asset)
data = data0.copy().dropna()
data.index.name = None
section = st.sidebar.slider('Number of quotes', min_value=30,
max_value=min([2000, data.shape[0]]),
value=500, step=10)
data2 = data[-section:]['Adj Close'].to_frame('Adj Close')
sma = st.sidebar.checkbox('SMA')
"compact": compact,
}
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
for sent in docs:
html = displacy.render(sent, options=options)
# Double newlines seem to mess with the rendering
html = html.replace("\n\n", "\n")
if split_sents and len(docs) > 1:
st.markdown(f"> {sent.text}")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
if "ner" in nlp.pipe_names:
st.header("Named Entities")
st.sidebar.header("Named Entities")
label_set = nlp.get_pipe("ner").labels
labels = st.sidebar.multiselect("Entity labels", label_set, label_set)
html = displacy.render(doc, style="ent", options={"ents": labels})
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
attrs = ["text", "label_", "start", "end", "start_char", "end_char"]
if "entity_linker" in nlp.pipe_names:
attrs.append("kb_id_")
data = [
[str(getattr(ent, attr)) for attr in attrs]
for ent in doc.ents
if ent.label_ in labels
]
df = pd.DataFrame(data, columns=attrs)
st.dataframe(df)
"collapse_punct": collapse_punct,
"collapse_phrases": collapse_phrases,
"compact": compact,
}
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
for sent in docs:
html = displacy.render(sent, options=options)
# Double newlines seem to mess with the rendering
html = html.replace("\n\n", "\n")
if split_sents and len(docs) > 1:
st.markdown(f"> {sent.text}")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
if "ner" in nlp.pipe_names:
st.header("Named Entities")
st.sidebar.header("Named Entities")
default_labels = ["PERSON", "ORG", "GPE", "LOC"]
labels = st.sidebar.multiselect(
"Entity labels", nlp.get_pipe("ner").labels, default_labels
)
html = displacy.render(doc, style="ent", options={"ents": labels})
# Newlines seem to mess with the rendering
html = html.replace("\n", " ")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
attrs = ["text", "label_", "start", "end", "start_char", "end_char"]
if "entity_linker" in nlp.pipe_names:
attrs.append("kb_id_")
data = [
[str(getattr(ent, attr)) for attr in attrs]
for ent in doc.ents
if ent.label_ in labels
]
def process_text(model_name, text):
nlp = load_model(model_name)
print ("model loaded!")
return nlp(text)
st.sidebar.title("Interactive spaCy visualizer")
st.sidebar.markdown(
"""
Process text with [spaCy](https://spacy.io) models and visualize named entities,
dependencies and more. Uses spaCy's built-in
[displaCy](http://spacy.io/usage/visualizers) visualizer under the hood.
"""
)
spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
model_load_state = st.info(f"Loading model '{spacy_model}'...")
nlp = load_model(spacy_model)
model_load_state.empty()
text = st.text_area("Text to analyze", DEFAULT_TEXT)
doc = process_text(spacy_model, text)
if "parser" in nlp.pipe_names:
st.header("Dependency Parse & Part-of-speech tags")
st.sidebar.header("Dependency Parse")
split_sents = st.sidebar.checkbox("Split sentences", value=True)
collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)
collapse_phrases = st.sidebar.checkbox("Collapse phrases")
compact = st.sidebar.checkbox("Compact mode")
options = {
"collapse_punct": collapse_punct,
st.write(
"Source: [Github](https://gist.github.com/ines/b320cb8441b590eedf19137599ce6685)"
)
spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
model_load_state = st.info(f"Loading model '{spacy_model}'...")
nlp = load_model(spacy_model)
model_load_state.empty()
text = st.text_area("Text to analyze", DEFAULT_TEXT)
doc = process_text(spacy_model, text)
if "parser" in nlp.pipe_names:
st.header("Dependency Parse & Part-of-speech tags")
st.sidebar.header("Dependency Parse")
split_sents = st.sidebar.checkbox("Split sentences", value=True)
collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)
collapse_phrases = st.sidebar.checkbox("Collapse phrases")
compact = st.sidebar.checkbox("Compact mode")
options = {
"collapse_punct": collapse_punct,
"collapse_phrases": collapse_phrases,
"compact": compact,
}
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
for sent in docs:
html = displacy.render(sent, options=options)
# Double newlines seem to mess with the rendering
html = html.replace("\n\n", "\n")
if split_sents and len(docs) > 1:
st.markdown(f"> {sent.text}")
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)