Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def convert_to_tsv(relation_df):
"""Converts the Dataframe to tsv for PBG to read.
each row is in the triplet format that defines one edge/relationship in the graph
columns: start,label,end
- start: id of the 'from' node
- end: id of the 'to' node
- label: type of the relationship
Arguments:
relation_df {[Dataframe]} -- Dataframe in above mentioned format
"""
try:
tsv_path = os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["DATA_DIRECTORY"],
GLOBAL_CONFIG["TSV_FILE_NAME"] + ".tsv",
) # default myproject/data/graph.tsv
logging.info(f"WRITING TSV FILE TO {tsv_path}")
relation_df[["start", "label", "end"]].to_csv(
tsv_path, sep="\t", header=False, index=False
)
except Exception as e:
logging.info("error in converting to tsv")
logging.info(e, exc_info=True)
sys.exit(e)
for version in versions
] # embedding filenames stored are in format : embeddings_0_0.json for number of partitions
edge_filenames = [
f"graph_partitioned/edges{p}_{p1}.h5"
for p in partitions
for p1 in partitions
] # edge files are stored are in format : edges_0_0.json for number of partitions
meta_dict = dict(
entities=entities,
partitions=GLOBAL_CONFIG["NUM_PARTITIONS"],
entity_files=entity_filenames,
embedding_files=embedding_filenames,
edge_files=edge_filenames,
) # metadata for all these files
metadata_path = os.path.join(
os.getcwd(), GLOBAL_CONFIG["PROJECT_NAME"], "metadata.json"
) # save to myproject/metadata.json
with open(metadata_path, "w") as f:
json.dump(meta_dict, f)
f.close()
except Exception as e:
logging.info("""error in exporting meta data. """)
logging.info(e, exc_info=True)
def similarity_search(entity_id):
try:
from embeoj.utils import load_config
global GLOBAL_CONFIG, DATA_DIRECTORY, CHECKPOINT_DIRECTORY
GLOBAL_CONFIG = load_config("GLOBAL_CONFIG")
DATA_DIRECTORY = os.path.join(
os.getcwd(), GLOBAL_CONFIG["PROJECT_NAME"], GLOBAL_CONFIG["DATA_DIRECTORY"]
)
CHECKPOINT_DIRECTORY = os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["CHECKPOINT_DIRECTORY"],
)
create_indexes() # create indexes if not present
entity_details = find_entity_data(entity_id)
entity_type = entity_details["entity_type"]
partition_number = entity_details["partition_number"]
# find index of entity id
query_index = entity_details["entity_index"]
search_result, entity_file_list, neighbors = search_all(
entity_type, partition_number, query_index
)
def initialise_config():
from embeoj.utils import load_config
global GLOBAL_CONFIG
global DATA_DIRECTORY
global CHECKPOINT_DIRECTORY
global FILENAMES
GLOBAL_CONFIG = load_config("GLOBAL_CONFIG")
FILENAMES = {
"train": os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["DATA_DIRECTORY"],
GLOBAL_CONFIG["TSV_FILE_NAME"] + ".tsv",
)
} # path to tsv file with train data
DATA_DIRECTORY = os.path.join(
os.getcwd(), GLOBAL_CONFIG["PROJECT_NAME"], GLOBAL_CONFIG["DATA_DIRECTORY"]
)
CHECKPOINT_DIRECTORY = os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["CHECKPOINT_DIRECTORY"],
)
def initialise_config():
from embeoj.utils import load_config
global SIMILARITY_SEARCH_CONFIG
global GLOBAL_CONFIG
global DATA_DIRECTORY
global CHECKPOINT_DIRECTORY
global FAISS_INDEX_NAME
global EMBEDDING_DIMENSIONS
global NUM_CLUSTER
global neighbors
SIMILARITY_SEARCH_CONFIG = load_config("SIMILARITY_SEARCH_CONFIG")
GLOBAL_CONFIG = load_config("GLOBAL_CONFIG")
DATA_DIRECTORY = os.path.join(
os.getcwd(), GLOBAL_CONFIG["PROJECT_NAME"], GLOBAL_CONFIG["DATA_DIRECTORY"]
)
CHECKPOINT_DIRECTORY = os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["CHECKPOINT_DIRECTORY"],
)
FAISS_INDEX_NAME = SIMILARITY_SEARCH_CONFIG["FAISS_INDEX_NAME"]
EMBEDDING_DIMENSIONS = GLOBAL_CONFIG["EMBEDDING_DIMENSIONS"]
NUM_CLUSTER = SIMILARITY_SEARCH_CONFIG["NUM_CLUSTER"]
neighbors = SIMILARITY_SEARCH_CONFIG["NEAREST_NEIGHBORS"] + 1
def initialise_config():
from embeoj.utils import load_config
global GLOBAL_CONFIG
global json_path
GLOBAL_CONFIG = load_config("GLOBAL_CONFIG")
json_path = os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["DATA_DIRECTORY"],
GLOBAL_CONFIG["JSON_EXPORT_FILE"] + ".json",
) # path to the json dump of the graph db
global GLOBAL_CONFIG
global DATA_DIRECTORY
global CHECKPOINT_DIRECTORY
global FILENAMES
GLOBAL_CONFIG = load_config("GLOBAL_CONFIG")
FILENAMES = {
"train": os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["DATA_DIRECTORY"],
GLOBAL_CONFIG["TSV_FILE_NAME"] + ".tsv",
)
} # path to tsv file with train data
DATA_DIRECTORY = os.path.join(
os.getcwd(), GLOBAL_CONFIG["PROJECT_NAME"], GLOBAL_CONFIG["DATA_DIRECTORY"]
)
CHECKPOINT_DIRECTORY = os.path.join(
os.getcwd(),
GLOBAL_CONFIG["PROJECT_NAME"],
GLOBAL_CONFIG["CHECKPOINT_DIRECTORY"],
)