Sure. Thank You
zinggDir = "/models"
modelId = "databricksdemotrial_120k"
input_file = "/febrl120k/test.csv"
try:
# Stopwords recommendation phase
options = ClientOptions([ClientOptions.PHASE, "recommend","--column", "firstName"])
args.setStopWordsCutoff(0.5)
zingg = ZinggWithSpark(args, options)
# Log the options generated
# LOG.debug(f"Zingg options generated for stopwords recommendation: {vars(options)}")
print (options)
options_dict = vars(options)
formatted_options = {key: str(value.getOptionValue) for key, value in options_dict.items()}
LOG.debug(f"Zingg options generated for stopwords recommendation: {formatted_options}")
zingg.initAndExecute()
# Log the stopwords recommendations
stopwordsForfname = spark.read.csv(zinggDir+"/"+modelId+"/stopWords/firstName")
stopwordsForfname_list = stopwordsForfname.collect()
LOG.info(f"Recommended stopwords for 'firstName': {stopwordsForfname_list}")
except Exception as e:
# Log any errors that occur during the stopwords recommendation phase
LOG.error("Error occurred during stopwords recommendation:", exc_info=True)
LOG.error(f"Error details: {e}")
I'm running this on databricks (just for context)