but I got an error, can you tell me how to fix it raise Py4JJavaError( py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext. : java.lang.UnsupportedOperationException: getSubject is not supported at java.base/javax.security.auth.Subject.getSubject(Subject.java:277) at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:577) at org.apache.spark.util.Utils$.$anonfun$getCurrentUserName$1(Utils.scala:2416) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2416) at org.apache.spark.SparkContext.<init>(SparkContext.scala:329) at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58) at java.base/jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:62) at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499) at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:483) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:238) at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80) at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) at py4j.ClientServerConnection.run(ClientServerConnection.java:106) at java.base/java.lang.Thread.run(Thread.java:1447)
managed to run the command ./scripts/zingg.sh --phase match --conf examples/febrl/config.jso
?
What is this script, could you show where the example is located?
from zingg.client import * from zingg.pipes import * import sys import findspark findspark.init() #build the arguments for zingg args = Arguments() #phase name to be passed as a command line argument phase_name = sys.argv[1] #set field definitions fname = FieldDefinition("fname", "string", MatchType.FUZZY) lname = FieldDefinition("lname", "string", MatchType.FUZZY) streetnumber = FieldDefinition("streetnumber", "string", MatchType.FUZZY) street = FieldDefinition("street","string", MatchType.FUZZY) address = FieldDefinition("address", "string", MatchType.FUZZY) locality = FieldDefinition("locality", "string", MatchType.FUZZY) areacode = FieldDefinition("areacode", "string", MatchType.FUZZY) state = FieldDefinition("state", "string", MatchType.FUZZY) dateofbirth = FieldDefinition("dateofbirth", "string", MatchType.FUZZY) ssn = FieldDefinition("ssn", "string", MatchType.FUZZY) fieldDefs = [fname, lname, streetnumber, street, address, locality, areacode, state, dateofbirth, ssn] args.setFieldDefinition(fieldDefs) #defining input pipe customerDataStaging = Pipe("customerDataStaging", "jdbc") customerDataStaging.addProperty("url","jdbc:postgresql://localhost:5432/postgres") customerDataStaging.addProperty("dbtable", "customers") customerDataStaging.addProperty("driver", "org.postgresql.Driver") customerDataStaging.addProperty("user","suchandra") customerDataStaging.addProperty("password","1234") #add input pipe to arguments for Zingg client args.setData(customerDataStaging) #defining output pipe customerIdentitiesResolved = Pipe("customerIdentitiesResolved", "jdbc") customerIdentitiesResolved.addProperty("url","jdbc:postgresql://localhost:5432/postgres") customerIdentitiesResolved.addProperty("dbtable", "customers_unified") customerIdentitiesResolved.addProperty("driver", "org.postgresql.Driver") customerIdentitiesResolved.addProperty("user","suchandra") customerIdentitiesResolved.addProperty("password","1234") #add output pipe to arguments for Zingg client args.setOutput(customerIdentitiesResolved) #save latest model in directory models/customer360 args.setModelId("customer360") #store all models in directory models/ args.setZinggDir("models") #sample size for selecting data for labelling args.setNumPartitions(4) #fraction of total dataset to select data for labelling args.setLabelDataSampleSize(0.5) options = ClientOptions([ClientOptions.PHASE,phase_name]) #Zingg execution for the given phase zingg = Zingg(args, options) zingg.initAndExecute()
Hi, could you help me with a error? do like here zingg 4.0.0 https://docs.zingg.ai/latest/contributing/settingupzingg export JAVA_HOME=/opt/homebrew/opt/openjdk@17 export SPARK_HOME=/opt/spark export SPARK_MASTER=local[*] export MAVEN_HOME=/Users/vitaliy/work/projects/openmetadata-docker/zingg/apache-maven-3.8.8 export ZINGG_HOME=/Users/vitaliy/work/projects/openmetadata-docker/zingg-0.4.0//assembly/target Java version 17.0.14 Scala version 3.6.4 Spark version 3.5 py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext. : java.lang.UnsupportedOperationException: getSubject is not supported at java.base/javax.security.auth.Subject.getSubject(Subject.java:277) at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:577) at org.apache.spark.util.Utils$.$anonfun$getCurrentUserName$1(Utils.scala:2416) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2416) at org.apache.spark.SparkContext.<init>(SparkContext.scala:329) at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:58) at java.base/jdk.internal.reflect.DirectConstructorHandleAccessor.newInstance(DirectConstructorHandleAccessor.java:62) at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499) at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:483) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:238) at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80) at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69) at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182) at py4j.ClientServerConnection.run(ClientServerConnection.java:106) at java.base/java.lang.Thread.run(Thread.java:1447)