Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model doc generation #99

Merged
merged 10 commits into from
Dec 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
1 change: 0 additions & 1 deletion client/.settings/org.eclipse.core.resources.prefs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8
11 changes: 11 additions & 0 deletions client/src/main/java/zingg/client/Arguments.java
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,17 @@ public String getZinggBaseModelDir() {
return zinggDir + "/" + modelId + "/model";
}

@JsonIgnore
public String getZinggDocDir() {
return getZinggBaseModelDir();
}

@JsonIgnore
public String getZinggDocFile() {
return zinggDir + "/" + modelId + "/model.html";
}


/**
* Location for internal Zingg use.
*
Expand Down
1 change: 1 addition & 0 deletions client/src/main/java/zingg/client/Client.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public Client(Arguments args, ClientOptions options) throws ZinggClientException
setZingg(args, options);
}
catch (Exception e) {
e.printStackTrace();
throw new ZinggClientException("An error has occured while setting up the client" + e.getMessage());
}
}
Expand Down
3 changes: 2 additions & 1 deletion client/src/main/java/zingg/client/ZinggOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ public enum ZinggOptions {
TRAIN_MATCH("trainMatch"),
FIND_TRAINING_DATA("findTrainingData"),
LABEL("label"),
LINK("link");
LINK("link"),
GENERATE_DOCS("generateDocs");

private String value;

Expand Down
3 changes: 2 additions & 1 deletion client/src/main/java/zingg/client/pipe/Format.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ public enum Format implements Serializable{
XLSX("com.crealytics.spark.excel"),
PARQUET("PARQUET"),
AVRO("avro"),
SNOWFLAKE("net.snowflake.spark.snowflake");
SNOWFLAKE("net.snowflake.spark.snowflake"),
TEXT("text");

String type;
static Map<String, Format> map;
Expand Down
8 changes: 7 additions & 1 deletion client/src/test/java/zingg/client/TestClientOption.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
import org.junit.Test;

public class TestClientOption {


@Test
public void dummy() {
//placeholder
}
/*

@Test
public void testParseArguments() {
Expand Down Expand Up @@ -51,4 +56,5 @@ public void testParseUnsupportedArgumentsLast() {
ClientOptions co = new ClientOptions();
co.parse(args);
}
*/
}
5 changes: 5 additions & 0 deletions client/src/test/java/zingg/client/input/TestToCSV.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@


public class TestToCSV {

@Test
public void dummy() {
//placeholder
}

/*

Expand Down
5 changes: 5 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,10 @@
<artifactId>secondstring</artifactId>
<version>2021</version>
</dependency>
<dependency>
<groupId>org.freemarker</groupId>
<artifactId>freemarker</artifactId>
<version>2.3.31</version>
</dependency>
</dependencies>
</project>
107 changes: 107 additions & 0 deletions core/src/main/java/zingg/Documenter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package zingg;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Scanner;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;

import zingg.client.ZinggClientException;
import zingg.client.ZinggOptions;
import zingg.client.pipe.Pipe;
import zingg.client.util.ColName;
import zingg.client.util.ColValues;
import zingg.util.DSUtil;
import zingg.util.PipeUtil;
import zingg.util.RowAdapter;
import zingg.util.RowWrapper;
import freemarker.ext.rhino.RhinoWrapper;
import freemarker.template.*;
import java.util.*;
import java.io.*;

public class Documenter extends ZinggBase {

protected static String name = "zingg.Documenter";
public static final Log LOG = LogFactory.getLog(Documenter.class);

public Documenter() {
setZinggOptions(ZinggOptions.GENERATE_DOCS);
}

public void execute() throws ZinggClientException {
try {
LOG.info("Document generation in progress");
Dataset<Row> markedRecords = PipeUtil.read(spark, false, false, PipeUtil.getTrainingDataMarkedPipe(args));
markedRecords = markedRecords.cache();
List<Column> displayCols = DSUtil.getFieldDefColumns(markedRecords, args, false);
List<Row> clusterIDs = markedRecords.select(ColName.CLUSTER_COLUMN).distinct().collectAsList();
int totalPairs = clusterIDs.size();
/* Create a data-model */
Map<String, Object> root = new HashMap<String, Object>();
root.put("modelId", args.getModelId());
root.put("clusters", markedRecords.collectAsList());
root.put("numColumns", markedRecords.columns().length);
root.put("columns", markedRecords.columns());
root.put("fieldDefinitionCount", args.getFieldDefinition().size());
buildAndWriteHTML(root);
} catch (Exception e) {
e.printStackTrace();
throw new ZinggClientException(e.getMessage());
}
}

public void buildAndWriteHTML(Map<String, Object> root) throws Exception {

/* ------------------------------------------------------------------------ */
/* You should do this ONLY ONCE in the whole application life-cycle: */

/* Create and adjust the configuration singleton */
Configuration cfg = new Configuration(Configuration.VERSION_2_3_29);
cfg.setClassForTemplateLoading(this.getClass(), "/");
// cfg.setDirectoryForTemplateLoading(new File("/where/you/store/templates"));
// Recommended settings for new projects:
cfg.setDefaultEncoding("UTF-8");
cfg.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);
cfg.setLogTemplateExceptions(false);
cfg.setWrapUncheckedExceptions(true);
cfg.setFallbackOnNullLoopVariable(false);
cfg.setObjectWrapper(new RowWrapper(cfg.getIncompatibleImprovements()));

/* ------------------------------------------------------------------------ */
/* You usually do these for MULTIPLE TIMES in the application life-cycle: */



/* Get the template (uses cache internally) */
Template temp = cfg.getTemplate("model.ftlh");

/* Merge data-model with template */
// Writer out = new OutputStreamWriter(System.out);
Writer file = new FileWriter (new File(args.getZinggDocFile()));
//StringWriter writer = new StringWriter();
temp.process(root, file);
// Note: Depending on what `out` is, you may need to call `out.close()`.
// This is usually the case for file output, but not for servlet output.
//file.flush();

//List<String> textList = Collections.singletonList(writer.toString());

//Dataset<Row> data = spark.createDataset(textList, Encoders.STRING()).toDF();

//PipeUtil.write(data, args, ctx, PipeUtil.getModelDocumentationPipe(args));
file.close();
//LOG.warn("written documentation at " + args.getZinggDocFile());
}




}
21 changes: 1 addition & 20 deletions core/src/main/java/zingg/Labeller.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import zingg.client.util.ColValues;
import zingg.util.DSUtil;
import zingg.util.PipeUtil;
import zingg.util.LabelMatchType;

public class Labeller extends ZinggBase {

Expand Down Expand Up @@ -198,24 +199,4 @@ void writeLabelledOutput(Dataset<Row> records) {
}
}

enum LabelMatchType {
UNDEFINED(ColValues.IS_NOT_KNOWN_PREDICTION, "ARE NOT KNOWN IF MATCH"),
DO_NOT_MATCH(ColValues.IS_NOT_A_MATCH_PREDICTION, "DO NOT MATCH"),
MATCH(ColValues.IS_MATCH_PREDICTION, "MATCH");

private Double value;
public String msg;

private LabelMatchType(Double value, String msg){
this.value=value;
this.msg = msg;
}

public static LabelMatchType get(double value) {
for (LabelMatchType t: LabelMatchType.values()) {
if (t.value.equals(value)) return t;
}
return null;
}

}
1 change: 1 addition & 0 deletions core/src/main/java/zingg/ZFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public ZFactory() {}
zinggers.put(ZinggOptions.MATCH, Matcher.name);
zinggers.put(ZinggOptions.TRAIN_MATCH, TrainMatcher.name);
zinggers.put(ZinggOptions.LINK, Linker.name);
zinggers.put(ZinggOptions.GENERATE_DOCS, Documenter.name);
}

public IZingg get(ZinggOptions z) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
Expand Down
25 changes: 25 additions & 0 deletions core/src/main/java/zingg/util/LabelMatchType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package zingg.util;

import zingg.client.util.ColValues;

public enum LabelMatchType {
UNDEFINED(ColValues.IS_NOT_KNOWN_PREDICTION, "ARE NOT KNOWN IF MATCH"),
DO_NOT_MATCH(ColValues.IS_NOT_A_MATCH_PREDICTION, "DO NOT MATCH"),
MATCH(ColValues.IS_MATCH_PREDICTION, "MATCH");

private Double value;
public String msg;

private LabelMatchType(Double value, String msg){
this.value=value;
this.msg = msg;
}

public static LabelMatchType get(double value) {
for (LabelMatchType t: LabelMatchType.values()) {
if (t.value.equals(value)) return t;
}
return null;
}

}
8 changes: 8 additions & 0 deletions core/src/main/java/zingg/util/PipeUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,14 @@ public static Pipe getTrainingDataMarkedPipe(Arguments args) {
p.setProp(FilePipe.LOCATION, args.getZinggTrainingDataMarkedDir());
return p;
}

public static Pipe getModelDocumentationPipe(Arguments args) {
Pipe p = new Pipe();
p.setFormat(Format.TEXT);
p.setProp(FilePipe.LOCATION, args.getZinggDocFile());
return p;
}


public static String getPipesAsString(Pipe[] pipes) {
return Arrays.stream(pipes)
Expand Down
37 changes: 37 additions & 0 deletions core/src/main/java/zingg/util/RowAdapter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package zingg.util;

import org.apache.spark.sql.Row;

import freemarker.template.ObjectWrapper;
import freemarker.template.TemplateModel;
import freemarker.template.AdapterTemplateModel;
import freemarker.template.TemplateModelException;
import freemarker.template.TemplateSequenceModel;
import freemarker.template.WrappingTemplateModel;

public class RowAdapter extends WrappingTemplateModel implements TemplateSequenceModel,
AdapterTemplateModel {

private final Row row;

public RowAdapter(Row row, ObjectWrapper ow) {
super(ow); // coming from WrappingTemplateModel
this.row = row;
}

@Override // coming from TemplateSequenceModel
public int size() throws TemplateModelException {
return row.size();
}

@Override // coming from TemplateSequenceModel
public TemplateModel get(int index) throws TemplateModelException {
return wrap(row.get(index));
}

@Override // coming from AdapterTemplateModel
public Object getAdaptedObject(Class hint) {
return row;
}

}
26 changes: 26 additions & 0 deletions core/src/main/java/zingg/util/RowWrapper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package zingg.util;

import freemarker.template.Version;

import org.apache.spark.sql.Row;
import freemarker.template.DefaultObjectWrapper;
import freemarker.template.TemplateDateModel;
import freemarker.template.TemplateModel;
import freemarker.template.TemplateModelException;

public class RowWrapper extends DefaultObjectWrapper {

public RowWrapper(Version incompatibleImprovements) {
super(incompatibleImprovements);
}

@Override
protected TemplateModel handleUnknownType(final Object obj) throws TemplateModelException {
if (obj instanceof Row) {
return new RowAdapter((Row) obj, this);
}

return super.handleUnknownType(obj);
}

}
Loading