Skip to content

Commit

Permalink
Merge pull request #147 from navinrathore/zFindLabeller
Browse files Browse the repository at this point in the history
Introduced new phase findLabel
  • Loading branch information
sonalgoyal authored Feb 25, 2022
2 parents 6833eea + 4684afd commit f1d3bf3
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 2 deletions.
2 changes: 1 addition & 1 deletion client/src/main/java/zingg/client/ClientOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ public final static String getHelp() {
s.append("zingg.sh --phase findTrainingData --conf config.json --zinggDir /location \n");
s.append("zingg.sh --phase label --conf config.json --zinggDir /location \n");
s.append("zingg.sh --phase trainMatch --conf config.json --email [email protected]\n");

s.append("zingg.sh --phase findAndLabel --conf config.json --zinggDir /location\n");
s.append("options\n");
for (Option o: optionMaster.values()) {
s.append("\t " + o.optionName + ":\t\t" + o.desc + "\n");
Expand Down
3 changes: 2 additions & 1 deletion client/src/main/java/zingg/client/ZinggOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ public enum ZinggOptions {
LABEL("label"),
LINK("link"),
GENERATE_DOCS("generateDocs"),
UPDATE_LABEL("updateLabel");
UPDATE_LABEL("updateLabel"),
FIND_AND_LABEL("findAndLabel");

private String value;

Expand Down
33 changes: 33 additions & 0 deletions core/src/main/java/zingg/FindAndLabeller.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package zingg;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import zingg.client.Arguments;
import zingg.client.ZinggClientException;
import zingg.client.ZinggOptions;

public class FindAndLabeller extends Labeller {
protected static String name = "zingg.FindAndLabeller";
public static final Log LOG = LogFactory.getLog(FindAndLabeller.class);

private TrainingDataFinder finder;

public FindAndLabeller() {
setZinggOptions(ZinggOptions.FIND_AND_LABEL);
finder = new TrainingDataFinder();
}

@Override
public void init(Arguments args, String license)
throws ZinggClientException {
super.init(args, license);
finder.copyContext(this);
}

@Override
public void execute() throws ZinggClientException {
finder.execute();
super.execute();
}
}
1 change: 1 addition & 0 deletions core/src/main/java/zingg/ZFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public ZFactory() {}
zinggers.put(ZinggOptions.LINK, Linker.name);
zinggers.put(ZinggOptions.GENERATE_DOCS, Documenter.name);
zinggers.put(ZinggOptions.UPDATE_LABEL, LabelUpdater.name);
zinggers.put(ZinggOptions.FIND_AND_LABEL, FindAndLabeller.name);
}

public IZingg get(ZinggOptions z) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
Expand Down
1 change: 1 addition & 0 deletions docs/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* [**Creating training data**](setup/training/createTrainingData.md)
* [findTrainingData](setup/training/findTrainingData.md)
* [label](setup/training/label.md)
* [findAndLabel](setup/training/findAndLabel.md)
* [Using preexisting training data](setup/training/addOwnTrainingData.md)
* [Exporting labeled data as csv](setup/training/exportLabeledData.md)
* [Building and saving the model](setup/train.md)
Expand Down
14 changes: 14 additions & 0 deletions docs/setup/training/findAndLabel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---
layout: default
parent: Creating training data
title: Find training data and labelling
grand_parent: Step By Step Guide
nav_order: 2
---
## Find training data and labelling them

This phase is composed of two phases namely [findTrainingData](./findTrainingData.md) and [label](./label.md). This will help experienced users to quicken the process of creating Training data.

`./zingg.sh --phase findAndLabel --conf config.json`

It's note that this option is good for small datasets else if your findTrainingData takes a long time, the user will have to wait for the console for labelling. For details, refer to the individual phases: [findTrainingData](./findTrainingData.md) and [label](./label.md)

0 comments on commit f1d3bf3

Please sign in to comment.