Demo entry 2557220

Format Conversion

   

Submitted by anonymous on Aug 29, 2015 at 16:43
Language: Java. Code size: 5.0 kB.

public class FormatconversionImpl implements Formatconversion {

	public static void main(String[] args) {
		if (args.length != 2) {
			printUsage();
		}

		if (!args[0].equals("-type")) {
			printUsage();
		}

		final Formatconversion preprocessor = new FormatconversionImpl();
		if (args[1].equals("absa")) {
			final String inputPath = OC.DOMAINS_AOOM_PATH + File.separator + OC.HOTELS + "-absa" + File.separator + OC.DATA + File.separator + OC.INPUT + File.separator + "ABSA15_Hotels_Test.xml";
			final String outputPath = OC.DOMAINS_AOOM_PATH + File.separator + OC.HOTELS + "-absa" + File.separator + OC.DATA;
			preprocessor.parse(inputPath, outputPath, true);
		}
	}

	@SuppressWarnings("unchecked")
	public void parse(final String inputPath, final String outputPath, final boolean flag) {
		try {
			final BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputPath + File.separator + "feature" + File.separator + "output.txt", true), OC.UTF8));
			final File fXmlFile = new File(inputPath);
			final DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
			final DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
			final Document doc = dBuilder.parse(fXmlFile);
			doc.getDocumentElement().normalize();
			final NodeList reviews = doc.getElementsByTagName("Review");

			final Word w = null;
			final GATEUtil parser = new GATEUtil(w);

			for (int i = 0, reviewNumber = reviews.getLength(); i < reviewNumber; i++) {
				final Node reviewNode = reviews.item(i);
				if (reviewNode.getNodeType() != Node.ELEMENT_NODE) {
					continue;
				}
				final Element reviewElement = (Element) reviewNode;
				final int reviewId = new Integer(reviewElement.getAttribute("rid"));
				if (reviewId % 100 == 0) {System.out.println("Review ID: " + reviewElement.getAttribute("rid"));}
				final NodeList sentences = reviewElement.getElementsByTagName("sentence");
				final JSONArray segments = new JSONArray();
				final List<Map<String, String>> l = new LinkedList<Map<String, String>>();
				JSONObject taggedReview = new JSONObject();
				for (int j = 0, sentenceNumber = sentences.getLength(); j < sentenceNumber; j++) {
					final Element sentenceNode = (Element) sentences.item(j);
					String sentence = sentenceNode.getTextContent();
					sentence = sentence.replaceAll(OC.NEW_LINE_PLUS, OC.NEW_LINE);
					sentence = sentence.replaceAll(OC.WHITESPACE_PLUS, OC.WHITESPACE);
					sentence = OxtractorUtil.trimLine(sentence);

					final gate.Document document = parser.getGateDoc(sentence);

					AnnotationSet as = document.getAnnotations();
					final Map<String, List<Annotation>> annotationsByType = OxtractorUtil.populateAnnotationsByType(as);
					List<Annotation> tokens = annotationsByType.get(OC.TOKEN);
					final String[] literalValue = new String[tokens.size()];
					int tokenCounter = 0;
					final StringBuffer sb = new StringBuffer("");
					for (final Annotation token : tokens) {
						literalValue[tokenCounter] = (String) token.getFeatures().get(OC.FEATURE_STRING);
						tokenCounter++;
sb.append(token.getFeatures().get(OC.FEATURE_STRING) + "/" + OxtractorUtil.getPOS(token) + "/" + OxtractorUtil.getRoot(token));
						sb.append(" ");
					}
					List<Annotation> nounChunks = annotationsByType.get(OC.NOUN_CHUNK);
		Map<Long, List<Annotation>> tokensByNounChunkIndex = OxtractorUtil.sortMap(OxtractorUtil.extractSubAnnotationsAsMap(nounChunks, tokens));
					List<Annotation> lookups = OxtractorUtil.getAnnotations(OC.HOTELS, document);
					if (lookups.size() == 0) {
						// throw new OxtractorException("No Lookup", null);
					}
					List<Annotation> measurementLookups = OxtractorUtil.getAnnotations("measurement", document);
		Map<Long, List<Annotation>> lookupsByNounChunkIndex = OxtractorUtil.sortMap(OxtractorUtil.extractSubAnnotationsAsMap(nounChunks, lookups));
Map<Long, List<Annotation>> measurementsByNounChunkIndex = OxtractorUtil.sortMap(OxtractorUtil.extractSubAnnotationsAsMap(nounChunks,measurementLookups));

					final NodeList opinions = sentenceNode.getElementsByTagName("Opinion");
					final Map<String, String> sentimentPolarityByCategory = new HashMap<String, String>();
					for (int o = 0; o < opinions.getLength(); o++) {
						final Node opinionID = opinions.item(o);
						if (opinionID.getNodeType() != Node.ELEMENT_NODE) {
							continue;
						}
						Element opinionElement = (Element) opinionID;
						sentimentPolarityByCategory.put(opinionElement.getAttribute("category"), opinionElement.getAttribute("polarity"));
					}
					l.add(sentimentPolarityByCategory);
					segments.add(sb.toString());
					taggedReview.put("segmentLabels", l);
					taggedReview.put("segments", segments);
				}
				bw.write(taggedReview.toString() + "\n");
			}
			bw.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	final static void printUsage() {
		System.err.println("-type aoom");
	}
	
}

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).