package com.company;
import org.apache.hc.core5.http.ParseException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.*;
import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.entity.mime.MultipartEntityBuilder;
import org.apache.hc.client5.http.entity.mime.StringBody;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.xml.sax.SAXException;
public class ShinTest5 {
public static void main(String[] args) throws IOException, SAXException,
ParserConfigurationException {
String input = "“당근”, ‘양파sdfgs’, 오4535이’s, 2345수dfgsv박!@#%@$^, /참$%^외^&*/";
String[] modifdStr = getCSVArray(input);
// comma 기준으로 배열 생성
for (int j = 0; j < modifdStr.length; j++) {
String content = getAPI(modifdStr, j);
// 인덱스 순으로 API 참조한 String 결과
String result = parseXML(content);
// XML로 변환
mkTxtFile(result, j);
// 텍스트 파일로 만들기
}
}
public static String[] getCSVArray(String input) {
// Comma 분리
String[] str = input.split(",");
String[] modifdStr = new String[str.length];
int i = 0;
// 특수문자와 공백 제거
for (String e : str) {
modifdStr[i] = StringReplace(e);
modifdStr[i] = modifdStr[i].trim();
i++;
}
for (String e : modifdStr) {
System.out.println(e);
}
return modifdStr;
}
public static String StringReplace(String str) {
// 한글이 아닌 값 삭제
String match = "[^\uAC00-\uD7A3\\s]";
str = str.replaceAll(match, "");
return str;
}
public static String StringReplace2(String str) {
// 한글, 점이 아닌 값 삭제
String match = "[^\uAC00-\uD7A3.\\s]";
str = str.replaceAll(match, "");
return str;
}
private static String getAPI(String[] modifdStr, int j) throws IOException {
// 국립 국어원 API 참조하여 XML결과 String형식 가져오기
String content = "";
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpPost httppost = new HttpPost("https://stdict.korean.go.kr/api/search.do");
StringBody key = new StringBody("14F296AB6ADF1D06A38A8F1B1BF75207",
ContentType.TEXT_PLAIN);
StringBody q = new StringBody(modifdStr[j],
ContentType.TEXT_PLAIN.withCharset("UTF-8"));
HttpEntity reqEntity = MultipartEntityBuilder.create()
.addPart("key", key)
.addPart("q", q)
.build();
httppost.setEntity(reqEntity);
CloseableHttpResponse response = httpclient.execute(httppost);
try {
System.out.println("----------------------------------------");
System.out.println("Response Code : " + response.getCode());
HttpEntity resEntity = response.getEntity();
if (resEntity != null) {
String charset = "UTF-8";
content = EntityUtils.toString(response.getEntity(), charset);
}
EntityUtils.consume(resEntity);
} catch (ParseException parseException) {
parseException.printStackTrace();
} finally {
response.close();
}
return content;
}
public static String parseXML(String content) throws ParserConfigurationException,
IOException, SAXException {
// XML에서 word, pos, definition, type 순으로 CSV 형식 변환
DocumentBuilderFactory dbFactoty = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactoty.newDocumentBuilder();
InputStream inptSrc = new ByteArrayInputStream(content.getBytes());
Document doc = dBuilder.parse(inptSrc);
String result = "word,pos,definition,type";
NodeList nList = doc.getElementsByTagName("item");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
result = result + "\n" + (getTagValue("word", eElement).trim() + "," +
getTagValue("pos", eElement).trim() + "," +
StringReplace2(getTagValue("definition", eElement).trim()) + "," +
getTagValue("type", eElement).trim());
} // if end
} // for end
return result;
}
private static String getTagValue(String tag, Element eElement) {
// XML에서 해당하는 태그 값 가져오기
NodeList nlList = eElement.getElementsByTagName(tag).item(0).getChildNodes();
Node nValue = (Node) nlList.item(0);
if (nValue == null)
return null;
return nValue.getNodeValue();
}
public static void mkTxtFile(String result, int j) throws IOException {
// 지정 경로로 텍스트 파일 만들기
BufferedOutputStream bs = null;
try {
bs = new BufferedOutputStream(new FileOutputStream("/home/ehdrud1129/다운로드/
20200624_국립국어원_결과" + (j + 1) + "_노동경.csv"));
bs.write(result.getBytes());
} catch (Exception e) {
e.getStackTrace();
// TODO: handle exception
} finally {
bs.close();
}
}
}