본문 바로가기

Programming/Java

국립국어원_HTTP적용 2020. 06 .24

package com.company;

import org.apache.hc.core5.http.ParseException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.*;

import org.apache.hc.client5.http.classic.methods.HttpPost;
import org.apache.hc.client5.http.entity.mime.MultipartEntityBuilder;
import org.apache.hc.client5.http.entity.mime.StringBody;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.ContentType;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.xml.sax.SAXException;

public class ShinTest5 {
    public static void main(String[] args) throws IOException, SAXException, 
    		ParserConfigurationException {

        String input = "“당근”, ‘양파sdfgs’, 오4535이’s, 2345수dfgsv박!@#%@$^, /참$%^외^&*/";

        String[] modifdStr = getCSVArray(input);
        // comma 기준으로 배열 생성

        for (int j = 0; j < modifdStr.length; j++) {
            String content = getAPI(modifdStr, j);
            // 인덱스 순으로 API 참조한 String 결과
            String result = parseXML(content);
            // XML로 변환
            mkTxtFile(result, j);
            // 텍스트 파일로 만들기
        }

    }


    public static String[] getCSVArray(String input) {
    // Comma 분리
        String[] str = input.split(",");
        String[] modifdStr = new String[str.length];

        int i = 0;	
        // 특수문자와 공백 제거
        for (String e : str) {
            modifdStr[i] = StringReplace(e);
            modifdStr[i] = modifdStr[i].trim();
            i++;
        }

        for (String e : modifdStr) {
            System.out.println(e);
        }
        return modifdStr;
    }

    public static String StringReplace(String str) {
    // 한글이 아닌 값 삭제
        String match = "[^\uAC00-\uD7A3\\s]";
        str = str.replaceAll(match, "");
        return str;
    }

    public static String StringReplace2(String str) {
    // 한글, 점이 아닌 값 삭제
        String match = "[^\uAC00-\uD7A3.\\s]";
        str = str.replaceAll(match, "");
        return str;
    }

    private static String getAPI(String[] modifdStr, int j) throws IOException {
    // 국립 국어원 API 참조하여 XML결과 String형식 가져오기

        String content = "";
        CloseableHttpClient httpclient = HttpClients.createDefault();

        HttpPost httppost = new HttpPost("https://stdict.korean.go.kr/api/search.do");

        StringBody key = new StringBody("14F296AB6ADF1D06A38A8F1B1BF75207",
        		ContentType.TEXT_PLAIN);
        StringBody q = new StringBody(modifdStr[j], 
        		ContentType.TEXT_PLAIN.withCharset("UTF-8"));

        HttpEntity reqEntity = MultipartEntityBuilder.create()
                .addPart("key", key)
                .addPart("q", q)
                .build();

        httppost.setEntity(reqEntity);
        CloseableHttpResponse response = httpclient.execute(httppost);

        try {
            System.out.println("----------------------------------------");
            System.out.println("Response Code : " + response.getCode());
            HttpEntity resEntity = response.getEntity();
            if (resEntity != null) {
                String charset = "UTF-8";
                content = EntityUtils.toString(response.getEntity(), charset);
            }
            EntityUtils.consume(resEntity);
        } catch (ParseException parseException) {
            parseException.printStackTrace();
        } finally {
            response.close();
        }

        return content;
    }

    public static String parseXML(String content) throws ParserConfigurationException,
    		IOException, SAXException {
            // XML에서 word, pos, definition, type 순으로 CSV 형식 변환
        DocumentBuilderFactory dbFactoty = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactoty.newDocumentBuilder();
        InputStream inptSrc = new ByteArrayInputStream(content.getBytes());
        Document doc = dBuilder.parse(inptSrc);

        String result = "word,pos,definition,type";
        NodeList nList = doc.getElementsByTagName("item");
        for (int temp = 0; temp < nList.getLength(); temp++) {
            Node nNode = nList.item(temp);
            if (nNode.getNodeType() == Node.ELEMENT_NODE) {

                Element eElement = (Element) nNode;
                result = result + "\n" + (getTagValue("word", eElement).trim() + "," + 
                		getTagValue("pos", eElement).trim() + "," + 
                        StringReplace2(getTagValue("definition", eElement).trim()) + "," + 
                        getTagValue("type", eElement).trim());
            }    // if end
        }    // for end

        return result;
    }

    private static String getTagValue(String tag, Element eElement) {
    // XML에서 해당하는 태그 값 가져오기
        NodeList nlList = eElement.getElementsByTagName(tag).item(0).getChildNodes();
        Node nValue = (Node) nlList.item(0);
        if (nValue == null)
            return null;
        return nValue.getNodeValue();
    }

    public static void mkTxtFile(String result, int j) throws IOException {
    // 지정 경로로 텍스트 파일 만들기
        BufferedOutputStream bs = null;
        try {
            bs = new BufferedOutputStream(new FileOutputStream("/home/ehdrud1129/다운로드/
            		20200624_국립국어원_결과" + (j + 1) + "_노동경.csv"));
            bs.write(result.getBytes());
        } catch (Exception e) {
            e.getStackTrace();
            // TODO: handle exception
        } finally {
            bs.close();
        }
    }
}

'Programming > Java' 카테고리의 다른 글

Java 기초 상식 정리  (0) 2023.04.23
JSON 파싱 2020. 06 .24  (0) 2020.06.29
HttpClient 또오해영  (0) 2020.06.29
우리말샘 API 2020. 06 .24  (0) 2020.06.29
국립국어원 API 2020. 06 .24  (0) 2020.06.29