Skip to content

Commit

Permalink
Update to support Programs
Browse files Browse the repository at this point in the history
  • Loading branch information
matallen committed Mar 6, 2019
1 parent 1f3106f commit de187d8
Showing 1 changed file with 45 additions and 170 deletions.
215 changes: 45 additions & 170 deletions src/main/java/com/redhat/sso/Controller2.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ public class Controller2{
private static final Logger log=Logger.getLogger(Controller2.class);

public static void main(String[] asd) throws JsonGenerationException, JsonMappingException, IOException{
System.setProperty("username", "redacted");
System.setProperty("password", "redacted");
System.setProperty("username", IOUtils.toString(new FileInputStream(new File("credentials.txt"))).split("\\n")[0]);
System.setProperty("password", IOUtils.toString(new FileInputStream(new File("credentials.txt"))).split("\\n")[1]);
List<Offering> result=new Controller2().search("sso_searchable", "tags,subject,content", "offering_");
System.out.println(com.redhat.sso.utils.Json.newObjectMapper(true).writeValueAsString(result));
}
Expand Down Expand Up @@ -212,6 +212,22 @@ private List<Offering> aggregateIntoOfferings(List<Document> alldocuments, Strin
if ("program".equalsIgnoreCase(o.type)){
// get data from sales kit landing pages

o.offering=Jsoup.parse(StrParse.get(overview.name).leftOf("-").trim()).text();
o.description=extractDescription4("<span[\\W]*?class=\"description\"[\\W]*?>(.*?)</span>", overview, overview.description);

// NOT YET IMPLEMENTED DUE TO IMMATURITY OF THE MOJO SALES KIT PAGE FOR PROGRAMS
// o.related;
// o.relatedProducts;

String url=RegExHelper.extract(overview.description, "class=\"url\".*?href=\"(.*?)\"");
if (null!=url){
o.documents.add(new Document(null, "Sales Kit", null, null, url, null));
}else{
o.documents.add(overview);
}
// overview.name="Sales Kit";
overview.description="";

}

// ################
Expand Down Expand Up @@ -463,6 +479,33 @@ private String extract(Document src, String descriptionHtml, String[] tokensInOr

return result;
}

private String extractDescription4(String groupRegEx, Document src, String descriptionHtml){
// find span with class="description", extract all text within that span

return Jsoup.parse(RegExHelper.extract(descriptionHtml, groupRegEx, 1)).text();

// int s=descriptionHtml.indexOf(keyStart);
// s=descriptionHtml.indexOf(">", s+1);
//
// //Matcher m=Pattern.compile("<.*span.*?class=\"description\".*?>(.+?)</span>").matcher(descriptionHtml);
//
// int e=descriptionHtml.indexOf(keyEnd, s);
// String result=descriptionHtml.substring(s+1, e);
// return Jsoup.parse(result).text();
}

private String extractDescription3(String keyStart, String keyEnd, Document src, String descriptionHtml){
// find span with class="description", extract all text within that span
int s=descriptionHtml.indexOf(keyStart);
s=descriptionHtml.indexOf(">", s+1);

//Matcher m=Pattern.compile("<.*span.*?class=\"description\".*?>(.+?)</span>").matcher(descriptionHtml);

int e=descriptionHtml.indexOf(keyEnd, s);
String result=descriptionHtml.substring(s+1, e);
return Jsoup.parse(result).text();
}

private String extractDescription2(Document src, String descriptionHtml){
// find span with class="description", extract all text within that span
Expand Down Expand Up @@ -523,100 +566,6 @@ private String arrayToString(String[] a){
sb.append(s).append(", ");
return sb.substring(0, sb.length()>2?sb.length()-2:0);
}
// private List<String> extractHtmlList(Document src, String html, String[] tokensInOrder){
// String token=null;
// int iDesc=-1;
// for(String t:tokensInOrder){
// if ((iDesc=html.indexOf(t))>=0){
// token=t;
// break;
// }
// }
//
//// int iDesc=descriptionHtml.indexOf(token);
// if (iDesc<0){
// log.error("Unable to find any \""+arrayToString(tokensInOrder)+"\" in document: "+src.getUrl());
// return Arrays.asList("MISSING: \""+token+"\""); //abort early if the header token is not in the document
// }
//
// int ulStart=html.indexOf("ul", iDesc);
// int ulEnd=html.indexOf("/ul", ulStart);
//
// String ul=html.substring(ulStart, ulEnd);
// // now just split by <li>
//
// List<String> result=new ArrayList<String>();
//
// //loop
// int end=0;
// int start=ul.indexOf("<li", end);
// while (start>0){
// end=ul.indexOf("</li>", start);
// String li=ul.substring(start, end);
// String item;
// if (li.indexOf("<a ")>=0){
// int hrefStart=li.indexOf("href=")+"href=".length()+1;
// String url=li.substring(hrefStart, li.indexOf("\"", hrefStart));
// li=li.replaceAll("&nbsp;", " ");
// item="<a href=\""+url+"\">"+Jsoup.parse(li).text().toString().trim()+"</a>";
// }else{
// item=Jsoup.parse(li).text().toString().trim();
// }
//// li=li.substring(li.indexOf("<a "), li.indexOf("</a>")+"</a>".length()); // strip everything except for a link if it exists
//// String item=Jsoup.parse(li).text().toString().trim();
// result.add(item);
// start=ul.indexOf("<li", end);
// }
//
// return result;
// }

// private List<Solution> extractSolutions(Document src, String descriptionHtml, String[] tokensInOrder){
// String token=null;
// int iDesc=-1;
// for(String t:tokensInOrder){
// if ((iDesc=descriptionHtml.indexOf(t))>=0){
// token=t;
// break;
// }
// }
//
//// int iDesc=descriptionHtml.indexOf(token);
// if (iDesc<0){
// log.error("Unable to find \""+arrayToString(tokensInOrder)+"\" in document: "+src.getUrl());
// return Arrays.asList(new Solution("MISSING: \""+token+"\"", null)); //abort early if the header token is not in the document
// }
//
// int ulStart=descriptionHtml.indexOf("ul", iDesc);
// int ulEnd=descriptionHtml.indexOf("/ul", ulStart);
//
// String ul=descriptionHtml.substring(ulStart, ulEnd);
// // now just split by <li>
//
// List<Solution> result=new ArrayList<Solution>();
//
// //loop
// int end=0;
// int start=ul.indexOf("<li", end);
// while (start>0){
// end=ul.indexOf("</li>", start);
// String li=ul.substring(start, end);
// String name="";
// String url=null;
// if (li.indexOf("<a ")>=0){
// int hrefStart=li.indexOf("href=")+"href=".length()+1;
// url=li.substring(hrefStart, li.indexOf("\"", hrefStart));
//// item="<a href=\""+url+"\">"+Jsoup.parse(li).text().toString().trim()+"</a>";
// }
// name=Jsoup.parse(li).text().toString().trim();
//// li=li.substring(li.indexOf("<a "), li.indexOf("</a>")+"</a>".length()); // strip everything except for a link if it exists
//// String item=Jsoup.parse(li).text().toString().trim();
// result.add(new Solution(name, url));
// start=ul.indexOf("<li", end);
// }
//
// return result;
// }


private Map<String, Pattern> regexCache=new HashMap<String, Pattern>();
Expand Down Expand Up @@ -724,80 +673,6 @@ private String truncateBefore(String input, Integer length){
return input;
}

// private List<String> extractListFromSection(String matcher, String[] tokensInOrder, Document src){
//
// }

// private List<Document> genericExtractListFromSection(String html, String[] tokensInOrder){
// return genericExtractListFromSection("<[Hh]\\d>(.+?)</[Hh]\\d>", html, tokensInOrder);
// }

// private List<String> extractSectionListToStrings(String matcher, String html, String[] tokensInOrder){
// Matcher m=Pattern.compile(matcher).matcher(html);
// int sectStart=-1;
// int sectEnd=html.length();
// List<String> tokens=Arrays.asList(tokensInOrder);
//
// while(m.find()){
// String headerTitle=Jsoup.parse(m.group(1)).text().trim();
// if (tokens.contains(headerTitle)){ // you've found a matching header
// sectStart=m.start();
// if (m.find())
// sectEnd=m.start();
// }
// }
//
// if (sectStart<0) return new ArrayList<String>();
//
// // cut just the section we're interested in (the h1 to next start of h1 or end of doc if there is no more sections)
// String htmlSubsection=html.substring(sectStart, sectEnd);
//
// int ulStart=-1;
// ulStart=indexOf(htmlSubsection, "<[UL|ul].*>");
//
// if (ulStart<0) return new ArrayList<String>(); // exit early if we cant find a list
//
// int ulEnd=indexOf(htmlSubsection, "</[Uu][Ll].*>");
//
// String ul=htmlSubsection.substring(ulStart, ulEnd+"</ul>".length());// cut the list section so we can parse it easier
//
// List<String> result=new ArrayList<String>();
//
// Matcher m2=LI_ITERATOR_REGEX.matcher(ul);
// while (m2.find()){
// String item=m2.group(1);
// String name="";
// String url=null;
// if (item.indexOf("<a ")>=0){
// int hrefStart=item.indexOf("href=")+"href=".length()+1;
// url=item.substring(hrefStart, item.indexOf("\"", hrefStart));
// }
// name=Jsoup.parse(item).text().toString().trim();
// name=Jsoup.parse(item).text().toString()
// .replaceAll("&nbsp;", "")
// .trim();
//
//// // check name is not too long, if it is then truncate it
//// if (name.length()>30){
//// int to=name.indexOf(" ", 30); // next space after 30 chars
//// if (to<0){
//// to=name.length();// if there's no space after 30 chars then just go to the end
//// }else
//// name=name.substring(0, to)+"...";
//// }
//
// String id=null;
// String description=null;
// List<Object> tags=null;
//// log.debug("Overview ("+o.offering+"):: Adding (Other Materials) document -> ("+url+")"+name);
//
// result.add("<a href='"+url+"'>"+name+"</a>");
//
//// result.add(new Document(id, name, description, url, tags));
// }
//
// return result;
// }

/**
* Looks through each header matching the "matcher" regex and tries to find the "tokens" (potential header titles). If it finds on then it looks for the next <ul> and parses that list only
Expand Down

0 comments on commit de187d8

Please sign in to comment.