I have created a program to do something as follows:
I have 22000 authors separated into 22000 files. Each file contains set of keywords (can be one or hundreds).
For example AUTHOR-22945.txt consists of keywords as follows:
algorithm
problem
computational
solution
general
application
base
And then, I have 100 tables in mysql (topic0 until topic99). Each table consists of keywords and value. Each topic table contains 20 lines.
For example:
keywords | value
us 0.021263
base 0.019618
within 0.014545
new 0.010844
apply 0.010296
process 0.010158
propose 0.009199
provide 0.008239
analysis 0.007005
three 0.006594
approach 0.006183
domain 0.005634
structural 0.004126
adapt 0.003989
aspect 0.003989
product 0.003441
efficient 0.003441
information 0.003167
associate 0.003167
class 0.00303
I tried to create a program that will produce set of value for each author taken from all 100 topics. I want to get the average value of an author from topic0 to topic99. So if a keyword from author file exists in topic0, I will take the values and sum it. And so on until topic99. Therefore, an author can have an output that consists of sum of each topic [t0, t1, t2, ..., t99] which will be for example [0, 0.3452, 0.2343, .... , 0.08342].
Currently what my program do is quiet messy way. When I tested it with one author data, it worked and produced the correct number. But when I input 22000 authors all for it to produce the output, it took more than 5 days to do that. And the result was wrong when I took samples and checked them randomly.
I want to ask you what would be the right algorithm or tools or logical way to do this that would produce an accurate output and also not so long compiling time to do it? FYI, I also have all the authors data and keywords for each author in mysql table. Would it be better using mysql innerjoin or java in this situation?
Thank you very much.
FYI, my current messy code is as follows:
package counttopicscore;
import java.sql.*;
import java.io.*;
import java.util.Arrays;
public class CountTopicScore{
public static void main(String[] args) {
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
String query0 = "SELECT DISTINCT author_key FROM authorkeywords";
Statement st0 = conn.createStatement();
ResultSet rs0 = st0.executeQuery(query0);
while(rs0.next())
{
String authorkey = rs0.getString("author_key");
String query = "SELECT keywords FROM authorkeywords WHERE author_key ='"+ authorkey +"'"+";";
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery(query);
double Value0, Value1, Value2, Value3, Value4, Value5, Value6, Value7, Value8, Value9, Value10,
Value11, Value12, Value13, Value14, Value15, Value16, Value17, Value18, Value19, Value20,
Value21, Value22, Value23, Value24, Value25, Value26, Value27, Value28, Value29, Value30,
Value31, Value32, Value33, Value34, Value35, Value36, Value37, Value38, Value39, Value40,
Value41, Value42, Value43, Value44, Value45, Value46, Value47, Value48, Value49, Value50,
Value51, Value52, Value53, Value54, Value55, Value56, Value57, Value58, Value59, Value60,
Value61, Value62, Value63, Value64, Value65, Value66, Value67, Value68, Value69, Value70,
Value71, Value72, Value73, Value74, Value75, Value76, Value77, Value78, Value79, Value80,
Value81, Value82, Value83, Value84, Value85, Value86, Value87, Value88, Value89, Value90,
Value91, Value92, Value93, Value94, Value95, Value96, Value97, Value98, Value99;
double topic0Value = 0.000000, topic1Value = 0.000000, topic2Value = 0.000000, topic3Value = 0.000000,
topic4Value = 0.000000, topic5Value = 0.000000, topic6Value = 0.000000, topic7Value = 0.000000,
topic8Value = 0.000000, topic9Value = 0.000000, topic10Value = 0.000000, topic11Value = 0.000000,
topic12Value = 0.000000, topic13Value = 0.000000;
double topic14Value = 0.000000;
double topic15Value = 0.000000;
double topic16Value = 0.000000;
double topic17Value = 0.000000;
double topic18Value = 0.000000;
double topic19Value = 0.000000;
double topic20Value = 0.000000;
double topic21Value = 0.000000;
double topic22Value = 0.000000;
double topic23Value = 0.000000;
double topic24Value = 0.000000;
double topic25Value = 0.000000;
double topic26Value = 0.000000;
double topic27Value = 0.000000;
double topic28Value = 0.000000;
double topic29Value = 0.000000;
double topic30Value = 0.000000;
double topic31Value = 0.000000;
double topic32Value = 0.000000;
double topic33Value = 0.000000;
double topic34Value = 0.000000;
double topic35Value = 0.000000;
double topic36Value = 0.000000;
double topic37Value = 0.000000;
double topic38Value = 0.000000;
double topic39Value = 0.000000;
double topic40Value = 0.000000;
double topic41Value = 0.000000;
double topic42Value = 0.000000;
double topic43Value = 0.000000;
double topic44Value = 0.000000;
double topic45Value = 0.000000;
double topic46Value = 0.000000;
double topic47Value = 0.000000;
double topic48Value = 0.000000;
double topic49Value = 0.000000;
double topic50Value = 0.000000;
double topic51Value = 0.000000;
double topic52Value = 0.000000;
double topic53Value = 0.000000;
double topic54Value = 0.000000;
double topic55Value = 0.000000;
double topic56Value = 0.000000;
double topic57Value = 0.000000;
double topic58Value = 0.000000;
double topic59Value = 0.000000;
double topic60Value = 0.000000;
double topic61Value = 0.000000;
double topic62Value = 0.000000;
double topic63Value = 0.000000;
double topic64Value = 0.000000;
double topic65Value = 0.000000;
double topic66Value = 0.000000;
double topic67Value = 0.000000;
double topic68Value = 0.000000;
double topic69Value = 0.000000;
double topic70Value = 0.000000;
double topic71Value = 0.000000;
double topic72Value = 0.000000;
double topic73Value = 0.000000;
double topic74Value = 0.000000;
double topic75Value = 0.000000;
double topic76Value = 0.000000;
double topic77Value = 0.000000;
double topic78Value = 0.000000;
double topic79Value = 0.000000;
double topic80Value = 0.000000;
double topic81Value = 0.000000;
double topic82Value = 0.000000;
double topic83Value = 0.000000;
double topic84Value = 0.000000;
double topic85Value = 0.000000;
double topic86Value = 0.000000;
double topic87Value = 0.000000;
double topic88Value = 0.000000;
double topic89Value = 0.000000;
double topic90Value = 0.000000;
double topic91Value = 0.000000;
double topic92Value = 0.000000;
double topic93Value = 0.000000;
double topic94Value = 0.000000;
double topic95Value = 0.000000;
double topic96Value = 0.000000;
double topic97Value = 0.000000;
double topic98Value = 0.000000;
double topic99Value = 0.000000;
while (rs.next())
{
String keyword = rs.getString("keywords");
Value0 = findTopic0(keyword);
Value1 = findTopic1(keyword);
Value2 = findTopic2(keyword);
Value3 = findTopic3(keyword);
Value4 = findTopic4(keyword);
Value5 = findTopic5(keyword);
Value6 = findTopic6(keyword);
Value7 = findTopic7(keyword);
Value8 = findTopic8(keyword);
Value9 = findTopic9(keyword);
Value10 = findTopic10(keyword);
Value11 = findTopic11(keyword);
Value12 = findTopic12(keyword);
Value13 = findTopic13(keyword);
Value14 = findTopic14(keyword);
Value15 = findTopic15(keyword);
Value16 = findTopic16(keyword);
Value17 = findTopic17(keyword);
Value18 = findTopic18(keyword);
Value19 = findTopic19(keyword);
Value20 = findTopic20(keyword);
Value21 = findTopic21(keyword);
Value22 = findTopic22(keyword);
Value23 = findTopic23(keyword);
Value24 = findTopic24(keyword);
Value25 = findTopic25(keyword);
Value26 = findTopic26(keyword);
Value27 = findTopic27(keyword);
Value28 = findTopic28(keyword);
Value29 = findTopic29(keyword);
Value30 = findTopic30(keyword);
Value31 = findTopic31(keyword);
Value32 = findTopic32(keyword);
Value33 = findTopic33(keyword);
Value34 = findTopic34(keyword);
Value35 = findTopic35(keyword);
Value36 = findTopic36(keyword);
Value37 = findTopic37(keyword);
Value38 = findTopic38(keyword);
Value39 = findTopic39(keyword);
Value40 = findTopic40(keyword);
Value41 = findTopic41(keyword);
Value42 = findTopic42(keyword);
Value43 = findTopic43(keyword);
Value44 = findTopic44(keyword);
Value45 = findTopic45(keyword);
Value46 = findTopic46(keyword);
Value47 = findTopic47(keyword);
Value48 = findTopic48(keyword);
Value49 = findTopic49(keyword);
Value50 = findTopic50(keyword);
Value51 = findTopic51(keyword);
Value52 = findTopic52(keyword);
Value53 = findTopic53(keyword);
Value54 = findTopic54(keyword);
Value55 = findTopic55(keyword);
Value56 = findTopic56(keyword);
Value57 = findTopic57(keyword);
Value58 = findTopic58(keyword);
Value59 = findTopic59(keyword);
Value60 = findTopic60(keyword);
Value61 = findTopic61(keyword);
Value62 = findTopic62(keyword);
Value63 = findTopic63(keyword);
Value64 = findTopic64(keyword);
Value65 = findTopic65(keyword);
Value66 = findTopic66(keyword);
Value67 = findTopic67(keyword);
Value68 = findTopic68(keyword);
Value69 = findTopic69(keyword);
Value70 = findTopic70(keyword);
Value71 = findTopic71(keyword);
Value72 = findTopic72(keyword);
Value73 = findTopic73(keyword);
Value74 = findTopic74(keyword);
Value75 = findTopic75(keyword);
Value76 = findTopic76(keyword);
Value77 = findTopic77(keyword);
Value78 = findTopic78(keyword);
Value79 = findTopic79(keyword);
Value80 = findTopic80(keyword);
Value81 = findTopic81(keyword);
Value82 = findTopic82(keyword);
Value83 = findTopic83(keyword);
Value84 = findTopic84(keyword);
Value85 = findTopic85(keyword);
Value86 = findTopic86(keyword);
Value87 = findTopic87(keyword);
Value88 = findTopic88(keyword);
Value89 = findTopic89(keyword);
Value90 = findTopic90(keyword);
Value91 = findTopic91(keyword);
Value92 = findTopic92(keyword);
Value93 = findTopic93(keyword);
Value94 = findTopic94(keyword);
Value95 = findTopic95(keyword);
Value96 = findTopic96(keyword);
Value97 = findTopic97(keyword);
Value98 = findTopic98(keyword);
Value99 = findTopic99(keyword);
topic0Value = topic0Value + Value0;
topic1Value = topic1Value + Value1;
topic2Value = topic2Value + Value2;
topic3Value = topic3Value + Value3;
topic4Value = topic4Value + Value4;
topic5Value = topic5Value + Value5;
topic6Value = topic6Value + Value6;
topic7Value = topic7Value + Value7;
topic8Value = topic8Value + Value8;
topic9Value = topic9Value + Value9;
topic10Value = topic10Value + Value10;
topic11Value = topic11Value + Value11;
topic12Value = topic12Value + Value12;
topic13Value = topic13Value + Value13;
topic14Value = topic14Value + Value14;
topic15Value = topic15Value + Value15;
topic16Value = topic16Value + Value16;
topic17Value = topic17Value + Value17;
topic18Value = topic18Value + Value18;
topic19Value = topic19Value + Value19;
topic20Value = topic20Value + Value20;
topic21Value = topic21Value + Value21;
topic22Value = topic22Value + Value22;
topic23Value = topic23Value + Value23;
topic24Value = topic24Value + Value24;
topic25Value = topic25Value + Value25;
topic26Value = topic26Value + Value26;
topic27Value = topic27Value + Value27;
topic28Value = topic28Value + Value28;
topic29Value = topic29Value + Value29;
topic30Value = topic30Value + Value30;
topic31Value = topic31Value + Value31;
topic32Value = topic32Value + Value32;
topic33Value = topic33Value + Value33;
topic34Value = topic34Value + Value34;
topic35Value = topic35Value + Value35;
topic36Value = topic36Value + Value36;
topic37Value = topic37Value + Value37;
topic38Value = topic38Value + Value38;
topic39Value = topic39Value + Value39;
topic40Value = topic40Value + Value40;
topic41Value = topic41Value + Value41;
topic42Value = topic42Value + Value42;
topic43Value = topic43Value + Value43;
topic44Value = topic44Value + Value44;
topic45Value = topic45Value + Value45;
topic46Value = topic46Value + Value46;
topic47Value = topic47Value + Value47;
topic48Value = topic48Value + Value48;
topic49Value = topic49Value + Value49;
topic50Value = topic50Value + Value50;
topic51Value = topic51Value + Value51;
topic52Value = topic52Value + Value52;
topic53Value = topic53Value + Value53;
topic54Value = topic54Value + Value54;
topic55Value = topic55Value + Value55;
topic56Value = topic56Value + Value56;
topic57Value = topic57Value + Value57;
topic58Value = topic58Value + Value58;
topic59Value = topic59Value + Value59;
topic60Value = topic60Value + Value60;
topic61Value = topic61Value + Value61;
topic62Value = topic62Value + Value62;
topic63Value = topic63Value + Value63;
topic64Value = topic64Value + Value64;
topic65Value = topic65Value + Value65;
topic66Value = topic66Value + Value66;
topic67Value = topic67Value + Value67;
topic68Value = topic68Value + Value68;
topic69Value = topic69Value + Value69;
topic70Value = topic70Value + Value70;
topic71Value = topic71Value + Value71;
topic72Value = topic72Value + Value72;
topic73Value = topic73Value + Value73;
topic74Value = topic74Value + Value74;
topic75Value = topic75Value + Value75;
topic76Value = topic76Value + Value76;
topic77Value = topic77Value + Value77;
topic78Value = topic78Value + Value78;
topic79Value = topic79Value + Value79;
topic80Value = topic80Value + Value80;
topic81Value = topic81Value + Value81;
topic82Value = topic82Value + Value82;
topic83Value = topic83Value + Value83;
topic84Value = topic84Value + Value84;
topic85Value = topic85Value + Value85;
topic86Value = topic86Value + Value86;
topic87Value = topic87Value + Value87;
topic88Value = topic88Value + Value88;
topic89Value = topic89Value + Value89;
topic90Value = topic90Value + Value90;
topic91Value = topic91Value + Value91;
topic92Value = topic92Value + Value92;
topic93Value = topic93Value + Value93;
topic94Value = topic94Value + Value94;
topic95Value = topic95Value + Value95;
topic96Value = topic96Value + Value96;
topic97Value = topic97Value + Value97;
topic98Value = topic98Value + Value98;
topic99Value = topic99Value + Value99;
}
st.close();
double[] score = new double[100];
score[0]=topic0Value;
score[1]=topic1Value;
score[2]=topic2Value;
score[3]=topic3Value;
score[4]=topic4Value;
score[5]=topic5Value;
score[6]=topic6Value;
score[7]=topic7Value;
score[8]=topic8Value;
score[9]=topic9Value;
score[10]=topic10Value;
score[11]=topic11Value;
score[12]=topic12Value;
score[13]=topic13Value;
score[14]=topic14Value;
score[15]=topic15Value;
score[16]=topic16Value;
score[17]=topic17Value;
score[18]=topic18Value;
score[19]=topic19Value;
score[20]=topic20Value;
score[21]=topic21Value;
score[22]=topic22Value;
score[23]=topic23Value;
score[24]=topic24Value;
score[25]=topic25Value;
score[26]=topic26Value;
score[27]=topic27Value;
score[28]=topic28Value;
score[29]=topic29Value;
score[30]=topic30Value;
score[31]=topic31Value;
score[32]=topic32Value;
score[33]=topic33Value;
score[34]=topic34Value;
score[35]=topic35Value;
score[36]=topic36Value;
score[37]=topic37Value;
score[38]=topic38Value;
score[39]=topic39Value;
score[40]=topic40Value;
score[41]=topic41Value;
score[42]=topic42Value;
score[43]=topic43Value;
score[44]=topic44Value;
score[45]=topic45Value;
score[46]=topic46Value;
score[47]=topic47Value;
score[48]=topic48Value;
score[49]=topic49Value;
score[50]=topic50Value;
score[51]=topic51Value;
score[52]=topic52Value;
score[53]=topic53Value;
score[54]=topic54Value;
score[55]=topic55Value;
score[56]=topic56Value;
score[57]=topic57Value;
score[58]=topic58Value;
score[59]=topic59Value;
score[60]=topic60Value;
score[61]=topic61Value;
score[62]=topic62Value;
score[63]=topic63Value;
score[64]=topic64Value;
score[65]=topic65Value;
score[66]=topic66Value;
score[67]=topic67Value;
score[68]=topic68Value;
score[69]=topic69Value;
score[70]=topic70Value;
score[71]=topic71Value;
score[72]=topic72Value;
score[73]=topic73Value;
score[74]=topic74Value;
score[75]=topic75Value;
score[76]=topic76Value;
score[77]=topic77Value;
score[78]=topic78Value;
score[79]=topic79Value;
score[80]=topic80Value;
score[81]=topic81Value;
score[82]=topic82Value;
score[83]=topic83Value;
score[84]=topic84Value;
score[85]=topic85Value;
score[86]=topic86Value;
score[87]=topic87Value;
score[88]=topic88Value;
score[89]=topic89Value;
score[90]=topic90Value;
score[91]=topic91Value;
score[92]=topic92Value;
score[93]=topic93Value;
score[94]=topic94Value;
score[95]=topic95Value;
score[96]=topic96Value;
score[97]=topic97Value;
score[98]=topic98Value;
score[99]=topic99Value;
PrintWriter pr = new PrintWriter("E:/scoring/"+authorkey+".txt");
for (int j=0; j<100 ; j++)
{
pr.println(score[j]);
}
pr.close();
}
}
catch (Exception e){
System.err.println("Got an exception! ");
System.err.println(e.getMessage());
}
}
private static double findTopic0(String keyword) {
double value = 0;
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
Statement st = conn.createStatement();
String queryToTopicTable = ("SELECT value FROM topic0 where word="+"'"+keyword+"'"+";");
ResultSet valueResult = st.executeQuery(queryToTopicTable);
while (valueResult.next())
{
value = valueResult.getDouble("value");
}
st.close();
}
catch (Exception e){
System.err.println("exception");
System.err.println(e.getMessage()); }
return value;
}
private static double findTopic1(String keyword) {
double value = 0;
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
Statement st = conn.createStatement();
String queryToTopicTable = ("SELECT value FROM topic1 where word="+"'"+keyword+"'"+";");
ResultSet valueResult = st.executeQuery(queryToTopicTable);
while (valueResult.next())
{
value = valueResult.getDouble("value");
}
st.close();
}
catch (Exception e){
System.err.println("exception");
System.err.println(e.getMessage()); }
return value;
}
private static double findTopic2(String keyword) {
double value = 0;
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
Statement st = conn.createStatement();
String queryToTopicTable = ("SELECT value FROM topic2 where word="+"'"+keyword+"'"+";");
ResultSet valueResult = st.executeQuery(queryToTopicTable);
while (valueResult.next())
{
value = valueResult.getDouble("value");
}
st.close();
}
catch (Exception e){
System.err.println("exception");
System.err.println(e.getMessage()); }
return value;
}
private static double findTopic3(String keyword) {
double value = 0;
try{
String myDriver = "org.gjt.mm.mysql.Driver";
String myUrl = "jdbc:mysql://localhost/arnetminer";
Class.forName(myDriver);
Connection conn = DriverManager.getConnection(myUrl, "root", "1234");
Statement st = conn.createStatement();
String queryToTopicTable = ("SELECT value FROM topic3 where word="+"'"+keyword+"'"+";");
ResultSet valueResult = st.executeQuery(queryToTopicTable);
while (valueResult.next())
{
value = valueResult.getDouble("value");
}
st.close();
}
catch (Exception e){
System.err.println("exception");
System.err.println(e.getMessage()); }
return value;
}
// .... until topic99
}