|
21 | 21 | #include "access/transam.h" |
22 | 22 | #include "access/xlog.h" |
23 | 23 | #include "libpq-int.h" |
| 24 | +#include "access/xlog.h" |
24 | 25 | #include "mb/pg_wchar.h" |
25 | 26 | #include "miscadmin.h" |
26 | 27 | #include "pgstat.h" |
|
30 | 31 | #include "utils/memutils.h" |
31 | 32 | #include "utils/syscache.h" |
32 | 33 |
|
33 | | - |
34 | 34 | /* |
35 | 35 | * Connection cache hash table entry |
36 | 36 | * |
@@ -103,6 +103,55 @@ static bool pgfdw_exec_cleanup_query(PGconn *conn, const char *query, |
103 | 103 | static bool pgfdw_get_cleanup_result(PGconn *conn, TimestampTz endtime, |
104 | 104 | PGresult **result); |
105 | 105 |
|
| 106 | +static int DistributedTransactionCount; |
| 107 | +static int DistributedTransactionParticipantsCount; |
| 108 | +static char* DistributedTransactionGid; |
| 109 | + |
| 110 | +/* Parallel send of sql statement to all paritcipants nodes |
| 111 | + * and wait status |
| 112 | + */ |
| 113 | +static bool |
| 114 | +BroadcastStatement(char const * sql, unsigned expectedStatus) |
| 115 | +{ |
| 116 | + HASH_SEQ_STATUS scan; |
| 117 | + ConnCacheEntry *entry; |
| 118 | + bool allOk = true; |
| 119 | + |
| 120 | + hash_seq_init(&scan, ConnectionHash); |
| 121 | + while ((entry = (ConnCacheEntry *) hash_seq_search(&scan))) |
| 122 | + { |
| 123 | + if (entry->xact_depth > 0) |
| 124 | + { |
| 125 | + do_sql_send_command(entry->conn, sql); |
| 126 | + } |
| 127 | + } |
| 128 | + |
| 129 | + hash_seq_init(&scan, ConnectionHash); |
| 130 | + while ((entry = (ConnCacheEntry *) hash_seq_search(&scan))) |
| 131 | + { |
| 132 | + if (entry->xact_depth > 0) |
| 133 | + { |
| 134 | + PGresult *result = PQgetResult(entry->conn); |
| 135 | + |
| 136 | + if (PQresultStatus(result) != expectedStatus) |
| 137 | + { |
| 138 | + elog(WARNING, "Failed command %s: status=%d, expected status=%d", sql, PQresultStatus(result), expectedStatus); |
| 139 | + pgfdw_report_error(ERROR, result, entry->conn, true, sql); |
| 140 | + allOk = false; |
| 141 | + } |
| 142 | + PQclear(result); |
| 143 | + PQgetResult(entry->conn); /* consume NULL result */ |
| 144 | + } |
| 145 | + } |
| 146 | + return allOk; |
| 147 | +} |
| 148 | + |
| 149 | +static bool |
| 150 | +BroadcastCommand(char const * sql) |
| 151 | +{ |
| 152 | + return BroadcastStatement(sql, PGRES_COMMAND_OK); |
| 153 | +} |
| 154 | + |
106 | 155 |
|
107 | 156 | /* |
108 | 157 | * Get a PGconn which can be used to execute queries on the remote PostgreSQL |
@@ -457,31 +506,23 @@ begin_remote_xact(ConnCacheEntry *entry) |
457 | 506 | /* Start main transaction if we haven't yet */ |
458 | 507 | if (entry->xact_depth <= 0) |
459 | 508 | { |
460 | | - TransactionId gxid = GetTransactionManager()->GetGlobalTransactionId(); |
461 | 509 | const char *sql; |
462 | 510 |
|
463 | 511 | elog(DEBUG3, "starting remote transaction on connection %p", |
464 | 512 | entry->conn); |
465 | 513 |
|
466 | | - // XXXX? |
467 | | - // |
468 | | - // if (UseTsDtmTransactions && TransactionIdIsValid(gxid)) |
469 | | - // { |
470 | | - // char stmt[64]; |
471 | | - // snprintf(stmt, sizeof(stmt), "select public.dtm_join_transaction(%d)", gxid); |
472 | | - // res = PQexec(entry->conn, stmt); |
473 | | - // PQclear(res); |
474 | | - // } |
475 | | - |
476 | 514 | if (IsolationIsSerializable()) |
477 | 515 | sql = "START TRANSACTION ISOLATION LEVEL SERIALIZABLE"; |
478 | | - else |
| 516 | + else if (UseRepeatableRead) |
479 | 517 | sql = "START TRANSACTION ISOLATION LEVEL REPEATABLE READ"; |
| 518 | + else |
| 519 | + sql = "START TRANSACTION"; |
480 | 520 | entry->changing_xact_state = true; |
481 | 521 | do_sql_command(entry->conn, sql); |
482 | 522 | entry->xact_depth = 1; |
483 | 523 | entry->changing_xact_state = false; |
484 | 524 |
|
| 525 | + |
485 | 526 | if (UseTsDtmTransactions) |
486 | 527 | { |
487 | 528 | if (currentConnection == NULL) |
@@ -516,6 +557,8 @@ begin_remote_xact(ConnCacheEntry *entry) |
516 | 557 | PQclear(res); |
517 | 558 | } |
518 | 559 | } |
| 560 | + |
| 561 | + DistributedTransactionParticipantsCount += 1; |
519 | 562 | } |
520 | 563 |
|
521 | 564 | /* |
@@ -805,55 +848,19 @@ pgfdw_xact_callback(XactEvent event, void *arg) |
805 | 848 | { |
806 | 849 | HASH_SEQ_STATUS scan; |
807 | 850 | ConnCacheEntry *entry; |
808 | | - |
809 | | - // /* Do nothing for this events */ |
810 | | - // switch (event) |
811 | | - // { |
812 | | - // case XACT_EVENT_START: |
813 | | - // case XACT_EVENT_COMMIT_PREPARED: |
814 | | - // case XACT_EVENT_ABORT_PREPARED: |
815 | | - // return; |
816 | | - // default: |
817 | | - // break; |
818 | | - // } |
| 851 | + bool two_phase_commit; |
819 | 852 |
|
820 | 853 | /* Quick exit if no connections were touched in this transaction. */ |
821 | 854 | if (!xact_got_connection) |
822 | 855 | return; |
823 | 856 |
|
824 | | - if (currentGlobalTransactionId != 0) |
825 | | - { |
826 | | - switch (event) |
827 | | - { |
828 | | - case XACT_EVENT_PARALLEL_PRE_COMMIT: |
829 | | - case XACT_EVENT_PRE_COMMIT: |
830 | | - { |
831 | | - csn_t maxCSN = 0; |
832 | | - |
833 | | - if (!RunDtmCommand(psprintf("PREPARE TRANSACTION '%d.%d'", |
834 | | - MyProcPid, currentLocalTransactionId)) || |
835 | | - !RunDtmFunction(psprintf("SELECT pg_global_snaphot_begin_prepare('%d.%d')", |
836 | | - MyProcPid, currentLocalTransactionId)) || |
837 | | - !RunDtmStatement(psprintf("SELECT pg_global_snaphot_prepare('%d.%d',0)", |
838 | | - MyProcPid, currentLocalTransactionId), PGRES_TUPLES_OK, DtmMaxCSN, &maxCSN) || |
839 | | - !RunDtmFunction(psprintf("SELECT pg_global_snaphot_end_prepare('%d.%d',%lld)", |
840 | | - MyProcPid, currentLocalTransactionId, maxCSN)) || |
841 | | - !RunDtmCommand(psprintf("COMMIT PREPARED '%d.%d'", |
842 | | - MyProcPid, currentLocalTransactionId))) |
843 | | - { |
844 | | - RunDtmCommand(psprintf("ROLLBACK PREPARED '%d.%d'", |
845 | | - MyProcPid, currentLocalTransactionId)); |
846 | | - ereport(ERROR, |
847 | | - (errcode(ERRCODE_TRANSACTION_ROLLBACK), |
848 | | - errmsg("transaction was aborted at one of the shards"))); |
849 | | - break; |
850 | | - } |
851 | | - return; |
852 | | - } |
853 | | - default: |
854 | | - break; |
855 | | - } |
856 | | - } |
| 857 | + /***********************************************************************************************/ |
| 858 | + |
| 859 | + /* Check if we need to perform 2PC commit: number of paritcipants should be greater than 1 */ |
| 860 | + two_phase_commit = Use2PC |
| 861 | + && (TransactionIdIsValid(GetCurrentTransactionIdIfAny()) + DistributedTransactionParticipantsCount) > 1; |
| 862 | + |
| 863 | + /***********************************************************************************************/ |
857 | 864 |
|
858 | 865 | /* |
859 | 866 | * Scan all connection cache entries to find open remote transactions, and |
@@ -891,10 +898,9 @@ pgfdw_xact_callback(XactEvent event, void *arg) |
891 | 898 | * we can't issue any more commands against it. |
892 | 899 | */ |
893 | 900 | pgfdw_reject_incomplete_xact_state_change(entry); |
894 | | - |
895 | | - /* Commit all remote transactions during pre-commit */ |
896 | | - if (!currentGlobalTransactionId) |
| 901 | + if (!two_phase_commit && !currentGlobalTransactionId) |
897 | 902 | { |
| 903 | + /* Commit all remote transactions during pre-commit */ |
898 | 904 | entry->changing_xact_state = true; |
899 | 905 | do_sql_command(entry->conn, "COMMIT TRANSACTION"); |
900 | 906 | entry->changing_xact_state = false; |
@@ -922,6 +928,13 @@ pgfdw_xact_callback(XactEvent event, void *arg) |
922 | 928 | } |
923 | 929 | entry->have_prep_stmt = false; |
924 | 930 | entry->have_error = false; |
| 931 | + |
| 932 | + if (two_phase_commit) |
| 933 | + { |
| 934 | + /* Do not reset xact_depth and break connection: we still need them for second phase |
| 935 | + */ |
| 936 | + continue; |
| 937 | + } |
925 | 938 | break; |
926 | 939 | case XACT_EVENT_PRE_PREPARE: |
927 | 940 |
|
@@ -1025,21 +1038,77 @@ pgfdw_xact_callback(XactEvent event, void *arg) |
1025 | 1038 | disconnect_pg_server(entry); |
1026 | 1039 | } |
1027 | 1040 | } |
1028 | | - // if (event != XACT_EVENT_PARALLEL_PRE_COMMIT && event != XACT_EVENT_PRE_COMMIT) |
1029 | | - // { |
1030 | | - /* |
1031 | | - * Regardless of the event type, we can now mark ourselves as out of the |
1032 | | - * transaction. (Note: if we are here during PRE_COMMIT or PRE_PREPARE, |
1033 | | - * this saves a useless scan of the hashtable during COMMIT or PREPARE.) |
1034 | | - */ |
1035 | | - xact_got_connection = false; |
1036 | 1041 |
|
1037 | | - /* Also reset cursor numbering for next transaction */ |
1038 | | - cursor_number = 0; |
| 1042 | + /***********************************************************************************************/ |
| 1043 | + |
| 1044 | + /* |
| 1045 | + * In case of 2PC broadcast PREPARE TRANSACTION statement. |
| 1046 | + * We are using BroadcasrCommand instead of sending them in the connection |
| 1047 | + * iterator above because we want to process them in parallel |
| 1048 | + */ |
| 1049 | + |
| 1050 | + if (currentGlobalTransactionId != 0 && |
| 1051 | + (event == XACT_EVENT_PARALLEL_PRE_COMMIT || event == XACT_EVENT_PRE_COMMIT)) |
| 1052 | + { |
| 1053 | + csn_t maxCSN = 0; |
| 1054 | + |
| 1055 | + if (!RunDtmCommand(psprintf("PREPARE TRANSACTION '%d.%d'", |
| 1056 | + MyProcPid, currentLocalTransactionId)) || |
| 1057 | + !RunDtmFunction(psprintf("SELECT pg_global_snaphot_begin_prepare('%d.%d')", |
| 1058 | + MyProcPid, currentLocalTransactionId)) || |
| 1059 | + !RunDtmStatement(psprintf("SELECT pg_global_snaphot_prepare('%d.%d',0)", |
| 1060 | + MyProcPid, currentLocalTransactionId), PGRES_TUPLES_OK, DtmMaxCSN, &maxCSN) || |
| 1061 | + !RunDtmFunction(psprintf("SELECT pg_global_snaphot_end_prepare('%d.%d',%lld)", |
| 1062 | + MyProcPid, currentLocalTransactionId, maxCSN)) || |
| 1063 | + !RunDtmCommand(psprintf("COMMIT PREPARED '%d.%d'", |
| 1064 | + MyProcPid, currentLocalTransactionId))) |
| 1065 | + { |
| 1066 | + RunDtmCommand(psprintf("ROLLBACK PREPARED '%d.%d'", |
| 1067 | + MyProcPid, currentLocalTransactionId)); |
| 1068 | + ereport(ERROR, |
| 1069 | + (errcode(ERRCODE_TRANSACTION_ROLLBACK), |
| 1070 | + errmsg("transaction was aborted at one of the shards"))); |
| 1071 | + } |
| 1072 | + return; |
| 1073 | + } |
| 1074 | + |
| 1075 | + else if (two_phase_commit && |
| 1076 | + (event == XACT_EVENT_PARALLEL_PRE_COMMIT || event == XACT_EVENT_PRE_COMMIT)) |
| 1077 | + { |
| 1078 | + DistributedTransactionGid = psprintf("%d:%d:%lld:%lld:%d", |
| 1079 | + MyProcPid, |
| 1080 | + ++DistributedTransactionCount, |
| 1081 | + (long long)GetSystemIdentifier(), |
| 1082 | + (long long)GetCurrentTransactionId(), |
| 1083 | + DistributedTransactionParticipantsCount); |
| 1084 | + if (!BroadcastCommand(psprintf("PREPARE TRANSACTION '%s'", DistributedTransactionGid)) || |
| 1085 | + !BroadcastCommand(psprintf("COMMIT PREPARED '%s'", DistributedTransactionGid))) |
| 1086 | + { |
| 1087 | + BroadcastCommand(psprintf("ROLLBACK PREPARED '%s'", DistributedTransactionGid)); |
| 1088 | + ereport(ERROR, |
| 1089 | + (errcode(ERRCODE_TRANSACTION_ROLLBACK), |
| 1090 | + errmsg("Transaction %s was aborted at one of participants", DistributedTransactionGid))); |
| 1091 | + } |
| 1092 | + return; |
| 1093 | + } |
| 1094 | + |
| 1095 | + |
| 1096 | + /***********************************************************************************************/ |
| 1097 | + |
| 1098 | + /* |
| 1099 | + * Regardless of the event type, we can now mark ourselves as out of the |
| 1100 | + * transaction. (Note: if we are here during PRE_COMMIT or PRE_PREPARE, |
| 1101 | + * this saves a useless scan of the hashtable during COMMIT or PREPARE.) |
| 1102 | + */ |
| 1103 | + xact_got_connection = false; |
| 1104 | + |
| 1105 | + /* Also reset cursor numbering for next transaction */ |
| 1106 | + cursor_number = 0; |
1039 | 1107 |
|
1040 | | - currentGlobalTransactionId = 0; |
1041 | | - currentConnection = NULL; |
1042 | | - // } |
| 1108 | + DistributedTransactionParticipantsCount = 0; |
| 1109 | + DistributedTransactionGid = NULL; |
| 1110 | + currentGlobalTransactionId = 0; |
| 1111 | + currentConnection = NULL; |
1043 | 1112 | } |
1044 | 1113 |
|
1045 | 1114 | /* |
|
0 commit comments